Files
pos-system/scripts/final-cleanup.py
Ho Ngoc Hai 9b6c585f57 Enhance documentation structure and improve bilingual support across skills
- Updated skill documentation files to include structured metadata for better organization.
- Enhanced bilingual descriptions and guidelines for clarity in both English and Vietnamese.
- Refined sections on usage, best practices, and related skills to ensure consistency across all documentation.
- Improved formatting and removed outdated references to streamline the documentation experience.
- Added best practices checklists to relevant skills for better usability and adherence to standards.
2026-01-01 07:35:44 +07:00

105 lines
4.5 KiB
Python

#!/usr/bin/env python3
"""
Final cleanup script to ensure complete separation of bilingual documentation.
"""
import os
import re
import glob
def clean_english_file(filepath):
"""Remove all Vietnamese content from English files."""
print(f"Cleaning English file: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
cleaned_lines = []
skip_mode = False
for i, line in enumerate(lines):
# Skip Vietnamese description lines
if line.strip().startswith('>') and any(viet_word in line.lower() for viet_word in ['thực hành', 'mẫu', 'nền tảng', 'sử dụng', 'triển khai', 'bảo vệ', 'xác thực', 'giới hạn', 'quản lý', 'kiểm tra']):
continue
# Skip lines that are clearly Vietnamese content
stripped = line.strip()
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
# Check if line contains Vietnamese characters or Vietnamese words
vietnamese_indicators = [
'các', 'cho', 'để', '', '', 'trong', 'với', 'này', '', 'không',
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra'
]
if any(indicator in stripped.lower() for indicator in vietnamese_indicators):
continue
# Skip empty lines that follow Vietnamese content
if not stripped and skip_mode:
skip_mode = False
continue
cleaned_lines.append(line)
# Write back the cleaned content
with open(filepath, 'w', encoding='utf-8') as f:
f.writelines(cleaned_lines)
def clean_vietnamese_file(filepath):
"""Remove all English content from Vietnamese files."""
print(f"Cleaning Vietnamese file: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
cleaned_lines = []
skip_mode = False
for i, line in enumerate(lines):
# Skip English description lines
if line.strip().startswith('>') and any(en_word in line.lower() for en_word in ['security', 'best practices', 'patterns', 'platform', 'use when', 'implementing', 'authentication']):
continue
# Skip lines that are clearly English content
stripped = line.strip()
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
# Skip if line doesn't contain Vietnamese indicators
vietnamese_indicators = [
'các', 'cho', 'để', '', '', 'trong', 'với', 'này', '', 'không',
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra',
'tổng quan', 'khi nào sử dụng', 'khái niệm chính', 'patterns', 'cache'
]
has_vietnamese = any(indicator in stripped.lower() for indicator in vietnamese_indicators)
if not has_vietnamese and stripped and not stripped.startswith(('The', 'Use', 'This', 'For', 'In', 'When', 'How', 'What', 'Why')):
continue
cleaned_lines.append(line)
# Write back the cleaned content
with open(filepath, 'w', encoding='utf-8') as f:
f.writelines(cleaned_lines)
def main():
print("Starting final cleanup of bilingual documentation...")
# Clean English files
print("Cleaning English documentation files...")
en_files = glob.glob('docs/en/skills/*.md')
for filepath in en_files:
if os.path.isfile(filepath):
clean_english_file(filepath)
# Clean Vietnamese files
print("Cleaning Vietnamese documentation files...")
vi_files = glob.glob('docs/vi/skills/*.md')
for filepath in vi_files:
if os.path.isfile(filepath):
clean_vietnamese_file(filepath)
print("Final cleanup completed!")
if __name__ == '__main__':
main()