- Updated skill documentation files to include structured metadata for better organization. - Enhanced bilingual descriptions and guidelines for clarity in both English and Vietnamese. - Refined sections on usage, best practices, and related skills to ensure consistency across all documentation. - Improved formatting and removed outdated references to streamline the documentation experience. - Added best practices checklists to relevant skills for better usability and adherence to standards.
105 lines
4.5 KiB
Python
105 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Final cleanup script to ensure complete separation of bilingual documentation.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import glob
|
|
|
|
def clean_english_file(filepath):
|
|
"""Remove all Vietnamese content from English files."""
|
|
print(f"Cleaning English file: {filepath}")
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
cleaned_lines = []
|
|
skip_mode = False
|
|
|
|
for i, line in enumerate(lines):
|
|
# Skip Vietnamese description lines
|
|
if line.strip().startswith('>') and any(viet_word in line.lower() for viet_word in ['thực hành', 'mẫu', 'nền tảng', 'sử dụng', 'triển khai', 'bảo vệ', 'xác thực', 'giới hạn', 'quản lý', 'kiểm tra']):
|
|
continue
|
|
|
|
# Skip lines that are clearly Vietnamese content
|
|
stripped = line.strip()
|
|
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
|
|
# Check if line contains Vietnamese characters or Vietnamese words
|
|
vietnamese_indicators = [
|
|
'các', 'cho', 'để', 'và', 'là', 'trong', 'với', 'này', 'có', 'không',
|
|
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
|
|
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
|
|
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra'
|
|
]
|
|
if any(indicator in stripped.lower() for indicator in vietnamese_indicators):
|
|
continue
|
|
|
|
# Skip empty lines that follow Vietnamese content
|
|
if not stripped and skip_mode:
|
|
skip_mode = False
|
|
continue
|
|
|
|
cleaned_lines.append(line)
|
|
|
|
# Write back the cleaned content
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.writelines(cleaned_lines)
|
|
|
|
def clean_vietnamese_file(filepath):
|
|
"""Remove all English content from Vietnamese files."""
|
|
print(f"Cleaning Vietnamese file: {filepath}")
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
|
|
cleaned_lines = []
|
|
skip_mode = False
|
|
|
|
for i, line in enumerate(lines):
|
|
# Skip English description lines
|
|
if line.strip().startswith('>') and any(en_word in line.lower() for en_word in ['security', 'best practices', 'patterns', 'platform', 'use when', 'implementing', 'authentication']):
|
|
continue
|
|
|
|
# Skip lines that are clearly English content
|
|
stripped = line.strip()
|
|
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
|
|
# Skip if line doesn't contain Vietnamese indicators
|
|
vietnamese_indicators = [
|
|
'các', 'cho', 'để', 'và', 'là', 'trong', 'với', 'này', 'có', 'không',
|
|
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
|
|
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
|
|
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra',
|
|
'tổng quan', 'khi nào sử dụng', 'khái niệm chính', 'patterns', 'cache'
|
|
]
|
|
has_vietnamese = any(indicator in stripped.lower() for indicator in vietnamese_indicators)
|
|
if not has_vietnamese and stripped and not stripped.startswith(('The', 'Use', 'This', 'For', 'In', 'When', 'How', 'What', 'Why')):
|
|
continue
|
|
|
|
cleaned_lines.append(line)
|
|
|
|
# Write back the cleaned content
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.writelines(cleaned_lines)
|
|
|
|
def main():
|
|
print("Starting final cleanup of bilingual documentation...")
|
|
|
|
# Clean English files
|
|
print("Cleaning English documentation files...")
|
|
en_files = glob.glob('docs/en/skills/*.md')
|
|
for filepath in en_files:
|
|
if os.path.isfile(filepath):
|
|
clean_english_file(filepath)
|
|
|
|
# Clean Vietnamese files
|
|
print("Cleaning Vietnamese documentation files...")
|
|
vi_files = glob.glob('docs/vi/skills/*.md')
|
|
for filepath in vi_files:
|
|
if os.path.isfile(filepath):
|
|
clean_vietnamese_file(filepath)
|
|
|
|
print("Final cleanup completed!")
|
|
|
|
if __name__ == '__main__':
|
|
main() |