- Updated skill documentation files to include structured metadata for better organization. - Enhanced bilingual descriptions and guidelines for clarity in both English and Vietnamese. - Refined sections on usage, best practices, and related skills to ensure consistency across all documentation. - Improved formatting and removed outdated references to streamline the documentation experience. - Added best practices checklists to relevant skills for better usability and adherence to standards.
154 lines
5.7 KiB
Python
154 lines
5.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Script to separate bilingual documentation into language-specific files.
|
|
Removes Vietnamese content from English docs and English content from Vietnamese docs.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import glob
|
|
|
|
def process_english_file(filepath):
|
|
"""Process an English file to keep only English content."""
|
|
print(f"Processing English file: {filepath}")
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# First, handle the description line - keep only English
|
|
content = re.sub(r'^> \*\*EN\*\*: (.+)\n> \*\*VI\*\*: .+', r'> \1', content, flags=re.MULTILINE)
|
|
|
|
# Remove standalone Vietnamese description lines
|
|
content = re.sub(r'^> \*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
|
|
|
|
# Convert bilingual titles to English only
|
|
content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
|
|
|
|
# For sections with both EN and VI blocks, keep only EN content
|
|
# Pattern: **EN**: content\n**VI**: content
|
|
content = re.sub(r'\*\*EN\*\*: ([^\n]+)\n\*\*VI\*\*: [^\n]+', r'\1', content)
|
|
|
|
# Remove remaining **VI** blocks and their content
|
|
content = re.sub(r'\*\*VI\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
|
|
|
|
# Remove standalone **VI**: lines
|
|
content = re.sub(r'^\*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
|
|
|
|
# Remove **EN**: markers
|
|
content = re.sub(r'\*\*EN\*\*: ', '', content)
|
|
|
|
# Convert bilingual labels to English
|
|
replacements = [
|
|
('**Reference / Tham Khảo**:', '**Reference**:'),
|
|
('**Patterns / Các Patterns**:', '**Patterns**:'),
|
|
('**Key Generators / Bộ Tạo Key**:', '**Key Generators**:'),
|
|
('**Speed / Tốc Độ**:', '**Speed**:'),
|
|
('**Capacity / Dung Lượng**:', '**Capacity**:'),
|
|
('**Scope / Phạm Vi**:', '**Scope**:'),
|
|
('**Use Case / Trường Hợp Sử Dụng**:', '**Use Case**:'),
|
|
('**Storage**:', '**Storage**:'),
|
|
('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**L1 Cache (Memory)**:'),
|
|
('**L2 Cache (Redis) / Cache Redis**:', '**L2 Cache (Redis)**:'),
|
|
]
|
|
|
|
for old, new in replacements:
|
|
content = content.replace(old, new)
|
|
|
|
# Convert bilingual comments in code to English
|
|
lines = content.split('\n')
|
|
processed_lines = []
|
|
for line in lines:
|
|
if line.strip().startswith('//') and ' / ' in line:
|
|
parts = line.split(' / ', 1)
|
|
processed_lines.append(parts[0])
|
|
else:
|
|
processed_lines.append(line)
|
|
|
|
content = '\n'.join(processed_lines)
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
def process_vietnamese_file(filepath):
|
|
"""Process a Vietnamese file to keep only Vietnamese content."""
|
|
print(f"Processing Vietnamese file: {filepath}")
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# First, handle the description line - keep only Vietnamese
|
|
content = re.sub(r'^> \*\*EN\*\*: .+\n> \*\*VI\*\*: (.+)', r'> \1', content, flags=re.MULTILINE)
|
|
|
|
# Remove standalone English description lines
|
|
content = re.sub(r'^> \*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
|
|
|
|
# Convert bilingual titles to Vietnamese only
|
|
content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
|
|
|
|
# For sections with both EN and VI blocks, keep only VI content
|
|
# Pattern: **EN**: content\n**VI**: content
|
|
content = re.sub(r'\*\*EN\*\*: [^\n]+\n\*\*VI\*\*: ([^\n]+)', r'\1', content)
|
|
|
|
# Remove remaining **EN** blocks and their content
|
|
content = re.sub(r'\*\*EN\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
|
|
|
|
# Remove standalone **EN**: lines
|
|
content = re.sub(r'^\*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
|
|
|
|
# Remove **VI**: markers
|
|
content = re.sub(r'\*\*VI\*\*: ', '', content)
|
|
|
|
# Convert bilingual labels to Vietnamese
|
|
replacements = [
|
|
('**Reference / Tham Khảo**:', '**Tham Khảo**:'),
|
|
('**Patterns / Các Patterns**:', '**Các Patterns**:'),
|
|
('**Key Generators / Bộ Tạo Key**:', '**Bộ Tạo Key**:'),
|
|
('**Speed / Tốc Độ**:', '**Tốc Độ**:'),
|
|
('**Capacity / Dung Lượng**:', '**Dung Lượng**:'),
|
|
('**Scope / Phạm Vi**:', '**Phạm Vi**:'),
|
|
('**Use Case / Trường Hợp Sử Dụng**:', '**Trường Hợp Sử Dụng**:'),
|
|
('**Storage**:', '**Storage**:'),
|
|
('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**Cache Bộ Nhớ**:'),
|
|
('**L2 Cache (Redis) / Cache Redis**:', '**Cache Redis**:'),
|
|
]
|
|
|
|
for old, new in replacements:
|
|
content = content.replace(old, new)
|
|
|
|
# Convert bilingual comments in code to Vietnamese
|
|
lines = content.split('\n')
|
|
processed_lines = []
|
|
for line in lines:
|
|
if line.strip().startswith('//') and ' / ' in line:
|
|
parts = line.split(' / ', 1)
|
|
if len(parts) == 2:
|
|
processed_lines.append(f"// {parts[1]}")
|
|
else:
|
|
processed_lines.append(line)
|
|
|
|
content = '\n'.join(processed_lines)
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
def main():
|
|
print("Starting bilingual documentation separation...")
|
|
|
|
# Process all English files
|
|
print("Processing English documentation files...")
|
|
en_files = glob.glob('docs/en/skills/*.md')
|
|
for filepath in en_files:
|
|
if os.path.isfile(filepath):
|
|
process_english_file(filepath)
|
|
|
|
# Process all Vietnamese files
|
|
print("Processing Vietnamese documentation files...")
|
|
vi_files = glob.glob('docs/vi/skills/*.md')
|
|
for filepath in vi_files:
|
|
if os.path.isfile(filepath):
|
|
process_vietnamese_file(filepath)
|
|
|
|
print("Bilingual documentation separation completed!")
|
|
|
|
if __name__ == '__main__':
|
|
main() |