Enhance documentation structure and improve bilingual support across skills

- Updated skill documentation files to include structured metadata for better organization. - Enhanced bilingual descriptions and guidelines for clarity in both English and Vietnamese. - Refined sections on usage, best practices, and related skills to ensure consistency across all documentation. - Improved formatting and removed outdated references to streamline the documentation experience. - Added best practices checklists to relevant skills for better usability and adherence to standards.
2026-01-01 07:35:44 +07:00
parent fb5434188a
commit 9b6c585f57
42 changed files with 9388 additions and 4470 deletions
--- a/scripts/final-cleanup.py
+++ b/scripts/final-cleanup.py
@@ -0,0 +1,105 @@
+#!/usr/bin/env python3
+"""
+Final cleanup script to ensure complete separation of bilingual documentation.
+"""
+
+import os
+import re
+import glob
+
+def clean_english_file(filepath):
+    """Remove all Vietnamese content from English files."""
+    print(f"Cleaning English file: {filepath}")
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    cleaned_lines = []
+    skip_mode = False
+
+    for i, line in enumerate(lines):
+        # Skip Vietnamese description lines
+        if line.strip().startswith('>') and any(viet_word in line.lower() for viet_word in ['thực hành', 'mẫu', 'nền tảng', 'sử dụng', 'triển khai', 'bảo vệ', 'xác thực', 'giới hạn', 'quản lý', 'kiểm tra']):
+            continue
+
+        # Skip lines that are clearly Vietnamese content
+        stripped = line.strip()
+        if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
+            # Check if line contains Vietnamese characters or Vietnamese words
+            vietnamese_indicators = [
+                'các', 'cho', 'để', 'và', 'là', 'trong', 'với', 'này', 'có', 'không',
+                'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
+                'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
+                'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra'
+            ]
+            if any(indicator in stripped.lower() for indicator in vietnamese_indicators):
+                continue
+
+        # Skip empty lines that follow Vietnamese content
+        if not stripped and skip_mode:
+            skip_mode = False
+            continue
+
+        cleaned_lines.append(line)
+
+    # Write back the cleaned content
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.writelines(cleaned_lines)
+
+def clean_vietnamese_file(filepath):
+    """Remove all English content from Vietnamese files."""
+    print(f"Cleaning Vietnamese file: {filepath}")
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    cleaned_lines = []
+    skip_mode = False
+
+    for i, line in enumerate(lines):
+        # Skip English description lines
+        if line.strip().startswith('>') and any(en_word in line.lower() for en_word in ['security', 'best practices', 'patterns', 'platform', 'use when', 'implementing', 'authentication']):
+            continue
+
+        # Skip lines that are clearly English content
+        stripped = line.strip()
+        if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
+            # Skip if line doesn't contain Vietnamese indicators
+            vietnamese_indicators = [
+                'các', 'cho', 'để', 'và', 'là', 'trong', 'với', 'này', 'có', 'không',
+                'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
+                'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
+                'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra',
+                'tổng quan', 'khi nào sử dụng', 'khái niệm chính', 'patterns', 'cache'
+            ]
+            has_vietnamese = any(indicator in stripped.lower() for indicator in vietnamese_indicators)
+            if not has_vietnamese and stripped and not stripped.startswith(('The', 'Use', 'This', 'For', 'In', 'When', 'How', 'What', 'Why')):
+                continue
+
+        cleaned_lines.append(line)
+
+    # Write back the cleaned content
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.writelines(cleaned_lines)
+
+def main():
+    print("Starting final cleanup of bilingual documentation...")
+
+    # Clean English files
+    print("Cleaning English documentation files...")
+    en_files = glob.glob('docs/en/skills/*.md')
+    for filepath in en_files:
+        if os.path.isfile(filepath):
+            clean_english_file(filepath)
+
+    # Clean Vietnamese files
+    print("Cleaning Vietnamese documentation files...")
+    vi_files = glob.glob('docs/vi/skills/*.md')
+    for filepath in vi_files:
+        if os.path.isfile(filepath):
+            clean_vietnamese_file(filepath)
+
+    print("Final cleanup completed!")
+
+if __name__ == '__main__':
+    main()
--- a/scripts/fix-bilingual-docs.py
+++ b/scripts/fix-bilingual-docs.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""
+Script to separate bilingual documentation into language-specific files.
+Removes Vietnamese content from English docs and English content from Vietnamese docs.
+"""
+
+import os
+import re
+import glob
+
+def process_english_file(filepath):
+    """Process an English file to keep only English content."""
+    print(f"Processing English file: {filepath}")
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # First, handle the description line - keep only English
+    content = re.sub(r'^> \*\*EN\*\*: (.+)\n> \*\*VI\*\*: .+', r'> \1', content, flags=re.MULTILINE)
+
+    # Remove standalone Vietnamese description lines
+    content = re.sub(r'^> \*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
+
+    # Convert bilingual titles to English only
+    content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
+
+    # For sections with both EN and VI blocks, keep only EN content
+    # Pattern: **EN**: content\n**VI**: content
+    content = re.sub(r'\*\*EN\*\*: ([^\n]+)\n\*\*VI\*\*: [^\n]+', r'\1', content)
+
+    # Remove remaining **VI** blocks and their content
+    content = re.sub(r'\*\*VI\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
+
+    # Remove standalone **VI**: lines
+    content = re.sub(r'^\*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
+
+    # Remove **EN**: markers
+    content = re.sub(r'\*\*EN\*\*: ', '', content)
+
+    # Convert bilingual labels to English
+    replacements = [
+        ('**Reference / Tham Khảo**:', '**Reference**:'),
+        ('**Patterns / Các Patterns**:', '**Patterns**:'),
+        ('**Key Generators / Bộ Tạo Key**:', '**Key Generators**:'),
+        ('**Speed / Tốc Độ**:', '**Speed**:'),
+        ('**Capacity / Dung Lượng**:', '**Capacity**:'),
+        ('**Scope / Phạm Vi**:', '**Scope**:'),
+        ('**Use Case / Trường Hợp Sử Dụng**:', '**Use Case**:'),
+        ('**Storage**:', '**Storage**:'),
+        ('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**L1 Cache (Memory)**:'),
+        ('**L2 Cache (Redis) / Cache Redis**:', '**L2 Cache (Redis)**:'),
+    ]
+
+    for old, new in replacements:
+        content = content.replace(old, new)
+
+    # Convert bilingual comments in code to English
+    lines = content.split('\n')
+    processed_lines = []
+    for line in lines:
+        if line.strip().startswith('//') and ' / ' in line:
+            parts = line.split(' / ', 1)
+            processed_lines.append(parts[0])
+        else:
+            processed_lines.append(line)
+
+    content = '\n'.join(processed_lines)
+
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.write(content)
+
+def process_vietnamese_file(filepath):
+    """Process a Vietnamese file to keep only Vietnamese content."""
+    print(f"Processing Vietnamese file: {filepath}")
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # First, handle the description line - keep only Vietnamese
+    content = re.sub(r'^> \*\*EN\*\*: .+\n> \*\*VI\*\*: (.+)', r'> \1', content, flags=re.MULTILINE)
+
+    # Remove standalone English description lines
+    content = re.sub(r'^> \*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
+
+    # Convert bilingual titles to Vietnamese only
+    content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
+
+    # For sections with both EN and VI blocks, keep only VI content
+    # Pattern: **EN**: content\n**VI**: content
+    content = re.sub(r'\*\*EN\*\*: [^\n]+\n\*\*VI\*\*: ([^\n]+)', r'\1', content)
+
+    # Remove remaining **EN** blocks and their content
+    content = re.sub(r'\*\*EN\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
+
+    # Remove standalone **EN**: lines
+    content = re.sub(r'^\*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
+
+    # Remove **VI**: markers
+    content = re.sub(r'\*\*VI\*\*: ', '', content)
+
+    # Convert bilingual labels to Vietnamese
+    replacements = [
+        ('**Reference / Tham Khảo**:', '**Tham Khảo**:'),
+        ('**Patterns / Các Patterns**:', '**Các Patterns**:'),
+        ('**Key Generators / Bộ Tạo Key**:', '**Bộ Tạo Key**:'),
+        ('**Speed / Tốc Độ**:', '**Tốc Độ**:'),
+        ('**Capacity / Dung Lượng**:', '**Dung Lượng**:'),
+        ('**Scope / Phạm Vi**:', '**Phạm Vi**:'),
+        ('**Use Case / Trường Hợp Sử Dụng**:', '**Trường Hợp Sử Dụng**:'),
+        ('**Storage**:', '**Storage**:'),
+        ('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**Cache Bộ Nhớ**:'),
+        ('**L2 Cache (Redis) / Cache Redis**:', '**Cache Redis**:'),
+    ]
+
+    for old, new in replacements:
+        content = content.replace(old, new)
+
+    # Convert bilingual comments in code to Vietnamese
+    lines = content.split('\n')
+    processed_lines = []
+    for line in lines:
+        if line.strip().startswith('//') and ' / ' in line:
+            parts = line.split(' / ', 1)
+            if len(parts) == 2:
+                processed_lines.append(f"// {parts[1]}")
+        else:
+            processed_lines.append(line)
+
+    content = '\n'.join(processed_lines)
+
+    with open(filepath, 'w', encoding='utf-8') as f:
+        f.write(content)
+
+def main():
+    print("Starting bilingual documentation separation...")
+
+    # Process all English files
+    print("Processing English documentation files...")
+    en_files = glob.glob('docs/en/skills/*.md')
+    for filepath in en_files:
+        if os.path.isfile(filepath):
+            process_english_file(filepath)
+
+    # Process all Vietnamese files
+    print("Processing Vietnamese documentation files...")
+    vi_files = glob.glob('docs/vi/skills/*.md')
+    for filepath in vi_files:
+        if os.path.isfile(filepath):
+            process_vietnamese_file(filepath)
+
+    print("Bilingual documentation separation completed!")
+
+if __name__ == '__main__':
+    main()
--- a/scripts/fix-bilingual-docs.sh
+++ b/scripts/fix-bilingual-docs.sh
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+# Script to separate bilingual documentation into language-specific files
+# Removes Vietnamese content from English docs and English content from Vietnamese docs
+
+set -e
+
+echo "Starting bilingual documentation separation..."
+
+# Function to process English files (remove Vietnamese content)
+process_en_file() {
+    local file="$1"
+    echo "Processing English file: $file"
+
+    # Use sed and awk for more comprehensive processing
+    # First, remove all **VI** blocks and bilingual markers
+    sed -i.bak \
+        -e '/^> \*\*VI\*\*:/d' \
+        -e '/^\*\*VI\*\*:/d' \
+        -e 's/\*\*VI\*\*: //' \
+        "$file"
+
+    # Convert bilingual titles/headers to English only
+    sed -i \
+        -e 's/# .\+ \/ .\+$/# \1/' \
+        -e 's/## .\+ \/ .\+$ /## \1 /' \
+        -e 's/### .\+ \/ .\+$ /### \1 /' \
+        "$file"
+
+    # Remove bilingual patterns in content
+    sed -i \
+        -e 's/\*\*Reference \/ Tham Khảo\*\*/**Reference**/g' \
+        -e 's/\*\*Patterns \/ Các Patterns\*\*/**Patterns**/g' \
+        -e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Key Generators**/g' \
+        -e 's/\*\*Speed \/ Tốc Độ\*\*/**Speed**/g' \
+        -e 's/\*\*Capacity \/ Dung Lượng\*\*/**Capacity**/g' \
+        -e 's/\*\*TTL\*\*/**TTL**/g' \
+        -e 's/\*\*Scope \/ Phạm Vi\*\*/**Scope**/g' \
+        -e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Use Case**/g' \
+        -e 's/\*\*Storage\*\*/**Storage**/g' \
+        "$file"
+
+    # Remove **EN**: markers
+    sed -i 's/\*\*EN\*\*: //' "$file"
+
+    # Remove bilingual comments in code blocks
+    sed -i 's|// .\+ / .\+$|// \1|' "$file"
+
+    # Clean up any remaining bilingual markers in headers
+    sed -i 's| (.\+ / .\+):| (\1):|g' "$file"
+
+    # Remove backup file
+    rm -f "${file}.bak"
+}
+
+# Function to process Vietnamese files (remove English content)
+process_vi_file() {
+    local file="$1"
+    echo "Processing Vietnamese file: $file"
+
+    # Use sed and awk for more comprehensive processing
+    # First, remove all **EN** blocks and bilingual markers
+    sed -i.bak \
+        -e '/^> \*\*EN\*\*:/d' \
+        -e '/^\*\*EN\*\*:/d' \
+        -e 's/\*\*EN\*\*: //' \
+        "$file"
+
+    # Convert bilingual titles/headers to Vietnamese only
+    awk '
+    /^#/ {
+        # Handle headers with bilingual format
+        if (match($0, /^#+ .+ \/ (.+)$/, arr)) {
+            # Extract Vietnamese part after "/"
+            level = substr($0, 1, index($0, " ") - 1)
+            viet_part = arr[1]
+            print level " " viet_part
+        } else {
+            print $0
+        }
+        next
+    }
+    { print }
+    ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
+
+    # Remove bilingual patterns in content - Vietnamese versions
+    sed -i \
+        -e 's/\*\*Reference \/ Tham Khảo\*\*/**Tham Khảo**/g' \
+        -e 's/\*\*Patterns \/ Các Patterns\*\*/**Các Patterns**/g' \
+        -e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Bộ Tạo Key**/g' \
+        -e 's/\*\*Speed \/ Tốc Độ\*\*/**Tốc Độ**/g' \
+        -e 's/\*\*Capacity \/ Dung Lượng\*\*/**Dung Lượng**/g' \
+        -e 's/\*\*TTL\*\*/**TTL**/g' \
+        -e 's/\*\*Scope \/ Phạm Vi\*\*/**Phạm Vi**/g' \
+        -e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Trường Hợp Sử Dụng**/g' \
+        -e 's/\*\*Storage\*\*/**Storage**/g' \
+        "$file"
+
+    # Remove **VI**: markers
+    sed -i 's/\*\*VI\*\*: //' "$file"
+
+    # Remove bilingual comments in code blocks - Vietnamese version
+    awk '
+    /^\/\// {
+        if (match($0, /^\/\/ .+ \/ (.+)$/, arr)) {
+            print "// " arr[1]
+        } else {
+            print $0
+        }
+        next
+    }
+    { print }
+    ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
+
+    # Clean up any remaining bilingual markers in headers
+    sed -i 's| (.\+ / .\+):| (\1):|g' "$file"
+
+    # Remove backup file
+    rm -f "${file}.bak"
+}
+
+# Process all English files
+echo "Processing English documentation files..."
+for file in docs/en/skills/*.md; do
+    if [[ -f "$file" ]]; then
+        process_en_file "$file"
+    fi
+done
+
+# Process all Vietnamese files
+echo "Processing Vietnamese documentation files..."
+for file in docs/vi/skills/*.md; do
+    if [[ -f "$file" ]]; then
+        process_vi_file "$file"
+    fi
+done
+
+echo "Bilingual documentation separation completed!"