Enhance documentation structure and improve bilingual support across skills

- Updated skill documentation files to include structured metadata for better organization.
- Enhanced bilingual descriptions and guidelines for clarity in both English and Vietnamese.
- Refined sections on usage, best practices, and related skills to ensure consistency across all documentation.
- Improved formatting and removed outdated references to streamline the documentation experience.
- Added best practices checklists to relevant skills for better usability and adherence to standards.
This commit is contained in:
Ho Ngoc Hai
2026-01-01 07:35:44 +07:00
parent fb5434188a
commit 9b6c585f57
42 changed files with 9388 additions and 4470 deletions

105
scripts/final-cleanup.py Normal file
View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python3
"""
Final cleanup script to ensure complete separation of bilingual documentation.
"""
import os
import re
import glob
def clean_english_file(filepath):
"""Remove all Vietnamese content from English files."""
print(f"Cleaning English file: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
cleaned_lines = []
skip_mode = False
for i, line in enumerate(lines):
# Skip Vietnamese description lines
if line.strip().startswith('>') and any(viet_word in line.lower() for viet_word in ['thực hành', 'mẫu', 'nền tảng', 'sử dụng', 'triển khai', 'bảo vệ', 'xác thực', 'giới hạn', 'quản lý', 'kiểm tra']):
continue
# Skip lines that are clearly Vietnamese content
stripped = line.strip()
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
# Check if line contains Vietnamese characters or Vietnamese words
vietnamese_indicators = [
'các', 'cho', 'để', '', '', 'trong', 'với', 'này', '', 'không',
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra'
]
if any(indicator in stripped.lower() for indicator in vietnamese_indicators):
continue
# Skip empty lines that follow Vietnamese content
if not stripped and skip_mode:
skip_mode = False
continue
cleaned_lines.append(line)
# Write back the cleaned content
with open(filepath, 'w', encoding='utf-8') as f:
f.writelines(cleaned_lines)
def clean_vietnamese_file(filepath):
"""Remove all English content from Vietnamese files."""
print(f"Cleaning Vietnamese file: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
lines = f.readlines()
cleaned_lines = []
skip_mode = False
for i, line in enumerate(lines):
# Skip English description lines
if line.strip().startswith('>') and any(en_word in line.lower() for en_word in ['security', 'best practices', 'patterns', 'platform', 'use when', 'implementing', 'authentication']):
continue
# Skip lines that are clearly English content
stripped = line.strip()
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
# Skip if line doesn't contain Vietnamese indicators
vietnamese_indicators = [
'các', 'cho', 'để', '', '', 'trong', 'với', 'này', '', 'không',
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra',
'tổng quan', 'khi nào sử dụng', 'khái niệm chính', 'patterns', 'cache'
]
has_vietnamese = any(indicator in stripped.lower() for indicator in vietnamese_indicators)
if not has_vietnamese and stripped and not stripped.startswith(('The', 'Use', 'This', 'For', 'In', 'When', 'How', 'What', 'Why')):
continue
cleaned_lines.append(line)
# Write back the cleaned content
with open(filepath, 'w', encoding='utf-8') as f:
f.writelines(cleaned_lines)
def main():
print("Starting final cleanup of bilingual documentation...")
# Clean English files
print("Cleaning English documentation files...")
en_files = glob.glob('docs/en/skills/*.md')
for filepath in en_files:
if os.path.isfile(filepath):
clean_english_file(filepath)
# Clean Vietnamese files
print("Cleaning Vietnamese documentation files...")
vi_files = glob.glob('docs/vi/skills/*.md')
for filepath in vi_files:
if os.path.isfile(filepath):
clean_vietnamese_file(filepath)
print("Final cleanup completed!")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""
Script to separate bilingual documentation into language-specific files.
Removes Vietnamese content from English docs and English content from Vietnamese docs.
"""
import os
import re
import glob
def process_english_file(filepath):
"""Process an English file to keep only English content."""
print(f"Processing English file: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# First, handle the description line - keep only English
content = re.sub(r'^> \*\*EN\*\*: (.+)\n> \*\*VI\*\*: .+', r'> \1', content, flags=re.MULTILINE)
# Remove standalone Vietnamese description lines
content = re.sub(r'^> \*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
# Convert bilingual titles to English only
content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
# For sections with both EN and VI blocks, keep only EN content
# Pattern: **EN**: content\n**VI**: content
content = re.sub(r'\*\*EN\*\*: ([^\n]+)\n\*\*VI\*\*: [^\n]+', r'\1', content)
# Remove remaining **VI** blocks and their content
content = re.sub(r'\*\*VI\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
# Remove standalone **VI**: lines
content = re.sub(r'^\*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
# Remove **EN**: markers
content = re.sub(r'\*\*EN\*\*: ', '', content)
# Convert bilingual labels to English
replacements = [
('**Reference / Tham Khảo**:', '**Reference**:'),
('**Patterns / Các Patterns**:', '**Patterns**:'),
('**Key Generators / Bộ Tạo Key**:', '**Key Generators**:'),
('**Speed / Tốc Độ**:', '**Speed**:'),
('**Capacity / Dung Lượng**:', '**Capacity**:'),
('**Scope / Phạm Vi**:', '**Scope**:'),
('**Use Case / Trường Hợp Sử Dụng**:', '**Use Case**:'),
('**Storage**:', '**Storage**:'),
('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**L1 Cache (Memory)**:'),
('**L2 Cache (Redis) / Cache Redis**:', '**L2 Cache (Redis)**:'),
]
for old, new in replacements:
content = content.replace(old, new)
# Convert bilingual comments in code to English
lines = content.split('\n')
processed_lines = []
for line in lines:
if line.strip().startswith('//') and ' / ' in line:
parts = line.split(' / ', 1)
processed_lines.append(parts[0])
else:
processed_lines.append(line)
content = '\n'.join(processed_lines)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
def process_vietnamese_file(filepath):
"""Process a Vietnamese file to keep only Vietnamese content."""
print(f"Processing Vietnamese file: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# First, handle the description line - keep only Vietnamese
content = re.sub(r'^> \*\*EN\*\*: .+\n> \*\*VI\*\*: (.+)', r'> \1', content, flags=re.MULTILINE)
# Remove standalone English description lines
content = re.sub(r'^> \*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
# Convert bilingual titles to Vietnamese only
content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
# For sections with both EN and VI blocks, keep only VI content
# Pattern: **EN**: content\n**VI**: content
content = re.sub(r'\*\*EN\*\*: [^\n]+\n\*\*VI\*\*: ([^\n]+)', r'\1', content)
# Remove remaining **EN** blocks and their content
content = re.sub(r'\*\*EN\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
# Remove standalone **EN**: lines
content = re.sub(r'^\*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
# Remove **VI**: markers
content = re.sub(r'\*\*VI\*\*: ', '', content)
# Convert bilingual labels to Vietnamese
replacements = [
('**Reference / Tham Khảo**:', '**Tham Khảo**:'),
('**Patterns / Các Patterns**:', '**Các Patterns**:'),
('**Key Generators / Bộ Tạo Key**:', '**Bộ Tạo Key**:'),
('**Speed / Tốc Độ**:', '**Tốc Độ**:'),
('**Capacity / Dung Lượng**:', '**Dung Lượng**:'),
('**Scope / Phạm Vi**:', '**Phạm Vi**:'),
('**Use Case / Trường Hợp Sử Dụng**:', '**Trường Hợp Sử Dụng**:'),
('**Storage**:', '**Storage**:'),
('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**Cache Bộ Nhớ**:'),
('**L2 Cache (Redis) / Cache Redis**:', '**Cache Redis**:'),
]
for old, new in replacements:
content = content.replace(old, new)
# Convert bilingual comments in code to Vietnamese
lines = content.split('\n')
processed_lines = []
for line in lines:
if line.strip().startswith('//') and ' / ' in line:
parts = line.split(' / ', 1)
if len(parts) == 2:
processed_lines.append(f"// {parts[1]}")
else:
processed_lines.append(line)
content = '\n'.join(processed_lines)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
def main():
print("Starting bilingual documentation separation...")
# Process all English files
print("Processing English documentation files...")
en_files = glob.glob('docs/en/skills/*.md')
for filepath in en_files:
if os.path.isfile(filepath):
process_english_file(filepath)
# Process all Vietnamese files
print("Processing Vietnamese documentation files...")
vi_files = glob.glob('docs/vi/skills/*.md')
for filepath in vi_files:
if os.path.isfile(filepath):
process_vietnamese_file(filepath)
print("Bilingual documentation separation completed!")
if __name__ == '__main__':
main()

138
scripts/fix-bilingual-docs.sh Executable file
View File

@@ -0,0 +1,138 @@
#!/bin/bash
# Script to separate bilingual documentation into language-specific files
# Removes Vietnamese content from English docs and English content from Vietnamese docs
set -e
echo "Starting bilingual documentation separation..."
# Function to process English files (remove Vietnamese content)
process_en_file() {
local file="$1"
echo "Processing English file: $file"
# Use sed and awk for more comprehensive processing
# First, remove all **VI** blocks and bilingual markers
sed -i.bak \
-e '/^> \*\*VI\*\*:/d' \
-e '/^\*\*VI\*\*:/d' \
-e 's/\*\*VI\*\*: //' \
"$file"
# Convert bilingual titles/headers to English only
sed -i \
-e 's/# .\+ \/ .\+$/# \1/' \
-e 's/## .\+ \/ .\+$ /## \1 /' \
-e 's/### .\+ \/ .\+$ /### \1 /' \
"$file"
# Remove bilingual patterns in content
sed -i \
-e 's/\*\*Reference \/ Tham Khảo\*\*/**Reference**/g' \
-e 's/\*\*Patterns \/ Các Patterns\*\*/**Patterns**/g' \
-e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Key Generators**/g' \
-e 's/\*\*Speed \/ Tốc Độ\*\*/**Speed**/g' \
-e 's/\*\*Capacity \/ Dung Lượng\*\*/**Capacity**/g' \
-e 's/\*\*TTL\*\*/**TTL**/g' \
-e 's/\*\*Scope \/ Phạm Vi\*\*/**Scope**/g' \
-e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Use Case**/g' \
-e 's/\*\*Storage\*\*/**Storage**/g' \
"$file"
# Remove **EN**: markers
sed -i 's/\*\*EN\*\*: //' "$file"
# Remove bilingual comments in code blocks
sed -i 's|// .\+ / .\+$|// \1|' "$file"
# Clean up any remaining bilingual markers in headers
sed -i 's| (.\+ / .\+):| (\1):|g' "$file"
# Remove backup file
rm -f "${file}.bak"
}
# Function to process Vietnamese files (remove English content)
process_vi_file() {
local file="$1"
echo "Processing Vietnamese file: $file"
# Use sed and awk for more comprehensive processing
# First, remove all **EN** blocks and bilingual markers
sed -i.bak \
-e '/^> \*\*EN\*\*:/d' \
-e '/^\*\*EN\*\*:/d' \
-e 's/\*\*EN\*\*: //' \
"$file"
# Convert bilingual titles/headers to Vietnamese only
awk '
/^#/ {
# Handle headers with bilingual format
if (match($0, /^#+ .+ \/ (.+)$/, arr)) {
# Extract Vietnamese part after "/"
level = substr($0, 1, index($0, " ") - 1)
viet_part = arr[1]
print level " " viet_part
} else {
print $0
}
next
}
{ print }
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
# Remove bilingual patterns in content - Vietnamese versions
sed -i \
-e 's/\*\*Reference \/ Tham Khảo\*\*/**Tham Khảo**/g' \
-e 's/\*\*Patterns \/ Các Patterns\*\*/**Các Patterns**/g' \
-e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Bộ Tạo Key**/g' \
-e 's/\*\*Speed \/ Tốc Độ\*\*/**Tốc Độ**/g' \
-e 's/\*\*Capacity \/ Dung Lượng\*\*/**Dung Lượng**/g' \
-e 's/\*\*TTL\*\*/**TTL**/g' \
-e 's/\*\*Scope \/ Phạm Vi\*\*/**Phạm Vi**/g' \
-e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Trường Hợp Sử Dụng**/g' \
-e 's/\*\*Storage\*\*/**Storage**/g' \
"$file"
# Remove **VI**: markers
sed -i 's/\*\*VI\*\*: //' "$file"
# Remove bilingual comments in code blocks - Vietnamese version
awk '
/^\/\// {
if (match($0, /^\/\/ .+ \/ (.+)$/, arr)) {
print "// " arr[1]
} else {
print $0
}
next
}
{ print }
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
# Clean up any remaining bilingual markers in headers
sed -i 's| (.\+ / .\+):| (\1):|g' "$file"
# Remove backup file
rm -f "${file}.bak"
}
# Process all English files
echo "Processing English documentation files..."
for file in docs/en/skills/*.md; do
if [[ -f "$file" ]]; then
process_en_file "$file"
fi
done
# Process all Vietnamese files
echo "Processing Vietnamese documentation files..."
for file in docs/vi/skills/*.md; do
if [[ -f "$file" ]]; then
process_vi_file "$file"
fi
done
echo "Bilingual documentation separation completed!"