Enhance documentation structure and improve bilingual support across skills
- Updated skill documentation files to include structured metadata for better organization. - Enhanced bilingual descriptions and guidelines for clarity in both English and Vietnamese. - Refined sections on usage, best practices, and related skills to ensure consistency across all documentation. - Improved formatting and removed outdated references to streamline the documentation experience. - Added best practices checklists to relevant skills for better usability and adherence to standards.
This commit is contained in:
105
scripts/final-cleanup.py
Normal file
105
scripts/final-cleanup.py
Normal file
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Final cleanup script to ensure complete separation of bilingual documentation.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
|
||||
def clean_english_file(filepath):
|
||||
"""Remove all Vietnamese content from English files."""
|
||||
print(f"Cleaning English file: {filepath}")
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
cleaned_lines = []
|
||||
skip_mode = False
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
# Skip Vietnamese description lines
|
||||
if line.strip().startswith('>') and any(viet_word in line.lower() for viet_word in ['thực hành', 'mẫu', 'nền tảng', 'sử dụng', 'triển khai', 'bảo vệ', 'xác thực', 'giới hạn', 'quản lý', 'kiểm tra']):
|
||||
continue
|
||||
|
||||
# Skip lines that are clearly Vietnamese content
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
|
||||
# Check if line contains Vietnamese characters or Vietnamese words
|
||||
vietnamese_indicators = [
|
||||
'các', 'cho', 'để', 'và', 'là', 'trong', 'với', 'này', 'có', 'không',
|
||||
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
|
||||
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
|
||||
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra'
|
||||
]
|
||||
if any(indicator in stripped.lower() for indicator in vietnamese_indicators):
|
||||
continue
|
||||
|
||||
# Skip empty lines that follow Vietnamese content
|
||||
if not stripped and skip_mode:
|
||||
skip_mode = False
|
||||
continue
|
||||
|
||||
cleaned_lines.append(line)
|
||||
|
||||
# Write back the cleaned content
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.writelines(cleaned_lines)
|
||||
|
||||
def clean_vietnamese_file(filepath):
|
||||
"""Remove all English content from Vietnamese files."""
|
||||
print(f"Cleaning Vietnamese file: {filepath}")
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
cleaned_lines = []
|
||||
skip_mode = False
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
# Skip English description lines
|
||||
if line.strip().startswith('>') and any(en_word in line.lower() for en_word in ['security', 'best practices', 'patterns', 'platform', 'use when', 'implementing', 'authentication']):
|
||||
continue
|
||||
|
||||
# Skip lines that are clearly English content
|
||||
stripped = line.strip()
|
||||
if stripped and not stripped.startswith('#') and not stripped.startswith('>') and not stripped.startswith('```') and not stripped.startswith('|') and not stripped.startswith('-') and not stripped.startswith('*'):
|
||||
# Skip if line doesn't contain Vietnamese indicators
|
||||
vietnamese_indicators = [
|
||||
'các', 'cho', 'để', 'và', 'là', 'trong', 'với', 'này', 'có', 'không',
|
||||
'từ', 'được', 'sẽ', 'nên', 'cần', 'khi', 'thì', 'lại', 'rất', 'đã',
|
||||
'thực', 'hiện', 'dụng', 'bảo', 'mật', 'xác', 'thức', 'phân', 'quyền',
|
||||
'dữ liệu', 'nhạy cảm', 'giới hạn', 'tốc độ', 'quản lý', 'bí mật', 'kiểm tra',
|
||||
'tổng quan', 'khi nào sử dụng', 'khái niệm chính', 'patterns', 'cache'
|
||||
]
|
||||
has_vietnamese = any(indicator in stripped.lower() for indicator in vietnamese_indicators)
|
||||
if not has_vietnamese and stripped and not stripped.startswith(('The', 'Use', 'This', 'For', 'In', 'When', 'How', 'What', 'Why')):
|
||||
continue
|
||||
|
||||
cleaned_lines.append(line)
|
||||
|
||||
# Write back the cleaned content
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.writelines(cleaned_lines)
|
||||
|
||||
def main():
|
||||
print("Starting final cleanup of bilingual documentation...")
|
||||
|
||||
# Clean English files
|
||||
print("Cleaning English documentation files...")
|
||||
en_files = glob.glob('docs/en/skills/*.md')
|
||||
for filepath in en_files:
|
||||
if os.path.isfile(filepath):
|
||||
clean_english_file(filepath)
|
||||
|
||||
# Clean Vietnamese files
|
||||
print("Cleaning Vietnamese documentation files...")
|
||||
vi_files = glob.glob('docs/vi/skills/*.md')
|
||||
for filepath in vi_files:
|
||||
if os.path.isfile(filepath):
|
||||
clean_vietnamese_file(filepath)
|
||||
|
||||
print("Final cleanup completed!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
154
scripts/fix-bilingual-docs.py
Normal file
154
scripts/fix-bilingual-docs.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to separate bilingual documentation into language-specific files.
|
||||
Removes Vietnamese content from English docs and English content from Vietnamese docs.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
|
||||
def process_english_file(filepath):
|
||||
"""Process an English file to keep only English content."""
|
||||
print(f"Processing English file: {filepath}")
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# First, handle the description line - keep only English
|
||||
content = re.sub(r'^> \*\*EN\*\*: (.+)\n> \*\*VI\*\*: .+', r'> \1', content, flags=re.MULTILINE)
|
||||
|
||||
# Remove standalone Vietnamese description lines
|
||||
content = re.sub(r'^> \*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
|
||||
|
||||
# Convert bilingual titles to English only
|
||||
content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
|
||||
|
||||
# For sections with both EN and VI blocks, keep only EN content
|
||||
# Pattern: **EN**: content\n**VI**: content
|
||||
content = re.sub(r'\*\*EN\*\*: ([^\n]+)\n\*\*VI\*\*: [^\n]+', r'\1', content)
|
||||
|
||||
# Remove remaining **VI** blocks and their content
|
||||
content = re.sub(r'\*\*VI\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
|
||||
|
||||
# Remove standalone **VI**: lines
|
||||
content = re.sub(r'^\*\*VI\*\*: .+\n', '', content, flags=re.MULTILINE)
|
||||
|
||||
# Remove **EN**: markers
|
||||
content = re.sub(r'\*\*EN\*\*: ', '', content)
|
||||
|
||||
# Convert bilingual labels to English
|
||||
replacements = [
|
||||
('**Reference / Tham Khảo**:', '**Reference**:'),
|
||||
('**Patterns / Các Patterns**:', '**Patterns**:'),
|
||||
('**Key Generators / Bộ Tạo Key**:', '**Key Generators**:'),
|
||||
('**Speed / Tốc Độ**:', '**Speed**:'),
|
||||
('**Capacity / Dung Lượng**:', '**Capacity**:'),
|
||||
('**Scope / Phạm Vi**:', '**Scope**:'),
|
||||
('**Use Case / Trường Hợp Sử Dụng**:', '**Use Case**:'),
|
||||
('**Storage**:', '**Storage**:'),
|
||||
('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**L1 Cache (Memory)**:'),
|
||||
('**L2 Cache (Redis) / Cache Redis**:', '**L2 Cache (Redis)**:'),
|
||||
]
|
||||
|
||||
for old, new in replacements:
|
||||
content = content.replace(old, new)
|
||||
|
||||
# Convert bilingual comments in code to English
|
||||
lines = content.split('\n')
|
||||
processed_lines = []
|
||||
for line in lines:
|
||||
if line.strip().startswith('//') and ' / ' in line:
|
||||
parts = line.split(' / ', 1)
|
||||
processed_lines.append(parts[0])
|
||||
else:
|
||||
processed_lines.append(line)
|
||||
|
||||
content = '\n'.join(processed_lines)
|
||||
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
def process_vietnamese_file(filepath):
|
||||
"""Process a Vietnamese file to keep only Vietnamese content."""
|
||||
print(f"Processing Vietnamese file: {filepath}")
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# First, handle the description line - keep only Vietnamese
|
||||
content = re.sub(r'^> \*\*EN\*\*: .+\n> \*\*VI\*\*: (.+)', r'> \1', content, flags=re.MULTILINE)
|
||||
|
||||
# Remove standalone English description lines
|
||||
content = re.sub(r'^> \*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
|
||||
|
||||
# Convert bilingual titles to Vietnamese only
|
||||
content = re.sub(r'^(#{1,6}) .+ / (.+)$', r'\1 \2', content, flags=re.MULTILINE)
|
||||
|
||||
# For sections with both EN and VI blocks, keep only VI content
|
||||
# Pattern: **EN**: content\n**VI**: content
|
||||
content = re.sub(r'\*\*EN\*\*: [^\n]+\n\*\*VI\*\*: ([^\n]+)', r'\1', content)
|
||||
|
||||
# Remove remaining **EN** blocks and their content
|
||||
content = re.sub(r'\*\*EN\*\*:\n((?:(?!\*\*EN\*\*:|\*\*VI\*\*:|^#{1,6}).*\n)*)', '', content)
|
||||
|
||||
# Remove standalone **EN**: lines
|
||||
content = re.sub(r'^\*\*EN\*\*: .+\n', '', content, flags=re.MULTILINE)
|
||||
|
||||
# Remove **VI**: markers
|
||||
content = re.sub(r'\*\*VI\*\*: ', '', content)
|
||||
|
||||
# Convert bilingual labels to Vietnamese
|
||||
replacements = [
|
||||
('**Reference / Tham Khảo**:', '**Tham Khảo**:'),
|
||||
('**Patterns / Các Patterns**:', '**Các Patterns**:'),
|
||||
('**Key Generators / Bộ Tạo Key**:', '**Bộ Tạo Key**:'),
|
||||
('**Speed / Tốc Độ**:', '**Tốc Độ**:'),
|
||||
('**Capacity / Dung Lượng**:', '**Dung Lượng**:'),
|
||||
('**Scope / Phạm Vi**:', '**Phạm Vi**:'),
|
||||
('**Use Case / Trường Hợp Sử Dụng**:', '**Trường Hợp Sử Dụng**:'),
|
||||
('**Storage**:', '**Storage**:'),
|
||||
('**L1 Cache (Memory) / Cache Bộ Nhớ**:', '**Cache Bộ Nhớ**:'),
|
||||
('**L2 Cache (Redis) / Cache Redis**:', '**Cache Redis**:'),
|
||||
]
|
||||
|
||||
for old, new in replacements:
|
||||
content = content.replace(old, new)
|
||||
|
||||
# Convert bilingual comments in code to Vietnamese
|
||||
lines = content.split('\n')
|
||||
processed_lines = []
|
||||
for line in lines:
|
||||
if line.strip().startswith('//') and ' / ' in line:
|
||||
parts = line.split(' / ', 1)
|
||||
if len(parts) == 2:
|
||||
processed_lines.append(f"// {parts[1]}")
|
||||
else:
|
||||
processed_lines.append(line)
|
||||
|
||||
content = '\n'.join(processed_lines)
|
||||
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
def main():
|
||||
print("Starting bilingual documentation separation...")
|
||||
|
||||
# Process all English files
|
||||
print("Processing English documentation files...")
|
||||
en_files = glob.glob('docs/en/skills/*.md')
|
||||
for filepath in en_files:
|
||||
if os.path.isfile(filepath):
|
||||
process_english_file(filepath)
|
||||
|
||||
# Process all Vietnamese files
|
||||
print("Processing Vietnamese documentation files...")
|
||||
vi_files = glob.glob('docs/vi/skills/*.md')
|
||||
for filepath in vi_files:
|
||||
if os.path.isfile(filepath):
|
||||
process_vietnamese_file(filepath)
|
||||
|
||||
print("Bilingual documentation separation completed!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
138
scripts/fix-bilingual-docs.sh
Executable file
138
scripts/fix-bilingual-docs.sh
Executable file
@@ -0,0 +1,138 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script to separate bilingual documentation into language-specific files
|
||||
# Removes Vietnamese content from English docs and English content from Vietnamese docs
|
||||
|
||||
set -e
|
||||
|
||||
echo "Starting bilingual documentation separation..."
|
||||
|
||||
# Function to process English files (remove Vietnamese content)
|
||||
process_en_file() {
|
||||
local file="$1"
|
||||
echo "Processing English file: $file"
|
||||
|
||||
# Use sed and awk for more comprehensive processing
|
||||
# First, remove all **VI** blocks and bilingual markers
|
||||
sed -i.bak \
|
||||
-e '/^> \*\*VI\*\*:/d' \
|
||||
-e '/^\*\*VI\*\*:/d' \
|
||||
-e 's/\*\*VI\*\*: //' \
|
||||
"$file"
|
||||
|
||||
# Convert bilingual titles/headers to English only
|
||||
sed -i \
|
||||
-e 's/# .\+ \/ .\+$/# \1/' \
|
||||
-e 's/## .\+ \/ .\+$ /## \1 /' \
|
||||
-e 's/### .\+ \/ .\+$ /### \1 /' \
|
||||
"$file"
|
||||
|
||||
# Remove bilingual patterns in content
|
||||
sed -i \
|
||||
-e 's/\*\*Reference \/ Tham Khảo\*\*/**Reference**/g' \
|
||||
-e 's/\*\*Patterns \/ Các Patterns\*\*/**Patterns**/g' \
|
||||
-e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Key Generators**/g' \
|
||||
-e 's/\*\*Speed \/ Tốc Độ\*\*/**Speed**/g' \
|
||||
-e 's/\*\*Capacity \/ Dung Lượng\*\*/**Capacity**/g' \
|
||||
-e 's/\*\*TTL\*\*/**TTL**/g' \
|
||||
-e 's/\*\*Scope \/ Phạm Vi\*\*/**Scope**/g' \
|
||||
-e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Use Case**/g' \
|
||||
-e 's/\*\*Storage\*\*/**Storage**/g' \
|
||||
"$file"
|
||||
|
||||
# Remove **EN**: markers
|
||||
sed -i 's/\*\*EN\*\*: //' "$file"
|
||||
|
||||
# Remove bilingual comments in code blocks
|
||||
sed -i 's|// .\+ / .\+$|// \1|' "$file"
|
||||
|
||||
# Clean up any remaining bilingual markers in headers
|
||||
sed -i 's| (.\+ / .\+):| (\1):|g' "$file"
|
||||
|
||||
# Remove backup file
|
||||
rm -f "${file}.bak"
|
||||
}
|
||||
|
||||
# Function to process Vietnamese files (remove English content)
|
||||
process_vi_file() {
|
||||
local file="$1"
|
||||
echo "Processing Vietnamese file: $file"
|
||||
|
||||
# Use sed and awk for more comprehensive processing
|
||||
# First, remove all **EN** blocks and bilingual markers
|
||||
sed -i.bak \
|
||||
-e '/^> \*\*EN\*\*:/d' \
|
||||
-e '/^\*\*EN\*\*:/d' \
|
||||
-e 's/\*\*EN\*\*: //' \
|
||||
"$file"
|
||||
|
||||
# Convert bilingual titles/headers to Vietnamese only
|
||||
awk '
|
||||
/^#/ {
|
||||
# Handle headers with bilingual format
|
||||
if (match($0, /^#+ .+ \/ (.+)$/, arr)) {
|
||||
# Extract Vietnamese part after "/"
|
||||
level = substr($0, 1, index($0, " ") - 1)
|
||||
viet_part = arr[1]
|
||||
print level " " viet_part
|
||||
} else {
|
||||
print $0
|
||||
}
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
|
||||
|
||||
# Remove bilingual patterns in content - Vietnamese versions
|
||||
sed -i \
|
||||
-e 's/\*\*Reference \/ Tham Khảo\*\*/**Tham Khảo**/g' \
|
||||
-e 's/\*\*Patterns \/ Các Patterns\*\*/**Các Patterns**/g' \
|
||||
-e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Bộ Tạo Key**/g' \
|
||||
-e 's/\*\*Speed \/ Tốc Độ\*\*/**Tốc Độ**/g' \
|
||||
-e 's/\*\*Capacity \/ Dung Lượng\*\*/**Dung Lượng**/g' \
|
||||
-e 's/\*\*TTL\*\*/**TTL**/g' \
|
||||
-e 's/\*\*Scope \/ Phạm Vi\*\*/**Phạm Vi**/g' \
|
||||
-e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Trường Hợp Sử Dụng**/g' \
|
||||
-e 's/\*\*Storage\*\*/**Storage**/g' \
|
||||
"$file"
|
||||
|
||||
# Remove **VI**: markers
|
||||
sed -i 's/\*\*VI\*\*: //' "$file"
|
||||
|
||||
# Remove bilingual comments in code blocks - Vietnamese version
|
||||
awk '
|
||||
/^\/\// {
|
||||
if (match($0, /^\/\/ .+ \/ (.+)$/, arr)) {
|
||||
print "// " arr[1]
|
||||
} else {
|
||||
print $0
|
||||
}
|
||||
next
|
||||
}
|
||||
{ print }
|
||||
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
|
||||
|
||||
# Clean up any remaining bilingual markers in headers
|
||||
sed -i 's| (.\+ / .\+):| (\1):|g' "$file"
|
||||
|
||||
# Remove backup file
|
||||
rm -f "${file}.bak"
|
||||
}
|
||||
|
||||
# Process all English files
|
||||
echo "Processing English documentation files..."
|
||||
for file in docs/en/skills/*.md; do
|
||||
if [[ -f "$file" ]]; then
|
||||
process_en_file "$file"
|
||||
fi
|
||||
done
|
||||
|
||||
# Process all Vietnamese files
|
||||
echo "Processing Vietnamese documentation files..."
|
||||
for file in docs/vi/skills/*.md; do
|
||||
if [[ -f "$file" ]]; then
|
||||
process_vi_file "$file"
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Bilingual documentation separation completed!"
|
||||
Reference in New Issue
Block a user