#!/bin/bash # Script to separate bilingual documentation into language-specific files # Removes Vietnamese content from English docs and English content from Vietnamese docs set -e echo "Starting bilingual documentation separation..." # Function to process English files (remove Vietnamese content) process_en_file() { local file="$1" echo "Processing English file: $file" # Use sed and awk for more comprehensive processing # First, remove all **VI** blocks and bilingual markers sed -i.bak \ -e '/^> \*\*VI\*\*:/d' \ -e '/^\*\*VI\*\*:/d' \ -e 's/\*\*VI\*\*: //' \ "$file" # Convert bilingual titles/headers to English only sed -i \ -e 's/# .\+ \/ .\+$/# \1/' \ -e 's/## .\+ \/ .\+$ /## \1 /' \ -e 's/### .\+ \/ .\+$ /### \1 /' \ "$file" # Remove bilingual patterns in content sed -i \ -e 's/\*\*Reference \/ Tham Khảo\*\*/**Reference**/g' \ -e 's/\*\*Patterns \/ Các Patterns\*\*/**Patterns**/g' \ -e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Key Generators**/g' \ -e 's/\*\*Speed \/ Tốc Độ\*\*/**Speed**/g' \ -e 's/\*\*Capacity \/ Dung Lượng\*\*/**Capacity**/g' \ -e 's/\*\*TTL\*\*/**TTL**/g' \ -e 's/\*\*Scope \/ Phạm Vi\*\*/**Scope**/g' \ -e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Use Case**/g' \ -e 's/\*\*Storage\*\*/**Storage**/g' \ "$file" # Remove **EN**: markers sed -i 's/\*\*EN\*\*: //' "$file" # Remove bilingual comments in code blocks sed -i 's|// .\+ / .\+$|// \1|' "$file" # Clean up any remaining bilingual markers in headers sed -i 's| (.\+ / .\+):| (\1):|g' "$file" # Remove backup file rm -f "${file}.bak" } # Function to process Vietnamese files (remove English content) process_vi_file() { local file="$1" echo "Processing Vietnamese file: $file" # Use sed and awk for more comprehensive processing # First, remove all **EN** blocks and bilingual markers sed -i.bak \ -e '/^> \*\*EN\*\*:/d' \ -e '/^\*\*EN\*\*:/d' \ -e 's/\*\*EN\*\*: //' \ "$file" # Convert bilingual titles/headers to Vietnamese only awk ' /^#/ { # Handle headers with bilingual format if (match($0, /^#+ .+ \/ (.+)$/, arr)) { # Extract Vietnamese part after "/" level = substr($0, 1, index($0, " ") - 1) viet_part = arr[1] print level " " viet_part } else { print $0 } next } { print } ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" # Remove bilingual patterns in content - Vietnamese versions sed -i \ -e 's/\*\*Reference \/ Tham Khảo\*\*/**Tham Khảo**/g' \ -e 's/\*\*Patterns \/ Các Patterns\*\*/**Các Patterns**/g' \ -e 's/\*\*Key Generators \/ Bộ Tạo Key\*\*/**Bộ Tạo Key**/g' \ -e 's/\*\*Speed \/ Tốc Độ\*\*/**Tốc Độ**/g' \ -e 's/\*\*Capacity \/ Dung Lượng\*\*/**Dung Lượng**/g' \ -e 's/\*\*TTL\*\*/**TTL**/g' \ -e 's/\*\*Scope \/ Phạm Vi\*\*/**Phạm Vi**/g' \ -e 's/\*\*Use Case \/ Trường Hợp Sử Dụng\*\*/**Trường Hợp Sử Dụng**/g' \ -e 's/\*\*Storage\*\*/**Storage**/g' \ "$file" # Remove **VI**: markers sed -i 's/\*\*VI\*\*: //' "$file" # Remove bilingual comments in code blocks - Vietnamese version awk ' /^\/\// { if (match($0, /^\/\/ .+ \/ (.+)$/, arr)) { print "// " arr[1] } else { print $0 } next } { print } ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" # Clean up any remaining bilingual markers in headers sed -i 's| (.\+ / .\+):| (\1):|g' "$file" # Remove backup file rm -f "${file}.bak" } # Process all English files echo "Processing English documentation files..." for file in docs/en/skills/*.md; do if [[ -f "$file" ]]; then process_en_file "$file" fi done # Process all Vietnamese files echo "Processing Vietnamese documentation files..." for file in docs/vi/skills/*.md; do if [[ -f "$file" ]]; then process_vi_file "$file" fi done echo "Bilingual documentation separation completed!"