diff --git a/.github/workflows/sync-docs-cn-to-en.yml b/.github/workflows/sync-docs-cn-to-en.yml
new file mode 100644
index 0000000000000..5fe0aa9e3913b
--- /dev/null
+++ b/.github/workflows/sync-docs-cn-to-en.yml
@@ -0,0 +1,134 @@
+name: Sync Docs Changes from ZH PR to EN PR
+
+on:
+  workflow_dispatch:
+    inputs:
+      source_pr_url:
+        description: 'Source PR URL (Chinese docs repository)'
+        required: true
+        type: string
+        default: ''
+      target_pr_url:
+        description: 'Target PR URL (English docs repository)'
+        required: true
+        type: string
+        default: ''
+      ai_provider:
+        description: 'AI Provider to use for translation'
+        required: false
+        type: choice
+        options:
+          - deepseek
+          - gemini
+        default: 'gemini'
+
+jobs:
+  sync-docs:
+    runs-on: ubuntu-latest
+    
+    steps:
+      - name: Checkout current repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          fetch-depth: 0
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.9'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r scripts/translate_doc_pr/requirements.txt
+
+      - name: Extract PR information
+        id: extract_info
+        run: |
+          # Extract source repo info
+          SOURCE_URL="${{ github.event.inputs.source_pr_url }}"
+          SOURCE_OWNER=$(echo $SOURCE_URL | cut -d'/' -f4)
+          SOURCE_REPO=$(echo $SOURCE_URL | cut -d'/' -f5)
+          SOURCE_PR=$(echo $SOURCE_URL | cut -d'/' -f7)
+          
+          # Extract target repo info
+          TARGET_URL="${{ github.event.inputs.target_pr_url }}"
+          TARGET_OWNER=$(echo $TARGET_URL | cut -d'/' -f4)
+          TARGET_REPO=$(echo $TARGET_URL | cut -d'/' -f5)
+          TARGET_PR=$(echo $TARGET_URL | cut -d'/' -f7)
+          
+          echo "source_owner=${SOURCE_OWNER}" >> $GITHUB_OUTPUT
+          echo "source_repo=${SOURCE_REPO}" >> $GITHUB_OUTPUT
+          echo "source_pr=${SOURCE_PR}" >> $GITHUB_OUTPUT
+          echo "target_owner=${TARGET_OWNER}" >> $GITHUB_OUTPUT
+          echo "target_repo=${TARGET_REPO}" >> $GITHUB_OUTPUT
+          echo "target_pr=${TARGET_PR}" >> $GITHUB_OUTPUT
+          
+          echo "Source: ${SOURCE_OWNER}/${SOURCE_REPO}#${SOURCE_PR}"
+          echo "Target: ${TARGET_OWNER}/${TARGET_REPO}#${TARGET_PR}"
+
+      - name: Get target PR branch info
+        id: target_branch
+        run: |
+          # Get target PR branch name
+          TARGET_BRANCH=$(curl -s \
+            -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
+            -H "Accept: application/vnd.github.v3+json" \
+            "https://api.github.com/repos/${{ steps.extract_info.outputs.target_owner }}/${{ steps.extract_info.outputs.target_repo }}/pulls/${{ steps.extract_info.outputs.target_pr }}" \
+            | jq -r '.head.ref')
+          
+          echo "target_branch=${TARGET_BRANCH}" >> $GITHUB_OUTPUT
+          echo "Target branch: ${TARGET_BRANCH}"
+
+      - name: Clone target repository
+        run: |
+          # Clone target repository with the PR branch
+          git clone https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ steps.extract_info.outputs.target_owner }}/${{ steps.extract_info.outputs.target_repo }}.git target_repo
+          cd target_repo
+          git checkout ${{ steps.target_branch.outputs.target_branch }}
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Run sync script
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DEEPSEEK_API_TOKEN: ${{ secrets.DEEPSEEK_API_TOKEN }}
+          GEMINI_API_TOKEN: ${{ secrets.GEMINI_API_TOKEN }}
+          SOURCE_PR_URL: ${{ github.event.inputs.source_pr_url }}
+          TARGET_PR_URL: ${{ github.event.inputs.target_pr_url }}
+          AI_PROVIDER: ${{ github.event.inputs.ai_provider }}
+          TARGET_REPO_PATH: ${{ github.workspace }}/target_repo
+        run: |
+          cd scripts/translate_doc_pr
+          python main_workflow.py
+
+      - name: Commit and push changes
+        run: |
+          cd target_repo
+          git add .
+          if git diff --staged --quiet; then
+            echo "No changes to commit"
+          else
+            git commit -m "Auto-sync: Update English docs from Chinese PR ${{ github.event.inputs.source_pr_url }}
+            
+            Synced from: ${{ github.event.inputs.source_pr_url }}
+            Target PR: ${{ github.event.inputs.target_pr_url }}
+            AI Provider: ${{ github.event.inputs.ai_provider }}
+            
+            Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>"
+            
+            git push origin ${{ steps.target_branch.outputs.target_branch }}
+            echo "Changes pushed to target PR branch: ${{ steps.target_branch.outputs.target_branch }}"
+          fi
+
+      - name: Add comment to target PR
+        run: |
+          # Add a comment to the target PR about the sync
+          curl -X POST \
+            -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
+            -H "Accept: application/vnd.github.v3+json" \
+            "https://api.github.com/repos/${{ steps.extract_info.outputs.target_owner }}/${{ steps.extract_info.outputs.target_repo }}/issues/${{ steps.extract_info.outputs.target_pr }}/comments" \
+            -d "{
+              \"body\": \"🤖 **Auto-sync completed**\\n\\n📥 **Source PR**: ${{ github.event.inputs.source_pr_url }}\\n🎯 **Target PR**: ${{ github.event.inputs.target_pr_url }}\\n✅ English documentation has been updated based on Chinese documentation changes.\\n\\n_This comment was generated automatically by the sync workflow._\"
+            }"
diff --git a/scripts/translate_doc_pr/__init__.py b/scripts/translate_doc_pr/__init__.py
new file mode 100644
index 0000000000000..b272696e2e394
--- /dev/null
+++ b/scripts/translate_doc_pr/__init__.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+"""
+Auto-Sync PR Changes - Refactored Modular Version
+
+This package contains the refactored version of the auto-sync-pr-changes script,
+split into logical modules for better maintainability and testing.
+
+Modules:
+- pr_analyzer: PR analysis, diff parsing, content getting, hierarchy building
+- section_matcher: Section matching (direct matching + AI matching)  
+- file_adder: New file processing and translation
+- file_deleter: Deleted file processing
+- file_updater: Updated file processing and translation
+- toc_processor: TOC file special processing
+- main: Main orchestration function
+"""
+
+# Import main functionality for easy access
+from main import main
+
+# Make main function available at package level
+__all__ = ["main"]
diff --git a/scripts/translate_doc_pr/file_adder.py b/scripts/translate_doc_pr/file_adder.py
new file mode 100644
index 0000000000000..57e93b2fb1c63
--- /dev/null
+++ b/scripts/translate_doc_pr/file_adder.py
@@ -0,0 +1,193 @@
+"""
+File Adder Module
+Handles processing and translation of newly added files
+"""
+
+import os
+import re
+import json
+import threading
+from github import Github
+from openai import OpenAI
+
+# Thread-safe printing
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    with print_lock:
+        print(*args, **kwargs)
+
+def create_section_batches(file_content, max_lines_per_batch=200):
+    """Create batches of file content for translation, respecting section boundaries"""
+    lines = file_content.split('\n')
+    
+    # Find all section headers
+    section_starts = []
+    for i, line in enumerate(lines):
+        line = line.strip()
+        if line.startswith('#'):
+            match = re.match(r'^(#{1,10})\s+(.+)', line)
+            if match:
+                section_starts.append(i + 1)  # 1-based line numbers
+    
+    # If no sections found, just batch by line count
+    if not section_starts:
+        batches = []
+        for i in range(0, len(lines), max_lines_per_batch):
+            batch_lines = lines[i:i + max_lines_per_batch]
+            batches.append('\n'.join(batch_lines))
+        return batches
+    
+    # Create batches respecting section boundaries
+    batches = []
+    current_batch_start = 0
+    
+    for i, section_start in enumerate(section_starts):
+        section_start_idx = section_start - 1  # Convert to 0-based
+        
+        # Check if adding this section would exceed the line limit
+        if (section_start_idx - current_batch_start) > max_lines_per_batch:
+            # Close current batch at the previous section boundary
+            if current_batch_start < section_start_idx:
+                batch_lines = lines[current_batch_start:section_start_idx]
+                batches.append('\n'.join(batch_lines))
+                current_batch_start = section_start_idx
+        
+        # If this is the last section, or the next section would create a batch too large
+        if i == len(section_starts) - 1:
+            # Add remaining content as final batch
+            batch_lines = lines[current_batch_start:]
+            batches.append('\n'.join(batch_lines))
+        else:
+            next_section_start = section_starts[i + 1] - 1  # 0-based
+            if (next_section_start - current_batch_start) > max_lines_per_batch:
+                # Close current batch at current section boundary
+                batch_lines = lines[current_batch_start:section_start_idx]
+                if batch_lines:  # Only add non-empty batches
+                    batches.append('\n'.join(batch_lines))
+                current_batch_start = section_start_idx
+    
+    # Clean up any empty batches
+    batches = [batch for batch in batches if batch.strip()]
+    
+    return batches
+
+def translate_file_batch(batch_content, ai_client, source_language="English", target_language="Chinese"):
+    """Translate a single batch of file content using AI"""
+    if not batch_content.strip():
+        return batch_content
+    
+    thread_safe_print(f"   🤖 Translating batch ({len(batch_content.split())} words)...")
+    
+    prompt = f"""You are a professional technical writer. Please translate the following {source_language} content to {target_language}.
+
+IMPORTANT INSTRUCTIONS:
+1. Preserve ALL Markdown formatting (headers, links, code blocks, tables, etc.)
+2. Do NOT translate:
+   - Code examples, SQL queries, configuration values
+   - Technical terms like "TiDB", "TiKV", "PD", API names, etc.
+   - File paths, URLs, and command line examples
+   - Variable names and system configuration parameters
+3. Translate only the descriptive text and explanations
+4. Maintain the exact structure and indentation
+5. Keep all special characters and formatting intact
+
+Content to translate:
+{batch_content}
+
+Please provide the translated content maintaining all formatting and structure."""
+
+    # Add token estimation
+    try:
+        from main import print_token_estimation
+        print_token_estimation(prompt, "File addition translation")
+    except ImportError:
+        # Fallback if import fails - use tiktoken
+        try:
+            import tiktoken
+            enc = tiktoken.get_encoding("cl100k_base")
+            tokens = enc.encode(prompt)
+            actual_tokens = len(tokens)
+            char_count = len(prompt)
+            print(f"   💰 File addition translation")
+            print(f"      📝 Input: {char_count:,} characters")
+            print(f"      🔢 Actual tokens: {actual_tokens:,} (using tiktoken cl100k_base)")
+        except Exception:
+            # Final fallback to character approximation
+            estimated_tokens = len(prompt) // 4
+            char_count = len(prompt)
+            print(f"   💰 File addition translation")
+            print(f"      📝 Input: {char_count:,} characters")
+            print(f"      🔢 Estimated tokens: ~{estimated_tokens:,} (fallback: 4 chars/token approximation)")
+    
+    try:
+        translated_content = ai_client.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.1
+        )
+        thread_safe_print(f"   ✅ Batch translation completed")
+        return translated_content
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Batch translation failed: {e}")
+        return batch_content  # Return original content if translation fails
+
+def process_added_files(added_files, pr_url, github_client, ai_client, repo_config):
+    """Process newly added files by translating and creating them in target repository"""
+    if not added_files:
+        thread_safe_print("\n📄 No new files to process")
+        return
+    
+    thread_safe_print(f"\n📄 Processing {len(added_files)} newly added files...")
+    
+    target_local_path = repo_config['target_local_path']
+    source_language = repo_config['source_language']
+    target_language = repo_config['target_language']
+    
+    for file_path, file_content in added_files.items():
+        thread_safe_print(f"\n📝 Processing new file: {file_path}")
+        
+        # Create target file path
+        target_file_path = os.path.join(target_local_path, file_path)
+        target_dir = os.path.dirname(target_file_path)
+        
+        # Create directory if it doesn't exist
+        if not os.path.exists(target_dir):
+            os.makedirs(target_dir, exist_ok=True)
+            thread_safe_print(f"   📁 Created directory: {target_dir}")
+        
+        # Check if file already exists
+        if os.path.exists(target_file_path):
+            thread_safe_print(f"   ⚠️  Target file already exists: {target_file_path}")
+            continue
+        
+        # Create section batches for translation
+        batches = create_section_batches(file_content, max_lines_per_batch=200)
+        thread_safe_print(f"   📦 Created {len(batches)} batches for translation")
+        
+        # Translate each batch
+        translated_batches = []
+        for i, batch in enumerate(batches):
+            thread_safe_print(f"   🔄 Processing batch {i+1}/{len(batches)}")
+            translated_batch = translate_file_batch(
+                batch, 
+                ai_client, 
+                source_language, 
+                target_language
+            )
+            translated_batches.append(translated_batch)
+        
+        # Combine translated batches
+        translated_content = '\n'.join(translated_batches)
+        
+        # Write translated content to target file
+        try:
+            with open(target_file_path, 'w', encoding='utf-8') as f:
+                f.write(translated_content)
+            
+            thread_safe_print(f"   ✅ Created translated file: {target_file_path}")
+            
+        except Exception as e:
+            thread_safe_print(f"   ❌ Error creating file {target_file_path}: {e}")
+    
+    thread_safe_print(f"\n✅ Completed processing all new files")
diff --git a/scripts/translate_doc_pr/file_deleter.py b/scripts/translate_doc_pr/file_deleter.py
new file mode 100644
index 0000000000000..c2064fe568cf3
--- /dev/null
+++ b/scripts/translate_doc_pr/file_deleter.py
@@ -0,0 +1,45 @@
+"""
+File Deleter Module
+Handles processing of deleted files and deleted sections
+"""
+
+import os
+import threading
+from github import Github
+
+# Thread-safe printing
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    with print_lock:
+        print(*args, **kwargs)
+
+def process_deleted_files(deleted_files, github_client, repo_config):
+    """Process deleted files by removing them from target repository"""
+    if not deleted_files:
+        thread_safe_print("\n🗑️  No files to delete")
+        return
+    
+    thread_safe_print(f"\n🗑️  Processing {len(deleted_files)} deleted files...")
+    
+    target_local_path = repo_config['target_local_path']
+    
+    for file_path in deleted_files:
+        thread_safe_print(f"\n🗑️  Processing deleted file: {file_path}")
+        
+        # Create target file path
+        target_file_path = os.path.join(target_local_path, file_path)
+        
+        # Check if file exists in target
+        if os.path.exists(target_file_path):
+            try:
+                os.remove(target_file_path)
+                thread_safe_print(f"   ✅ Deleted file: {target_file_path}")
+            except Exception as e:
+                thread_safe_print(f"   ❌ Error deleting file {target_file_path}: {e}")
+        else:
+            thread_safe_print(f"   ⚠️  Target file not found: {target_file_path}")
+    
+    thread_safe_print(f"\n✅ Completed processing deleted files")
+
+# Section deletion logic moved to file_updater.py
diff --git a/scripts/translate_doc_pr/file_updater.py b/scripts/translate_doc_pr/file_updater.py
new file mode 100644
index 0000000000000..82addd7cc6881
--- /dev/null
+++ b/scripts/translate_doc_pr/file_updater.py
@@ -0,0 +1,1692 @@
+"""
+File Updater Module
+Handles processing and translation of updated files and sections
+"""
+
+import os
+import re
+import json
+import threading
+from concurrent.futures import ThreadPoolExecutor
+from github import Github
+from openai import OpenAI
+
+# Thread-safe printing
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    with print_lock:
+        print(*args, **kwargs)
+
+def get_updated_sections_from_ai(pr_diff, target_sections, source_old_content_dict, ai_client, source_language, target_language, target_file_name=None):
+    """Use AI to update target sections based on source old content, PR diff, and target sections"""
+    if not source_old_content_dict or not target_sections:
+        return {}
+    
+    # Filter out deleted sections and prepare source sections from old content
+    source_sections = {}
+    for key, old_content in source_old_content_dict.items():
+        # Skip deleted sections
+        if 'deleted' in key:
+            continue
+        
+        # Handle null values by using empty string
+        content = old_content if old_content is not None else ""
+        source_sections[key] = content
+
+    # Keep the original order from match_source_diff_to_target.json (no sorting needed)
+    formatted_source_sections = json.dumps(source_sections, ensure_ascii=False, indent=2)
+    formatted_target_sections = json.dumps(target_sections, ensure_ascii=False, indent=2)
+    
+    thread_safe_print(f"   📊 Source sections: {len(source_sections)} sections")
+    thread_safe_print(f"   📊 Target sections: {len(target_sections)} sections")
+    
+    # Calculate total content size
+    total_source_chars = sum(len(str(content)) for content in source_sections.values())
+    total_target_chars = sum(len(str(content)) for content in target_sections.values())
+    thread_safe_print(f"   📏 Content size: Source={total_source_chars:,} chars, Target={total_target_chars:,} chars")
+
+    thread_safe_print(f"   🤖 Getting AI translation for {len(source_sections)} sections...")
+
+    diff_content = source_sections
+    
+    prompt = f"""You are a professional technical writer in the Database domain. I will provide you with:
+
+1. Source sections in {source_language}:
+{formatted_source_sections}
+
+2. GitHub PR changes (Diff):
+{pr_diff}
+
+3. Current target sections in {target_language}:
+{formatted_target_sections}
+
+Task: Update the target sections in {target_language} according to the diff in {source_language}.
+
+Instructions:
+1. Carefully analyze the PR diff to understand what changes were made (additions, deletions, modifications)
+2. Find the corresponding positions in the {target_language} sections and make the same changes. Do not change any content that is not modified in the diff, especially the format.
+3. Keep the JSON structure unchanged, only modify the section content
+4. Ensure the updated {target_language} content is logically consistent with the {source_language} changes
+5. Maintain proper technical writing style and terminology in {target_language}. If a sentence in the diff is unchanged in content but only reordered in {source_language}, reuse its existing translation in {target_language}.
+
+Please return the complete updated JSON in the same format as target sections, without any additional explanatory text."""
+
+    # Save prompt to file for reference with target file prefix
+    target_file_prefix = "unknown"
+    if target_file_name:
+        # Use provided target file name
+        target_file_prefix = target_file_name.replace('/', '_').replace('.md', '')
+    elif target_sections:
+        # Try to extract filename from the first section key or content
+        first_key = next(iter(target_sections.keys()), "")
+        if "_" in first_key:
+            # If key contains underscore, it might have target file info
+            parts = first_key.split("_")
+            if len(parts) > 1:
+                target_file_prefix = parts[0]
+    
+    # Ensure temp_output directory exists
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    temp_dir = os.path.join(script_dir, "temp_output")
+    os.makedirs(temp_dir, exist_ok=True)
+    
+    prompt_file = os.path.join(temp_dir, f"{target_file_prefix}_prompt-for-ai-translation.txt")
+    with open(prompt_file, 'w', encoding='utf-8') as f:
+        f.write(prompt)
+    
+    thread_safe_print(f"\n💾 Prompt saved to {prompt_file}")
+    thread_safe_print(f"📝 Prompt length: {len(prompt)} characters")
+    thread_safe_print(f"📊 Source sections: {len(source_sections)}")
+    thread_safe_print(f"📊 Target sections: {len(target_sections)}")
+    thread_safe_print(f"🤖 Sending prompt to AI...")
+
+    thread_safe_print(f"\n   📤 AI Update Prompt ({source_language} → {target_language}):")
+    thread_safe_print(f"   " + "="*80)
+    thread_safe_print(f"   Source Sections: {formatted_source_sections[:500]}...")
+    thread_safe_print(f"   PR Diff (first 500 chars): {pr_diff[:500]}...")
+    thread_safe_print(f"   Target Sections: {formatted_target_sections[:500]}...")
+    thread_safe_print(f"   " + "="*80)
+
+    try:
+        from main import print_token_estimation
+        print_token_estimation(prompt, f"Document translation ({source_language} → {target_language})")
+    except ImportError:
+        # Fallback if import fails - use tiktoken
+        try:
+            import tiktoken
+            enc = tiktoken.get_encoding("cl100k_base")
+            tokens = enc.encode(prompt)
+            actual_tokens = len(tokens)
+            char_count = len(prompt)
+            thread_safe_print(f"   💰 Document translation ({source_language} → {target_language})")
+            thread_safe_print(f"      📝 Input: {char_count:,} characters")
+            thread_safe_print(f"      🔢 Actual tokens: {actual_tokens:,} (using tiktoken cl100k_base)")
+        except Exception:
+            # Final fallback to character approximation
+            estimated_tokens = len(prompt) // 4
+            char_count = len(prompt)
+            thread_safe_print(f"   💰 Document translation ({source_language} → {target_language})")
+            thread_safe_print(f"      📝 Input: {char_count:,} characters")
+            thread_safe_print(f"      🔢 Estimated tokens: ~{estimated_tokens:,} (fallback: 4 chars/token approximation)")
+    
+    try:
+        ai_response = ai_client.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.1
+        )
+        thread_safe_print(f"   📝 AI translation response received")
+        thread_safe_print(f"   📋 AI response (first 500 chars): {ai_response[:500]}...")
+        
+        result = parse_updated_sections(ai_response)
+        thread_safe_print(f"   📊 Parsed {len(result)} sections from AI response")
+        
+        # Save AI results to file with target file prefix
+        ai_results_file = os.path.join(temp_dir, f"{target_file_prefix}_updated_sections_from_ai.json")
+        with open(ai_results_file, 'w', encoding='utf-8') as f:
+            json.dump(result, f, ensure_ascii=False, indent=2)
+        
+        thread_safe_print(f"   💾 AI results saved to {ai_results_file}")
+        return result
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ AI translation failed: {e}")
+        return {}
+
+def parse_updated_sections(ai_response):
+    """Parse AI response and extract JSON (from get-updated-target-sections.py)"""
+    # Ensure temp_output directory exists for debug files
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    temp_dir = os.path.join(script_dir, "temp_output")
+    os.makedirs(temp_dir, exist_ok=True)
+    
+    try:
+        print(f"\n   🔧 Parsing AI response...")
+        print(f"   Raw response length: {len(ai_response)} characters")
+        
+        # Try to extract JSON from AI response
+        cleaned_response = ai_response.strip()
+        
+        # Remove markdown code blocks if present
+        if cleaned_response.startswith('```json'):
+            cleaned_response = cleaned_response[7:]
+            print(f"   📝 Removed '```json' prefix")
+        elif cleaned_response.startswith('```'):
+            cleaned_response = cleaned_response[3:]
+            print(f"   📝 Removed '```' prefix")
+        
+        if cleaned_response.endswith('```'):
+            cleaned_response = cleaned_response[:-3]
+            print(f"   📝 Removed '```' suffix")
+        
+        cleaned_response = cleaned_response.strip()
+        
+        print(f"   📝 Cleaned response length: {len(cleaned_response)} characters")
+        print(f"   📝 First 200 chars: {cleaned_response[:200]}...")
+        print(f"   📝 Last 200 chars: ...{cleaned_response[-200:]}")
+        
+        # Try to find JSON content between curly braces
+        start_idx = cleaned_response.find('{')
+        end_idx = cleaned_response.rfind('}')
+        
+        if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
+            json_content = cleaned_response[start_idx:end_idx+1]
+            print(f"   📝 Extracted JSON content length: {len(json_content)} characters")
+            
+            try:
+                # Parse JSON
+                updated_sections = json.loads(json_content)
+                print(f"   ✅ Successfully parsed JSON with {len(updated_sections)} sections")
+                return updated_sections
+            except json.JSONDecodeError as e:
+                print(f"   ⚠️  JSON seems incomplete, trying to fix...")
+                
+                # Try to fix incomplete JSON by finding the last complete entry
+                lines = json_content.split('\n')
+                fixed_lines = []
+                in_value = False
+                quote_count = 0
+                
+                for line in lines:
+                    if '"' in line:
+                        quote_count += line.count('"')
+                    
+                    fixed_lines.append(line)
+                    
+                    # If we have an even number of quotes, we might have a complete entry
+                    if quote_count % 2 == 0 and (line.strip().endswith(',') or line.strip().endswith('"')):
+                        # Try to parse up to this point
+                        potential_json = '\n'.join(fixed_lines)
+                        if not potential_json.rstrip().endswith('}'):
+                            # Remove trailing comma and add closing brace
+                            if potential_json.rstrip().endswith(','):
+                                potential_json = potential_json.rstrip()[:-1] + '\n}'
+                            else:
+                                potential_json += '\n}'
+                        
+                        try:
+                            partial_sections = json.loads(potential_json)
+                            print(f"   🔧 Fixed JSON with {len(partial_sections)} sections")
+                            return partial_sections
+                        except:
+                            continue
+                
+                # If all else fails, return the original error
+                raise e
+        else:
+            print(f"   ❌ Could not find valid JSON structure in response")
+            return None
+        
+    except json.JSONDecodeError as e:
+        print(f"   ❌ Error parsing AI response as JSON: {e}")
+        print(f"   📝 Error at position: {e.pos if hasattr(e, 'pos') else 'unknown'}")
+        
+        # Save debug info
+        debug_file = os.path.join(temp_dir, f"ai_response_debug_{os.getpid()}.txt")
+        with open(debug_file, 'w', encoding='utf-8') as f:
+            f.write("Original AI Response:\n")
+            f.write("="*80 + "\n")
+            f.write(ai_response)
+            f.write("\n" + "="*80 + "\n")
+            f.write("Cleaned Response:\n")
+            f.write("-"*80 + "\n")
+            f.write(cleaned_response if 'cleaned_response' in locals() else "Not available")
+        
+        print(f"   📁 Debug info saved to: {debug_file}")
+        return None
+    except Exception as e:
+        print(f"   ❌ Unexpected error parsing AI response: {e}")
+        return None
+
+
+def replace_frontmatter_content(lines, new_content):
+    """Replace content from beginning of file to first top-level header"""
+    # Find the first top-level header
+    first_header_idx = None
+    for i, line in enumerate(lines):
+        if line.strip().startswith('# '):
+            first_header_idx = i
+            break
+    
+    if first_header_idx is None:
+        # No top-level header found, replace entire content
+        return new_content.split('\n')
+    
+    # Replace content from start to before first header
+    new_lines = new_content.split('\n')
+    return new_lines + lines[first_header_idx:]
+
+
+def replace_toplevel_section_content(lines, target_line_num, new_content):
+    """Replace content from top-level header to first next-level header"""
+    start_idx = target_line_num - 1  # Convert to 0-based index
+    
+    # Find the end of top-level section (before first ## header)
+    end_idx = len(lines)
+    for i in range(start_idx + 1, len(lines)):
+        line = lines[i].strip()
+        if line.startswith('##'):  # Found first next-level header
+            end_idx = i
+            break
+    
+    # Replace the top-level section content (from start_idx to end_idx)
+    new_lines = new_content.split('\n')
+    return lines[:start_idx] + new_lines + lines[end_idx:]
+
+
+def update_local_document(file_path, updated_sections, hierarchy_dict, target_local_path):
+    """Update local document using hierarchy-based section identification (from update-target-doc-v2.py)"""
+    local_path = os.path.join(target_local_path, file_path)
+    
+    if not os.path.exists(local_path):
+        print(f"   ❌ Local file not found: {local_path}")
+        return False
+    
+    try:
+        # Read document content
+        with open(local_path, 'r', encoding='utf-8') as f:
+            document_content = f.read()
+        
+        lines = document_content.split('\n')
+        
+        replacements_made = []
+        
+        # Use a unified approach: build a complete replacement plan first, then execute it
+        # This avoids line number shifts during the replacement process
+        
+        # Find section boundaries for ALL sections
+        section_boundaries = find_section_boundaries(lines, hierarchy_dict)
+        
+        # Create a comprehensive replacement plan
+        replacement_plan = []
+        
+        for line_num, new_content in updated_sections.items():
+            if line_num == "0":
+                # Special handling for frontmatter
+                first_header_idx = None
+                for i, line in enumerate(lines):
+                    if line.strip().startswith('# '):
+                        first_header_idx = i
+                        break
+                
+                replacement_plan.append({
+                    'type': 'frontmatter',
+                    'start': 0,
+                    'end': first_header_idx if first_header_idx else len(lines),
+                    'new_content': new_content,
+                    'line_num': line_num
+                })
+                
+            elif line_num in hierarchy_dict:
+                hierarchy = hierarchy_dict[line_num]
+                if ' > ' not in hierarchy:  # Top-level section
+                    # Special handling for top-level sections
+                    start_idx = int(line_num) - 1
+                    end_idx = len(lines)
+                    for i in range(start_idx + 1, len(lines)):
+                        line = lines[i].strip()
+                        if line.startswith('##'):
+                            end_idx = i
+                            break
+                    
+                    replacement_plan.append({
+                        'type': 'toplevel',
+                        'start': start_idx,
+                        'end': end_idx,
+                        'new_content': new_content,
+                        'line_num': line_num
+                    })
+                else:
+                    # Regular section
+                    if line_num in section_boundaries:
+                        boundary = section_boundaries[line_num]
+                        replacement_plan.append({
+                            'type': 'regular',
+                            'start': boundary['start'],
+                            'end': boundary['end'],
+                            'new_content': new_content,
+                            'line_num': line_num,
+                            'hierarchy': boundary['hierarchy']
+                        })
+                    else:
+                        print(f"      ⚠️  Section at line {line_num} not found in hierarchy")
+        
+        # Sort replacement plan: process from bottom to top of the document to avoid line shifts
+        # Sort by start line in reverse order (highest line number first)
+        replacement_plan.sort(key=lambda x: -x['start'])
+        
+        # Execute replacements in the planned order (from bottom to top)
+        print(f"      📋 Executing {len(replacement_plan)} replacements from bottom to top:")
+        for i, replacement in enumerate(replacement_plan):
+            print(f"      {i+1}. {replacement['type']} (line {replacement.get('line_num', '0')}, start: {replacement['start']})")
+        
+        for replacement in replacement_plan:
+            start = replacement['start']
+            end = replacement['end']
+            new_content = replacement['new_content']
+            new_lines = new_content.split('\n')
+            
+            # Replace the content
+            lines = lines[:start] + new_lines + lines[end:]
+            
+            # Record the replacement
+            original_line_count = end - start
+            line_diff = len(new_lines) - original_line_count
+            
+            replacements_made.append({
+                'type': replacement['type'],
+                'line_num': replacement.get('line_num', 'N/A'),
+                'hierarchy': replacement.get('hierarchy', 'N/A'),
+                'start': start,
+                'end': end,
+                'original_lines': original_line_count,
+                'new_lines': len(new_lines),
+                'line_diff': line_diff
+            })
+            
+            print(f"      ✅ Updated {replacement['type']} section: {replacement.get('line_num', 'frontmatter')}")
+        
+        # Save updated document
+        with open(local_path, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(lines))
+        
+        print(f"   ✅ Updated {len(replacements_made)} sections")
+        for replacement in replacements_made:
+            print(f"      📝 Line {replacement['line_num']}: {replacement['hierarchy']}")
+        
+        return True
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error updating file: {e}")
+        return False
+
+def find_section_boundaries(lines, hierarchy_dict):
+    """Find the start and end line for each section based on hierarchy (from update-target-doc-v2.py)"""
+    section_boundaries = {}
+    
+    # Sort sections by line number
+    sorted_sections = sorted(hierarchy_dict.items(), key=lambda x: int(x[0]))
+    
+    for i, (line_num, hierarchy) in enumerate(sorted_sections):
+        start_line = int(line_num) - 1  # Convert to 0-based index
+        
+        # Find end line (start of next section at same or higher level)
+        end_line = len(lines)  # Default to end of document
+        
+        if start_line >= len(lines):
+            continue
+            
+        # Get current section level
+        current_line = lines[start_line].strip()
+        if not current_line.startswith('#'):
+            continue
+            
+        current_level = len(current_line.split()[0])  # Count # characters
+        
+        # Look for next section at same or higher level
+        for j in range(start_line + 1, len(lines)):
+            line = lines[j].strip()
+            if line.startswith('#'):
+                line_level = len(line.split()[0]) if line.split() else 0
+                if line_level <= current_level:
+                    end_line = j
+                    break
+        
+        section_boundaries[line_num] = {
+            'start': start_line,
+            'end': end_line,
+            'hierarchy': hierarchy,
+            'level': current_level
+        }
+    
+    return section_boundaries
+
+def insert_sections_into_document(file_path, translated_sections, target_insertion_points, target_local_path):
+    """Insert translated sections into the target document at specified points"""
+    
+    if not translated_sections or not target_insertion_points:
+        thread_safe_print(f"   ⚠️  No sections or insertion points provided")
+        return False
+    
+    local_path = os.path.join(target_local_path, file_path)
+    
+    if not os.path.exists(local_path):
+        thread_safe_print(f"   ❌ Local file not found: {local_path}")
+        return False
+    
+    try:
+        # Read document content
+        with open(local_path, 'r', encoding='utf-8') as f:
+            document_content = f.read()
+        
+        lines = document_content.split('\n')
+        thread_safe_print(f"   📄 Document has {len(lines)} lines")
+        
+        # Sort insertion points by line number in descending order to avoid position shifts
+        sorted_insertions = sorted(
+            target_insertion_points.items(), 
+            key=lambda x: x[1]['insertion_after_line'], 
+            reverse=True
+        )
+        
+        insertions_made = []
+        
+        for group_id, point_data in sorted_insertions:
+            insertion_after_line = point_data['insertion_after_line']
+            new_sections = point_data['new_sections']
+            insertion_type = point_data['insertion_type']
+            
+            thread_safe_print(f"     📌 Inserting {len(new_sections)} sections after line {insertion_after_line}")
+            
+            # Convert 1-based line number to 0-based index for insertion point
+            # insertion_after_line is 1-based, so insertion_index should be insertion_after_line - 1
+            insertion_index = insertion_after_line - 1
+            
+            # Prepare new content to insert
+            new_content_lines = []
+            
+            # Add an empty line before the new sections if not already present
+            if insertion_index < len(lines) and lines[insertion_index].strip():
+                new_content_lines.append("")
+            
+            # Add each translated section
+            for section_line_num in new_sections:
+                # Find the corresponding translated content
+                section_hierarchy = None
+                section_content = None
+                
+                # Search for the section in translated_sections by line number or hierarchy
+                for hierarchy, content in translated_sections.items():
+                    # Try to match by hierarchy or find the content
+                    if str(section_line_num) in hierarchy or content:  # This is a simplified matching
+                        section_hierarchy = hierarchy
+                        section_content = content
+                        break
+                
+                if section_content:
+                    # Split content into lines and add to insertion
+                    content_lines = section_content.split('\n')
+                    new_content_lines.extend(content_lines)
+                    
+                    # Add spacing between sections
+                    if section_line_num != new_sections[-1]:  # Not the last section
+                        new_content_lines.append("")
+                    
+                    thread_safe_print(f"       ✅ Added section: {section_hierarchy}")
+                else:
+                    thread_safe_print(f"       ⚠️  Could not find translated content for section at line {section_line_num}")
+            
+            # Add an empty line after the new sections if not already present
+            # Check if the new content already ends with an empty line
+            if new_content_lines and not new_content_lines[-1].strip():
+                # Content already ends with empty line, don't add another
+                pass
+            elif insertion_index + 1 < len(lines) and lines[insertion_index + 1].strip():
+                # Next line has content and our content doesn't end with empty line, add one
+                new_content_lines.append("")
+            
+            # Insert the new content (insert after insertion_index line, before the next line)
+            # If insertion_after_line is 251, we want to insert at position 252 (0-based index 251)
+            lines = lines[:insertion_index + 1] + new_content_lines + lines[insertion_index + 1:]
+            
+            insertions_made.append({
+                'group_id': group_id,
+                'insertion_after_line': insertion_after_line,
+                'sections_count': len(new_sections),
+                'lines_added': len(new_content_lines),
+                'insertion_type': insertion_type
+            })
+        
+        # Save updated document
+        with open(local_path, 'w', encoding='utf-8') as f:
+            f.write('\n'.join(lines))
+        
+        thread_safe_print(f"   ✅ Successfully inserted {len(insertions_made)} section groups")
+        for insertion in insertions_made:
+            thread_safe_print(f"      📝 {insertion['group_id']}: {insertion['sections_count']} sections, {insertion['lines_added']} lines after line {insertion['insertion_after_line']}")
+        
+        return True
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error inserting sections: {e}")
+        return False
+
+def process_modified_sections(modified_sections, pr_diff, pr_url, github_client, ai_client, repo_config, max_non_system_sections=120):
+    """Process modified sections with full data structure support"""
+    results = []
+    
+    for file_path, file_data in modified_sections.items():
+        thread_safe_print(f"\n📄 Processing {file_path}")
+        
+        try:
+            # Call process_single_file with the complete data structure
+            success, message = process_single_file(
+                file_path, 
+                file_data,  # Pass the complete data structure (includes 'sections', 'original_hierarchy', etc.)
+                pr_diff, 
+                pr_url, 
+                github_client, 
+                ai_client, 
+                repo_config, 
+                max_non_system_sections
+            )
+            
+            if success:
+                thread_safe_print(f"   ✅ Successfully processed {file_path}")
+                results.append((file_path, True, message))
+            else:
+                thread_safe_print(f"   ❌ Failed to process {file_path}: {message}")
+                results.append((file_path, False, message))
+                
+        except Exception as e:
+            thread_safe_print(f"   ❌ Error processing {file_path}: {e}")
+            results.append((file_path, False, f"Error processing {file_path}: {e}"))
+    
+    return results
+
+def process_deleted_sections(deleted_sections, pr_url, github_client, ai_client, repo_config, max_non_system_sections=120):
+    """Process deleted sections with full data structure support"""
+    results = []
+    
+    for file_path, source_sections in deleted_sections.items():
+        thread_safe_print(f"\n🗑️  Processing deleted sections in {file_path}")
+        
+        try:
+            # Call process_single_file_deletion with the complete data structure
+            success, message = process_single_file_deletion(
+                file_path, 
+                source_sections, 
+                pr_url, 
+                github_client, 
+                ai_client, 
+                repo_config, 
+                max_non_system_sections
+            )
+            
+            if success:
+                thread_safe_print(f"   ✅ Successfully processed deletions in {file_path}")
+                results.append((file_path, True, message))
+            else:
+                thread_safe_print(f"   ❌ Failed to process deletions in {file_path}: {message}")
+                results.append((file_path, False, message))
+                
+        except Exception as e:
+            thread_safe_print(f"   ❌ Error processing deletions in {file_path}: {e}")
+            results.append((file_path, False, f"Error processing deletions in {file_path}: {e}"))
+    
+    return results
+
+def process_single_file_deletion(file_path, source_sections, pr_url, github_client, ai_client, repo_config, max_non_system_sections=120):
+    """Process deletion of sections in a single file"""
+    
+    # Import needed functions
+    from pr_analyzer import get_target_hierarchy_and_content
+    from section_matcher import (
+        find_direct_matches_for_special_files, 
+        filter_non_system_sections, 
+        get_corresponding_sections,
+        is_system_variable_or_config,
+        clean_title_for_matching,
+        parse_ai_response,
+        find_matching_line_numbers
+    )
+    
+    # Get target file hierarchy and content
+    target_hierarchy, target_lines = get_target_hierarchy_and_content(
+        file_path, github_client, repo_config['target_repo']
+    )
+    
+    if not target_hierarchy:
+        return False, f"Could not get target hierarchy for {file_path}"
+    
+    # Separate system variables from regular sections for hybrid mapping
+    system_sections = {}
+    regular_sections = {}
+    
+    for line_num, hierarchy in source_sections.items():
+        # Extract title for checking
+        if ' > ' in hierarchy:
+            title = hierarchy.split(' > ')[-1]
+        else:
+            title = hierarchy
+        
+        cleaned_title = clean_title_for_matching(title)
+        if is_system_variable_or_config(cleaned_title):
+            system_sections[line_num] = hierarchy
+        else:
+            regular_sections[line_num] = hierarchy
+    
+    sections_to_delete = []
+    
+    # Process system variables with direct matching
+    if system_sections:
+        thread_safe_print(f"   🎯 Direct matching for {len(system_sections)} system sections...")
+        matched_dict, failed_matches, skipped_sections = find_direct_matches_for_special_files(
+            system_sections, target_hierarchy, target_lines
+        )
+        
+        for target_line_num, hierarchy_string in matched_dict.items():
+            sections_to_delete.append(int(target_line_num))
+            thread_safe_print(f"      ✅ Marked system section for deletion: line {target_line_num}")
+        
+        if failed_matches:
+            thread_safe_print(f"      ❌ Failed to match {len(failed_matches)} system sections")
+            for failed_line in failed_matches:
+                thread_safe_print(f"         - Line {failed_line}: {system_sections[failed_line]}")
+    
+    # Process regular sections with AI matching
+    if regular_sections:
+        thread_safe_print(f"   🤖 AI matching for {len(regular_sections)} regular sections...")
+        
+        # Filter target hierarchy for AI
+        filtered_target_hierarchy = filter_non_system_sections(target_hierarchy)
+        
+        # Check if filtered hierarchy is reasonable for AI
+        if len(filtered_target_hierarchy) > max_non_system_sections:
+            thread_safe_print(f"      ❌ Target hierarchy too large for AI: {len(filtered_target_hierarchy)} > {max_non_system_sections}")
+        else:
+            # Get AI mapping (convert dict values to lists as expected by the function)
+            source_list = list(regular_sections.values())
+            target_list = list(filtered_target_hierarchy.values())
+            
+            ai_mapping = get_corresponding_sections(
+                source_list, 
+                target_list, 
+                ai_client,
+                repo_config['source_language'], 
+                repo_config['target_language'],
+                max_tokens=20000  # Use default value for now, can be made configurable later
+            )
+            
+            if ai_mapping:
+                # Parse AI response and find matching line numbers
+                ai_sections = parse_ai_response(ai_mapping)
+                ai_matched = find_matching_line_numbers(ai_sections, target_hierarchy)
+                
+                for source_line, target_line in ai_matched.items():
+                    try:
+                        sections_to_delete.append(int(target_line))
+                        thread_safe_print(f"      ✅ Marked regular section for deletion: line {target_line}")
+                    except ValueError as e:
+                        thread_safe_print(f"      ❌ Error converting target_line to int: {target_line}, error: {e}")
+                        # If target_line is not a number, try to find it in target_hierarchy
+                        for line_num, hierarchy in target_hierarchy.items():
+                            if target_line in hierarchy or hierarchy in target_line:
+                                sections_to_delete.append(int(line_num))
+                                thread_safe_print(f"      ✅ Found matching section at line {line_num}: {hierarchy}")
+                                break
+    
+    # Delete the sections from local document
+    if sections_to_delete:
+        success = delete_sections_from_document(file_path, sections_to_delete, repo_config['target_local_path'])
+        if success:
+            return True, f"Successfully deleted {len(sections_to_delete)} sections from {file_path}"
+        else:
+            return False, f"Failed to delete sections from {file_path}"
+    else:
+        return False, f"No sections to delete in {file_path}"
+
+def delete_sections_from_document(file_path, sections_to_delete, target_local_path):
+    """Delete specified sections from the local document"""
+    target_file_path = os.path.join(target_local_path, file_path)
+    
+    if not os.path.exists(target_file_path):
+        thread_safe_print(f"   ❌ Target file not found: {target_file_path}")
+        return False
+    
+    try:
+        # Read current file content
+        with open(target_file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        lines = content.split('\n')
+        
+        # Import needed function
+        from pr_analyzer import build_hierarchy_dict
+        
+        # Build hierarchy to understand section boundaries
+        target_hierarchy = build_hierarchy_dict(content)
+        
+        # Sort sections to delete in reverse order to maintain line numbers
+        sections_to_delete.sort(reverse=True)
+        
+        thread_safe_print(f"   🗑️  Deleting {len(sections_to_delete)} sections from {file_path}")
+        
+        for section_line in sections_to_delete:
+            section_start = section_line - 1  # Convert to 0-based index
+            
+            if section_start < 0 or section_start >= len(lines):
+                thread_safe_print(f"      ❌ Invalid section line: {section_line}")
+                continue
+            
+            # Find section end
+            section_end = len(lines) - 1  # Default to end of file
+            
+            # Look for next header at same or higher level
+            current_line = lines[section_start].strip()
+            if current_line.startswith('#'):
+                current_level = len(current_line.split('#')[1:])  # Count # characters
+                
+                for i in range(section_start + 1, len(lines)):
+                    line = lines[i].strip()
+                    if line.startswith('#'):
+                        line_level = len(line.split('#')[1:])
+                        if line_level <= current_level:
+                            section_end = i - 1
+                            break
+            
+            # Delete section (from section_start to section_end inclusive)
+            thread_safe_print(f"      🗑️  Deleting lines {section_start + 1} to {section_end + 1}")
+            del lines[section_start:section_end + 1]
+        
+        # Write updated content back to file
+        updated_content = '\n'.join(lines)
+        with open(target_file_path, 'w', encoding='utf-8') as f:
+            f.write(updated_content)
+        
+        thread_safe_print(f"   ✅ Updated file: {target_file_path}")
+        return True
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error deleting sections from {target_file_path}: {e}")
+        return False
+
+def process_single_file(file_path, source_sections, pr_diff, pr_url, github_client, ai_client, repo_config, max_non_system_sections=120):
+    """Process a single file - thread-safe function for parallel processing"""
+    thread_id = threading.current_thread().name
+    thread_safe_print(f"\n📄 [{thread_id}] Processing {file_path}")
+    
+    try:
+        # Check if this is a TOC file with special operations
+        if isinstance(source_sections, dict) and 'type' in source_sections and source_sections['type'] == 'toc':
+            from toc_processor import process_toc_file
+            return process_toc_file(file_path, source_sections, pr_url, github_client, ai_client, repo_config)
+        
+        # Check if this is enhanced sections
+        if isinstance(source_sections, dict) and 'sections' in source_sections:
+            if source_sections.get('type') == 'enhanced_sections':
+                # Skip all the matching logic and directly extract data
+                thread_safe_print(f"   [{thread_id}] 🚀 Using enhanced sections data, skipping matching logic")
+                enhanced_sections = source_sections['sections']
+                
+                # Extract target sections and source old content from enhanced sections
+                # Maintain the exact order from match_source_diff_to_target.json
+                from collections import OrderedDict
+                target_sections = OrderedDict()
+                source_old_content_dict = OrderedDict()
+                
+                # Process in the exact order they appear in enhanced_sections (which comes from match_source_diff_to_target.json)
+                for key, section_info in enhanced_sections.items():
+                    if isinstance(section_info, dict):
+                        operation = section_info.get('source_operation', '')
+                        
+                        # Skip deleted sections - they shouldn't be in the enhanced_sections anyway
+                        if operation == 'deleted':
+                            continue
+                        
+                        # For source sections: use old_content for modified, new_content for added
+                        if operation == 'added':
+                            source_content = section_info.get('source_new_content', '')
+                        else:  # modified
+                            source_content = section_info.get('source_old_content', '')
+                        
+                        # For target sections: use target_content for modified, empty string for added
+                        if operation == 'added':
+                            target_content = ""  # Added sections have no existing target content
+                        else:  # modified
+                            target_content = section_info.get('target_content', '')
+                        
+                        # Add to both dictionaries using the same key from match_source_diff_to_target.json
+                        if source_content is not None:
+                            source_old_content_dict[key] = source_content
+                        target_sections[key] = target_content
+                
+                thread_safe_print(f"   [{thread_id}] 📊 Extracted: {len(target_sections)} target sections, {len(source_old_content_dict)} source old content entries")
+                
+                # Update sections with AI (get-updated-target-sections.py logic)
+                thread_safe_print(f"   [{thread_id}] 🤖 Getting updated sections from AI...")
+                updated_sections = get_updated_sections_from_ai(pr_diff, target_sections, source_old_content_dict, ai_client, repo_config['source_language'], repo_config['target_language'], file_path)
+                if not updated_sections:
+                    thread_safe_print(f"   [{thread_id}] ⚠️  Could not get AI update")
+                    return False, f"Could not get AI update for {file_path}"
+                
+                # Return the AI results for further processing
+                thread_safe_print(f"   [{thread_id}] ✅ Successfully got AI translation results for {file_path}")
+                return True, updated_sections  # Return the actual AI results
+                    
+            else:
+                # New format: complete data structure
+                actual_sections = source_sections['sections']
+        
+        # Regular file processing continues here for old format
+        # Get target hierarchy and content (get-target-affected-hierarchy.py logic)
+        from pr_analyzer import get_target_hierarchy_and_content
+        target_hierarchy, target_lines = get_target_hierarchy_and_content(file_path, github_client, repo_config['target_repo'])
+        if not target_hierarchy:
+            thread_safe_print(f"   [{thread_id}] ⚠️  Could not get target content")
+            return False, f"Could not get target content for {file_path}"
+        else:
+            # Old format: direct dict
+            actual_sections = source_sections
+            
+        # Only do mapping if we don't have enhanced sections
+        if 'enhanced_sections' not in locals() or not enhanced_sections:
+            # Separate different types of sections
+            from section_matcher import is_system_variable_or_config
+            system_var_sections = {}
+            toplevel_sections = {}
+            frontmatter_sections = {}
+            regular_sections = {}
+            
+            for line_num, hierarchy in actual_sections.items():
+                if line_num == "0" and hierarchy == "frontmatter":
+                    # Special handling for frontmatter
+                    frontmatter_sections[line_num] = hierarchy
+                else:
+                    # Extract the leaf title from hierarchy
+                    leaf_title = hierarchy.split(' > ')[-1] if ' > ' in hierarchy else hierarchy
+                    
+                    if is_system_variable_or_config(leaf_title):
+                        system_var_sections[line_num] = hierarchy
+                    elif leaf_title.startswith('# '):
+                        # Top-level titles need special handling
+                        toplevel_sections[line_num] = hierarchy
+                    else:
+                        regular_sections[line_num] = hierarchy
+        
+        thread_safe_print(f"   [{thread_id}] 📊 Found {len(system_var_sections)} system variable/config, {len(toplevel_sections)} top-level, {len(frontmatter_sections)} frontmatter, and {len(regular_sections)} regular sections")
+        
+        target_affected = {}
+        
+        # Process frontmatter sections with special handling
+        if frontmatter_sections:
+            thread_safe_print(f"   [{thread_id}] 📄 Processing frontmatter section...")
+            # For frontmatter, we simply map it to line 0 in target
+            for line_num, hierarchy in frontmatter_sections.items():
+                target_affected[line_num] = hierarchy
+            thread_safe_print(f"   [{thread_id}] ✅ Mapped {len(frontmatter_sections)} frontmatter section")
+        
+        # Process top-level titles with special matching
+        if toplevel_sections:
+            thread_safe_print(f"   [{thread_id}] 🔝 Top-level title matching for {len(toplevel_sections)} sections...")
+            from section_matcher import find_toplevel_title_matches
+            toplevel_matched, toplevel_failed, toplevel_skipped = find_toplevel_title_matches(toplevel_sections, target_lines)
+            
+            if toplevel_matched:
+                target_affected.update(toplevel_matched)
+                thread_safe_print(f"   [{thread_id}] ✅ Top-level matched {len(toplevel_matched)} sections")
+            
+            if toplevel_failed:
+                thread_safe_print(f"   [{thread_id}] ⚠️  {len(toplevel_failed)} top-level sections failed matching")
+                for failed in toplevel_failed:
+                    thread_safe_print(f"       ❌ {failed['hierarchy']}: {failed['reason']}")
+        
+        # Process system variables/config sections with direct matching
+        if system_var_sections:
+            thread_safe_print(f"   [{thread_id}] 🎯 Direct matching {len(system_var_sections)} system variable/config sections...")
+            from section_matcher import find_direct_matches_for_special_files
+            direct_matched, failed_matches, skipped_sections = find_direct_matches_for_special_files(system_var_sections, target_hierarchy, target_lines)
+            
+            if direct_matched:
+                target_affected.update(direct_matched)
+                thread_safe_print(f"   [{thread_id}] ✅ Direct matched {len(direct_matched)} system variable/config sections")
+            
+            if failed_matches:
+                thread_safe_print(f"   [{thread_id}] ⚠️  {len(failed_matches)} system variable/config sections failed direct matching")
+                for failed in failed_matches:
+                    thread_safe_print(f"       ❌ {failed['hierarchy']}: {failed['reason']}")
+        
+        # Process regular sections with AI mapping using filtered target hierarchy
+        if regular_sections:
+            thread_safe_print(f"   [{thread_id}] 🤖 AI mapping {len(regular_sections)} regular sections...")
+            
+            # Filter target hierarchy to only include non-system sections for AI mapping
+            from section_matcher import filter_non_system_sections
+            filtered_target_hierarchy = filter_non_system_sections(target_hierarchy)
+            
+            # Check if filtered target hierarchy exceeds the maximum allowed for AI mapping
+            MAX_NON_SYSTEM_SECTIONS_FOR_AI = 120
+            if len(filtered_target_hierarchy) > MAX_NON_SYSTEM_SECTIONS_FOR_AI:
+                thread_safe_print(f"   [{thread_id}] ❌ Too many non-system sections ({len(filtered_target_hierarchy)} > {MAX_NON_SYSTEM_SECTIONS_FOR_AI})")
+                thread_safe_print(f"   [{thread_id}] ⚠️  Skipping AI mapping for regular sections to avoid complexity")
+                
+                # If no system sections were matched either, return error
+                if not target_affected:
+                    error_message = f"File {file_path} has too many non-system sections ({len(filtered_target_hierarchy)} > {MAX_NON_SYSTEM_SECTIONS_FOR_AI}) and no system variable sections were matched"
+                    return False, error_message
+                
+                # Continue with only system variable matches if available
+                thread_safe_print(f"   [{thread_id}] ✅ Proceeding with {len(target_affected)} system variable/config sections only")
+            else:
+                # Proceed with AI mapping using filtered hierarchy
+                source_list = list(regular_sections.values())
+                target_list = list(filtered_target_hierarchy.values())
+                
+                from section_matcher import get_corresponding_sections
+                ai_response = get_corresponding_sections(source_list, target_list, ai_client, repo_config['source_language'], repo_config['target_language'], max_tokens=20000)
+                if ai_response:
+                    # Parse AI response and find matching line numbers in the original (unfiltered) hierarchy
+                    from section_matcher import parse_ai_response, find_matching_line_numbers
+                    ai_sections = parse_ai_response(ai_response)
+                    ai_matched = find_matching_line_numbers(ai_sections, target_hierarchy)  # Use original hierarchy for line number lookup
+                    
+                    if ai_matched:
+                        target_affected.update(ai_matched)
+                        thread_safe_print(f"   [{thread_id}] ✅ AI mapped {len(ai_matched)} regular sections")
+                    else:
+                        thread_safe_print(f"   [{thread_id}] ⚠️  AI mapping failed for regular sections")
+                else:
+                    thread_safe_print(f"   [{thread_id}] ⚠️  Could not get AI response for regular sections")
+        
+        # Summary of mapping results
+        thread_safe_print(f"   [{thread_id}] 📊 Total mapped: {len(target_affected)} out of {len(actual_sections)} sections")
+        
+        if not target_affected:
+            thread_safe_print(f"   [{thread_id}] ⚠️  Could not map sections")
+            return False, f"Could not map sections for {file_path}"
+        
+        thread_safe_print(f"   [{thread_id}] ✅ Mapped {len(target_affected)} sections")
+        
+        # Extract target sections (get-target-affected-sections.py logic)
+        thread_safe_print(f"   [{thread_id}] 📝 Extracting target sections...")
+        from pr_analyzer import extract_affected_sections
+        target_sections = extract_affected_sections(target_affected, target_lines)
+        
+        # Extract source old content from the enhanced data structure
+        thread_safe_print(f"   [{thread_id}] 📖 Extracting source old content...")
+        source_old_content_dict = {}
+        
+        # Handle different data structures for source_sections
+        if isinstance(source_sections, dict) and 'sections' in source_sections:
+            # New format: complete data structure with enhanced matching info
+            for key, section_info in source_sections.items():
+                if isinstance(section_info, dict) and 'source_old_content' in section_info:
+                    source_old_content_dict[key] = section_info['source_old_content']
+        else:
+            # Fallback: if we don't have the enhanced structure, we need to get it differently
+            thread_safe_print(f"   [{thread_id}] ⚠️  Source sections missing enhanced structure, using fallback")
+            # For now, create empty dict to avoid errors - this should be addressed in the calling code
+            source_old_content_dict = {}
+        
+        # Update sections with AI (get-updated-target-sections.py logic)
+        thread_safe_print(f"   [{thread_id}] 🤖 Getting updated sections from AI...")
+        updated_sections = get_updated_sections_from_ai(pr_diff, target_sections, source_old_content_dict, ai_client, repo_config['source_language'], repo_config['target_language'], file_path)
+        if not updated_sections:
+            thread_safe_print(f"   [{thread_id}] ⚠️  Could not get AI update")
+            return False, f"Could not get AI update for {file_path}"
+        
+        # Update local document (update-target-doc-v2.py logic)
+        thread_safe_print(f"   [{thread_id}] 💾 Updating local document...")
+        success = update_local_document(file_path, updated_sections, target_affected, repo_config['target_local_path'])
+        
+        if success:
+            thread_safe_print(f"   [{thread_id}] 🎉 Successfully updated {file_path}")
+            return True, f"Successfully updated {file_path}"
+        else:
+            thread_safe_print(f"   [{thread_id}] ❌ Failed to update {file_path}")
+            return False, f"Failed to update {file_path}"
+            
+    except Exception as e:
+        thread_safe_print(f"   [{thread_id}] ❌ Error processing {file_path}: {e}")
+        return False, f"Error processing {file_path}: {e}"
+
+def process_added_sections(added_sections, pr_diff, pr_url, github_client, ai_client, repo_config, max_non_system_sections=120):
+    """Process added sections by translating and inserting them"""
+    if not added_sections:
+        thread_safe_print("\n➕ No added sections to process")
+        return
+    
+    thread_safe_print(f"\n➕ Processing added sections from {len(added_sections)} files...")
+    
+    # Import needed functions
+    from section_matcher import map_insertion_points_to_target
+    from pr_analyzer import get_target_hierarchy_and_content
+    
+    for file_path, section_data in added_sections.items():
+        thread_safe_print(f"\n➕ Processing added sections in {file_path}")
+        
+        source_sections = section_data['sections']
+        insertion_points = section_data['insertion_points']
+        
+        # Get target file hierarchy and content
+        target_hierarchy, target_lines = get_target_hierarchy_and_content(
+            file_path, github_client, repo_config['target_repo']
+        )
+        
+        if not target_hierarchy:
+            thread_safe_print(f"   ❌ Could not get target hierarchy for {file_path}")
+            continue
+        
+        # Map insertion points to target language
+        target_insertion_points = map_insertion_points_to_target(
+            insertion_points, target_hierarchy, target_lines, file_path, pr_url, github_client, ai_client, repo_config, max_non_system_sections
+        )
+        
+        if not target_insertion_points:
+            thread_safe_print(f"   ❌ No insertion points mapped for {file_path}")
+            continue
+        
+        # Use AI to translate/update new sections (similar to modified sections)
+        # Since we're now using source_old_content, we need to extract it from the added sections
+        source_old_content_dict = {}
+        for key, content in source_sections.items():
+            # For added sections, source_old_content is typically None or empty
+            # We use the new content (from the source file) as the content to translate
+            source_old_content_dict[key] = content if content is not None else ""
+        
+        # Get target sections (empty for new sections, but we need the structure)
+        target_sections = {}  # New sections don't have existing target content
+        
+        # Use the same AI function to translate the new sections
+        translated_sections = get_updated_sections_from_ai(
+            pr_diff, 
+            target_sections, 
+            source_old_content_dict, 
+            ai_client,
+            repo_config['source_language'], 
+            repo_config['target_language'],
+            file_path
+        )
+        
+        if translated_sections:
+            # Insert translated sections into document
+            insert_sections_into_document(file_path, translated_sections, target_insertion_points, repo_config['target_local_path'])
+            thread_safe_print(f"   ✅ Successfully inserted {len(translated_sections)} sections in {file_path}")
+        else:
+            thread_safe_print(f"   ⚠️  No sections were translated for {file_path}")
+
+def process_files_in_batches(source_changes, pr_diff, pr_url, github_client, ai_client, repo_config, operation_type="modified", batch_size=5, max_non_system_sections=120):
+    """Process files in parallel batches"""
+    # Handle different data formats
+    if isinstance(source_changes, dict):
+        files = []
+        for path, data in source_changes.items():
+            if isinstance(data, dict):
+                if 'type' in data and data['type'] == 'toc':
+                    # TOC file with special operations
+                    files.append((path, data))
+                elif 'sections' in data:
+                    # New format: extract sections for processing
+                    files.append((path, data['sections']))
+                else:
+                    # Old format: direct dict
+                    files.append((path, data))
+            else:
+                # Old format: direct dict
+                files.append((path, data))
+    else:
+        files = list(source_changes.items())
+    
+    total_files = len(files)
+    
+    if total_files == 0:
+        return []
+    
+    thread_safe_print(f"\n🔄 Processing {total_files} files in batches of {batch_size}")
+    
+    results = []
+    
+    # Process files in batches
+    for i in range(0, total_files, batch_size):
+        batch = files[i:i + batch_size]
+        batch_num = (i // batch_size) + 1
+        total_batches = (total_files + batch_size - 1) // batch_size
+        
+        thread_safe_print(f"\n📦 Batch {batch_num}/{total_batches}: Processing {len(batch)} files")
+        
+        # Process current batch in parallel
+        with ThreadPoolExecutor(max_workers=len(batch), thread_name_prefix=f"Batch{batch_num}") as executor:
+            # Submit all files in current batch
+            future_to_file = {}
+            for file_path, source_sections in batch:
+                future = executor.submit(
+                    process_single_file, 
+                    file_path, 
+                    source_sections, 
+                    pr_diff, 
+                    pr_url, 
+                    github_client, 
+                    ai_client,
+                    repo_config,
+                    max_non_system_sections
+                )
+                future_to_file[future] = file_path
+            
+            # Collect results as they complete
+            from concurrent.futures import as_completed
+            batch_results = []
+            for future in as_completed(future_to_file):
+                file_path = future_to_file[future]
+                try:
+                    success, message = future.result()
+                    batch_results.append((file_path, success, message))
+                except Exception as e:
+                    batch_results.append((file_path, False, f"Exception in thread: {e}"))
+            
+            results.extend(batch_results)
+        
+        # Brief pause between batches to avoid overwhelming the APIs
+        if i + batch_size < total_files:
+            thread_safe_print(f"   ⏸️  Waiting 2 seconds before next batch...")
+            import time
+            time.sleep(2)
+    
+    return results
+
+def update_target_document_from_match_data(match_file_path, target_local_path, target_file_name=None):
+    """
+    Update target document using data from match_source_diff_to_target.json
+    This integrates the logic from test_target_update.py
+    
+    Args:
+        match_file_path: Path to the match_source_diff_to_target.json file
+        target_local_path: Local path to the target repository 
+        target_file_name: Optional target file name (if not provided, will be extracted from match_file_path)
+    """
+    import json
+    import os
+    from pathlib import Path
+    
+    # Load match data
+    if not os.path.exists(match_file_path):
+        thread_safe_print(f"❌ {match_file_path} file does not exist")
+        return False
+    
+    with open(match_file_path, 'r', encoding='utf-8') as f:
+        match_data = json.load(f)
+    
+    thread_safe_print(f"✅ Loaded {len(match_data)} section matching data from {match_file_path}")
+    thread_safe_print(f"   Reading translation results directly from target_new_content field")
+    
+    if not match_data:
+        thread_safe_print("❌ No matching data found")
+        return False
+    
+    # Sort sections by target_line from large to small (modify from back to front)
+    sections_with_line = []
+    
+    for key, section_data in match_data.items():
+        operation = section_data.get('source_operation', '')
+        target_new_content = section_data.get('target_new_content')
+        
+        # For deleted sections, target_new_content should be null
+        if operation == 'deleted':
+            if target_new_content is not None:
+                thread_safe_print(f"   ⚠️  Deleted section {key} has non-null target_new_content, should be fixed")
+            thread_safe_print(f"   🗑️  Including deleted section: {key}")
+        elif not target_new_content:
+            thread_safe_print(f"   ⚠️  Skipping section without target_new_content: {key}")
+            continue
+        
+        target_line = section_data.get('target_line')
+        if target_line and target_line != 'unknown':
+            try:
+                # Handle special case for bottom sections
+                if target_line == "-1":
+                    line_num = -1  # Special marker for bottom sections
+                else:
+                    line_num = int(target_line)
+                sections_with_line.append((key, section_data, line_num))
+            except ValueError:
+                thread_safe_print(f"⚠️  Skipping invalid target_line: {target_line} for {key}")
+    
+    # Separate sections into different processing groups
+    bottom_modified_sections = []  # Process first: modify existing content at document end
+    regular_sections = []          # Process second: normal operations from back to front
+    bottom_added_sections = []     # Process last: append new content to document end
+    
+    for key, section_data, line_num in sections_with_line:
+        target_hierarchy = section_data.get('target_hierarchy', '')
+        
+        if target_hierarchy.startswith('bottom-modified-'):
+            bottom_modified_sections.append((key, section_data, line_num))
+        elif target_hierarchy.startswith('bottom-added-'):
+            bottom_added_sections.append((key, section_data, line_num))
+        else:
+            regular_sections.append((key, section_data, line_num))
+    
+    # Sort each group appropriately
+    def get_source_line_num(item):
+        key, section_data, line_num = item
+        if '_' in key and key.split('_')[1].isdigit():
+            return int(key.split('_')[1])
+        return 0
+    
+    # Bottom modified: sort by source line number (large to small)
+    bottom_modified_sections.sort(key=lambda x: -get_source_line_num(x))
+    
+    # Regular sections: sort by target_line (large to small), then by source line number
+    regular_sections.sort(key=lambda x: (-x[2], -get_source_line_num(x)))
+    
+    # Bottom added: sort by source line number (small to large) for proper document order
+    bottom_added_sections.sort(key=lambda x: get_source_line_num(x))
+    
+    # Combine all sections in processing order
+    all_sections = bottom_modified_sections + regular_sections + bottom_added_sections
+    
+    thread_safe_print(f"\n📊 Processing order: bottom-modified -> regular -> bottom-added")
+    thread_safe_print(f"   📋 Bottom modified sections: {len(bottom_modified_sections)}")
+    thread_safe_print(f"   📋 Regular sections: {len(regular_sections)}")  
+    thread_safe_print(f"   📋 Bottom added sections: {len(bottom_added_sections)}")
+    
+    if not all_sections:
+        thread_safe_print("❌ No valid sections found for update")
+        return False
+    
+    thread_safe_print(f"\n📊 Detailed processing order:")
+    for i, (key, section_data, line_num) in enumerate(all_sections, 1):
+        operation = section_data.get('source_operation', '')
+        hierarchy = section_data.get('target_hierarchy', '')
+        insertion_type = section_data.get('insertion_type', '')
+        
+        # Extract source line number for display
+        source_line_num = int(key.split('_')[1]) if '_' in key and key.split('_')[1].isdigit() else 'N/A'
+        
+        # Display target_line with special handling for bottom sections
+        target_display = "END" if line_num == -1 else str(line_num)
+        
+        # Determine section group
+        if hierarchy.startswith('bottom-modified-'):
+            group = "BotMod"
+        elif hierarchy.startswith('bottom-added-'):
+            group = "BotAdd"
+        else:
+            group = "Regular"
+        
+        if operation == 'deleted':
+            action = "delete"
+        elif insertion_type == "before_reference":
+            action = "insert"
+        elif line_num == -1:
+            action = "append"
+        else:
+            action = "replace"
+        
+        thread_safe_print(f"   {i:2}. [{group:7}] Target:{target_display:>3} Src:{source_line_num:3} | {key:15} ({operation:8}) | {action:7} | {hierarchy}")
+    
+    # Determine target file name
+    if target_file_name is None:
+        # Extract target file name from match file path
+        # e.g., "tikv-configuration-file-match_source_diff_to_target.json" -> "tikv-configuration-file.md"
+        match_filename = os.path.basename(match_file_path)
+        if match_filename.endswith('-match_source_diff_to_target.json'):
+            extracted_name = match_filename[:-len('-match_source_diff_to_target.json')] + '.md'
+            target_file_name = extracted_name
+            thread_safe_print(f"   📂 Extracted target file name from match file: {target_file_name}")
+        else:
+            # Fallback: try to determine from source hierarchy
+            first_entry = next(iter(match_data.values()))
+            source_hierarchy = first_entry.get('source_original_hierarchy', '')
+            
+            if 'TiFlash' in source_hierarchy or 'tiflash' in source_hierarchy.lower():
+                target_file_name = "tiflash/tiflash-configuration.md"
+            else:
+                # Default to command-line flags for other cases
+                target_file_name = "command-line-flags-for-tidb-configuration.md"
+            thread_safe_print(f"   📂 Determined target file name from hierarchy: {target_file_name}")
+    else:
+        thread_safe_print(f"   📂 Using provided target file name: {target_file_name}")
+    
+    target_file_path = os.path.join(target_local_path, target_file_name)
+    thread_safe_print(f"\n📄 Target file path: {target_file_path}")
+    
+    # Update target document
+    thread_safe_print(f"\n🚀 Starting target document update, will modify {len(all_sections)} sections...")
+    success = update_target_document_sections(all_sections, target_file_path)
+    
+    return success
+
+def update_target_document_sections(all_sections, target_file_path):
+    """
+    Update target document sections - integrated from test_target_update.py
+    """
+    thread_safe_print(f"\n🚀 Starting target document update: {target_file_path}")
+    
+    # Read target document
+    if not os.path.exists(target_file_path):
+        thread_safe_print(f"❌ Target file does not exist: {target_file_path}")
+        return False
+    
+    with open(target_file_path, 'r', encoding='utf-8') as f:
+        target_lines = f.readlines()
+    
+    thread_safe_print(f"📄 Target document total lines: {len(target_lines)}")
+    
+    # Process modifications in order (bottom-modified -> regular -> bottom-added)
+    for i, (key, section_data, target_line_num) in enumerate(all_sections, 1):
+        operation = section_data.get('source_operation', '')
+        insertion_type = section_data.get('insertion_type', '')
+        target_hierarchy = section_data.get('target_hierarchy', '')
+        target_new_content = section_data.get('target_new_content')
+        
+        thread_safe_print(f"\n📝 {i}/{len(all_sections)} Processing {key} (Line {target_line_num})")
+        thread_safe_print(f"   Operation type: {operation}")
+        thread_safe_print(f"   Target section: {target_hierarchy}")
+        
+        if operation == 'deleted':
+            # Delete logic: remove the specified section
+            if target_line_num == -1:
+                thread_safe_print(f"   ❌ Invalid delete operation for bottom section")
+                continue
+                
+            thread_safe_print(f"   🗑️  Delete mode: removing section starting at line {target_line_num}")
+            
+            # Find section end position
+            start_line = target_line_num - 1  # Convert to 0-based index
+            
+            if start_line >= len(target_lines):
+                thread_safe_print(f"   ❌ Line number out of range: {target_line_num} > {len(target_lines)}")
+                continue
+            
+            # Find section end position
+            end_line = find_section_end_for_update(target_lines, start_line, target_hierarchy)
+            
+            thread_safe_print(f"   📍 Delete range: line {start_line + 1} to {end_line}")
+            thread_safe_print(f"   📄 Delete content: {target_lines[start_line].strip()[:50]}...")
+            
+            # Delete content
+            deleted_lines = target_lines[start_line:end_line]
+            target_lines[start_line:end_line] = []
+            
+            thread_safe_print(f"   ✅ Deleted {len(deleted_lines)} lines of content")
+            
+        elif target_new_content is None:
+            thread_safe_print(f"   ⚠️  Skipping: target_new_content is null")
+            continue
+            
+        elif not target_new_content:
+            thread_safe_print(f"   ⚠️  Skipping: target_new_content is empty")
+            continue
+            
+        else:
+            # Handle content format
+            thread_safe_print(f"   📄 Content preview: {repr(target_new_content[:80])}...")
+            
+            if target_hierarchy.startswith('bottom-'):
+                # Bottom section special handling
+                if target_hierarchy.startswith('bottom-modified-'):
+                    # Bottom modified: find and replace existing content at document end
+                    thread_safe_print(f"   🔄 Bottom modified section: replacing existing content at document end")
+                    
+                    # Get the old content to search for
+                    source_operation_data = section_data.get('source_operation_data', {})
+                    old_content = source_operation_data.get('old_content', '').strip()
+                    
+                    if old_content:
+                        # Search backwards from end to find the matching section
+                        found_line = None
+                        for idx in range(len(target_lines) - 1, -1, -1):
+                            line_content = target_lines[idx].strip()
+                            if line_content == old_content:
+                                found_line = idx
+                                thread_safe_print(f"   📍 Found target section at line {found_line + 1}: {line_content[:50]}...")
+                                break
+                        
+                        if found_line is not None:
+                            # Find section end
+                            end_line = find_section_end_for_update(target_lines, found_line, target_hierarchy)
+                            
+                            # Ensure content format is correct
+                            if not target_new_content.endswith('\n'):
+                                target_new_content += '\n'
+                            
+                            # Split content by lines
+                            new_lines = target_new_content.splitlines(keepends=True)
+                            
+                            # Replace content
+                            target_lines[found_line:end_line] = new_lines
+                            
+                            thread_safe_print(f"   ✅ Replaced {end_line - found_line} lines with {len(new_lines)} lines")
+                        else:
+                            thread_safe_print(f"   ⚠️  Could not find target section, appending to end instead")
+                            # Fallback: append to end
+                            if not target_new_content.endswith('\n'):
+                                target_new_content += '\n'
+                            if target_lines and target_lines[-1].strip():
+                                target_new_content = '\n' + target_new_content
+                            new_lines = target_new_content.splitlines(keepends=True)
+                            target_lines.extend(new_lines)
+                            thread_safe_print(f"   ✅ Appended {len(new_lines)} lines to end of document")
+                    else:
+                        thread_safe_print(f"   ⚠️  No old_content found, appending to end instead")
+                        # Fallback: append to end
+                        if not target_new_content.endswith('\n'):
+                            target_new_content += '\n'
+                        if target_lines and target_lines[-1].strip():
+                            target_new_content = '\n' + target_new_content
+                        new_lines = target_new_content.splitlines(keepends=True)
+                        target_lines.extend(new_lines)
+                        thread_safe_print(f"   ✅ Appended {len(new_lines)} lines to end of document")
+                        
+                elif target_hierarchy.startswith('bottom-added-'):
+                    # Bottom added: append new content to end of document
+                    thread_safe_print(f"   🔚 Bottom added section: appending new content to end")
+                    
+                    # Ensure content format is correct
+                    if not target_new_content.endswith('\n'):
+                        target_new_content += '\n'
+                    
+                    # Add spacing before new section if needed
+                    if target_lines and target_lines[-1].strip():
+                        target_new_content = '\n' + target_new_content
+                    
+                    # Split content by lines
+                    new_lines = target_new_content.splitlines(keepends=True)
+                    
+                    # Append to end of document
+                    target_lines.extend(new_lines)
+                    
+                    thread_safe_print(f"   ✅ Appended {len(new_lines)} lines to end of document")
+                else:
+                    # Other bottom sections: append to end
+                    thread_safe_print(f"   🔚 Other bottom section: appending to end of document")
+                    
+                    # Ensure content format is correct
+                    if not target_new_content.endswith('\n'):
+                        target_new_content += '\n'
+                    
+                    # Add spacing before new section if needed
+                    if target_lines and target_lines[-1].strip():
+                        target_new_content = '\n' + target_new_content
+                    
+                    # Split content by lines
+                    new_lines = target_new_content.splitlines(keepends=True)
+                    
+                    # Append to end of document
+                    target_lines.extend(new_lines)
+                    
+                    thread_safe_print(f"   ✅ Appended {len(new_lines)} lines to end of document")
+                
+            elif target_hierarchy == "frontmatter":
+                # Frontmatter special handling: directly replace front lines
+                thread_safe_print(f"   📄 Frontmatter mode: directly replacing document beginning")
+                
+                # Find the first top-level heading position
+                first_header_line = 0
+                for i, line in enumerate(target_lines):
+                    if line.strip().startswith('# '):
+                        first_header_line = i
+                        break
+                
+                thread_safe_print(f"   📍 Frontmatter range: line 1 to {first_header_line}")
+                
+                # Split new content by lines, preserving original structure including trailing empty lines
+                new_lines = target_new_content.splitlines(keepends=True)
+                
+                # If the original content ends with \n, it means there should be an empty line after the last content line
+                # splitlines() doesn't create this empty line, so we need to add it manually
+                if target_new_content.endswith('\n'):
+                    new_lines.append('\n')
+                elif target_new_content:
+                    # If content doesn't end with newline, ensure the last line has one
+                    if not new_lines[-1].endswith('\n'):
+                        new_lines[-1] += '\n'
+                
+                # Replace frontmatter
+                target_lines[0:first_header_line] = new_lines
+                
+                thread_safe_print(f"   ✅ Replaced {first_header_line} lines of frontmatter with {len(new_lines)} lines")
+                
+            elif insertion_type == "before_reference":
+                # Insert logic: insert before specified line
+                if target_line_num == -1:
+                    thread_safe_print(f"   ❌ Invalid insert operation for bottom section")
+                    continue
+                    
+                thread_safe_print(f"   📍 Insert mode: inserting before line {target_line_num}")
+                
+                # Ensure content format is correct
+                if not target_new_content.endswith('\n'):
+                    target_new_content += '\n'
+                
+                # Ensure spacing between sections
+                if not target_new_content.endswith('\n\n'):
+                    target_new_content += '\n'
+                
+                # Split content by lines
+                new_lines = target_new_content.splitlines(keepends=True)
+                
+                # Insert at specified position
+                insert_position = target_line_num - 1  # Convert to 0-based index
+                if insert_position < 0:
+                    insert_position = 0
+                elif insert_position > len(target_lines):
+                    insert_position = len(target_lines)
+                
+                # Execute insertion
+                for j, line in enumerate(new_lines):
+                    target_lines.insert(insert_position + j, line)
+                
+                thread_safe_print(f"   ✅ Inserted {len(new_lines)} lines of content")
+                
+            else:
+                # Replace logic: find target section and replace
+                if target_line_num == -1:
+                    thread_safe_print(f"   ❌ Invalid replace operation for bottom section")
+                    continue
+                    
+                thread_safe_print(f"   🔄 Replace mode: replacing section starting at line {target_line_num}")
+                
+                # Ensure content format is correct
+                if not target_new_content.endswith('\n'):
+                    target_new_content += '\n'
+                
+                # Ensure spacing between sections
+                if not target_new_content.endswith('\n\n'):
+                    target_new_content += '\n'
+                
+                # Find section end position
+                start_line = target_line_num - 1  # Convert to 0-based index
+                
+                if start_line >= len(target_lines):
+                    thread_safe_print(f"   ❌ Line number out of range: {target_line_num} > {len(target_lines)}")
+                    continue
+                
+                # Find section end position
+                end_line = find_section_end_for_update(target_lines, start_line, target_hierarchy)
+                
+                thread_safe_print(f"   📍 Replace range: line {start_line + 1} to {end_line}")
+                
+                # Split new content by lines
+                new_lines = target_new_content.splitlines(keepends=True)
+                
+                # Replace content
+                target_lines[start_line:end_line] = new_lines
+                
+                thread_safe_print(f"   ✅ Replaced {end_line - start_line} lines with {len(new_lines)} lines")
+    
+    
+    with open(target_file_path, 'w', encoding='utf-8') as f:
+        f.writelines(target_lines)
+    
+    thread_safe_print(f"\n✅ Target document update completed!")
+    thread_safe_print(f"📄 Updated file: {target_file_path}")
+    
+    return True
+
+def find_section_end_for_update(lines, start_line, target_hierarchy):
+    """Find section end position - based on test_target_update.py logic"""
+    current_line = lines[start_line].strip()
+    
+    if target_hierarchy == "frontmatter":
+        # Frontmatter special handling: from --- to second ---, then to first top-level heading
+        if start_line == 0 and current_line.startswith('---'):
+            # Find second ---
+            for i in range(start_line + 1, len(lines)):
+                if lines[i].strip() == '---':
+                    # Found frontmatter end, but need to include up to next content start
+                    # Look for first non-empty line or first heading
+                    for j in range(i + 1, len(lines)):
+                        line = lines[j].strip()
+                        if line and line.startswith('# '):
+                            thread_safe_print(f"     📍 Frontmatter ends at line {j} (before first top-level heading)")
+                            return j
+                        elif line and not line.startswith('#'):
+                            # If there's other content, end there
+                            thread_safe_print(f"     📍 Frontmatter ends at line {j} (before other content)")
+                            return j
+                    # If no other content found, end after second ---
+                    thread_safe_print(f"     📍 Frontmatter ends at line {i+1} (after second ---)")
+                    return i + 1
+        # If not standard frontmatter format, find first top-level heading
+        for i in range(start_line + 1, len(lines)):
+            if lines[i].strip().startswith('# '):
+                thread_safe_print(f"     📍 Frontmatter ends at line {i} (before first top-level heading)")
+                return i
+        # If no top-level heading found, process entire file
+        return len(lines)
+    
+    if current_line.startswith('#'):
+        # Use file_updater.py method to calculate heading level
+        current_level = len(current_line.split()[0]) if current_line.split() else 0
+        thread_safe_print(f"     🔍 Current heading level: {current_level} (heading: {current_line[:50]}...)")
+        
+        # Special handling for top-level headings: only process until first second-level heading
+        if current_level == 1:
+            for i in range(start_line + 1, len(lines)):
+                line = lines[i].strip()
+                if line.startswith('##'):  # Find first second-level heading
+                    thread_safe_print(f"     📍 Top-level heading ends at line {i} (before first second-level heading)")
+                    return i
+            # If no second-level heading found, look for next top-level heading
+            for i in range(start_line + 1, len(lines)):
+                line = lines[i].strip()
+                if line.startswith('#') and not line.startswith('##'):
+                    thread_safe_print(f"     📍 Top-level heading ends at line {i} (before next top-level heading)")
+                    return i
+        else:
+            # For other level headings, stop at ANY header to get only direct content
+            # This prevents including sub-sections in the update range
+            for i in range(start_line + 1, len(lines)):
+                line = lines[i].strip()
+                if line.startswith('#'):
+                    # Stop at ANY header to get only direct content
+                    thread_safe_print(f"     📍 Found header at line {i}: {line[:30]}... (stopping for direct content only)")
+                    return i
+        
+        # If not found, return file end
+        thread_safe_print(f"     📍 No end position found, using file end")
+        return len(lines)
+    
+    # Non-heading line, only replace current line
+    return start_line + 1
diff --git a/scripts/translate_doc_pr/main_workflow.py b/scripts/translate_doc_pr/main_workflow.py
new file mode 100644
index 0000000000000..12260334ec206
--- /dev/null
+++ b/scripts/translate_doc_pr/main_workflow.py
@@ -0,0 +1,691 @@
+"""
+Main Entry Point for GitHub Workflow
+Orchestrates the entire auto-sync workflow in GitHub Actions environment
+"""
+
+import sys
+import os
+import json
+import threading
+import tiktoken
+from github import Github, Auth
+
+# Conditional import for Gemini
+try:
+    from google import genai
+    GEMINI_AVAILABLE = True
+except ImportError:
+    GEMINI_AVAILABLE = False
+
+# Import all modules
+from pr_analyzer import analyze_source_changes, get_repo_config, get_target_hierarchy_and_content, parse_pr_url
+from file_adder import process_added_files
+from file_deleter import process_deleted_files
+from file_updater import process_files_in_batches, process_added_sections, process_modified_sections, process_deleted_sections
+from toc_processor import process_toc_files
+from section_matcher import match_source_diff_to_target
+
+# Configuration from environment variables
+SOURCE_PR_URL = os.getenv("SOURCE_PR_URL")
+TARGET_PR_URL = os.getenv("TARGET_PR_URL")
+GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
+AI_PROVIDER = os.getenv("AI_PROVIDER", "deepseek")
+TARGET_REPO_PATH = os.getenv("TARGET_REPO_PATH")
+
+# AI configuration
+DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_TOKEN")
+DEEPSEEK_BASE_URL = "https://api.deepseek.com"
+GEMINI_API_KEY = os.getenv("GEMINI_API_TOKEN")
+GEMINI_MODEL_NAME = "gemini-2.0-flash"
+
+# Processing limit configuration
+MAX_NON_SYSTEM_SECTIONS_FOR_AI = 120
+SOURCE_TOKEN_LIMIT = 5000  # Maximum tokens for source new_content before skipping file processing
+
+# AI configuration
+AI_MAX_TOKENS = 20000  # Maximum tokens for AI translation requests
+
+# Special file configuration
+SPECIAL_FILES = ["TOC.md"]
+IGNORE_FILES = ["faq/ddl-faq.md","command-line-flags-for-tidb-configuration.md","pd-configuration-file.md"]
+
+# Repository configuration for workflow
+def get_workflow_repo_configs():
+    """Get repository configuration based on environment variables"""
+    if not SOURCE_PR_URL or not TARGET_PR_URL:
+        raise ValueError("SOURCE_PR_URL and TARGET_PR_URL must be set")
+    
+    # Parse source and target repo info
+    source_parts = SOURCE_PR_URL.split('/')
+    target_parts = TARGET_PR_URL.split('/')
+    
+    source_owner, source_repo = source_parts[-4], source_parts[-3]
+    target_owner, target_repo = target_parts[-4], target_parts[-3]
+    
+    source_repo_key = f"{source_owner}/{source_repo}"
+    target_repo_key = f"{target_owner}/{target_repo}"
+    
+    # Determine language direction based on repo names
+    if source_repo.endswith('-cn') and not target_repo.endswith('-cn'):
+        # Chinese to English
+        source_language = "Chinese"
+        target_language = "English"
+    elif not source_repo.endswith('-cn') and target_repo.endswith('-cn'):
+        # English to Chinese
+        source_language = "English"
+        target_language = "Chinese"
+    else:
+        # Default fallback
+        source_language = "English"
+        target_language = "Chinese"
+    
+    return {
+        source_repo_key: {
+            "target_repo": target_repo_key,
+            "target_local_path": TARGET_REPO_PATH,
+            "source_language": source_language,
+            "target_language": target_language
+        }
+    }
+
+# Thread-safe printing function
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    with print_lock:
+        print(*args, **kwargs)
+
+def ensure_temp_output_dir():
+    """Ensure the temp_output directory exists"""
+    # Get the directory of the current script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    temp_dir = os.path.join(script_dir, "temp_output")
+    os.makedirs(temp_dir, exist_ok=True)
+    return temp_dir
+
+def clean_temp_output_dir():
+    """Clean the temp_output directory at the start of execution"""
+    import shutil
+    # Get the directory of the current script
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    temp_dir = os.path.join(script_dir, "temp_output")
+    if os.path.exists(temp_dir):
+        if os.path.isdir(temp_dir):
+            shutil.rmtree(temp_dir)
+            print(f"🧹 Cleaned existing temp_output directory")
+        else:
+            # Remove file if it exists
+            os.remove(temp_dir)
+            print(f"🧹 Removed existing temp_output file")
+    os.makedirs(temp_dir, exist_ok=True)
+    print(f"📁 Created temp_output directory: {temp_dir}")
+    return temp_dir
+
+def estimate_tokens(text):
+    """Calculate accurate token count using tiktoken (GPT-4/3.5 encoding)"""
+    if not text:
+        return 0
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")  # GPT-4/3.5 encoding
+        tokens = enc.encode(text)
+        return len(tokens)
+    except Exception as e:
+        # Fallback to character approximation if tiktoken fails
+        thread_safe_print(f"   ⚠️  Tiktoken encoding failed: {e}, using character approximation")
+        return len(text) // 4
+
+def print_token_estimation(prompt_text, context="AI translation"):
+    """Print accurate token consumption for a request"""
+    actual_tokens = estimate_tokens(prompt_text)
+    char_count = len(prompt_text)
+    thread_safe_print(f"   💰 {context}")
+    thread_safe_print(f"      📝 Input: {char_count:,} characters")
+    thread_safe_print(f"      🔢 Actual tokens: {actual_tokens:,} (using tiktoken cl100k_base)")
+    return actual_tokens
+
+class UnifiedAIClient:
+    """Unified interface for different AI providers"""
+    
+    def __init__(self, provider="deepseek"):
+        self.provider = provider
+        if provider == "deepseek":
+            from openai import OpenAI
+            self.client = OpenAI(api_key=DEEPSEEK_API_KEY, base_url=DEEPSEEK_BASE_URL)
+            self.model = "deepseek-chat"
+        elif provider == "gemini":
+            if not GEMINI_AVAILABLE:
+                raise ImportError("google.generativeai package not installed. Run: pip install google-generativeai")
+            if not GEMINI_API_KEY:
+                raise ValueError("GEMINI_API_TOKEN environment variable must be set")
+            self.client = genai.Client(api_key=GEMINI_API_KEY)
+            self.model = GEMINI_MODEL_NAME
+        else:
+            raise ValueError(f"Unsupported AI provider: {provider}")
+    
+    def chat_completion(self, messages, temperature=0.1, max_tokens=20000):
+        """Unified chat completion interface"""
+        if self.provider == "deepseek":
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens
+            )
+            return response.choices[0].message.content.strip()
+        elif self.provider == "gemini":
+            try:
+                # Convert OpenAI-style messages to Gemini format
+                prompt = self._convert_messages_to_prompt(messages)
+                thread_safe_print(f"   🔄 Calling Gemini API...")
+                
+                # Use the correct Gemini API call format (based on your reference file)
+                response = self.client.models.generate_content(
+                    model=self.model, 
+                    contents=prompt
+                )
+                
+                if response and response.text:
+                    thread_safe_print(f"   ✅ Gemini response received")
+                    return response.text.strip()
+                else:
+                    thread_safe_print(f"   ⚠️  Gemini response was empty or blocked")
+                    return "No response from Gemini"
+                    
+            except Exception as e:
+                thread_safe_print(f"   ❌ Gemini API error: {str(e)}")
+                # Fallback: suggest switching to DeepSeek
+                thread_safe_print(f"   💡 Consider switching to DeepSeek in main.py: AI_PROVIDER = 'deepseek'")
+                raise e
+    
+    def _convert_messages_to_prompt(self, messages):
+        """Convert OpenAI-style messages to a single prompt for Gemini"""
+        prompt_parts = []
+        for message in messages:
+            role = message.get("role", "user")
+            content = message.get("content", "")
+            if role == "user":
+                prompt_parts.append(content)
+            elif role == "system":
+                prompt_parts.append(f"System: {content}")
+        return "\n\n".join(prompt_parts)
+
+def check_source_token_limit(source_diff_dict_file, token_limit=SOURCE_TOKEN_LIMIT):
+    """Check if the total tokens of all new_content in source-diff-dict exceeds the limit"""
+    try:
+        with open(source_diff_dict_file, 'r', encoding='utf-8') as f:
+            source_diff_dict = json.load(f)
+        
+        total_new_content = ""
+        section_count = 0
+        
+        for key, section_data in source_diff_dict.items():
+            if isinstance(section_data, dict):
+                new_content = section_data.get('new_content', '')
+                if new_content:
+                    total_new_content += new_content + "\n"
+                    section_count += 1
+        
+        if not total_new_content.strip():
+            thread_safe_print(f"   ⚠️  No new_content found in {source_diff_dict_file}")
+            return True, 0, 0  # Allow processing if no content to check
+        
+        total_tokens = estimate_tokens(total_new_content)
+        char_count = len(total_new_content)
+        
+        thread_safe_print(f"   📊 Source token limit check:")
+        thread_safe_print(f"      📝 Total new_content: {char_count:,} characters from {section_count} sections")
+        thread_safe_print(f"      🔢 Total tokens: {total_tokens:,}")
+        thread_safe_print(f"      🚧 Token limit: {token_limit:,}")
+        
+        if total_tokens > token_limit:
+            thread_safe_print(f"      ❌ Token limit exceeded! ({total_tokens:,} > {token_limit:,})")
+            return False, total_tokens, token_limit
+        else:
+            thread_safe_print(f"      ✅ Within token limit ({total_tokens:,} ≤ {token_limit:,})")
+            return True, total_tokens, token_limit
+            
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error checking token limit for {source_diff_dict_file}: {e}")
+        return True, 0, 0  # Allow processing on error to avoid blocking
+
+def get_pr_diff(pr_url, github_client):
+    """Get the diff content from a GitHub PR (from auto-sync-pr-changes.py)"""
+    try:
+        from pr_analyzer import parse_pr_url
+        owner, repo, pr_number = parse_pr_url(pr_url)
+        repository = github_client.get_repo(f"{owner}/{repo}")
+        pr = repository.get_pull(pr_number)
+        
+        # Get files and their patches
+        files = pr.get_files()
+        diff_content = []
+        
+        for file in files:
+            if file.filename.endswith('.md') and file.patch:
+                diff_content.append(f"File: {file.filename}")
+                diff_content.append(file.patch)
+                diff_content.append("-" * 80)
+        
+        return "\n".join(diff_content)
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error getting PR diff: {e}")
+        return None
+
+def filter_diff_by_operation_type(pr_diff, operation_type, target_sections=None):
+    """Filter PR diff to only include changes relevant to specific operation type"""
+    
+    if not pr_diff:
+        return ""
+    
+    if operation_type == "modified":
+        # For modified sections, we want the full diff but focus on changed content
+        return pr_diff
+    elif operation_type == "added":
+        # For added sections, we want to show what was added
+        filtered_lines = []
+        for line in pr_diff.split('\n'):
+            if line.startswith('+') and not line.startswith('+++'):
+                filtered_lines.append(line)
+            elif line.startswith('@@') or line.startswith('File:'):
+                filtered_lines.append(line)
+        return '\n'.join(filtered_lines)
+    elif operation_type == "deleted":
+        # For deleted sections, we want to show what was removed
+        filtered_lines = []
+        for line in pr_diff.split('\n'):
+            if line.startswith('-') and not line.startswith('---'):
+                filtered_lines.append(line)
+            elif line.startswith('@@') or line.startswith('File:'):
+                filtered_lines.append(line)
+        return '\n'.join(filtered_lines)
+    
+    return pr_diff
+
+def filter_diff_for_target_file(pr_diff, target_file, source_diff_dict):
+    """Extract file-specific diff from the complete PR diff based on source files that map to the target file"""
+    if not pr_diff or not source_diff_dict:
+        return pr_diff
+    
+    # Extract source files that contribute to this target file
+    source_files = set()
+    for key, section_data in source_diff_dict.items():
+        if isinstance(section_data, dict):
+            source_file = section_data.get('source_file', '')
+            if source_file:
+                source_files.add(source_file)
+    
+    if not source_files:
+        print(f"   ⚠️  No source files found in source_diff_dict, using complete PR diff")
+        return pr_diff
+    
+    print(f"   📄 Source files contributing to {target_file}: {list(source_files)}")
+    
+    # Filter PR diff to only include changes from these source files
+    filtered_lines = []
+    current_file = None
+    include_section = False
+    
+    for line in pr_diff.split('\n'):
+        if line.startswith('File: '):
+            current_file = line.replace('File: ', '').strip()
+            include_section = current_file in source_files
+            if include_section:
+                filtered_lines.append(line)
+        elif line.startswith('-' * 80):
+            if include_section:
+                filtered_lines.append(line)
+        elif include_section:
+            filtered_lines.append(line)
+    
+    file_specific_diff = '\n'.join(filtered_lines)
+    print(f"   📊 Filtered diff: {len(file_specific_diff)} chars (from {len(pr_diff)} chars)")
+    
+    return file_specific_diff if file_specific_diff.strip() else pr_diff
+
+def extract_file_diff_from_pr(pr_diff, source_file_path):
+    """Extract diff content for a specific source file from the complete PR diff"""
+    if not pr_diff:
+        return ""
+    
+    filtered_lines = []
+    current_file = None
+    include_section = False
+    
+    for line in pr_diff.split('\n'):
+        if line.startswith('File: '):
+            current_file = line.replace('File: ', '').strip()
+            include_section = (current_file == source_file_path)
+            if include_section:
+                filtered_lines.append(line)
+        elif line.startswith('-' * 80):
+            if include_section:
+                filtered_lines.append(line)
+                include_section = False  # End of this file's section
+        elif include_section:
+            filtered_lines.append(line)
+    
+    return '\n'.join(filtered_lines)
+
+def determine_file_processing_type(source_file_path, file_sections, special_files=None):
+    """Determine how to process a file based on operation type and file characteristics"""
+    
+    # Check if this is a special file (like TOC.md)
+    if special_files and os.path.basename(source_file_path) in special_files:
+        return "special_file_toc"
+    
+    # For all other modified files, use regular processing
+    return "regular_modified"
+
+def process_regular_modified_file(source_file_path, file_sections, file_diff, pr_url, github_client, ai_client, repo_config, max_sections):
+    """Process a regular markdown file that has been modified"""
+    try:
+        print(f"   📝 Processing as regular modified file: {source_file_path}")
+        
+        # Extract the actual sections from the file_sections structure
+        # file_sections contains: {'sections': {...}, 'original_hierarchy': {...}, 'current_hierarchy': {...}}
+        if isinstance(file_sections, dict) and 'sections' in file_sections:
+            actual_sections = file_sections['sections']
+        else:
+            # Fallback: assume file_sections is already the sections dict
+            actual_sections = file_sections
+        
+        print(f"   📊 Extracted sections: {len(actual_sections)} sections")
+        
+        # CRITICAL: Load the source-diff-dict.json and perform matching
+        import json
+        import os
+        from section_matcher import match_source_diff_to_target
+        from pr_analyzer import get_target_hierarchy_and_content
+        
+        # Load source-diff-dict.json with file prefix
+        temp_dir = ensure_temp_output_dir()
+        file_prefix = source_file_path.replace('/', '-').replace('.md', '')
+        source_diff_dict_file = os.path.join(temp_dir, f"{file_prefix}-source-diff-dict.json")
+        if os.path.exists(source_diff_dict_file):
+            with open(source_diff_dict_file, 'r', encoding='utf-8') as f:
+                source_diff_dict = json.load(f)
+            print(f"   📂 Loaded source diff dict with {len(source_diff_dict)} sections from {source_diff_dict_file}")
+            
+            # Check source token limit before proceeding with processing
+            print(f"   🔍 Checking source token limit...")
+            within_limit, total_tokens, token_limit = check_source_token_limit(source_diff_dict_file)
+            if not within_limit:
+                print(f"   🚫 Skipping file processing: source content exceeds token limit")
+                print(f"      📊 Total tokens: {total_tokens:,} > Limit: {token_limit:,}")
+                print(f"      ⏭️  File {source_file_path} will not be processed")
+                return False
+                
+        else:
+            print(f"   ❌ {source_diff_dict_file} not found")
+            return False
+        
+        # Get target file hierarchy and content
+        target_repo = repo_config['target_repo']
+        target_hierarchy, target_lines = get_target_hierarchy_and_content(source_file_path, github_client, target_repo)
+        
+        if not target_hierarchy or not target_lines:
+            print(f"   ❌ Could not get target file content for {source_file_path}")
+            return False
+        
+        print(f"   📖 Target file: {len(target_hierarchy)} sections, {len(target_lines)} lines")
+        
+        # Perform source diff to target matching
+        print(f"   🔗 Matching source diff to target...")
+        enhanced_sections = match_source_diff_to_target(
+            source_diff_dict, 
+            target_hierarchy, 
+            target_lines, 
+            ai_client, 
+            repo_config, 
+            max_sections,
+            AI_MAX_TOKENS
+        )
+        
+        if not enhanced_sections:
+            print(f"   ❌ No sections matched")
+            return False
+        
+        print(f"   ✅ Matched {len(enhanced_sections)} sections")
+        
+        # Save the match result for reference
+        match_file = os.path.join(temp_dir, f"{source_file_path.replace('/', '-').replace('.md', '')}-match_source_diff_to_target.json")
+        with open(match_file, 'w', encoding='utf-8') as f:
+            json.dump(enhanced_sections, f, ensure_ascii=False, indent=2)
+        print(f"   💾 Saved match result to: {match_file}")
+        
+        # Step 2: Get AI translation for the matched sections
+        print(f"   🤖 Getting AI translation for matched sections...")
+        
+        # Create file data structure with enhanced matching info
+        # Wrap enhanced_sections in the expected format for process_single_file
+        file_data = {
+            source_file_path: {
+                'type': 'enhanced_sections',
+                'sections': enhanced_sections
+            }
+        }
+        
+        # Call the existing process_modified_sections function to get AI translation
+        results = process_modified_sections(file_data, file_diff, pr_url, github_client, ai_client, repo_config, max_sections)
+        
+        # Step 3: Update match_source_diff_to_target.json with AI results
+        if results and len(results) > 0:
+            file_path, success, ai_updated_sections = results[0]  # Get first result
+            if success and isinstance(ai_updated_sections, dict):
+                print(f"   📝 Step 3: Updating {match_file} with AI results...")
+                
+                # Load current match_source_diff_to_target.json
+                with open(match_file, 'r', encoding='utf-8') as f:
+                    match_data = json.load(f)
+                
+                # Add target_new_content field to each section based on AI results
+                updated_count = 0
+                for key, section_data in match_data.items():
+                    operation = section_data.get('source_operation', '')
+                    
+                    if operation == 'deleted':
+                        # For deleted sections, set target_new_content to null
+                        section_data['target_new_content'] = None
+                    elif key in ai_updated_sections:
+                        # For modified/added sections with AI translation
+                        section_data['target_new_content'] = ai_updated_sections[key]
+                        updated_count += 1
+                    else:
+                        # For sections not translated, keep original content
+                        section_data['target_new_content'] = section_data.get('target_content', '')
+                
+                # Save updated match_source_diff_to_target.json
+                with open(match_file, 'w', encoding='utf-8') as f:
+                    json.dump(match_data, f, ensure_ascii=False, indent=2)
+                
+                print(f"   ✅ Updated {updated_count} sections with AI translations in {match_file}")
+                
+                # Step 4: Apply updates to target document using update_target_document_from_match_data
+                print(f"   📝 Step 4: Applying updates to target document...")
+                from file_updater import update_target_document_from_match_data
+                
+                success = update_target_document_from_match_data(match_file, repo_config['target_local_path'], source_file_path)
+                if success:
+                    print(f"   🎉 Target document successfully updated!")
+                    return True
+                else:
+                    print(f"   ❌ Failed to update target document")
+                    return False
+                    
+            else:
+                print(f"   ⚠️  AI translation failed or returned invalid results")
+                return False
+        else:
+            print(f"   ⚠️  No results from process_modified_sections")
+            return False
+        
+    except Exception as e:
+        print(f"   ❌ Error processing regular modified file {source_file_path}: {e}")
+        return False
+
+
+def get_workflow_repo_config(pr_url, repo_configs):
+    """Get repository configuration for workflow environment"""
+    from pr_analyzer import parse_pr_url
+    
+    owner, repo, pr_number = parse_pr_url(pr_url)
+    source_repo = f"{owner}/{repo}"
+    
+    if source_repo not in repo_configs:
+        raise ValueError(f"Unsupported source repository: {source_repo}. Supported: {list(repo_configs.keys())}")
+    
+    config = repo_configs[source_repo].copy()
+    config['source_repo'] = source_repo
+    config['pr_number'] = pr_number
+    
+    return config
+
+def main():
+    """Main function - orchestrates the entire workflow for GitHub Actions"""
+    
+    # Validate environment variables
+    if not all([SOURCE_PR_URL, TARGET_PR_URL, GITHUB_TOKEN, TARGET_REPO_PATH]):
+        print("❌ Missing required environment variables:")
+        print(f"   SOURCE_PR_URL: {SOURCE_PR_URL}")
+        print(f"   TARGET_PR_URL: {TARGET_PR_URL}")
+        print(f"   GITHUB_TOKEN: {'Set' if GITHUB_TOKEN else 'Not set'}")
+        print(f"   TARGET_REPO_PATH: {TARGET_REPO_PATH}")
+        return
+    
+    print(f"🔧 Auto PR Sync Tool (GitHub Workflow Version)")
+    print(f"📍 Source PR URL: {SOURCE_PR_URL}")
+    print(f"📍 Target PR URL: {TARGET_PR_URL}")
+    print(f"🤖 AI Provider: {AI_PROVIDER}")
+    print(f"📁 Target Repo Path: {TARGET_REPO_PATH}")
+    
+    # Clean and prepare temp_output directory
+    clean_temp_output_dir()
+    
+    # Get repository configuration using workflow config
+    try:
+        repo_configs = get_workflow_repo_configs()
+        repo_config = get_workflow_repo_config(SOURCE_PR_URL, repo_configs)
+        print(f"📁 Source Repo: {repo_config['source_repo']} ({repo_config['source_language']})")
+        print(f"📁 Target Repo: {repo_config['target_repo']} ({repo_config['target_language']})")
+        print(f"📁 Target Path: {repo_config['target_local_path']}")
+    except ValueError as e:
+        print(f"❌ {e}")
+        return
+    
+    # Initialize clients
+    auth = Auth.Token(GITHUB_TOKEN)
+    github_client = Github(auth=auth)
+    
+    # Initialize unified AI client
+    try:
+        ai_client = UnifiedAIClient(provider=AI_PROVIDER)
+        thread_safe_print(f"🤖 AI Provider: {AI_PROVIDER.upper()} ({ai_client.model})")
+    except Exception as e:
+        thread_safe_print(f"❌ Failed to initialize AI client: {e}")
+        return
+    
+    print(f"\n🚀 Starting auto-sync for PR: {SOURCE_PR_URL}")
+    
+    # Step 1: Get PR diff
+    print(f"\n📋 Step 1: Getting PR diff...")
+    pr_diff = get_pr_diff(SOURCE_PR_URL, github_client)
+    if not pr_diff:
+        print("❌ Could not get PR diff")
+        return
+    print(f"✅ Got PR diff: {len(pr_diff)} characters")
+    
+    # Step 2: Analyze source changes with operation categorization
+    print(f"\n📊 Step 2: Analyzing source changes...")
+    added_sections, modified_sections, deleted_sections, added_files, deleted_files, toc_files = analyze_source_changes(
+        SOURCE_PR_URL, github_client, 
+        special_files=SPECIAL_FILES, 
+        ignore_files=IGNORE_FILES, 
+        repo_configs=repo_configs,
+        max_non_system_sections=MAX_NON_SYSTEM_SECTIONS_FOR_AI,
+        pr_diff=pr_diff  # Pass the PR diff to avoid re-fetching
+    )
+    
+    # Step 3: Process different types of files based on operation type
+    print(f"\n📋 Step 3: Processing files based on operation type...")
+    
+    # Import necessary functions
+    from file_updater import process_modified_sections, update_target_document_from_match_data
+    from toc_processor import process_toc_files
+    
+    # Step 3.1: Process deleted files (file-level deletions)
+    if deleted_files:
+        print(f"\n🗑️  Step 3.1: Processing {len(deleted_files)} deleted files...")
+        process_deleted_files(deleted_files, github_client, repo_config)
+        print(f"   ✅ Deleted files processed")
+    
+    # Step 3.2: Process added files (file-level additions)
+    if added_files:
+        print(f"\n📄 Step 3.2: Processing {len(added_files)} added files...")
+        process_added_files(added_files, SOURCE_PR_URL, github_client, ai_client, repo_config)
+        print(f"   ✅ Added files processed")
+    
+    # Step 3.3: Process special files (TOC.md and similar)
+    if toc_files:
+        print(f"\n📋 Step 3.3: Processing {len(toc_files)} special files (TOC)...")
+        process_toc_files(toc_files, SOURCE_PR_URL, github_client, ai_client, repo_config)
+        print(f"   ✅ Special files processed")
+    
+    # Step 3.4: Process modified files (section-level modifications)
+    if modified_sections:
+        print(f"\n📝 Step 3.4: Processing {len(modified_sections)} modified files...")
+        
+        # Process each modified file separately
+        for source_file_path, file_sections in modified_sections.items():
+            print(f"\n📄 Processing modified file: {source_file_path}")
+            
+            # Extract file-specific diff from the complete PR diff
+            print(f"   🔍 Extracting file-specific diff for: {source_file_path}")
+            file_specific_diff = extract_file_diff_from_pr(pr_diff, source_file_path)
+            
+            if not file_specific_diff:
+                print(f"   ⚠️  No diff found for {source_file_path}, skipping...")
+                continue
+            
+            print(f"   📊 File-specific diff: {len(file_specific_diff)} chars")
+            
+            # Determine file processing approach for modified files
+            file_type = determine_file_processing_type(source_file_path, file_sections, SPECIAL_FILES)
+            print(f"   🔍 File processing type: {file_type}")
+            
+            if file_type == "special_file_toc":
+                # Special files should have been processed in Step 3.3, skip here
+                print(f"   ⏭️  Special file already processed in Step 3.3, skipping...")
+                continue
+            
+            elif file_type == "regular_modified":
+                # Regular markdown files with modifications
+                success = process_regular_modified_file(
+                    source_file_path, 
+                    file_sections, 
+                    file_specific_diff,
+                    SOURCE_PR_URL, 
+                    github_client, 
+                    ai_client, 
+                    repo_config, 
+                    MAX_NON_SYSTEM_SECTIONS_FOR_AI
+                )
+                
+                if success:
+                    print(f"   ✅ Successfully processed {source_file_path}")
+                else:
+                    print(f"   ❌ Failed to process {source_file_path}")
+            
+            else:
+                print(f"   ⚠️  Unknown file processing type: {file_type} for {source_file_path}, skipping...")
+    
+    # Final summary
+    print(f"📊 Summary:")
+    print(f"   📄 Added files: {len(added_files)} processed")
+    print(f"   🗑️  Deleted files: {len(deleted_files)} processed")
+    print(f"   📋 TOC files: {len(toc_files)} processed")
+    print(f"   📝 Modified files: {len(modified_sections)} processed")
+    print(f"🎉 Workflow completed successfully!")
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/translate_doc_pr/pr_analyzer.py b/scripts/translate_doc_pr/pr_analyzer.py
new file mode 100644
index 0000000000000..c164da1520163
--- /dev/null
+++ b/scripts/translate_doc_pr/pr_analyzer.py
@@ -0,0 +1,1447 @@
+#!/usr/bin/env python3
+"""
+PR Analyzer Module
+Handles PR analysis, diff parsing, content getting, hierarchy building, and section getting
+"""
+
+import json
+import os
+import re
+import threading
+from github import Github
+
+# Thread-safe printing
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    """Thread-safe print function"""
+    with print_lock:
+        print(*args, **kwargs)
+
+
+def parse_pr_url(pr_url):
+    """Parse PR URL to get repo info"""
+    parts = pr_url.split('/')
+    return parts[-4], parts[-3], int(parts[-1])  # owner, repo, pr_number
+
+def get_repo_config(pr_url, repo_configs):
+    """Get repository configuration based on source repo"""
+    owner, repo, pr_number = parse_pr_url(pr_url)
+    source_repo = f"{owner}/{repo}"
+    
+    if source_repo not in repo_configs:
+        raise ValueError(f"Unsupported source repository: {source_repo}. Supported: {list(repo_configs.keys())}")
+    
+    config = repo_configs[source_repo].copy()
+    config['source_repo'] = source_repo
+    config['pr_number'] = pr_number
+    
+    return config
+
+def get_pr_diff(pr_url, github_client):
+    """Get the diff content from a GitHub PR"""
+    try:
+        owner, repo, pr_number = parse_pr_url(pr_url)
+        repository = github_client.get_repo(f"{owner}/{repo}")
+        pr = repository.get_pull(pr_number)
+        
+        # Get files and their patches
+        files = pr.get_files()
+        diff_content = []
+        
+        for file in files:
+            if file.filename.endswith('.md') and file.patch:
+                diff_content.append(f"File: {file.filename}")
+                diff_content.append(file.patch)
+                diff_content.append("-" * 80)
+        
+        return "\n".join(diff_content)
+        
+    except Exception as e:
+        print(f"   ❌ Error getting PR diff: {e}")
+        return None
+
+def get_changed_line_ranges(file):
+    """Get the ranges of lines that were changed in the PR"""
+    changed_ranges = []
+    patch = file.patch
+    if not patch:
+        return changed_ranges
+    
+    lines = patch.split('\n')
+    current_line = 0
+    
+    for line in lines:
+        if line.startswith('@@'):
+            # Parse the hunk header to get line numbers
+            match = re.search(r'\+(\d+),?(\d+)?', line)
+            if match:
+                current_line = int(match.group(1))
+        elif line.startswith('+') and not line.startswith('+++'):
+            # This is an added line
+            changed_ranges.append(current_line)
+            current_line += 1
+        elif line.startswith('-') and not line.startswith('---'):
+            # This is a deleted line, also consider as changed
+            changed_ranges.append(current_line)
+            # Don't increment current_line for deleted lines
+            continue
+        elif line.startswith(' '):
+            # Context line
+            current_line += 1
+    
+    return changed_ranges
+
+def analyze_diff_operations(file):
+    """Analyze diff to categorize operations as added, modified, or deleted (improved GitHub-like approach)"""
+    operations = {
+        'added_lines': [],      # Lines that were added
+        'deleted_lines': [],    # Lines that were deleted  
+        'modified_lines': []    # Lines that were modified (both added and deleted content)
+    }
+    
+    patch = file.patch
+    if not patch:
+        return operations
+    
+    lines = patch.split('\n')
+    current_line = 0
+    deleted_line = 0
+    
+    # Parse diff and keep track of sequence order for better modification detection
+    diff_sequence = []  # Track the order of operations in diff
+    
+    for i, line in enumerate(lines):
+        if line.startswith('@@'):
+            # Parse the hunk header to get line numbers
+            # Format: @@ -old_start,old_count +new_start,new_count @@
+            match = re.search(r'-(\d+),?(\d+)?\s+\+(\d+),?(\d+)?', line)
+            if match:
+                deleted_line = int(match.group(1))
+                current_line = int(match.group(3))
+        elif line.startswith('+') and not line.startswith('+++'):
+            # This is an added line
+            added_entry = {
+                'line_number': current_line,
+                'content': line[1:],  # Remove the '+' prefix
+                'is_header': line[1:].strip().startswith('#'),
+                'diff_index': i  # Track position in diff
+            }
+            operations['added_lines'].append(added_entry)
+            diff_sequence.append(('added', added_entry))
+            current_line += 1
+        elif line.startswith('-') and not line.startswith('---'):
+            # This is a deleted line
+            deleted_entry = {
+                'line_number': deleted_line,
+                'content': line[1:],  # Remove the '-' prefix
+                'is_header': line[1:].strip().startswith('#'),
+                'diff_index': i  # Track position in diff
+            }
+            operations['deleted_lines'].append(deleted_entry)
+            diff_sequence.append(('deleted', deleted_entry))
+            deleted_line += 1
+        elif line.startswith(' '):
+            # Context line (unchanged)
+            current_line += 1
+            deleted_line += 1
+    
+    # GitHub-like modification detection: based on diff sequence proximity
+    modified_pairs = []
+    deleted_headers = [d for d in operations['deleted_lines'] if d['is_header']]
+    added_headers = [a for a in operations['added_lines'] if a['is_header']]
+    
+    used_added_indices = set()
+    used_deleted_indices = set()
+    
+    # Helper function for semantic similarity
+    def are_headers_similar(old, new):
+        # Remove markdown markers
+        old_clean = old.replace('#', '').replace('`', '').strip()
+        new_clean = new.replace('#', '').replace('`', '').strip()
+        
+        # Check if one is a substring/extension of the other
+        if old_clean in new_clean or new_clean in old_clean:
+            return True
+        
+        # Check for similar patterns (like appending -pu, -new, etc.)
+        old_base = old_clean.split('-')[0]
+        new_base = new_clean.split('-')[0]
+        if old_base and new_base and old_base == new_base:
+            return True
+            
+        return False
+    
+    # GitHub-like approach: Look for adjacent or close operations in diff sequence
+    for i, deleted_header in enumerate(deleted_headers):
+        if i in used_deleted_indices:
+            continue
+            
+        for j, added_header in enumerate(added_headers):
+            if j in used_added_indices:
+                continue
+                
+            deleted_content = deleted_header['content'].strip()
+            added_content = added_header['content'].strip()
+            
+            # Check if they are close in the diff sequence (GitHub's approach)
+            diff_distance = abs(added_header['diff_index'] - deleted_header['diff_index'])
+            is_close_in_diff = diff_distance <= 5  # Allow small gap for context lines
+            
+            # Check semantic similarity
+            is_similar = are_headers_similar(deleted_content, added_content)
+            
+            # GitHub-like logic: prioritize diff proximity + semantic similarity
+            if is_close_in_diff and is_similar:
+                modified_pairs.append({
+                    'deleted': deleted_header,
+                    'added': added_header,
+                    'original_content': deleted_header['content']
+                })
+                used_added_indices.add(j)
+                used_deleted_indices.add(i)
+                break
+            # Fallback: strong semantic similarity even if not adjacent
+            elif is_similar and abs(added_header['line_number'] - deleted_header['line_number']) <= 20:
+                modified_pairs.append({
+                    'deleted': deleted_header,
+                    'added': added_header,
+                    'original_content': deleted_header['content']
+                })
+                used_added_indices.add(j)
+                used_deleted_indices.add(i)
+                break
+    
+    # Remove identified modifications from pure additions/deletions
+    for pair in modified_pairs:
+        if pair['deleted'] in operations['deleted_lines']:
+            operations['deleted_lines'].remove(pair['deleted'])
+        if pair['added'] in operations['added_lines']:
+            operations['added_lines'].remove(pair['added'])
+        # Store both new and original content for modified headers
+        modified_entry = pair['added'].copy()
+        modified_entry['original_content'] = pair['original_content']
+        operations['modified_lines'].append(modified_entry)
+    
+    return operations
+
+def build_hierarchy_dict(file_content):
+    """Build hierarchy dictionary from file content, excluding content inside code blocks"""
+    lines = file_content.split('\n')
+    level_stack = []
+    all_hierarchy_dict = {}
+    
+    # Track code block state
+    in_code_block = False
+    code_block_delimiter = None  # Track the type of code block (``` or ```)
+    
+    # Build complete hierarchy for all headers
+    for line_num, line in enumerate(lines, 1):
+        original_line = line
+        line = line.strip()
+        
+        # Check for code block delimiters
+        if line.startswith('```') or line.startswith('~~~'):
+            if not in_code_block:
+                # Entering a code block
+                in_code_block = True
+                code_block_delimiter = line[:3]  # Store the delimiter type
+                continue
+            elif line.startswith(code_block_delimiter):
+                # Exiting a code block
+                in_code_block = False
+                code_block_delimiter = None
+                continue
+        
+        # Skip processing if we're inside a code block
+        if in_code_block:
+            continue
+        
+        # Process headers only if not in code block
+        if line.startswith('#'):
+            match = re.match(r'^(#{1,10})\s+(.+)', line)
+            if match:
+                level = len(match.group(1))
+                title = match.group(2).strip()
+                
+                # Remove items from stack that are at same or deeper level
+                while level_stack and level_stack[-1][0] >= level:
+                    level_stack.pop()
+                
+                # Build hierarchy with special handling for top-level titles
+                if level == 1:
+                    # Top-level titles are included directly without hierarchy path
+                    hierarchy_line = line
+                elif level_stack:
+                    # For other levels, build path but skip the top-level title (level 1)
+                    path_parts = [item[1] for item in level_stack if item[0] > 1]  # Skip level 1 items
+                    path_parts.append(line)
+                    hierarchy_line = " > ".join(path_parts)
+                else:
+                    # Fallback for other cases
+                    hierarchy_line = line
+                
+                if hierarchy_line:  # Only add non-empty hierarchies
+                    all_hierarchy_dict[line_num] = hierarchy_line
+                
+                level_stack.append((level, line))
+    
+    return all_hierarchy_dict
+
+def build_hierarchy_path(lines, line_num, all_headers):
+    """Build the full hierarchy path for a header at given line"""
+    if line_num not in all_headers:
+        return []
+    
+    current_header = all_headers[line_num]
+    current_level = current_header['level']
+    hierarchy_path = []
+    
+    # Find all parent headers
+    for check_line in sorted(all_headers.keys()):
+        if check_line >= line_num:
+            break
+        
+        header = all_headers[check_line]
+        if header['level'] < current_level:
+            # This is a potential parent
+            # Remove any headers at same or deeper level
+            while hierarchy_path and hierarchy_path[-1]['level'] >= header['level']:
+                hierarchy_path.pop()
+            hierarchy_path.append(header)
+    
+    # Add current header
+    hierarchy_path.append(current_header)
+    
+    return hierarchy_path
+
+def build_hierarchy_for_modified_section(file_content, target_line_num, original_line, base_hierarchy_dict):
+    """Build hierarchy path for a modified section using original content"""
+    lines = file_content.split('\n')
+    
+    # Get the level of the original header
+    original_match = re.match(r'^(#{1,10})\s+(.+)', original_line)
+    if not original_match:
+        return None
+    
+    original_level = len(original_match.group(1))
+    original_title = original_match.group(2).strip()
+    
+    # Find parent sections by looking backwards from target line
+    level_stack = []
+    
+    for line_num in range(1, target_line_num):
+        if line_num in base_hierarchy_dict:
+            # This is a header line
+            line_content = lines[line_num - 1].strip()
+            if line_content.startswith('#'):
+                match = re.match(r'^(#{1,10})\s+(.+)', line_content)
+                if match:
+                    level = len(match.group(1))
+                    title = match.group(2).strip()
+                    
+                    # Remove items from stack that are at same or deeper level
+                    while level_stack and level_stack[-1][0] >= level:
+                        level_stack.pop()
+                    
+                    # Add this header to stack if it's a potential parent
+                    if level < original_level:
+                        level_stack.append((level, line_content))
+    
+    # Build hierarchy path using original content
+    if level_stack:
+        path_parts = [item[1] for item in level_stack[1:]]  # Skip first level
+        path_parts.append(original_line)
+        hierarchy_line = " > ".join(path_parts)
+    else:
+        hierarchy_line = original_line if original_level > 1 else ""
+    
+    return hierarchy_line if hierarchy_line else None
+
+def find_section_boundaries(lines, hierarchy_dict):
+    """Find the start and end line for each section based on hierarchy"""
+    section_boundaries = {}
+    
+    # Sort sections by line number
+    sorted_sections = sorted(hierarchy_dict.items(), key=lambda x: int(x[0]))
+    
+    for i, (line_num, hierarchy) in enumerate(sorted_sections):
+        start_line = int(line_num) - 1  # Convert to 0-based index
+        
+        # Find end line (start of next section at same or higher level)
+        end_line = len(lines)  # Default to end of document
+        
+        if start_line >= len(lines):
+            continue
+            
+        # Get current section level
+        current_line = lines[start_line].strip()
+        if not current_line.startswith('#'):
+            continue
+            
+        current_level = len(current_line.split()[0])  # Count # characters
+        
+        # Look for next section at same or higher level
+        for j in range(start_line + 1, len(lines)):
+            line = lines[j].strip()
+            if line.startswith('#'):
+                line_level = len(line.split()[0]) if line.split() else 0
+                if line_level <= current_level:
+                    end_line = j
+                    break
+        
+        section_boundaries[line_num] = {
+            'start': start_line,
+            'end': end_line,
+            'hierarchy': hierarchy,
+            'level': current_level
+        }
+    
+    return section_boundaries
+
+def extract_section_content(lines, start_line, hierarchy_dict):
+    """Extract the content of a section starting from start_line (includes sub-sections)"""
+    if not lines or start_line < 1 or start_line > len(lines):
+        return ""
+    
+    start_index = start_line - 1  # Convert to 0-based index
+    section_content = []
+    
+    # Find the header at start_line
+    current_line = lines[start_index].strip()
+    if not current_line.startswith('#'):
+        return ""
+    
+    # Get the level of current header
+    current_level = len(current_line.split()[0])  # Count # characters
+    section_content.append(current_line)
+    
+    # Special handling for top-level titles (level 1)
+    if current_level == 1:
+        # For top-level titles, only extract content until the first next-level header (##)
+        for i in range(start_index + 1, len(lines)):
+            line = lines[i].strip()
+            
+            if line.startswith('#'):
+                # Check if this is a header of next level (##, ###, etc.)
+                line_level = len(line.split()[0]) if line.split() else 0
+                if line_level > current_level:
+                    # Found first subsection, stop here for top-level titles
+                    break
+                elif line_level <= current_level:
+                    # Found same or higher level header, also stop
+                    break
+            
+            section_content.append(lines[i].rstrip())  # Keep original line without trailing whitespace
+    else:
+        # For non-top-level titles, use the original logic
+        # Extract content until we hit the next header of same or higher level
+        for i in range(start_index + 1, len(lines)):
+            line = lines[i].strip()
+            
+            if line.startswith('#'):
+                # Check if this is a header of same or higher level
+                line_level = len(line.split()[0]) if line.split() else 0
+                if line_level <= current_level:
+                    # Found a header of same or higher level, stop here regardless
+                    # Each section should be extracted individually
+                    break
+            
+            section_content.append(lines[i].rstrip())  # Keep original line without trailing whitespace
+    
+    return '\n'.join(section_content)
+
+def extract_section_direct_content(lines, start_line):
+    """Extract ONLY the direct content of a section (excluding sub-sections) - for source diff dict"""
+    if not lines or start_line < 1 or start_line > len(lines):
+        return ""
+    
+    start_index = start_line - 1  # Convert to 0-based index
+    section_content = []
+    
+    # Find the header at start_line
+    current_line = lines[start_index].strip()
+    if not current_line.startswith('#'):
+        return ""
+    
+    # Add the header line
+    section_content.append(current_line)
+    
+    # Only extract until the first header (any level)
+    # This means we stop at ANY header - whether it's a sub-section OR same/higher level
+    for i in range(start_index + 1, len(lines)):
+        line = lines[i].strip()
+        if line.startswith('#'):
+            # Stop at ANY header to get only direct content
+            break
+        section_content.append(lines[i].rstrip())
+    
+    return '\n'.join(section_content)
+
+def extract_frontmatter_content(file_lines):
+    """Extract content from the beginning of file to the first top-level header"""
+    if not file_lines:
+        return ""
+    
+    frontmatter_lines = []
+    for i, line in enumerate(file_lines):
+        line_stripped = line.strip()
+        # Stop when we hit the first top-level header
+        if line_stripped.startswith('# '):
+            break
+        frontmatter_lines.append(line.rstrip())
+    
+    return '\n'.join(frontmatter_lines)
+
+
+def extract_affected_sections(hierarchy_dict, file_lines):
+    """Extract all affected sections based on hierarchy dict"""
+    affected_sections = {}
+    
+    for line_num, hierarchy in hierarchy_dict.items():
+        if line_num == "0" and hierarchy == "frontmatter":
+            # Special handling for frontmatter
+            frontmatter_content = extract_frontmatter_content(file_lines)
+            if frontmatter_content:
+                affected_sections[line_num] = frontmatter_content
+        else:
+            line_number = int(line_num)
+            section_content = extract_section_content(file_lines, line_number, hierarchy_dict)
+            
+            if section_content:
+                affected_sections[line_num] = section_content
+    
+    return affected_sections
+
+def find_containing_section(line_num, all_headers):
+    """Find which section a line belongs to"""
+    current_section = None
+    for header_line_num in sorted(all_headers.keys()):
+        if header_line_num <= line_num:
+            current_section = header_line_num
+        else:
+            break
+    return current_section
+
+def find_affected_sections(lines, changed_lines, all_headers):
+    """Find which sections are affected by the changes"""
+    affected_sections = set()
+    
+    for changed_line in changed_lines:
+        # Find the section this changed line belongs to
+        current_section = None
+        
+        # Find the most recent header before or at the changed line
+        for line_num in sorted(all_headers.keys()):
+            if line_num <= changed_line:
+                current_section = line_num
+            else:
+                break
+        
+        if current_section:
+            # Only add the directly affected section (the one that directly contains the change)
+            affected_sections.add(current_section)
+    
+    return affected_sections
+
+def find_sections_by_operation_type(lines, operations, all_headers, base_hierarchy_dict=None):
+    """Find sections affected by different types of operations"""
+    sections = {
+        'added': set(),
+        'modified': set(), 
+        'deleted': set()
+    }
+    
+    # Process added lines
+    for added_line in operations['added_lines']:
+        line_num = added_line['line_number']
+        if added_line['is_header']:
+            # This is a new header - only mark the section as added if the header itself is new
+            sections['added'].add(line_num)
+        # Note: We don't mark sections as "added" just because they contain new non-header content
+        # That would be a "modified" section, not an "added" section
+    
+    # Process modified lines  
+    for modified_line in operations['modified_lines']:
+        line_num = modified_line['line_number']
+        if modified_line['is_header']:
+            sections['modified'].add(line_num)
+        else:
+            section = find_containing_section(line_num, all_headers)
+            if section:
+                sections['modified'].add(section)
+    
+    # Process deleted lines - use base hierarchy to find deleted sections
+    for deleted_line in operations['deleted_lines']:
+        if deleted_line['is_header']:
+            # Find this header in the base file hierarchy (before deletion)
+            deleted_title = clean_title_for_matching(deleted_line['content'])
+            # Use base hierarchy if available, otherwise fall back to current headers
+            search_hierarchy = base_hierarchy_dict if base_hierarchy_dict else all_headers
+            
+            found_deleted = False
+            for line_num, hierarchy_line in search_hierarchy.items():
+                # Extract title from hierarchy line
+                if ' > ' in hierarchy_line:
+                    original_title = clean_title_for_matching(hierarchy_line.split(' > ')[-1])
+                else:
+                    original_title = clean_title_for_matching(hierarchy_line)
+                
+                if deleted_title == original_title:
+                    sections['deleted'].add(line_num)
+                    print(f"   🗑️  Detected deleted section: {deleted_line['content']} (line {line_num})")
+                    found_deleted = True
+                    break
+            
+            if not found_deleted:
+                # If not found by exact match, try partial matching for renamed sections
+                print(f"   ⚠️  Could not find deleted section: {deleted_line['content']}")
+    
+    return sections
+
+
+def get_target_hierarchy_and_content(file_path, github_client, target_repo):
+    """Get target hierarchy and content"""
+    try:
+        repository = github_client.get_repo(target_repo)
+        file_content = repository.get_contents(file_path, ref="master").decoded_content.decode('utf-8')
+        lines = file_content.split('\n')
+        
+        # Build hierarchy using same method
+        hierarchy = build_hierarchy_dict(file_content)
+        
+        return hierarchy, lines
+    except Exception as e:
+        print(f"   ❌ Error getting target file: {e}")
+        return {}, []
+
+def get_source_sections_content(pr_url, file_path, source_affected, github_client):
+    """Get the content of source sections for better context"""
+    try:
+        owner, repo, pr_number = parse_pr_url(pr_url)
+        repository = github_client.get_repo(f"{owner}/{repo}")
+        pr = repository.get_pull(pr_number)
+        
+        # Get the source file content
+        file_content = repository.get_contents(file_path, ref=pr.head.sha).decoded_content.decode('utf-8')
+        lines = file_content.split('\n')
+        
+        # Extract source sections
+        source_sections = {}
+        
+        for line_num, hierarchy in source_affected.items():
+            if line_num == "0" and hierarchy == "frontmatter":
+                # Special handling for frontmatter
+                frontmatter_content = extract_frontmatter_content(lines)
+                if frontmatter_content:
+                    source_sections[line_num] = frontmatter_content
+            else:
+                line_number = int(line_num)
+                section_content = extract_section_content(lines, line_number, source_affected)
+                if section_content:
+                    source_sections[line_num] = section_content
+        
+        return source_sections
+    except Exception as e:
+        thread_safe_print(f"   ⚠️  Could not get source sections: {e}")
+        return {}
+
+def get_source_file_hierarchy(file_path, pr_url, github_client, get_base_version=False):
+    """Get source file hierarchy from PR head or base"""
+    try:
+        owner, repo, pr_number = parse_pr_url(pr_url)
+        repository = github_client.get_repo(f"{owner}/{repo}")
+        pr = repository.get_pull(pr_number)
+        
+        if get_base_version:
+            # Get the source file content before PR changes (base version)
+            source_file_content = repository.get_contents(file_path, ref=pr.base.sha).decoded_content.decode('utf-8')
+        else:
+            # Get the source file content after PR changes (head version)
+            source_file_content = repository.get_contents(file_path, ref=pr.head.sha).decoded_content.decode('utf-8')
+            
+        source_hierarchy = build_hierarchy_dict(source_file_content)
+        
+        return source_hierarchy
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error getting source file hierarchy: {e}")
+        return {}
+
+# Helper function needed for find_sections_by_operation_type
+def clean_title_for_matching(title):
+    """Clean title for matching by removing markdown formatting and span elements"""
+    if not title:
+        return ""
+    
+    # Remove span elements like <span class="version-mark">New in v5.0</span>
+    title = re.sub(r'<span[^>]*>.*?</span>', '', title)
+    
+    # Remove markdown header prefix (# ## ### etc.)
+    title = re.sub(r'^#{1,6}\s*', '', title.strip())
+    
+    # Remove backticks
+    title = title.replace('`', '')
+    
+    # Strip whitespace
+    title = title.strip()
+    
+    return title
+
+def find_previous_section_for_added(added_sections, hierarchy_dict):
+    """Find the previous section hierarchy for each added section group"""
+    insertion_points = {}
+    
+    if not added_sections:
+        return insertion_points
+    
+    # Group consecutive added sections
+    added_list = sorted(list(added_sections))
+    groups = []
+    current_group = [added_list[0]]
+    
+    for i in range(1, len(added_list)):
+        if added_list[i] - added_list[i-1] <= 10:  # Consider sections within 10 lines as consecutive
+            current_group.append(added_list[i])
+        else:
+            groups.append(current_group)
+            current_group = [added_list[i]]
+    groups.append(current_group)
+    
+    # For each group, find the previous section hierarchy
+    for group in groups:
+        first_new_section = min(group)
+        
+        # Find the section that comes before this group
+        previous_section_line = None
+        previous_section_hierarchy = None
+        
+        for line_num_str in sorted(hierarchy_dict.keys(), key=int):
+            line_num = int(line_num_str)
+            if line_num < first_new_section:
+                previous_section_line = line_num
+                previous_section_hierarchy = hierarchy_dict[line_num_str]
+            else:
+                break
+        
+        if previous_section_hierarchy:
+            insertion_points[f"group_{groups.index(group)}"] = {
+                'previous_section_hierarchy': previous_section_hierarchy,
+                'previous_section_line': previous_section_line,
+                'new_sections': group,
+                'insertion_type': 'multiple' if len(group) > 1 else 'single'
+            }
+            print(f"   📍 Added section group: {len(group)} sections after '{previous_section_hierarchy}'")
+        else:
+            print(f"   ⚠️  Could not find previous section for added sections starting at line {first_new_section}")
+    
+    return insertion_points
+
+def build_source_diff_dict(modified_sections, added_sections, deleted_sections, all_hierarchy_dict, base_hierarchy_dict, operations, file_content, base_file_content):
+    """Build source diff dictionary with correct structure for matching"""
+    from section_matcher import clean_title_for_matching
+    source_diff_dict = {}
+    
+    # Helper function to extract section content (only direct content, no sub-sections)
+    def extract_section_content_for_diff(line_num, hierarchy_dict):
+        if str(line_num) == "0":
+            # Handle frontmatter
+            return extract_frontmatter_content(file_content.split('\n'))
+        else:
+            return extract_section_direct_content(file_content.split('\n'), line_num)
+    
+    # Helper function to extract old content from base file (only direct content, no sub-sections)
+    def extract_old_content_for_diff(line_num, base_hierarchy_dict, base_file_content):
+        if str(line_num) == "0":
+            # Handle frontmatter from base file
+            return extract_frontmatter_content(base_file_content.split('\n'))
+        else:
+            return extract_section_direct_content(base_file_content.split('\n'), line_num)
+    
+    # Helper function to extract old content by hierarchy (for modified sections that may have moved)
+    def extract_old_content_by_hierarchy(original_hierarchy, base_hierarchy_dict, base_file_content):
+        """Extract old content by finding the section with matching hierarchy in base file (only direct content)"""
+        if original_hierarchy == "frontmatter":
+            return extract_frontmatter_content(base_file_content.split('\n'))
+        
+        # Find the line number in base file that matches the original hierarchy
+        for base_line_num_str, base_hierarchy in base_hierarchy_dict.items():
+            if base_hierarchy == original_hierarchy:
+                base_line_num = int(base_line_num_str) if base_line_num_str != "0" else 0
+                if base_line_num == 0:
+                    return extract_frontmatter_content(base_file_content.split('\n'))
+                else:
+                    return extract_section_direct_content(base_file_content.split('\n'), base_line_num)
+        
+        # If exact match not found, return empty string
+        print(f"   ⚠️  Could not find matching hierarchy in base file: {original_hierarchy}")
+        return ""
+    
+    # Helper function to build complete hierarchy for a section using base file info
+    def build_complete_original_hierarchy(line_num, current_hierarchy, base_hierarchy_dict, operations):
+        """Build complete hierarchy path for original section"""
+        line_num_str = str(line_num)
+        
+        # Special cases: frontmatter and top-level titles
+        if line_num_str == "0":
+            return "frontmatter"
+        
+        # Check if this line was modified and has original content
+        for modified_line in operations.get('modified_lines', []):
+            if (modified_line.get('is_header') and 
+                modified_line.get('line_number') == line_num and 
+                'original_content' in modified_line):
+                original_line = modified_line['original_content'].strip()
+                
+                # For top-level titles, return the original content directly
+                if ' > ' not in current_hierarchy:
+                    return original_line
+                
+                # For nested sections, build the complete hierarchy using original content
+                # Find the hierarchy path using base hierarchy dict and replace the leaf with original
+                if line_num_str in base_hierarchy_dict:
+                    base_hierarchy = base_hierarchy_dict[line_num_str]
+                    if ' > ' in base_hierarchy:
+                        # Replace the leaf (last part) with original content
+                        hierarchy_parts = base_hierarchy.split(' > ')
+                        hierarchy_parts[-1] = original_line
+                        return ' > '.join(hierarchy_parts)
+                    else:
+                        # Single level, return original content
+                        return original_line
+                
+                # Fallback: return original content
+                return original_line
+        
+        # If not modified, use base hierarchy if available
+        if line_num_str in base_hierarchy_dict:
+            return base_hierarchy_dict[line_num_str]
+        
+        # If not found in base (new section), use current hierarchy
+        return current_hierarchy
+    
+    # Process modified sections
+    for line_num_str, hierarchy in modified_sections.items():
+        line_num = int(line_num_str) if line_num_str != "0" else 0
+        
+        # Build complete original hierarchy
+        original_hierarchy = build_complete_original_hierarchy(line_num, hierarchy, base_hierarchy_dict, operations)
+        
+        # Extract both old and new content
+        new_content = extract_section_content_for_diff(line_num, all_hierarchy_dict)
+        # Use hierarchy-based lookup for old content instead of line number
+        old_content = extract_old_content_by_hierarchy(original_hierarchy, base_hierarchy_dict, base_file_content)
+        
+        # Only include if content actually changed
+        if new_content != old_content:
+            # Check if this is a bottom modified section (no next section in base file)
+            is_bottom_modified = False
+            if line_num_str in base_hierarchy_dict:
+                # Get all sections in base file sorted by line number
+                base_sections = sorted([(int(ln), hier) for ln, hier in base_hierarchy_dict.items() if ln != "0"])
+                
+                # Check if there's any section after this line in base file
+                has_next_section = any(base_line > line_num for base_line, _ in base_sections)
+                
+                if not has_next_section:
+                    is_bottom_modified = True
+                    print(f"   ✅ Bottom modified section detected at line {line_num_str}: no next section in base file")
+            
+            # Use special marker for bottom modified sections
+            if is_bottom_modified:
+                final_original_hierarchy = f"bottom-modified-{line_num}"
+            else:
+                final_original_hierarchy = original_hierarchy
+            
+            source_diff_dict[f"modified_{line_num_str}"] = {
+                "new_line_number": line_num,
+                "original_hierarchy": final_original_hierarchy,
+                "operation": "modified",
+                "new_content": new_content,
+                "old_content": old_content
+            }
+            print(f"   ✅ Real modification detected at line {line_num_str}: content changed")
+        else:
+            print(f"   🚫 Filtered out false positive at line {line_num_str}: content unchanged (likely line shift artifact)")
+    
+    # Process added sections - find next section from current document hierarchy
+    for line_num_str, hierarchy in added_sections.items():
+        line_num = int(line_num_str)
+        
+        print(f"   🔍 Finding next section for added section at line {line_num}: {hierarchy}")
+        
+        # Strategy: Find the next section directly from the current document (post-PR)
+        # Get all current sections sorted by line number
+        current_sections = sorted([(int(ln), curr_hierarchy) for ln, curr_hierarchy in all_hierarchy_dict.items()])
+        print(f"   📋 Current sections around line {line_num}: {[(ln, h.split(' > ')[-1] if ' > ' in h else h) for ln, h in current_sections if abs(ln - line_num) <= 15]}")
+        
+        next_section_original_hierarchy = None
+        
+        # Find the next section that comes after the added section in the current document
+        for curr_line_num, curr_hierarchy in current_sections:
+            if curr_line_num > line_num:
+                # Found the next section in current document
+                # Now find its original hierarchy in base document
+                curr_line_str = str(curr_line_num)
+                
+                # Get the original hierarchy for this next section
+                # Use the same logic as build_complete_original_hierarchy to get original content
+                if curr_line_str in base_hierarchy_dict:
+                    # Check if this section was modified
+                    was_modified = False
+                    for modified_line in operations.get('modified_lines', []):
+                        if (modified_line.get('is_header') and 
+                            modified_line.get('line_number') == curr_line_num and 
+                            'original_content' in modified_line):
+                            # This section was modified, use original content
+                            original_line = modified_line['original_content'].strip()
+                            base_hierarchy = base_hierarchy_dict[curr_line_str]
+                            
+                            if ' > ' in base_hierarchy:
+                                # Replace the leaf with original content
+                                hierarchy_parts = base_hierarchy.split(' > ')
+                                hierarchy_parts[-1] = original_line
+                                next_section_original_hierarchy = ' > '.join(hierarchy_parts)
+                            else:
+                                next_section_original_hierarchy = original_line
+                            
+                            print(f"   ✅ Found next section (modified): line {curr_line_num} -> {next_section_original_hierarchy.split(' > ')[-1] if ' > ' in next_section_original_hierarchy else next_section_original_hierarchy}")
+                            was_modified = True
+                            break
+                    
+                    if not was_modified:
+                        # Section was not modified, use base hierarchy directly
+                        next_section_original_hierarchy = base_hierarchy_dict[curr_line_str]
+                        print(f"   ✅ Found next section (unchanged): line {curr_line_num} -> {next_section_original_hierarchy.split(' > ')[-1] if ' > ' in next_section_original_hierarchy else next_section_original_hierarchy}")
+                    
+                    break
+                else:
+                    # This next section might also be new or modified
+                    # Try to find it by content matching in base hierarchy
+                    found_match = False
+                    for base_line_str, base_hierarchy in base_hierarchy_dict.items():
+                        # Compare the leaf titles (last part of hierarchy)
+                        curr_leaf = curr_hierarchy.split(' > ')[-1] if ' > ' in curr_hierarchy else curr_hierarchy
+                        base_leaf = base_hierarchy.split(' > ')[-1] if ' > ' in base_hierarchy else base_hierarchy
+                        
+                        # Clean titles for comparison
+                        curr_clean = clean_title_for_matching(curr_leaf)
+                        base_clean = clean_title_for_matching(base_leaf)
+                        
+                        if curr_clean == base_clean:
+                            next_section_original_hierarchy = base_hierarchy
+                            print(f"   ✅ Found next section (by content): {base_hierarchy.split(' > ')[-1] if ' > ' in base_hierarchy else base_hierarchy}")
+                            found_match = True
+                            break
+                    
+                    if found_match:
+                        break
+                    else:
+                        print(f"   ⚠️  Next section at line {curr_line_num} not found in base, continuing search...")
+        
+        # If no next section found, this is being added at the end
+        if not next_section_original_hierarchy:
+            print(f"   ✅ Bottom section detected: this section is added at the end of document")
+            # Use special marker for bottom added sections - no matching needed
+            next_section_original_hierarchy = f"bottom-added-{line_num}"
+        
+        source_diff_dict[f"added_{line_num_str}"] = {
+            "new_line_number": line_num,
+            "original_hierarchy": next_section_original_hierarchy,
+            "operation": "added",
+            "new_content": extract_section_content_for_diff(line_num, all_hierarchy_dict),
+            "old_content": None  # Added sections have no old content
+        }
+    
+    # Process deleted sections - use original hierarchy from base file
+    for line_num_str, hierarchy in deleted_sections.items():
+        line_num = int(line_num_str)
+        # Use complete hierarchy from base file
+        original_hierarchy = base_hierarchy_dict.get(line_num_str, hierarchy)
+        
+        # Extract old content for deleted sections
+        old_content = extract_old_content_for_diff(line_num, base_hierarchy_dict, base_file_content)
+        
+        source_diff_dict[f"deleted_{line_num_str}"] = {
+            "new_line_number": line_num,
+            "original_hierarchy": original_hierarchy,
+            "operation": "deleted",
+            "new_content": None,  # No new content for deleted sections
+            "old_content": old_content  # Show what was deleted
+        }
+    
+    # Sort the dictionary by new_line_number for better readability
+    sorted_items = sorted(source_diff_dict.items(), key=lambda x: x[1]['new_line_number'])
+    source_diff_dict = dict(sorted_items)
+    
+    return source_diff_dict
+
+def analyze_source_changes(pr_url, github_client, special_files=None, ignore_files=None, repo_configs=None, max_non_system_sections=120, pr_diff=None):
+    """Analyze source language changes and categorize them as added, modified, or deleted"""
+    # Import modules needed in this function
+    import os
+    import json
+    from toc_processor import process_toc_operations
+    
+    owner, repo, pr_number = parse_pr_url(pr_url)
+    repository = github_client.get_repo(f"{owner}/{repo}")
+    pr = repository.get_pull(pr_number)
+    
+    # Get repository configuration for target repo info
+    repo_config = get_repo_config(pr_url, repo_configs)
+    
+    print(f"📋 Processing PR #{pr_number}: {pr.title}")
+    
+    # Get markdown files
+    files = pr.get_files()
+    markdown_files = [f for f in files if f.filename.endswith('.md')]
+    
+    print(f"📄 Found {len(markdown_files)} markdown files")
+    
+    # Return dictionaries for different operation types
+    added_sections = {}      # New sections that were added
+    modified_sections = {}   # Existing sections that were modified  
+    deleted_sections = {}    # Sections that were deleted
+    added_files = {}         # Completely new files that were added
+    deleted_files = []       # Completely deleted files
+    ignored_files = []       # Files that were ignored
+    toc_files = {}           # Special TOC files requiring special processing
+    
+    for file in markdown_files:
+        print(f"\n🔍 Analyzing {file.filename}")
+        
+        # Check if this file should be ignored
+        if file.filename in ignore_files:
+            print(f"   ⏭️  Skipping ignored file: {file.filename}")
+            ignored_files.append(file.filename)
+            continue
+        
+        # Check if this is a completely new file or deleted file
+        if file.status == 'added':
+            print(f"   ➕ Detected new file: {file.filename}")
+            try:
+                file_content = repository.get_contents(file.filename, ref=pr.head.sha).decoded_content.decode('utf-8')
+                added_files[file.filename] = file_content
+                print(f"   ✅ Added complete file for translation")
+                continue
+            except Exception as e:
+                print(f"   ❌ Error getting new file content: {e}")
+                continue
+        
+        elif file.status == 'removed':
+            print(f"   🗑️  Detected deleted file: {file.filename}")
+            deleted_files.append(file.filename)
+            print(f"   ✅ Marked file for deletion")
+            continue
+        
+        # For modified files, check if it's a special file like TOC.md
+        try:
+            file_content = repository.get_contents(file.filename, ref=pr.head.sha).decoded_content.decode('utf-8')
+        except Exception as e:
+            print(f"   ❌ Error getting content: {e}")
+            continue
+        
+        # Check if this is a TOC.md file requiring special processing
+        if os.path.basename(file.filename) in special_files:
+            print(f"   📋 Detected special file: {file.filename}")
+            
+            # Get target file content for comparison
+            try:
+                target_repository = github_client.get_repo(repo_config['target_repo'])
+                target_file_content = target_repository.get_contents(file.filename, ref="master").decoded_content.decode('utf-8')
+                target_lines = target_file_content.split('\n')
+            except Exception as e:
+                print(f"   ⚠️  Could not get target file content: {e}")
+                continue
+            
+            # Analyze diff operations for TOC.md
+            operations = analyze_diff_operations(file)
+            source_lines = file_content.split('\n')
+            
+            # Process with special TOC logic
+            toc_results = process_toc_operations(file.filename, operations, source_lines, target_lines, "")  # Local path will be determined later
+            
+            # Store TOC operations for later processing
+            if any([toc_results['added'], toc_results['modified'], toc_results['deleted']]):
+                # Combine all operations for processing
+                all_toc_operations = []
+                all_toc_operations.extend(toc_results['added'])
+                all_toc_operations.extend(toc_results['modified']) 
+                all_toc_operations.extend(toc_results['deleted'])
+                
+                # Add to special TOC processing queue (separate from regular sections)
+                toc_files[file.filename] = {
+                    'type': 'toc',
+                    'operations': all_toc_operations
+                }
+                
+                print(f"   📋 TOC operations queued for processing:")
+                if toc_results['added']:
+                    print(f"      ➕ Added: {len(toc_results['added'])} entries")
+                if toc_results['modified']:
+                    print(f"      ✏️  Modified: {len(toc_results['modified'])} entries") 
+                if toc_results['deleted']:
+                    print(f"      ❌ Deleted: {len(toc_results['deleted'])} entries")
+            else:
+                print(f"   ℹ️  No TOC operations found")
+            
+            continue  # Skip regular processing for TOC files
+        
+        # Analyze diff operations
+        operations = analyze_diff_operations(file)
+        print(f"   📝 Diff analysis: {len(operations['added_lines'])} added, {len(operations['modified_lines'])} modified, {len(operations['deleted_lines'])} deleted lines")
+        
+        lines = file_content.split('\n')
+        all_headers = {}
+        
+        # Track code block state
+        in_code_block = False
+        code_block_delimiter = None
+        
+        # First pass: collect all headers (excluding those in code blocks)
+        for line_num, line in enumerate(lines, 1):
+            original_line = line
+            line = line.strip()
+            
+            # Check for code block delimiters
+            if line.startswith('```') or line.startswith('~~~'):
+                if not in_code_block:
+                    # Entering a code block
+                    in_code_block = True
+                    code_block_delimiter = line[:3]
+                    continue
+                elif line.startswith(code_block_delimiter):
+                    # Exiting a code block
+                    in_code_block = False
+                    code_block_delimiter = None
+                    continue
+            
+            # Skip processing if we're inside a code block
+            if in_code_block:
+                continue
+            
+            # Process headers only if not in code block
+            if line.startswith('#'):
+                match = re.match(r'^(#{1,10})\s+(.+)', line)
+                if match:
+                    level = len(match.group(1))
+                    title = match.group(2).strip()
+                    all_headers[line_num] = {
+                        'level': level,
+                        'title': title,
+                        'line': line
+                    }
+        
+        # Build complete hierarchy from HEAD (after changes)
+        all_hierarchy_dict = build_hierarchy_dict(file_content)
+        
+        # For deletion detection, we also need the base file hierarchy
+        try:
+            base_file_content = repository.get_contents(file.filename, ref=f"{repository.default_branch}").decoded_content.decode('utf-8')
+            base_hierarchy_dict = build_hierarchy_dict(base_file_content)
+        except Exception as e:
+            print(f"   ⚠️  Could not get base file content: {e}")
+            base_hierarchy_dict = all_hierarchy_dict
+            base_file_content = file_content  # Fallback to current content
+        
+        # Find sections by operation type with corrected logic
+        sections_by_type = find_sections_by_operation_type(lines, operations, all_headers, base_hierarchy_dict)
+        
+        # Prioritize modified headers over added ones (fix for header changes like --host -> --hosts)
+        modified_header_lines = set()
+        for modified_line in operations['modified_lines']:
+            if modified_line['is_header']:
+                modified_header_lines.add(modified_line['line_number'])
+        
+        # Remove modified header lines from added set
+        sections_by_type['added'] = sections_by_type['added'] - modified_header_lines
+        
+        # Enhanced logic: check for actual content changes within sections
+        # This helps detect changes in section content (not just headers)
+        print(f"   🔍 Enhanced detection: checking for actual section content changes...")
+        
+        # Get only lines that have actual content changes (exclude headers)
+        real_content_changes = set()
+        
+        # Added lines (new content, excluding headers)
+        for added_line in operations['added_lines']:
+            if not added_line['is_header']:
+                real_content_changes.add(added_line['line_number'])
+        
+        # Deleted lines (removed content, excluding headers)
+        for deleted_line in operations['deleted_lines']:
+            if not deleted_line['is_header']:
+                real_content_changes.add(deleted_line['line_number'])
+        
+        # Modified lines (changed content, excluding headers)
+        for modified_line in operations['modified_lines']:
+            if not modified_line['is_header']:
+                real_content_changes.add(modified_line['line_number'])
+        
+        print(f"   📝 Real content changes (non-header): {sorted(real_content_changes)}")
+        
+        # Find sections that contain actual content changes
+        content_affected_sections = set()
+        for changed_line in real_content_changes:
+            # Find which section this changed line belongs to
+            containing_section = None
+            for line_num in sorted(all_headers.keys()):
+                if line_num <= changed_line:
+                    containing_section = line_num
+                else:
+                    break
+            
+            if containing_section and containing_section not in sections_by_type['added']:
+                # Additional check: make sure this is not just a line number shift
+                # Only add if the change is within reasonable distance from the section header
+                # AND if the changed line is not part of a completely deleted section header
+                is_deleted_header = False
+                for deleted_line in operations['deleted_lines']:
+                    if (deleted_line['is_header'] and 
+                        abs(changed_line - deleted_line['line_number']) <= 2):
+                        is_deleted_header = True
+                        print(f"   ⚠️  Skipping change at line {changed_line} (deleted header near line {deleted_line['line_number']})")
+                        break
+                
+                # More precise filtering: check if this change is actually meaningful
+                # Skip changes that are part of deleted content or line shifts due to deletions
+                should_include = True
+                
+                # Skip exact deleted headers
+                for deleted_line in operations['deleted_lines']:
+                    if (deleted_line['is_header'] and 
+                        changed_line == deleted_line['line_number']):
+                        should_include = False
+                        print(f"   ⚠️  Skipping change at line {changed_line} (exact deleted header)")
+                        break
+                
+                # Skip changes that are very close to deleted content AND far from their containing section
+                # This helps filter out line shift artifacts while keeping real content changes
+                if should_include:
+                    for deleted_line in operations['deleted_lines']:
+                        # Only skip if both conditions are met:
+                        # 1. Very close to deleted content (within 5 lines)
+                        # 2. The change is far from its containing section (likely a shift artifact)
+                        distance_to_deletion = abs(changed_line - deleted_line['line_number'])
+                        distance_to_section = changed_line - containing_section
+                        
+                        if (distance_to_deletion <= 5 and distance_to_section > 100):
+                            should_include = False
+                            print(f"   ⚠️  Skipping change at line {changed_line} (likely line shift: {distance_to_deletion} lines from deletion, {distance_to_section} from section)")
+                            break
+                
+                if should_include and changed_line - containing_section <= 30:
+                    content_affected_sections.add(containing_section)
+                    print(f"   📝 Content change at line {changed_line} affects section at line {containing_section}")
+                elif should_include:
+                    print(f"   ⚠️  Skipping distant change at line {changed_line} from section {containing_section}")
+        
+        # Add content-modified sections to the modified set, but exclude sections that are already marked as added or deleted
+        for line_num in content_affected_sections:
+            if (line_num not in sections_by_type['modified'] and 
+                line_num not in sections_by_type['added'] and
+                line_num not in sections_by_type['deleted']):  # ✅ Critical fix: exclude deleted sections      
+                sections_by_type['modified'].add(line_num)
+                print(f"   📝 Added content-modified section at line {line_num}")
+            elif line_num in sections_by_type['deleted']:
+                print(f"   🚫 Skipping content-modified section at line {line_num}: already marked as deleted")
+        
+        # Prepare sections data for source_diff_dict
+        file_modified = {}
+        file_added = {}
+        file_deleted = {}
+        
+        # Build modified sections
+        for line_num in sections_by_type['modified']:
+            if line_num in all_hierarchy_dict:
+                file_modified[str(line_num)] = all_hierarchy_dict[line_num]
+        
+        # Build added sections  
+        for line_num in sections_by_type['added']:
+            if line_num in all_hierarchy_dict:
+                file_added[str(line_num)] = all_hierarchy_dict[line_num]
+        
+        # Build deleted sections
+        for line_num in sections_by_type['deleted']:
+            if line_num in base_hierarchy_dict:
+                file_deleted[str(line_num)] = base_hierarchy_dict[line_num]
+        
+        # Check for frontmatter changes (content before first top-level header)
+        print(f"   🔍 Checking for frontmatter changes...")
+        frontmatter_changed = False
+        
+        # Check if any changes occur before the first top-level header
+        first_header_line = None
+        for line_num in sorted(all_headers.keys()):
+            header_info = all_headers[line_num]
+            if header_info['level'] == 1:  # First top-level header
+                first_header_line = line_num
+                break
+        
+        print(f"   📊 First header line: {first_header_line}")
+        print(f"   📊 Real content changes: {sorted(real_content_changes)}")
+        
+        if first_header_line:
+            # Check if any real content changes are before the first header
+            for line_num in real_content_changes:
+                #print(f"   🔍 Checking line {line_num} vs first header {first_header_line}")
+                if line_num < first_header_line:
+                    frontmatter_changed = True
+                    print(f"   📄 Frontmatter change detected: line {line_num} < {first_header_line}")
+                    break
+        
+        print(f"   📊 Frontmatter changed: {frontmatter_changed}")
+        
+        if frontmatter_changed:
+            print(f"   📄 Frontmatter changes detected (before line {first_header_line})")
+            # Add frontmatter as a special section with line number 0
+            file_modified["0"] = "frontmatter"
+            print(f"   ✅ Added frontmatter section to modified sections")
+        
+        # Build source diff dictionary
+        source_diff_dict = build_source_diff_dict(
+            file_modified, file_added, file_deleted, 
+            all_hierarchy_dict, base_hierarchy_dict, 
+            operations, file_content, base_file_content
+        )
+        
+        # Breakpoint: Output source_diff_dict to file for review with file prefix
+        
+        # Ensure temp_output directory exists
+        script_dir = os.path.dirname(os.path.abspath(__file__))
+        temp_dir = os.path.join(script_dir, "temp_output")
+        os.makedirs(temp_dir, exist_ok=True)
+        
+        file_prefix = file.filename.replace('/', '-').replace('.md', '')
+        output_file = os.path.join(temp_dir, f"{file_prefix}-source-diff-dict.json")
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(source_diff_dict, f, ensure_ascii=False, indent=2)
+        
+        print(f"   💾 Saved source diff dictionary to: {output_file}")
+        print(f"   📊 Source diff dictionary contains {len(source_diff_dict)} sections:")
+        for key, diff_info in source_diff_dict.items():
+            print(f"      {diff_info['operation']}: {key} -> original_hierarchy: {diff_info['original_hierarchy']}")
+        
+        # source-diff-dict.json generation is complete, continue to next step in main.py
+        
+        # For modified headers, we need to build a mapping using original titles for matching
+        original_hierarchy_dict = all_hierarchy_dict.copy()
+        
+        # Update hierarchy dict to use original content for modified headers when needed for matching
+        for line_num in sections_by_type['modified']:
+            if line_num in all_headers:
+                header_info = all_headers[line_num]
+                # Check if this header was modified and has original content
+                for op in operations['modified_lines']:
+                    if (op['is_header'] and 
+                        op['line_number'] == line_num and 
+                        'original_content' in op):
+                        # Create hierarchy path using original content for matching
+                        original_line = op['original_content'].strip()
+                        if original_line.startswith('#'):
+                            # Build original hierarchy for matching
+                            original_hierarchy = build_hierarchy_for_modified_section(
+                                file_content, line_num, original_line, all_hierarchy_dict)
+                            if original_hierarchy:
+                                original_hierarchy_dict[line_num] = original_hierarchy
+                        break
+        
+        # Process added sections
+        if sections_by_type['added']:
+            file_added = {}
+            # Find insertion points using the simplified logic: 
+            # Record the previous section hierarchy for each added section
+            insertion_points = find_previous_section_for_added(sections_by_type['added'], all_hierarchy_dict)
+            
+            # Get actual content for added sections
+            for line_num in sections_by_type['added']:
+                if line_num in all_hierarchy_dict:
+                    file_added[str(line_num)] = all_hierarchy_dict[line_num]
+            
+            # Get source sections content (actual content, not just hierarchy)
+            if file_added:
+                source_sections_content = get_source_sections_content(pr_url, file.filename, file_added, github_client)
+                file_added = source_sections_content  # Replace hierarchy with actual content
+            
+            if file_added:
+                added_sections[file.filename] = {
+                    'sections': file_added,
+                    'insertion_points': insertion_points
+                }
+                print(f"   ➕ Found {len(file_added)} added sections with {len(insertion_points)} insertion points")
+        
+        # Process modified sections
+        if sections_by_type['modified']:
+            file_modified = {}
+            for line_num in sections_by_type['modified']:
+                if line_num in original_hierarchy_dict:
+                    file_modified[str(line_num)] = original_hierarchy_dict[line_num]
+            
+            if file_modified:
+                modified_sections[file.filename] = {
+                    'sections': file_modified,
+                    'original_hierarchy': original_hierarchy_dict,
+                    'current_hierarchy': all_hierarchy_dict
+                }
+                print(f"   ✏️  Found {len(file_modified)} modified sections")
+        
+        # Process deleted sections  
+        if sections_by_type['deleted']:
+            file_deleted = {}
+            for line_num in sections_by_type['deleted']:
+                # Use base hierarchy to get the deleted section info
+                if line_num in base_hierarchy_dict:
+                    file_deleted[str(line_num)] = base_hierarchy_dict[line_num]
+            
+            if file_deleted:
+                deleted_sections[file.filename] = file_deleted
+                print(f"   ❌ Found {len(file_deleted)} deleted sections")
+        
+        # Enhanced logic: also check content-level changes using legacy detection
+        # This helps detect changes in section content (not just headers)
+        print(f"   🔍 Enhanced detection: checking content-level changes...")
+        changed_lines = get_changed_line_ranges(file)
+        affected_sections = find_affected_sections(lines, changed_lines, all_headers)
+        
+        legacy_modified = {}
+        for line_num in affected_sections:
+            if line_num in all_hierarchy_dict:
+                section_hierarchy = all_hierarchy_dict[line_num]
+                # Only add if not already detected by operation-type analysis
+                already_detected = False
+                if file.filename in modified_sections:
+                    for existing_line, existing_hierarchy in modified_sections[file.filename].get('sections', {}).items():
+                        if existing_hierarchy == section_hierarchy:
+                            already_detected = True
+                            break
+                
+                if not already_detected:
+                    legacy_modified[str(line_num)] = section_hierarchy
+        
+        if legacy_modified:
+            print(f"   ✅ Enhanced detection found {len(legacy_modified)} additional content-modified sections")
+            # Merge with existing modified sections
+            if file.filename in modified_sections:
+                # Merge the sections
+                existing_sections = modified_sections[file.filename].get('sections', {})
+                existing_sections.update(legacy_modified)
+                modified_sections[file.filename]['sections'] = existing_sections
+            else:
+                # Create new entry
+                modified_sections[file.filename] = {
+                    'sections': legacy_modified,
+                    'original_hierarchy': all_hierarchy_dict,
+                    'current_hierarchy': all_hierarchy_dict
+                }
+    
+    print(f"\n📊 Summary:")
+    #print(f"   ✏️  Modified files: {} files") 
+    print(f"   📄 Added files: {len(added_files)} files")
+    print(f"   🗑️  Deleted files: {len(deleted_files)} files")
+    print(f"   📋 TOC files: {len(toc_files)} files")
+    if ignored_files:
+        print(f"   ⏭️  Ignored files: {len(ignored_files)} files")
+        for ignored_file in ignored_files:
+            print(f"      - {ignored_file}")
+    
+    return added_sections, modified_sections, deleted_sections, added_files, deleted_files, toc_files
diff --git a/scripts/translate_doc_pr/requirements.txt b/scripts/translate_doc_pr/requirements.txt
new file mode 100644
index 0000000000000..d8336cf8cebe7
--- /dev/null
+++ b/scripts/translate_doc_pr/requirements.txt
@@ -0,0 +1,4 @@
+PyGithub>=1.55.0
+openai>=1.0.0
+tiktoken>=0.4.0
+google-generativeai>=0.3.0
diff --git a/scripts/translate_doc_pr/section_matcher.py b/scripts/translate_doc_pr/section_matcher.py
new file mode 100644
index 0000000000000..ce4ef61116c89
--- /dev/null
+++ b/scripts/translate_doc_pr/section_matcher.py
@@ -0,0 +1,973 @@
+"""
+Section Matcher Module
+Handles section hierarchy matching including direct matching and AI matching
+"""
+
+import os
+import re
+import json
+import threading
+from github import Github
+from openai import OpenAI
+
+# Thread-safe printing
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    with print_lock:
+        print(*args, **kwargs)
+
+def clean_title_for_matching(title):
+    """Clean title for matching by removing markdown formatting and span elements"""
+    if not title:
+        return ""
+    
+    # Remove span elements like <span class="version-mark">New in v5.0</span>
+    title = re.sub(r'<span[^>]*>.*?</span>', '', title)
+    
+    # Remove markdown header prefix (# ## ### etc.)
+    title = re.sub(r'^#{1,6}\s*', '', title.strip())
+    
+    # Remove backticks
+    title = title.replace('`', '')
+    
+    # Strip whitespace
+    title = title.strip()
+    
+    return title
+
+def is_system_variable_or_config(title):
+    """Check if a title represents a system variable or configuration item"""
+    cleaned_title = clean_title_for_matching(title)
+    
+    if not cleaned_title:
+        return False
+    
+    # Check if original title had backticks (indicating code/config item)
+    original_has_backticks = '`' in title
+    
+    # System variables and config items are typically:
+    # 1. Alphanumeric characters with underscores, hyphens, dots, or percent signs
+    # 2. No spaces in the middle
+    # 3. Often contain underscores, hyphens, dots, or percent signs
+    # 4. May contain uppercase letters (like alert rule names)
+    # 5. Single words wrapped in backticks (like `capacity`, `engine`)
+    
+    # Check if it contains only allowed characters (including % for metrics/alerts)
+    allowed_chars = re.match(r'^[a-zA-Z0-9_\-\.%]+$', cleaned_title)
+    
+    # Check if it contains at least one separator (common in system vars/config/alerts)
+    has_separator = ('_' in cleaned_title or '-' in cleaned_title or 
+                    '.' in cleaned_title or '%' in cleaned_title)
+    
+    # Check if it doesn't contain spaces (spaces would indicate it's likely a regular title)
+    no_spaces = ' ' not in cleaned_title
+    
+    # Additional patterns for alert rules and metrics
+    is_alert_rule = (cleaned_title.startswith('PD_') or 
+                    cleaned_title.startswith('TiDB_') or
+                    cleaned_title.startswith('TiKV_') or
+                    cleaned_title.endswith('_alert') or
+                    '%' in cleaned_title)
+    
+    # NEW: Check if it's a single word in backticks (config/variable name)
+    # Examples: `capacity`, `engine`, `enable`, `dirname` etc.
+    is_single_backticked_word = (original_has_backticks and 
+                                allowed_chars and 
+                                no_spaces and 
+                                len(cleaned_title.split()) == 1)
+    
+    return bool(allowed_chars and (has_separator or is_alert_rule or is_single_backticked_word) and no_spaces)
+
+def find_toplevel_title_matches(source_sections, target_lines):
+    """Find matches for top-level titles (# Level) by direct pattern matching"""
+    matched_dict = {}
+    failed_matches = []
+    skipped_sections = []
+    
+    thread_safe_print(f"🔍 Searching for top-level title matches")
+    
+    for source_line_num, source_hierarchy in source_sections.items():
+        # Extract the leaf title from hierarchy
+        source_leaf_title = source_hierarchy.split(' > ')[-1] if ' > ' in source_hierarchy else source_hierarchy
+        
+        # Only process top-level titles
+        if not source_leaf_title.startswith('# '):
+            skipped_sections.append({
+                'line_num': source_line_num,
+                'hierarchy': source_hierarchy,
+                'reason': 'Not a top-level title'
+            })
+            continue
+        
+        thread_safe_print(f"   📝 Looking for top-level match: {source_leaf_title}")
+        
+        # Find the first top-level title in target document
+        target_match = None
+        for line_num, line in enumerate(target_lines, 1):
+            line = line.strip()
+            if line.startswith('# '):
+                target_match = {
+                    'line_num': line_num,
+                    'title': line,
+                    'hierarchy_string': line[2:].strip()  # Remove '# ' prefix for hierarchy
+                }
+                thread_safe_print(f"      ✓ Found target top-level at line {line_num}: {line}")
+                break
+        
+        if target_match:
+            matched_dict[str(target_match['line_num'])] = target_match['hierarchy_string']
+            thread_safe_print(f"      ✅ Top-level match: line {target_match['line_num']}")
+        else:
+            thread_safe_print(f"      ❌ No top-level title found in target")
+            failed_matches.append({
+                'line_num': source_line_num,
+                'hierarchy': source_hierarchy,
+                'reason': 'No top-level title found in target'
+            })
+    
+    thread_safe_print(f"📊 Top-level matching result: {len(matched_dict)} matches found")
+    if failed_matches:
+        thread_safe_print(f"⚠️  {len(failed_matches)} top-level sections failed to match:")
+        for failed in failed_matches:
+            thread_safe_print(f"      ❌ Line {failed['line_num']}: {failed['hierarchy']} - {failed['reason']}")
+    
+    return matched_dict, failed_matches, skipped_sections
+
+
+def find_direct_matches_for_special_files(source_sections, target_hierarchy, target_lines):
+    """Find direct matches for system variables/config items without using AI"""
+    matched_dict = {}
+    failed_matches = []
+    skipped_sections = []
+    
+    # Build target headers with hierarchy paths
+    target_headers = {}
+    for line_num, line in enumerate(target_lines, 1):
+        line = line.strip()
+        if line.startswith('#'):
+            match = re.match(r'^(#{1,10})\s+(.+)', line)
+            if match:
+                level = len(match.group(1))
+                title = match.group(2).strip()
+                target_headers[line_num] = {
+                    'level': level,
+                    'title': title,
+                    'line': line
+                }
+    
+    thread_safe_print(f"   🔍 Searching for direct matches among {len(target_headers)} target headers")
+    
+    for source_line_num, source_hierarchy in source_sections.items():
+        # Extract the leaf title from hierarchy
+        source_leaf_title = source_hierarchy.split(' > ')[-1] if ' > ' in source_hierarchy else source_hierarchy
+        source_clean_title = clean_title_for_matching(source_leaf_title)
+        
+        thread_safe_print(f"   📝 Looking for match: {source_clean_title}")
+        
+        if not is_system_variable_or_config(source_leaf_title):
+            thread_safe_print(f"      ⚠️  Not a system variable/config, skipping direct match")
+            skipped_sections.append({
+                'line_num': source_line_num,
+                'hierarchy': source_hierarchy,
+                'reason': 'Not a system variable or config item'
+            })
+            continue
+        
+        # Find potential matches in target
+        potential_matches = []
+        for target_line_num, target_header in target_headers.items():
+            target_clean_title = clean_title_for_matching(target_header['title'])
+            
+            if source_clean_title == target_clean_title:
+                # Build hierarchy path for this target header
+                hierarchy_path = build_hierarchy_path(target_lines, target_line_num, target_headers)
+                potential_matches.append({
+                    'line_num': target_line_num,
+                    'header': target_header,
+                    'hierarchy_path': hierarchy_path,
+                    'hierarchy_string': ' > '.join([f"{'#' * h['level']} {h['title']}" for h in hierarchy_path if h['level'] > 1 or len(hierarchy_path) == 1])
+                })
+                thread_safe_print(f"      ✓ Found potential match at line {target_line_num}: {target_header['title']}")
+        
+        if len(potential_matches) == 1:
+            # Single match found
+            match = potential_matches[0]
+            matched_dict[str(match['line_num'])] = match['hierarchy_string']
+            thread_safe_print(f"      ✅ Direct match: line {match['line_num']}")
+        elif len(potential_matches) > 1:
+            # Multiple matches, need to use parent hierarchy to disambiguate
+            thread_safe_print(f"      🔀 Multiple matches found ({len(potential_matches)}), using parent hierarchy")
+            
+            # Extract parent hierarchy from source
+            source_parts = source_hierarchy.split(' > ')
+            if len(source_parts) > 1:
+                source_parent_titles = [clean_title_for_matching(part) for part in source_parts[:-1]]
+                
+                best_match = None
+                best_score = -1
+                
+                for match in potential_matches:
+                    # Compare parent hierarchy
+                    target_parent_titles = [clean_title_for_matching(h['title']) for h in match['hierarchy_path'][:-1]]
+                    
+                    # Calculate similarity score
+                    score = 0
+                    min_len = min(len(source_parent_titles), len(target_parent_titles))
+                    
+                    for i in range(min_len):
+                        if i < len(source_parent_titles) and i < len(target_parent_titles):
+                            if source_parent_titles[-(i+1)] == target_parent_titles[-(i+1)]:  # Compare from end
+                                score += 1
+                            else:
+                                break
+                    
+                    thread_safe_print(f"        📊 Match at line {match['line_num']} score: {score}")
+                    
+                    if score > best_score:
+                        best_score = score
+                        best_match = match
+                
+                if best_match and best_score > 0:
+                    matched_dict[str(best_match['line_num'])] = best_match['hierarchy_string']
+                    thread_safe_print(f"      ✅ Best match: line {best_match['line_num']} (score: {best_score})")
+                else:
+                    thread_safe_print(f"      ❌ No good parent hierarchy match found")
+                    failed_matches.append({
+                        'line_num': source_line_num,
+                        'hierarchy': source_hierarchy,
+                        'reason': 'Multiple matches found but no good parent hierarchy match'
+                    })
+            else:
+                thread_safe_print(f"      ⚠️  No parent hierarchy in source, cannot disambiguate")
+                failed_matches.append({
+                    'line_num': source_line_num,
+                    'hierarchy': source_hierarchy,
+                    'reason': 'Multiple matches found but no parent hierarchy to disambiguate'
+                })
+        else:
+            thread_safe_print(f"      ❌ No matches found for: {source_clean_title}")
+            # Try fuzzy matching for similar titles (e.g., --host vs --hosts)
+            fuzzy_matched = False
+            source_clean_lower = source_clean_title.lower()
+            for target_header in target_headers:
+                # Handle both dict and tuple formats
+                if isinstance(target_header, dict):
+                    target_clean = clean_title_for_matching(target_header['title'])
+                elif isinstance(target_header, (list, tuple)) and len(target_header) >= 2:
+                    target_clean = clean_title_for_matching(target_header[1])  # title is at index 1
+                else:
+                    continue  # Skip invalid entries
+                target_clean_lower = target_clean.lower()
+                # Check for similar titles (handle plural/singular and minor differences)
+                # Case 1: One is substring of another (e.g., --host vs --hosts)
+                # Case 2: Small character difference (1-2 characters)
+                len_diff = abs(len(source_clean_lower) - len(target_clean_lower))
+                if (len_diff <= 2 and 
+                    (source_clean_lower in target_clean_lower or 
+                     target_clean_lower in source_clean_lower)):
+                        thread_safe_print(f"      ≈ Fuzzy match found: {source_clean_title} ≈ {target_clean}")
+                        if isinstance(target_header, dict):
+                            matched_dict[str(target_header['line_num'])] = target_header['hierarchy_string']
+                            thread_safe_print(f"      ✅ Fuzzy match: line {target_header['line_num']}")
+                        elif isinstance(target_header, (list, tuple)) and len(target_header) >= 3:
+                            matched_dict[str(target_header[0])] = target_header[2]  # line_num at index 0, hierarchy at index 2
+                            thread_safe_print(f"      ✅ Fuzzy match: line {target_header[0]}")
+                        fuzzy_matched = True
+                        break
+            
+            if not fuzzy_matched:
+                failed_matches.append({
+                    'line_num': source_line_num,
+                    'hierarchy': source_hierarchy,
+                    'reason': 'No matching section found in target'
+                })
+    
+    thread_safe_print(f"   📊 Direct matching result: {len(matched_dict)} matches found")
+    
+    if failed_matches:
+        thread_safe_print(f"   ⚠️  {len(failed_matches)} sections failed to match:")
+        for failed in failed_matches:
+            thread_safe_print(f"      ❌ Line {failed['line_num']}: {failed['hierarchy']} - {failed['reason']}")
+    
+    if skipped_sections:
+        thread_safe_print(f"   ℹ️  {len(skipped_sections)} sections skipped (not system variables/config):")
+        for skipped in skipped_sections:
+            thread_safe_print(f"      ⏭️  Line {skipped['line_num']}: {skipped['hierarchy']} - {skipped['reason']}")
+    
+    return matched_dict, failed_matches, skipped_sections
+
+def filter_non_system_sections(target_hierarchy):
+    """Filter out system variable/config sections from target hierarchy for AI mapping"""
+    filtered_hierarchy = {}
+    system_sections_count = 0
+    
+    for line_num, hierarchy in target_hierarchy.items():
+        # Extract the leaf title from hierarchy
+        leaf_title = hierarchy.split(' > ')[-1] if ' > ' in hierarchy else hierarchy
+        
+        if is_system_variable_or_config(leaf_title):
+            system_sections_count += 1
+        else:
+            filtered_hierarchy[line_num] = hierarchy
+    
+    thread_safe_print(f"   🔧 Filtered target hierarchy: {len(filtered_hierarchy)} non-system sections (removed {system_sections_count} system sections)")
+    
+    return filtered_hierarchy
+
+def get_corresponding_sections(source_sections, target_sections, ai_client, source_language, target_language, max_tokens=20000):
+    """Use AI to find corresponding sections between different languages"""
+    
+    # Format source sections
+    source_text = "\n".join(source_sections)
+    target_text = "\n".join(target_sections)
+    number_of_sections = len(source_sections)
+    
+    prompt = f"""I am aligning the {source_language} and {target_language} documentation for TiDB. I have modified the following {number_of_sections} sections in the {source_language} file:
+
+{source_text}
+
+Here is the section structure of the corresponding {target_language} file. Please select the corresponding {number_of_sections} sections in {target_language} from the following list that I should modify. Do not output any other text, return the Markdown code block enclosed in three backticks.
+
+{target_text}"""
+
+    thread_safe_print(f"\n   📤 AI Mapping Prompt ({source_language} → {target_language}):")
+    thread_safe_print(f"   " + "="*80)
+    thread_safe_print(f"   {prompt}")
+    thread_safe_print(f"   " + "="*80)
+
+    # Import token estimation function from main
+    try:
+        from main import print_token_estimation
+        print_token_estimation(prompt, f"Section mapping ({source_language} → {target_language})")
+    except ImportError:
+        # Fallback if import fails - use tiktoken
+        try:
+            import tiktoken
+            enc = tiktoken.get_encoding("cl100k_base")
+            tokens = enc.encode(prompt)
+            actual_tokens = len(tokens)
+            char_count = len(prompt)
+            thread_safe_print(f"   💰 Section mapping ({source_language} → {target_language})")
+            thread_safe_print(f"      📝 Input: {char_count:,} characters")
+            thread_safe_print(f"      🔢 Actual tokens: {actual_tokens:,} (using tiktoken cl100k_base)")
+        except Exception:
+            # Final fallback to character approximation
+            estimated_tokens = len(prompt) // 4
+            char_count = len(prompt)
+            thread_safe_print(f"   💰 Section mapping ({source_language} → {target_language})")
+            thread_safe_print(f"      📝 Input: {char_count:,} characters")
+            thread_safe_print(f"      🔢 Estimated tokens: ~{estimated_tokens:,} (fallback: 4 chars/token approximation)")
+
+    try:
+        ai_response = ai_client.chat_completion(
+            messages=[
+                {"role": "user", "content": prompt}
+            ],
+            temperature=0.1,
+            max_tokens=max_tokens
+        )
+        
+        thread_safe_print(f"\n   📥 AI Mapping Response:")
+        thread_safe_print(f"   " + "-"*80)
+        thread_safe_print(f"   {ai_response}")
+        thread_safe_print(f"   " + "-"*80)
+        
+        return ai_response
+    except Exception as e:
+        print(f"   ❌ AI mapping error: {e}")
+        return None
+
+def parse_ai_response(ai_response):
+    """Parse AI response to extract section names"""
+    sections = []
+    lines = ai_response.split('\n')
+    
+    for line in lines:
+        line = line.strip()
+        # Skip markdown code block markers and empty lines
+        if line and not line.startswith('```'):
+            # Remove leading "## " if present and clean up
+            if line.startswith('## '):
+                sections.append(line)
+            elif line.startswith('- '):
+                # Handle cases where AI returns a list
+                sections.append(line[2:].strip())
+    
+    return sections
+
+def find_matching_line_numbers(ai_sections, target_hierarchy_dict):
+    """Find line numbers in target hierarchy dict that match AI sections"""
+    matched_dict = {}
+    
+    for ai_section in ai_sections:
+        # Look for exact matches first
+        found = False
+        for line_num, hierarchy in target_hierarchy_dict.items():
+            if hierarchy == ai_section:
+                matched_dict[str(line_num)] = hierarchy
+                found = True
+                break
+        
+        if not found:
+            # Look for partial matches (in case of slight differences)
+            for line_num, hierarchy in target_hierarchy_dict.items():
+                # Remove common variations and compare
+                ai_clean = ai_section.replace('### ', '').replace('## ', '').strip()
+                hierarchy_clean = hierarchy.replace('### ', '').replace('## ', '').strip()
+                
+                if ai_clean in hierarchy_clean or hierarchy_clean in ai_clean:
+                    matched_dict[str(line_num)] = hierarchy
+                    thread_safe_print(f"      ≈ Partial match found at line {line_num}: {hierarchy}")
+                    found = True
+                    break
+        
+        if not found:
+            thread_safe_print(f"      ✗ No match found for: {ai_section}")
+    
+    return matched_dict
+
+def build_hierarchy_path(lines, line_num, all_headers):
+    """Build the full hierarchy path for a header at given line (from auto-sync-pr-changes.py)"""
+    if line_num not in all_headers:
+        return []
+    
+    current_header = all_headers[line_num]
+    current_level = current_header['level']
+    hierarchy_path = []
+    
+    # Find all parent headers
+    for check_line in sorted(all_headers.keys()):
+        if check_line >= line_num:
+            break
+        
+        header = all_headers[check_line]
+        if header['level'] < current_level:
+            # This is a potential parent
+            # Remove any headers at same or deeper level
+            while hierarchy_path and hierarchy_path[-1]['level'] >= header['level']:
+                hierarchy_path.pop()
+            hierarchy_path.append(header)
+    
+    # Add current header
+    hierarchy_path.append(current_header)
+    
+    return hierarchy_path
+
+def map_insertion_points_to_target(insertion_points, target_hierarchy, target_lines, file_path, pr_url, github_client, ai_client, repo_config, max_non_system_sections=120):
+    """Map source insertion points to target language locations"""
+    target_insertion_points = {}
+    
+    thread_safe_print(f"   📍 Mapping {len(insertion_points)} insertion points to target...")
+    
+    for group_key, point_info in insertion_points.items():
+        previous_section_hierarchy = point_info['previous_section_hierarchy']
+        thread_safe_print(f"      🔍 Finding target location for: {previous_section_hierarchy}")
+        
+        # Extract title for system variable checking
+        if ' > ' in previous_section_hierarchy:
+            title = previous_section_hierarchy.split(' > ')[-1]
+        else:
+            title = previous_section_hierarchy
+        
+        # Check if this is a system variable/config that can be directly matched
+        cleaned_title = clean_title_for_matching(title)
+        if is_system_variable_or_config(cleaned_title):
+            thread_safe_print(f"         🎯 Direct matching for system var/config: {cleaned_title}")
+            
+            # Direct matching for system variables
+            temp_source = {point_info['previous_section_line']: previous_section_hierarchy}
+            matched_dict, failed_matches, skipped_sections = find_direct_matches_for_special_files(
+                temp_source, target_hierarchy, target_lines
+            )
+            
+            if matched_dict:
+                # Get the first (and should be only) matched target line
+                target_line = list(matched_dict.keys())[0]
+                
+                # Find the end of this section
+                target_line_num = int(target_line)
+                insertion_after_line = find_section_end_line(target_line_num, target_hierarchy, target_lines)
+                
+                target_insertion_points[group_key] = {
+                    'insertion_after_line': insertion_after_line,
+                    'target_hierarchy': target_hierarchy.get(str(target_line_num), ''),
+                    'insertion_type': point_info['insertion_type'],
+                    'new_sections': point_info['new_sections']
+                }
+                thread_safe_print(f"         ✅ Direct match found, insertion after line {insertion_after_line}")
+                continue
+        
+        # If not a system variable or direct matching failed, use AI
+        thread_safe_print(f"         🤖 Using AI mapping for: {cleaned_title}")
+        
+        # Filter target hierarchy for AI (remove system sections)
+        filtered_target_hierarchy = filter_non_system_sections(target_hierarchy)
+        
+        # Check if filtered hierarchy is too large for AI
+        # Use provided max_non_system_sections parameter
+        if len(filtered_target_hierarchy) > max_non_system_sections:
+            thread_safe_print(f"         ❌ Target hierarchy too large for AI: {len(filtered_target_hierarchy)} > {max_non_system_sections}")
+            continue
+        
+        # Prepare source for AI mapping
+        temp_source = {str(point_info['previous_section_line']): previous_section_hierarchy}
+        
+        # Get AI mapping
+        ai_response = get_corresponding_sections(
+            list(temp_source.values()), 
+            list(filtered_target_hierarchy.values()), 
+            ai_client, 
+            repo_config['source_language'], 
+            repo_config['target_language'],
+            max_tokens=20000  # Use default value since this function doesn't accept max_tokens yet
+        )
+        
+        if ai_response:
+            # Parse AI response and find matching line numbers
+            ai_sections = parse_ai_response(ai_response)
+            ai_matched = find_matching_line_numbers(ai_sections, target_hierarchy)
+            
+            if ai_matched and len(ai_matched) > 0:
+                # Get the first match (we only have one source section)
+                target_line = list(ai_matched.keys())[0]
+                target_line_num = int(target_line)
+                
+                # Find the end of this section
+                insertion_after_line = find_section_end_line(target_line_num, target_hierarchy, target_lines)
+                
+                target_insertion_points[group_key] = {
+                    'insertion_after_line': insertion_after_line,
+                    'target_hierarchy': target_hierarchy.get(target_line, ''),
+                    'insertion_type': point_info['insertion_type'],
+                    'new_sections': point_info['new_sections']
+                }
+                thread_safe_print(f"         ✅ AI match found, insertion after line {insertion_after_line}")
+            else:
+                thread_safe_print(f"         ❌ No AI matching sections found for: {previous_section_hierarchy}")
+        else:
+            thread_safe_print(f"         ❌ No AI response received for: {previous_section_hierarchy}")
+    
+    return target_insertion_points
+
+def extract_hierarchies_from_diff_dict(source_diff_dict):
+    """Extract original_hierarchy from source_diff_dict for section matching"""
+    extracted_hierarchies = {}
+    
+    for key, diff_info in source_diff_dict.items():
+        operation = diff_info.get('operation', '')
+        original_hierarchy = diff_info.get('original_hierarchy', '')
+        
+        # Process all sections: modified, deleted, and added
+        if operation in ['modified', 'deleted', 'added'] and original_hierarchy:
+            # Use the key as the identifier for the hierarchy
+            extracted_hierarchies[key] = original_hierarchy
+    
+    thread_safe_print(f"📄 Extracted {len(extracted_hierarchies)} hierarchies from source diff dict:")
+    for key, hierarchy in extracted_hierarchies.items():
+        thread_safe_print(f"   {key}: {hierarchy}")
+    
+    return extracted_hierarchies
+
+def match_source_diff_to_target(source_diff_dict, target_hierarchy, target_lines, ai_client, repo_config, max_non_system_sections=120, max_tokens=20000):
+    """
+    Match source_diff_dict original_hierarchy to target file sections
+    Uses direct matching for system variables/config and AI matching for others
+    
+    Returns:
+        dict: Matched sections with enhanced information including:
+            - target_line: Line number in target file
+            - target_hierarchy: Target section hierarchy 
+            - insertion_type: For added sections only
+            - source_original_hierarchy: Original hierarchy from source
+            - source_operation: Operation type (modified/added/deleted)
+            - source_old_content: Old content from source diff
+            - source_new_content: New content from source diff
+    """
+    thread_safe_print(f"🔗 Starting source diff to target matching...")
+    
+    # Extract hierarchies from source diff dict
+    source_hierarchies = extract_hierarchies_from_diff_dict(source_diff_dict)
+    
+    if not source_hierarchies:
+        thread_safe_print(f"⚠️  No hierarchies to match")
+        return {}
+    
+    # Process sections in original order to maintain consistency
+    # Initialize final matching results with ordered dict to preserve order
+    from collections import OrderedDict
+    all_matched_sections = OrderedDict()
+    
+    # Categorize sections for processing strategy but maintain order
+    direct_match_sections = OrderedDict()
+    ai_match_sections = OrderedDict()
+    added_sections = OrderedDict()
+    bottom_sections = OrderedDict()  # New category for bottom sections
+    
+    for key, hierarchy in source_hierarchies.items():
+        # Check if this is a bottom section (no matching needed)
+        if hierarchy.startswith('bottom-'):
+            bottom_sections[key] = hierarchy
+        # Check if this is an added section
+        elif key.startswith('added_'):
+            added_sections[key] = hierarchy
+        else:
+            # Extract the leaf title from hierarchy for checking
+            leaf_title = hierarchy.split(' > ')[-1] if ' > ' in hierarchy else hierarchy
+            
+            # Check if this is suitable for direct matching
+            if (hierarchy == "frontmatter" or 
+                leaf_title.startswith('# ') or  # Top-level titles
+                is_system_variable_or_config(leaf_title)):  # System variables/config
+                direct_match_sections[key] = hierarchy
+            else:
+                ai_match_sections[key] = hierarchy
+    
+    thread_safe_print(f"📊 Section categorization:")
+    thread_safe_print(f"   🎯 Direct matching: {len(direct_match_sections)} sections")
+    thread_safe_print(f"   🤖 AI matching: {len(ai_match_sections)} sections")
+    thread_safe_print(f"   ➕ Added sections: {len(added_sections)} sections")
+    thread_safe_print(f"   🔚 Bottom sections: {len(bottom_sections)} sections (no matching needed)")
+    
+    # Process each section in original order
+    thread_safe_print(f"\n🔄 Processing sections in original order...")
+    
+    for key, hierarchy in source_hierarchies.items():
+        thread_safe_print(f"   🔍 Processing {key}: {hierarchy}")
+        
+        # Determine processing strategy based on section type and content
+        if hierarchy.startswith('bottom-'):
+            # Bottom section - no matching needed, append to end
+            thread_safe_print(f"      🔚 Bottom section - append to end of document")
+            result = {
+                "target_line": "-1",  # Special marker for bottom sections
+                "target_hierarchy": hierarchy  # Keep the bottom-xxx marker
+            }
+        elif key.startswith('added_'):
+            # Added section - find insertion point
+            thread_safe_print(f"      ➕ Added section - finding insertion point")
+            result = process_added_section(key, hierarchy, target_hierarchy, target_lines, ai_client, repo_config, max_non_system_sections, max_tokens)
+        else:
+            # Modified or deleted section - find matching section
+            operation = source_diff_dict[key].get('operation', 'unknown')
+            thread_safe_print(f"      {operation.capitalize()} section - finding target match")
+            result = process_modified_or_deleted_section(key, hierarchy, target_hierarchy, target_lines, ai_client, repo_config, max_non_system_sections, max_tokens)
+        
+        if result:
+            # Add source language information from source_diff_dict
+            source_info = source_diff_dict.get(key, {})
+            
+            # Extract target content from target_lines
+            target_line = result.get('target_line', 'unknown')
+            target_content = ""
+            if target_line != 'unknown' and target_line != '0':
+                try:
+                    target_line_num = int(target_line)
+                    # For ALL operations, only extract direct content (no sub-sections)
+                    # This avoids duplication when both parent and child sections have operations
+                    target_content = extract_section_direct_content(target_line_num, target_lines)
+                except (ValueError, IndexError):
+                    target_content = ""
+            elif target_line == '0':
+                # For frontmatter, extract content from beginning to first header
+                target_content = extract_frontmatter_content(target_lines)
+            
+            enhanced_result = {
+                **result,  # Include existing target matching info
+                'target_content': target_content,  # Add target section content
+                'source_original_hierarchy': source_info.get('original_hierarchy', ''),
+                'source_operation': source_info.get('operation', ''),
+                'source_old_content': source_info.get('old_content', ''),
+                'source_new_content': source_info.get('new_content', '')
+            }
+            all_matched_sections[key] = enhanced_result
+            thread_safe_print(f"      ✅ {key}: -> line {target_line}")
+        else:
+            thread_safe_print(f"      ❌ {key}: matching failed")
+    
+    thread_safe_print(f"\n📊 Final matching results: {len(all_matched_sections)} total matches")
+    return all_matched_sections
+
+def process_modified_or_deleted_section(key, hierarchy, target_hierarchy, target_lines, ai_client, repo_config, max_non_system_sections, max_tokens=20000):
+    """Process modified or deleted sections to find target matches"""
+    # Extract the leaf title from hierarchy for checking
+    leaf_title = hierarchy.split(' > ')[-1] if ' > ' in hierarchy else hierarchy
+    
+    # Check if this is suitable for direct matching
+    if (hierarchy == "frontmatter" or 
+        leaf_title.startswith('# ') or  # Top-level titles
+        is_system_variable_or_config(leaf_title)):  # System variables/config
+        
+        if hierarchy == "frontmatter":
+            return {"target_line": "0", "target_hierarchy": "frontmatter"}
+            
+        elif leaf_title.startswith('# '):
+            # Top-level title matching
+            temp_sections = {key: hierarchy}
+            matched_dict, failed_matches, skipped_sections = find_toplevel_title_matches(
+                temp_sections, target_lines
+            )
+            if matched_dict:
+                target_line = list(matched_dict.keys())[0]
+                # For top-level titles, add # prefix to the hierarchy
+                return {
+                    "target_line": target_line, 
+                    "target_hierarchy": f"# {matched_dict[target_line]}"
+                }
+                
+        else:
+            # System variable/config matching
+            temp_sections = {key: hierarchy}
+            matched_dict, failed_matches, skipped_sections = find_direct_matches_for_special_files(
+                temp_sections, target_hierarchy, target_lines
+            )
+            if matched_dict:
+                target_line = list(matched_dict.keys())[0]
+                target_hierarchy_str = list(matched_dict.values())[0]
+                
+                # Extract the leaf title and add # prefix, remove top-level title from hierarchy
+                if ' > ' in target_hierarchy_str:
+                    # Remove top-level title and keep only the leaf with ## prefix
+                    leaf_title = target_hierarchy_str.split(' > ')[-1]
+                    formatted_hierarchy = f"## {leaf_title}"
+                else:
+                    # Single level, add ## prefix
+                    formatted_hierarchy = f"## {target_hierarchy_str}"
+                
+                return {
+                    "target_line": target_line,
+                    "target_hierarchy": formatted_hierarchy
+                }
+    else:
+        # AI matching for non-system sections
+        filtered_target_hierarchy = filter_non_system_sections(target_hierarchy)
+        
+        if len(filtered_target_hierarchy) <= max_non_system_sections:
+            temp_sections = {key: hierarchy}
+            
+            ai_response = get_corresponding_sections(
+                list(temp_sections.values()),
+                list(filtered_target_hierarchy.values()),
+                ai_client,
+                repo_config['source_language'],
+                repo_config['target_language'],
+                max_tokens
+            )
+            
+            if ai_response:
+                ai_sections = parse_ai_response(ai_response)
+                ai_matched = find_matching_line_numbers(ai_sections, target_hierarchy)
+                
+                if ai_matched:
+                    target_line = list(ai_matched.keys())[0]
+                    target_hierarchy_str = list(ai_matched.values())[0]
+                    
+                    # Format AI matched hierarchy with # prefix and remove top-level title
+                    formatted_hierarchy = format_target_hierarchy(target_hierarchy_str)
+                    
+                    return {
+                        "target_line": target_line,
+                        "target_hierarchy": formatted_hierarchy
+                    }
+    
+    return None
+
+def format_target_hierarchy(target_hierarchy_str):
+    """Format target hierarchy to preserve complete hierarchy structure"""
+    if target_hierarchy_str.startswith('##') or target_hierarchy_str.startswith('#'):
+        # Already formatted, return as is
+        return target_hierarchy_str
+    elif ' > ' in target_hierarchy_str:
+        # Keep complete hierarchy structure, just ensure proper formatting
+        return target_hierarchy_str
+    else:
+        # Single level, add ## prefix for compatibility
+        return f"## {target_hierarchy_str}"
+
+def process_added_section(key, reference_hierarchy, target_hierarchy, target_lines, ai_client, repo_config, max_non_system_sections, max_tokens=20000):
+    """Process added sections to find insertion points"""
+    # For added sections, hierarchy points to the next section (where to insert before)
+    reference_leaf = reference_hierarchy.split(' > ')[-1] if ' > ' in reference_hierarchy else reference_hierarchy
+    
+    if (reference_hierarchy == "frontmatter" or 
+        reference_leaf.startswith('# ') or 
+        is_system_variable_or_config(reference_leaf)):
+        
+        # Use direct matching for the reference section
+        temp_reference = {f"ref_{key}": reference_hierarchy}
+        
+        if reference_hierarchy == "frontmatter":
+            return {
+                "target_line": "0",
+                "target_hierarchy": "frontmatter",
+                "insertion_type": "before_reference"
+            }
+            
+        elif reference_leaf.startswith('# '):
+            matched_dict, failed_matches, skipped_sections = find_toplevel_title_matches(
+                temp_reference, target_lines
+            )
+            if matched_dict:
+                target_line = list(matched_dict.keys())[0]
+                formatted_hierarchy = f"# {matched_dict[target_line]}"
+                return {
+                    "target_line": target_line,
+                    "target_hierarchy": formatted_hierarchy,
+                    "insertion_type": "before_reference"
+                }
+                
+        else:
+            # System variable/config
+            matched_dict, failed_matches, skipped_sections = find_direct_matches_for_special_files(
+                temp_reference, target_hierarchy, target_lines
+            )
+            if matched_dict:
+                target_line = list(matched_dict.keys())[0]
+                target_hierarchy_str = list(matched_dict.values())[0]
+                formatted_hierarchy = format_target_hierarchy(target_hierarchy_str)
+                return {
+                    "target_line": target_line,
+                    "target_hierarchy": formatted_hierarchy,
+                    "insertion_type": "before_reference"
+                }
+    else:
+        # Use AI matching for the reference section
+        filtered_target_hierarchy = filter_non_system_sections(target_hierarchy)
+        
+        if len(filtered_target_hierarchy) <= max_non_system_sections:
+            temp_reference = {f"ref_{key}": reference_hierarchy}
+            
+            ai_response = get_corresponding_sections(
+                list(temp_reference.values()),
+                list(filtered_target_hierarchy.values()),
+                ai_client,
+                repo_config['source_language'],
+                repo_config['target_language'],
+                max_tokens
+            )
+            
+            if ai_response:
+                ai_sections = parse_ai_response(ai_response)
+                ai_matched = find_matching_line_numbers(ai_sections, target_hierarchy)
+                
+                if ai_matched:
+                    target_line = list(ai_matched.keys())[0]
+                    target_hierarchy_str = list(ai_matched.values())[0]
+                    formatted_hierarchy = format_target_hierarchy(target_hierarchy_str)
+                    return {
+                        "target_line": target_line,
+                        "target_hierarchy": formatted_hierarchy,
+                        "insertion_type": "before_reference"
+                    }
+    
+    return None
+
+def extract_target_section_content(target_line_num, target_lines):
+    """Extract target section content from target_lines (includes sub-sections)"""
+    if target_line_num >= len(target_lines):
+        return ""
+    
+    start_line = target_line_num - 1  # Convert to 0-based index
+    
+    # Find the end of the section by looking for the next header
+    current_line = target_lines[start_line].strip()
+    if not current_line.startswith('#'):
+        return current_line
+    
+    current_level = len(current_line.split()[0])  # Count # characters
+    end_line = len(target_lines)  # Default to end of file
+    
+    # For top-level headers (# level 1), stop at first sublevel (## level 2)
+    # For other headers, stop at same or higher level
+    if current_level == 1:
+        # Top-level header: stop at first ## (level 2) or higher
+        for i in range(start_line + 1, len(target_lines)):
+            line = target_lines[i].strip()
+            if line.startswith('#'):
+                line_level = len(line.split()[0])
+                if line_level >= 2:  # Stop at ## or higher level
+                    end_line = i
+                    break
+    else:
+        # Sub-level header: stop at same or higher level (traditional behavior)
+        for i in range(start_line + 1, len(target_lines)):
+            line = target_lines[i].strip()
+            if line.startswith('#'):
+                line_level = len(line.split()[0])
+                if line_level <= current_level:
+                    end_line = i
+                    break
+    
+    # Extract content from start_line to end_line
+    section_content = '\n'.join(target_lines[start_line:end_line])
+    return section_content.strip()
+
+def extract_section_direct_content(target_line_num, target_lines):
+    """Extract ONLY the direct content of a section (excluding sub-sections)"""
+    if target_line_num >= len(target_lines):
+        return ""
+    
+    start_line = target_line_num - 1  # Convert to 0-based index
+    
+    # Find the end of the section by looking for the next header
+    current_line = target_lines[start_line].strip()
+    if not current_line.startswith('#'):
+        return current_line
+    
+    current_level = len(current_line.split()[0])  # Count # characters
+    end_line = len(target_lines)  # Default to end of file
+    
+    # Only extract until the first header (any level)
+    # This means we stop at ANY header - whether it's a sub-section OR same/higher level
+    for i in range(start_line + 1, len(target_lines)):
+        line = target_lines[i].strip()
+        if line.startswith('#'):
+            # Stop at ANY header to get only direct content
+            end_line = i
+            break
+    
+    # Extract content from start_line to end_line
+    section_content = '\n'.join(target_lines[start_line:end_line])
+    return section_content.strip()
+
+def extract_frontmatter_content(target_lines):
+    """Extract frontmatter content from beginning to first header"""
+    if not target_lines:
+        return ""
+    
+    frontmatter_lines = []
+    for i, line in enumerate(target_lines):
+        line_stripped = line.strip()
+        # Stop when we hit the first top-level header
+        if line_stripped.startswith('# '):
+            break
+        frontmatter_lines.append(line.rstrip())
+    
+    return '\n'.join(frontmatter_lines)
+
+def find_section_end_line(section_start_line, target_hierarchy, target_lines):
+    """Find the end line of a section to determine insertion point (from auto-sync-pr-changes.py)"""
+    
+    # Get the current section's level
+    current_section_line = target_lines[section_start_line - 1].strip()
+    current_level = len(current_section_line.split()[0]) if current_section_line.startswith('#') else 5
+    
+    # Find the next section at the same level or higher (lower number)
+    next_section_line = None
+    for line_num_str in sorted(target_hierarchy.keys(), key=int):
+        line_num = int(line_num_str)
+        if line_num > section_start_line:
+            # Check the level of this section
+            section_line = target_lines[line_num - 1].strip()
+            if section_line.startswith('#'):
+                section_level = len(section_line.split()[0])
+                if section_level <= current_level:
+                    next_section_line = line_num
+                    break
+    
+    if next_section_line:
+        # Insert before the next same-level or higher-level section
+        return next_section_line - 1
+    else:
+        # This is the last section at this level, insert at the end of the file
+        return len(target_lines)
diff --git a/scripts/translate_doc_pr/toc_processor.py b/scripts/translate_doc_pr/toc_processor.py
new file mode 100644
index 0000000000000..71cce4a17f8bb
--- /dev/null
+++ b/scripts/translate_doc_pr/toc_processor.py
@@ -0,0 +1,434 @@
+"""
+TOC Processor Module
+Handles special processing logic for TOC.md files
+"""
+
+import os
+import re
+import json
+import threading
+from github import Github
+from openai import OpenAI
+
+# Thread-safe printing
+print_lock = threading.Lock()
+
+def thread_safe_print(*args, **kwargs):
+    with print_lock:
+        print(*args, **kwargs)
+
+def extract_toc_link_from_line(line):
+    """Extract the link part (including parentheses) from a TOC line"""
+    # Pattern to match [text](link) format
+    pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+    match = re.search(pattern, line)
+    if match:
+        return f"({match.group(2)})"  # Return (link) including parentheses
+    return None
+
+def is_toc_translation_needed(line):
+    """Check if a TOC line needs translation based on content in square brackets"""
+    # Extract content within square brackets [content]
+    pattern = r'\[([^\]]+)\]'
+    match = re.search(pattern, line)
+    if match:
+        content = match.group(1)
+        # Skip translation if content has no Chinese and no spaces
+        has_chinese = bool(re.search(r'[\u4e00-\u9fff]', content))
+        has_spaces = ' ' in content
+        
+        # Need translation if has Chinese OR has spaces
+        # Skip translation only if it's alphanumeric/technical term without spaces
+        return has_chinese or has_spaces
+    return True  # Default to translate if can't parse
+
+def find_best_toc_match(target_link, target_lines, source_line_num):
+    """Find the best matching line in target TOC based on link content and line proximity"""
+    matches = []
+    
+    for i, line in enumerate(target_lines):
+        line_link = extract_toc_link_from_line(line.strip())
+        if line_link and line_link == target_link:
+            matches.append({
+                'line_num': i + 1,  # Convert to 1-based
+                'line': line.strip(),
+                'distance': abs((i + 1) - source_line_num)
+            })
+    
+    if not matches:
+        return None
+    
+    # Sort by distance to source line number, choose the closest one
+    matches.sort(key=lambda x: x['distance'])
+    return matches[0]
+
+def group_consecutive_lines(lines):
+    """Group consecutive lines together"""
+    if not lines:
+        return []
+    
+    # Sort lines by line number
+    sorted_lines = sorted(lines, key=lambda x: x['line_number'])
+    
+    groups = []
+    current_group = [sorted_lines[0]]
+    
+    for i in range(1, len(sorted_lines)):
+        current_line = sorted_lines[i]
+        prev_line = sorted_lines[i-1]
+        
+        # Consider lines consecutive if they are within 2 lines of each other
+        if current_line['line_number'] - prev_line['line_number'] <= 2:
+            current_group.append(current_line)
+        else:
+            groups.append(current_group)
+            current_group = [current_line]
+    
+    groups.append(current_group)
+    return groups
+
+def process_toc_operations(file_path, operations, source_lines, target_lines, target_local_path):
+    """Process TOC.md file operations with special logic"""
+    thread_safe_print(f"\n📋 Processing TOC.md with special logic...")
+    
+    results = {
+        'added': [],
+        'modified': [],
+        'deleted': []
+    }
+    
+    # Process deleted lines first
+    for deleted_line in operations['deleted_lines']:
+        if not deleted_line['is_header']:  # TOC lines are not headers
+            deleted_content = deleted_line['content']
+            deleted_link = extract_toc_link_from_line(deleted_content)
+            
+            if deleted_link:
+                thread_safe_print(f"   🗑️  Processing deleted TOC line with link: {deleted_link}")
+                
+                # Find matching line in target
+                match = find_best_toc_match(deleted_link, target_lines, deleted_line['line_number'])
+                if match:
+                    thread_safe_print(f"      ✅ Found target line {match['line_num']}: {match['line']}")
+                    results['deleted'].append({
+                        'source_line': deleted_line['line_number'],
+                        'target_line': match['line_num'],
+                        'content': deleted_content
+                    })
+                else:
+                    thread_safe_print(f"      ❌ No matching line found for {deleted_link}")
+    
+    # Process added lines
+    added_groups = group_consecutive_lines(operations['added_lines'])
+    for group in added_groups:
+        if group:  # Skip empty groups
+            first_added_line = group[0]
+            thread_safe_print(f"   ➕ Processing added TOC group starting at line {first_added_line['line_number']}")
+            
+            # Find the previous line in source to determine insertion point
+            previous_line_num = first_added_line['line_number'] - 1
+            if previous_line_num > 0 and previous_line_num <= len(source_lines):
+                previous_line_content = source_lines[previous_line_num - 1]
+                previous_link = extract_toc_link_from_line(previous_line_content)
+                
+                if previous_link:
+                    thread_safe_print(f"      📍 Previous line link: {previous_link}")
+                    
+                    # Find matching previous line in target
+                    match = find_best_toc_match(previous_link, target_lines, previous_line_num)
+                    if match:
+                        thread_safe_print(f"      ✅ Found target insertion point after line {match['line_num']}")
+                        
+                        # Process each line in the group
+                        for added_line in group:
+                            added_content = added_line['content']
+                            if is_toc_translation_needed(added_content):
+                                results['added'].append({
+                                    'source_line': added_line['line_number'],
+                                    'target_insertion_after': match['line_num'],
+                                    'content': added_content,
+                                    'needs_translation': True
+                                })
+                                thread_safe_print(f"         📝 Added for translation: {added_content.strip()}")
+                            else:
+                                results['added'].append({
+                                    'source_line': added_line['line_number'],
+                                    'target_insertion_after': match['line_num'],
+                                    'content': added_content,
+                                    'needs_translation': False
+                                })
+                                thread_safe_print(f"         ⏭️  Added without translation: {added_content.strip()}")
+                    else:
+                        thread_safe_print(f"      ❌ No target insertion point found for {previous_link}")
+                else:
+                    thread_safe_print(f"      ❌ No link found in previous line: {previous_line_content.strip()}")
+    
+    # Process modified lines  
+    modified_groups = group_consecutive_lines(operations['modified_lines'])
+    for group in modified_groups:
+        if group:  # Skip empty groups
+            first_modified_line = group[0]
+            thread_safe_print(f"   ✏️  Processing modified TOC group starting at line {first_modified_line['line_number']}")
+            
+            # Find the previous line in source to determine target location
+            previous_line_num = first_modified_line['line_number'] - 1
+            if previous_line_num > 0 and previous_line_num <= len(source_lines):
+                previous_line_content = source_lines[previous_line_num - 1]
+                previous_link = extract_toc_link_from_line(previous_line_content)
+                
+                if previous_link:
+                    thread_safe_print(f"      📍 Previous line link: {previous_link}")
+                    
+                    # Find matching previous line in target
+                    match = find_best_toc_match(previous_link, target_lines, previous_line_num)
+                    if match:
+                        # Process each line in the group
+                        for modified_line in group:
+                            modified_content = modified_line['content']
+                            if is_toc_translation_needed(modified_content):
+                                results['modified'].append({
+                                    'source_line': modified_line['line_number'],
+                                    'target_line_context': match['line_num'],
+                                    'content': modified_content,
+                                    'needs_translation': True
+                                })
+                                thread_safe_print(f"         📝 Modified for translation: {modified_content.strip()}")
+                            else:
+                                results['modified'].append({
+                                    'source_line': modified_line['line_number'],
+                                    'target_line_context': match['line_num'],
+                                    'content': modified_content,
+                                    'needs_translation': False
+                                })
+                                thread_safe_print(f"         ⏭️  Modified without translation: {modified_content.strip()}")
+                    else:
+                        thread_safe_print(f"      ❌ No target context found for {previous_link}")
+                else:
+                    thread_safe_print(f"      ❌ No link found in previous line: {previous_line_content.strip()}")
+    
+    return results
+
+def find_toc_modification_line(mod_op, target_lines):
+    """Find the actual line number to modify in target TOC based on context"""
+    # This function helps find the exact line to modify in target TOC
+    # based on the modification operation context
+    
+    target_line_context = mod_op.get('target_line_context', 0)
+    
+    # Look for the line after the context line that should be modified
+    # This is a simplified approach - in practice, you might need more sophisticated logic
+    
+    if target_line_context > 0 and target_line_context < len(target_lines):
+        # Check if the next line is the one to modify
+        return target_line_context + 1
+    
+    return target_line_context
+
+def translate_toc_lines(toc_operations, ai_client, repo_config):
+    """Translate multiple TOC lines at once"""
+    lines_to_translate = []
+    
+    # Collect all lines that need translation
+    for op in toc_operations:
+        if op.get('needs_translation', False):
+            lines_to_translate.append({
+                'operation_type': 'added' if 'target_insertion_after' in op else 'modified',
+                'content': op['content'],
+                'source_line': op['source_line']
+            })
+    
+    if not lines_to_translate:
+        thread_safe_print(f"   ⏭️  No TOC lines need translation")
+        return {}
+    
+    thread_safe_print(f"   🤖 Translating {len(lines_to_translate)} TOC lines...")
+    
+    # Prepare content for AI translation
+    content_dict = {}
+    for i, line_info in enumerate(lines_to_translate):
+        content_dict[f"line_{i}"] = line_info['content']
+    
+    source_lang = repo_config['source_language']
+    target_lang = repo_config['target_language']
+    
+    prompt = f"""You are a professional translator. Please translate the following TOC (Table of Contents) lines from {source_lang} to {target_lang}.
+
+IMPORTANT INSTRUCTIONS:
+1. Preserve ALL formatting, indentation, spaces, and dashes exactly as they appear
+2. Only translate the text content within square brackets [text]
+3. Keep all markdown links, parentheses, and special characters unchanged
+4. Maintain the exact same indentation and spacing structure
+
+Input lines to translate:
+{json.dumps(content_dict, indent=2, ensure_ascii=False)}
+
+Please return the translated lines in the same JSON format, preserving all formatting and only translating the text within square brackets.
+
+Return format:
+{{
+  "line_0": "translated line with preserved formatting",
+  "line_1": "translated line with preserved formatting"
+}}"""
+
+    #print(prompt) #DEBUG
+    # Add token estimation
+    try:
+        from main import print_token_estimation
+        print_token_estimation(prompt, "TOC translation")
+    except ImportError:
+        # Fallback if import fails - use tiktoken
+        try:
+            import tiktoken
+            enc = tiktoken.get_encoding("cl100k_base")
+            tokens = enc.encode(prompt)
+            actual_tokens = len(tokens)
+            char_count = len(prompt)
+            print(f"   💰 TOC translation")
+            print(f"      📝 Input: {char_count:,} characters")
+            print(f"      🔢 Actual tokens: {actual_tokens:,} (using tiktoken cl100k_base)")
+        except Exception:
+            # Final fallback to character approximation
+            estimated_tokens = len(prompt) // 4
+            char_count = len(prompt)
+            print(f"   💰 TOC translation")
+            print(f"      📝 Input: {char_count:,} characters")
+            print(f"      🔢 Estimated tokens: ~{estimated_tokens:,} (fallback: 4 chars/token approximation)")
+    
+    try:
+        ai_response = ai_client.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            temperature=0.1
+        )
+        #print(ai_response) #DEBUG
+        thread_safe_print(f"   📝 AI translation response received")
+        
+        # Parse AI response
+        try:
+            json_start = ai_response.find('{')
+            json_end = ai_response.rfind('}') + 1
+            
+            if json_start != -1 and json_end > json_start:
+                json_str = ai_response[json_start:json_end]
+                translated_lines = json.loads(json_str)
+                
+                # Map back to original operations
+                translation_mapping = {}
+                for i, line_info in enumerate(lines_to_translate):
+                    key = f"line_{i}"
+                    if key in translated_lines:
+                        translation_mapping[line_info['source_line']] = translated_lines[key]
+                
+                thread_safe_print(f"   ✅ Successfully translated {len(translation_mapping)} TOC lines")
+                return translation_mapping
+                
+        except json.JSONDecodeError as e:
+            thread_safe_print(f"   ❌ Failed to parse AI translation response: {e}")
+            return {}
+            
+    except Exception as e:
+        thread_safe_print(f"   ❌ AI translation failed: {e}")
+        return {}
+
+def process_toc_file(file_path, toc_data, pr_url, github_client, ai_client, repo_config):
+    """Process a single TOC.md file with special logic"""
+    thread_safe_print(f"\n📋 Processing TOC file: {file_path}")
+    
+    try:
+        target_local_path = repo_config['target_local_path']
+        target_file_path = os.path.join(target_local_path, file_path)
+        
+        # Read current target file
+        with open(target_file_path, 'r', encoding='utf-8') as f:
+            target_content = f.read()
+        
+        target_lines = target_content.split('\n')
+        operations = toc_data['operations']
+        
+        # Separate operations by type
+        deleted_ops = [op for op in operations if 'target_line' in op]
+        added_ops = [op for op in operations if 'target_insertion_after' in op]
+        modified_ops = [op for op in operations if 'target_line_context' in op]
+        
+        thread_safe_print(f"   📊 TOC operations: {len(deleted_ops)} deleted, {len(added_ops)} added, {len(modified_ops)} modified")
+        
+        # Process deletions first (work backwards to maintain line numbers)
+        if deleted_ops:
+            thread_safe_print(f"   🗑️  Processing {len(deleted_ops)} deletions...")
+            deleted_ops.sort(key=lambda x: x['target_line'], reverse=True)
+            
+            for del_op in deleted_ops:
+                target_line_num = del_op['target_line'] - 1  # Convert to 0-based
+                if 0 <= target_line_num < len(target_lines):
+                    thread_safe_print(f"      ❌ Deleting line {del_op['target_line']}: {target_lines[target_line_num].strip()}")
+                    del target_lines[target_line_num]
+        
+        # Process modifications
+        if modified_ops:
+            thread_safe_print(f"   ✏️  Processing {len(modified_ops)} modifications...")
+            
+            # Get translations for operations that need them
+            translations = translate_toc_lines(modified_ops, ai_client, repo_config)
+            
+            for mod_op in modified_ops:
+                target_line_num = find_toc_modification_line(mod_op, target_lines) - 1  # Convert to 0-based
+                
+                if 0 <= target_line_num < len(target_lines):
+                    if mod_op.get('needs_translation', False) and mod_op['source_line'] in translations:
+                        new_content = translations[mod_op['source_line']]
+                        thread_safe_print(f"      ✏️  Modifying line {target_line_num + 1} with translation")
+                    else:
+                        new_content = mod_op['content']
+                        thread_safe_print(f"      ✏️  Modifying line {target_line_num + 1} without translation")
+                    
+                    target_lines[target_line_num] = new_content
+        
+        # Process additions last
+        if added_ops:
+            thread_safe_print(f"   ➕ Processing {len(added_ops)} additions...")
+            
+            # Get translations for operations that need them
+            translations = translate_toc_lines(added_ops, ai_client, repo_config)
+            
+            # Group additions by insertion point and process in reverse order
+            added_ops.sort(key=lambda x: x['target_insertion_after'], reverse=True)
+            
+            for add_op in added_ops:
+                insertion_after = add_op['target_insertion_after']
+                
+                if add_op.get('needs_translation', False) and add_op['source_line'] in translations:
+                    new_content = translations[add_op['source_line']]
+                    thread_safe_print(f"      ➕ Inserting after line {insertion_after} with translation")
+                else:
+                    new_content = add_op['content']
+                    thread_safe_print(f"      ➕ Inserting after line {insertion_after} without translation")
+                
+                # Insert the new line
+                if insertion_after < len(target_lines):
+                    target_lines.insert(insertion_after, new_content)
+                else:
+                    target_lines.append(new_content)
+        
+        # Write updated content back to file
+        updated_content = '\n'.join(target_lines)
+        with open(target_file_path, 'w', encoding='utf-8') as f:
+            f.write(updated_content)
+        
+        thread_safe_print(f"   ✅ TOC file updated: {file_path}")
+        
+    except Exception as e:
+        thread_safe_print(f"   ❌ Error processing TOC file {file_path}: {e}")
+
+def process_toc_files(toc_files, pr_url, github_client, ai_client, repo_config):
+    """Process all TOC files"""
+    if not toc_files:
+        return
+    
+    thread_safe_print(f"\n📋 Processing {len(toc_files)} TOC files...")
+    
+    for file_path, toc_data in toc_files.items():
+        if toc_data['type'] == 'toc':
+            process_toc_file(file_path, toc_data, pr_url, github_client, ai_client, repo_config)
+        else:
+            thread_safe_print(f"   ⚠️  Unknown TOC data type: {toc_data['type']} for {file_path}")
+    
+    thread_safe_print(f"   ✅ All TOC files processed")