Refactor: Rebase and cleanup.

MarcosBorgesPhD · MarcosBorgesPhD · commit 5b4416d2ee07 · 2025-12-02T21:56:53.000+01:00
This branch has been rebased onto the latest upstream/main to integrate recent dependency fixes (spec.lock update) and fix CI failures.

The history has been cleaned to contain only the changes relevant to this feature.
diff --git a/.github/workflows/auto-pr-on-issue.yml b/.github/workflows/auto-pr-on-issue.yml
@@ -26,6 +26,9 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v6
 
+      - name: Install Pandoc
+        uses: pandoc/actions/setup@v1
+
       - name: Set up Git
         run: |
           git config --global user.email "action@github.com"
diff --git a/.github/workflows/snapshot-ci.yml b/.github/workflows/snapshot-ci.yml
@@ -18,6 +18,9 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v6
 
+      - name: Install Pandoc
+        uses: pandoc/actions/setup@v1
+
       - name: Run snapshot tests
         run: |
           uv run python .github/auto-pr-tests/test_runner.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ requires-python = ">=3.12,<3.13"
 dependencies = [
     "builder",
     "tqdm",
-    "m2r",
+    "pypandoc",
     "sphinx>=8.2.3",
     "sphinx-autobuild>=2024.10.3",
     "sphinx-needs>=5.1.0",
diff --git a/scripts/auto-pr-helper.py b/scripts/auto-pr-helper.py
@@ -3,9 +3,9 @@
 import os
 import re
 import sys
-from textwrap import indent
+from textwrap import dedent, indent
 
-from m2r import convert
+import pypandoc
 
 scriptpath = "../"
 script_dir = os.path.dirname(os.path.abspath(__file__))
@@ -19,7 +19,67 @@
 
 
 def md_to_rst(markdown: str) -> str:
-    return convert(markdown)
+    return pypandoc.convert_text(
+        markdown,
+        'rst',
+        format='markdown',
+        extra_args=['--wrap=none']
+    )
+
+
+def normalize_list_separation(text: str) -> str:
+    """
+    Ensures every new list block is preceded by a blank line,
+    required for robust parsing by Pandoc when targeting RST
+    """
+    # Regex to identify any line that starts a Markdown list item (* or -)
+    _list_item_re = re.compile(r"^[ \t]*[*-][ \t]+")
+
+    output_buffer = []
+    for line in text.splitlines():
+        is_item = bool(_list_item_re.match(line))
+
+        # Get the last line appended to the output buffer
+        prev = output_buffer[-1] if output_buffer else ""
+
+        # Check if a blank line needs to be inserted before list
+        # (Current is item) AND (Prev is not blank) AND (Prev is not an item)
+        if is_item and prev.strip() and not _list_item_re.match(prev):
+            # Insert a blank line to clearly separate the new list block
+            output_buffer.append("")
+
+        output_buffer.append(line)
+
+    return "\n".join(output_buffer)
+
+
+def normalize_md(issue_body: str) -> str:
+    """
+    Fix links and mixed bold/code that confuse Markdown parser
+    """
+    # Fix links with inline-code: [`link`](url) => [link](url)
+    issue_body = re.sub(
+        r"\[\s*`([^`]+)`\s*\]\(([^)]+)\)",
+        r"[\1](\2)",
+        issue_body
+    )
+
+    # Fix mixed bold/code formatting
+    # **`code`** => `code`
+    issue_body = re.sub(
+        r"\*\*`([^`]+)`\*\*",
+        r"`\1`",
+        issue_body
+    )
+
+    # `**code**` => `code`
+    issue_body = re.sub(
+        r"`\*\*([^`]+)\*\*`",
+        r"`\1`",
+        issue_body
+    )
+
+    return issue_body
 
 
 def extract_form_fields(issue_body: str) -> dict:
@@ -85,9 +145,15 @@ def format_code_block(code: str, lang: str = "rust") -> str:
             lines = lines[1:]
             if lines and lines[-1].strip() == "```":
                 lines = lines[:-1]
+
+        # Dedent before adding indentation
+        dedented_code = dedent("\n".join(lines))
+
+        # Add required indentation
         indented_code = "\n".join(
-            f"         {line}" for line in lines
-        )  # Adds the required indentation
+            f"       {line}" for line in dedented_code.splitlines()
+        )
+
         return f"\n\n{indented_code}\n"
 
     amplification_text = indent(md_to_rst(get("amplification")), " " * 12)
@@ -139,7 +205,11 @@ def format_code_block(code: str, lang: str = "rust") -> str:
 
     issue_number = json_issue["number"]
     issue_title = json_issue["title"]
+
     issue_body = json_issue["body"]
+    issue_body = normalize_md(issue_body)
+    issue_body = normalize_list_separation(issue_body)
+
     fields = extract_form_fields(issue_body)
     chapter = fields["chapter"]
     content = guideline_template(fields)
diff --git a/uv.lock b/uv.lock