diff --git a/.github/workflows/auto-pr-on-issue.yml b/.github/workflows/auto-pr-on-issue.yml index 49a83653..8c62e9e3 100644 --- a/.github/workflows/auto-pr-on-issue.yml +++ b/.github/workflows/auto-pr-on-issue.yml @@ -26,6 +26,9 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v6 + - name: Install Pandoc + uses: pandoc/actions/setup@v1 + - name: Set up Git run: | git config --global user.email "action@github.com" diff --git a/.github/workflows/snapshot-ci.yml b/.github/workflows/snapshot-ci.yml index 8631545d..aaf73754 100644 --- a/.github/workflows/snapshot-ci.yml +++ b/.github/workflows/snapshot-ci.yml @@ -18,6 +18,9 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v6 + - name: Install Pandoc + uses: pandoc/actions/setup@v1 + - name: Run snapshot tests run: | uv run python .github/auto-pr-tests/test_runner.py diff --git a/pyproject.toml b/pyproject.toml index 45dced2a..962faa0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.12,<3.13" dependencies = [ "builder", "tqdm", - "m2r", + "pypandoc", "sphinx>=8.2.3", "sphinx-autobuild>=2024.10.3", "sphinx-needs>=5.1.0", diff --git a/scripts/auto-pr-helper.py b/scripts/auto-pr-helper.py index de09de6b..62d5ca2e 100644 --- a/scripts/auto-pr-helper.py +++ b/scripts/auto-pr-helper.py @@ -5,7 +5,7 @@ import sys from textwrap import dedent, indent -from m2r import convert +import pypandoc scriptpath = "../" script_dir = os.path.dirname(os.path.abspath(__file__)) @@ -19,7 +19,39 @@ def md_to_rst(markdown: str) -> str: - return convert(markdown) + return pypandoc.convert_text( + markdown, + 'rst', + format='markdown', + extra_args=['--wrap=none'] + ) + + +def normalize_list_separation(text: str) -> str: + """ + Ensures every new list block is preceded by a blank line, + required for robust parsing by Pandoc when targeting RST + """ + # Regex to identify any line that starts a Markdown list item (* or -) + _list_item_re = re.compile(r"^[ \t]*[*-][ \t]+") + + output_buffer = [] + for line in text.splitlines(): + is_item = bool(_list_item_re.match(line)) + + # Get the last line appended to the output buffer + prev = output_buffer[-1] if output_buffer else "" + + # Check if a blank line needs to be inserted before list + # (Current is item) AND (Prev is not blank) AND (Prev is not an item) + if is_item and prev.strip() and not _list_item_re.match(prev): + # Insert a blank line to clearly separate the new list block + output_buffer.append("") + + output_buffer.append(line) + + return "\n".join(output_buffer) + def normalize_md(issue_body: str) -> str: """ @@ -176,6 +208,7 @@ def format_code_block(code: str, lang: str = "rust") -> str: issue_body = json_issue["body"] issue_body = normalize_md(issue_body) + issue_body = normalize_list_separation(issue_body) fields = extract_form_fields(issue_body) chapter = fields["chapter"]