|
3 | 3 | import os |
4 | 4 | import re |
5 | 5 | import sys |
6 | | -from textwrap import indent |
| 6 | +from textwrap import dedent, indent |
7 | 7 |
|
8 | | -from m2r import convert |
| 8 | +import pypandoc |
9 | 9 |
|
10 | 10 | scriptpath = "../" |
11 | 11 | script_dir = os.path.dirname(os.path.abspath(__file__)) |
|
19 | 19 |
|
20 | 20 |
|
21 | 21 | def md_to_rst(markdown: str) -> str: |
22 | | - return convert(markdown) |
| 22 | + return pypandoc.convert_text( |
| 23 | + markdown, |
| 24 | + 'rst', |
| 25 | + format='markdown', |
| 26 | + extra_args=['--wrap=none'] |
| 27 | + ) |
| 28 | + |
| 29 | + |
| 30 | +def normalize_list_separation(text: str) -> str: |
| 31 | + """ |
| 32 | + Ensures every new list block is preceded by a blank line, |
| 33 | + required for robust parsing by Pandoc when targeting RST |
| 34 | + """ |
| 35 | + # Regex to identify any line that starts a Markdown list item (* or -) |
| 36 | + _list_item_re = re.compile(r"^[ \t]*[*-][ \t]+") |
| 37 | + |
| 38 | + output_buffer = [] |
| 39 | + for line in text.splitlines(): |
| 40 | + is_item = bool(_list_item_re.match(line)) |
| 41 | + |
| 42 | + # Get the last line appended to the output buffer |
| 43 | + prev = output_buffer[-1] if output_buffer else "" |
| 44 | + |
| 45 | + # Check if a blank line needs to be inserted before list |
| 46 | + # (Current is item) AND (Prev is not blank) AND (Prev is not an item) |
| 47 | + if is_item and prev.strip() and not _list_item_re.match(prev): |
| 48 | + # Insert a blank line to clearly separate the new list block |
| 49 | + output_buffer.append("") |
| 50 | + |
| 51 | + output_buffer.append(line) |
| 52 | + |
| 53 | + return "\n".join(output_buffer) |
| 54 | + |
| 55 | + |
| 56 | +def normalize_md(issue_body: str) -> str: |
| 57 | + """ |
| 58 | + Fix links and mixed bold/code that confuse Markdown parser |
| 59 | + """ |
| 60 | + # Fix links with inline-code: [`link`](url) => [link](url) |
| 61 | + issue_body = re.sub( |
| 62 | + r"\[\s*`([^`]+)`\s*\]\(([^)]+)\)", |
| 63 | + r"[\1](\2)", |
| 64 | + issue_body |
| 65 | + ) |
| 66 | + |
| 67 | + # Fix mixed bold/code formatting |
| 68 | + # **`code`** => `code` |
| 69 | + issue_body = re.sub( |
| 70 | + r"\*\*`([^`]+)`\*\*", |
| 71 | + r"`\1`", |
| 72 | + issue_body |
| 73 | + ) |
| 74 | + |
| 75 | + # `**code**` => `code` |
| 76 | + issue_body = re.sub( |
| 77 | + r"`\*\*([^`]+)\*\*`", |
| 78 | + r"`\1`", |
| 79 | + issue_body |
| 80 | + ) |
| 81 | + |
| 82 | + return issue_body |
23 | 83 |
|
24 | 84 |
|
25 | 85 | def extract_form_fields(issue_body: str) -> dict: |
@@ -85,9 +145,15 @@ def format_code_block(code: str, lang: str = "rust") -> str: |
85 | 145 | lines = lines[1:] |
86 | 146 | if lines and lines[-1].strip() == "```": |
87 | 147 | lines = lines[:-1] |
| 148 | + |
| 149 | + # Dedent before adding indentation |
| 150 | + dedented_code = dedent("\n".join(lines)) |
| 151 | + |
| 152 | + # Add required indentation |
88 | 153 | indented_code = "\n".join( |
89 | | - f" {line}" for line in lines |
90 | | - ) # Adds the required indentation |
| 154 | + f" {line}" for line in dedented_code.splitlines() |
| 155 | + ) |
| 156 | + |
91 | 157 | return f"\n\n{indented_code}\n" |
92 | 158 |
|
93 | 159 | amplification_text = indent(md_to_rst(get("amplification")), " " * 12) |
@@ -139,7 +205,11 @@ def format_code_block(code: str, lang: str = "rust") -> str: |
139 | 205 |
|
140 | 206 | issue_number = json_issue["number"] |
141 | 207 | issue_title = json_issue["title"] |
| 208 | + |
142 | 209 | issue_body = json_issue["body"] |
| 210 | + issue_body = normalize_md(issue_body) |
| 211 | + issue_body = normalize_list_separation(issue_body) |
| 212 | + |
143 | 213 | fields = extract_form_fields(issue_body) |
144 | 214 | chapter = fields["chapter"] |
145 | 215 | content = guideline_template(fields) |
|
0 commit comments