Skip to content

Commit e1b8d06

Browse files
authored
Merge pull request #97 from idlip/parser
add parser script to get filled in issue template
2 parents 25eb4ed + 0213bce commit e1b8d06

File tree

1 file changed

+168
-0
lines changed

1 file changed

+168
-0
lines changed

parse-maintainer.py

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
"""
2+
Script to parse maintainers data for Forklore PR
3+
Intended for both Maintainer who want to self-contribute or simple anyone to help out!
4+
"""
5+
6+
import re
7+
import json
8+
from html import escape
9+
10+
def is_image(url):
11+
return any(url.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp'])
12+
13+
def format_response(value: str) -> str:
14+
value = value.strip()
15+
16+
# Convert links and image URLs into HTML tags
17+
def convert_url(match):
18+
url = match.group(0)
19+
if is_image(url):
20+
return f'<img src="{url}" alt="image" />'
21+
return f'<a href="{url}">{url}</a>'
22+
23+
value = re.sub(r'https?://\S+', convert_url, value)
24+
25+
# Convert newlines into <br>
26+
value = value.replace('\n', '<br>')
27+
return value
28+
29+
def parse_multiline_field(lines, start_index):
30+
"""Extract multiline field starting at start_index + 1 until next field or section."""
31+
value_lines = []
32+
i = start_index + 1
33+
while i < len(lines):
34+
line = lines[i]
35+
if line.startswith("**") and line.endswith("**"): # Next field label
36+
break
37+
if line.startswith("### ") or line.startswith("## "): # Next section heading
38+
break
39+
value_lines.append(line)
40+
i += 1
41+
return " ".join(value_lines).strip(), i
42+
43+
44+
def parse_issue(markdown: str):
45+
# Remove all comments (<!-- ... -->)
46+
markdown = re.sub(r'<!--.*?-->', '', markdown, flags=re.DOTALL)
47+
48+
lines = [line.strip() for line in markdown.strip().splitlines()]
49+
lines = [line for line in lines if line != ''] # remove empty lines
50+
51+
data = {
52+
"username": "",
53+
"full_name": "",
54+
"photo": "",
55+
"designation": "",
56+
"socials": [],
57+
"projects": [],
58+
"form": []
59+
}
60+
61+
# State machine parsing
62+
current_section = None
63+
current_project = {}
64+
project_fields = ["Name", "Project Link", "Website Link", "Logo URL", "Short Description", "Full Description"]
65+
form_questions = []
66+
67+
i = 0
68+
while i < len(lines):
69+
line = lines[i]
70+
71+
# ==== USER DETAILS ====
72+
if line.startswith("**Username:**"):
73+
data["username"] = lines[i + 1].strip()
74+
i += 2
75+
continue
76+
77+
if line.startswith("**Full Name:**"):
78+
data["full_name"] = lines[i + 1].strip()
79+
i += 2
80+
continue
81+
82+
if line.startswith("**Photo URL:**"):
83+
data["photo"] = lines[i + 1].strip()
84+
i += 2
85+
continue
86+
87+
if line.startswith("**Designation / Role:**"):
88+
data["designation"] = lines[i + 1].strip()
89+
i += 2
90+
continue
91+
92+
if line.startswith("**Social Profiles:**"):
93+
i += 1
94+
while i < len(lines) and ':' in lines[i]:
95+
label_link = lines[i].split(":", 1)
96+
if len(label_link) == 2:
97+
label, link = label_link[0].strip(), label_link[1].strip()
98+
data["socials"].append({"label": label, "link": link})
99+
i += 1
100+
continue
101+
102+
# ==== PROJECTS ====
103+
if line.startswith("### Project"):
104+
if current_project:
105+
data["projects"].append(current_project)
106+
current_project = {}
107+
i += 1
108+
continue
109+
110+
if any(line.startswith(f"**{field}:**") for field in project_fields):
111+
field = re.match(r"\*\*(.*?):\*\*", line).group(1).strip()
112+
value, i = parse_multiline_field(lines, i)
113+
current_project[field] = value
114+
continue
115+
116+
# ==== FORM QUESTIONS ====
117+
question_match = re.match(r"\*\*(\d+\..*?)\*\*", line)
118+
if question_match:
119+
question = question_match.group(1).strip()
120+
# Capture all lines under this until we hit another bold or end
121+
i += 1
122+
response_lines = []
123+
while i < len(lines) and not lines[i].startswith("**"):
124+
response_lines.append(lines[i])
125+
i += 1
126+
response_text = "\n".join(response_lines).strip()
127+
formatted_response = format_response(response_text)
128+
form_questions.append({
129+
"question": re.sub(r"^\d+\.\s*", "", question),
130+
"response": formatted_response
131+
})
132+
continue
133+
134+
i += 1
135+
136+
# Append last project
137+
if current_project:
138+
# Rename keys to match JSON spec
139+
project_json = {
140+
"name": current_project.get("Name", ""),
141+
"project_link": current_project.get("Project Link", ""),
142+
"website_link": current_project.get("Website Link", ""),
143+
"logo": current_project.get("Logo URL", ""),
144+
"description": current_project.get("Full Description", ""),
145+
"short_description": current_project.get("Short Description", "")
146+
}
147+
data["projects"].append(project_json)
148+
149+
# Append form Q&A
150+
data["form"] = form_questions
151+
152+
return data
153+
154+
155+
if __name__ == "__main__":
156+
import sys
157+
158+
if len(sys.argv) < 2:
159+
print("Usage: python parse_issue.py <input_file.md>")
160+
sys.exit(1)
161+
162+
input_file = sys.argv[1]
163+
164+
with open(input_file, "r", encoding="utf-8") as f:
165+
md = f.read()
166+
result = parse_issue(md)
167+
print(json.dumps(result, indent=2, ensure_ascii=False))
168+

0 commit comments

Comments
 (0)