-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsilkrowparser.py
142 lines (121 loc) · 5.73 KB
/
silkrowparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import re
def parse_line(lines, prefix):
for line in lines:
if line.startswith(prefix):
return line.strip().replace(prefix, "").strip()
return ""
def parse_content(lines):
content = ""
ul_open = False # flag to track if <ul> tag is open
for line in lines:
if not any(line.startswith(prefix) for prefix in ["title:", "subtitle:", "founder:", "created:", "last-updated:", "last-updater:", "topics:"]):
# converts some "markers" to HTML tags
line = re.sub(r'\/\*\/(.*?)\/\*\/', r'<b>\1</b>', line) # Bold
line = re.sub(r'\/\/\/(.*?)\/\/\/', r'<i>\1</i>', line) # Italics
line = re.sub(r'\/\_\/(.*?)\/\_\/', r'<u>\1</u>', line) # Underline
line = re.sub(r'\/\~\/(.*?)\/\~\/', r'<del>\1</del>', line) # Crossed
line = re.sub(r'\/\#\/(.*?)\/\#\/', r'<code>\1</code>', line) # Monospace
line = re.sub(r'\/\!\/(.*?)\/\!\/', r'<mark>\1</mark>', line) # Marked
line = re.sub(r'\/\§\/ (.*?)', r'  \1', line) # Indentation
line = re.sub(r'\/\,\/(.*?)', r'<hr>', line) # HR Line
# image marker
section1 = '<img onclick="'
section2 = "inform('"
section3 = "')"
section4 = '"'
section5 = "src='../img/"
section6 = "'>"
line = re.sub(r'\/\@\/(.*?)\/(.*?)\/\@\/', section1 + section2 + r"\1" + section3 + section4 + section5 + r"\2" + section6, line)
# converts headings into HTML headings
line = re.sub(r'\/\#1\/(.*?)\/\#1\/', r'<h1>\1</h1>', line) # h1
line = re.sub(r'\/\#2\/(.*?)\/\#2\/', r'<h2>\1</h2>', line) # h2
line = re.sub(r'\/\#3\/(.*?)\/\#3\/', r'<h3>\1</h3>', line) # h3
line = re.sub(r'\/\#4\/(.*?)\/\#4\/', r'<h4>\1</h4>', line) # h4
line = re.sub(r'\/\#5\/(.*?)\/\#5\/', r'<h5>\1</h5>', line) # h5
line = re.sub(r'\/\#6\/(.*?)\/\#6\/', r'<h6>\1</h6>', line) # h6
# handles UL and LI markers
if line.startswith("/=/"): # UL
if not ul_open:
content += "<ul>\n"
ul_open = True
elif line.startswith("/-/"): # LI
content += f"<li>{line[3:]}</li>\n"
else:
# closes UL if it's open and not UL or LI
if ul_open:
content += "</ul>\n"
ul_open = False
content += line
# closes UL if it's still open after processing all lines
if ul_open:
content += "</ul>\n"
return content.strip()
def format_paragraphs(content):
paragraphs = content.split('\n\n')
html_paragraphs = ""
for paragraph in paragraphs:
html_paragraphs += f"<p>{paragraph}</p>\n\n"
return html_paragraphs
def create_html_from_text(text_file, existing_html_file):
# checks if the file exists
if not os.path.exists(text_file):
print(f"file '{text_file}' not found")
return
if not os.path.exists(existing_html_file):
print(f"file '{existing_html_file}' not found")
return
# reads the content of the text file
with open(text_file, 'r', encoding='utf-8') as file:
lines = file.readlines()
# parses tags
title = parse_line(lines, "title:")
subtitle = parse_line(lines, "subtitle:")
founder = parse_line(lines, "founder:")
created = parse_line(lines, "created:")
lastupdated = parse_line(lines, "last-updated:")
lastupdater = parse_line(lines, "last-updater:")
topics = parse_line(lines, "topics:")
content = parse_content(lines)
formatted_content = format_paragraphs(content)
# reads the HTML file
with open(existing_html_file, 'r', encoding='utf-8') as html_file:
html_content = html_file.read()
# inserts content into the <article> tag of the HTML file
article_start = html_content.find("<article>") + len("<article>")
article_end = html_content.find("</article>")
html_content = html_content[:article_start] + f"""
<p class="toc" onclick="informtoc();">ToC</p>
<h1 class="title">{title}</h1>
<p class="subtitle">{subtitle}</p>
<div class="topics"><p>{topics}</p></div>
<p class="created">Created by {founder} | {created}</p>
<p class="lastupdated">Last updated by {lastupdater} | {lastupdated}</p>
<hr>
{formatted_content}
""" + html_content[article_end:]
# add meta tags
meta_tags = f"""
<link rel="stylesheet" type="text/css" href="../styles/font.css">
<link rel="stylesheet" type="text/css" href="../styles/style.css">
<script src="../js/info.js"></script>
<script src="../js/toc.js"></script>
<link rel="icon" type="x-icon" href="../img/encycloplate.png">
<title>Encycloplate - {title}</title>
<meta name="description" content="{subtitle}">
<meta name="author" content="{founder}">
<meta name="keywords" content="{topics}">
"""
# replace the entire head with meta tags
head_start = html_content.find("<head>") + len("<head>")
head_end = html_content.find("</head>")
html_content = html_content[:head_start] + meta_tags + html_content[head_end:]
# writes updated HTML content into the HTML file
with open(existing_html_file, 'w', encoding='utf-8') as html_file:
html_file.write(html_content)
print(f"content has been inserted into '{existing_html_file}' successfully")
if __name__ == "__main__":
text_file = input("the input file name: ")
existing_html_file = input("the html file name: ")
create_html_from_text(text_file, existing_html_file)
input("")