Skip to content

Commit 0a28ec7

Browse files
committed
Added library
1 parent 033c3d6 commit 0a28ec7

File tree

7 files changed

+557
-0
lines changed

7 files changed

+557
-0
lines changed

sbapp/md2bbcode/__init__.py

Whitespace-only changes.

sbapp/md2bbcode/html2bbcode.py

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# converts some HTML tags to BBCode
2+
# pass --debug to save the output to readme.finalpass
3+
# may be better off replacing this with html to markdown (and then to bbcode). Lepture recommeds a JS html to markdown converter: sundown
4+
from bs4 import BeautifulSoup, NavigableString
5+
import argparse
6+
7+
def handle_font_tag(tag, replacements):
8+
"""Handles the conversion of <font> tag with attributes like color and size."""
9+
attributes = []
10+
if 'color' in tag.attrs:
11+
attributes.append(f"COLOR={tag['color']}")
12+
if 'size' in tag.attrs:
13+
attributes.append(f"SIZE={tag['size']}")
14+
if 'face' in tag.attrs:
15+
attributes.append(f"FONT={tag['face']}")
16+
17+
inner_content = ''.join(recursive_html_to_bbcode(child, replacements) for child in tag.children)
18+
if attributes:
19+
# Nest all attributes. Example: [COLOR=red][SIZE=5]content[/SIZE][/COLOR]
20+
for attr in reversed(attributes):
21+
inner_content = f"[{attr}]{inner_content}[/{attr.split('=')[0]}]"
22+
return inner_content
23+
24+
def handle_style_tag(tag, replacements):
25+
"""Handles the conversion of tags with style attributes like color, size, and font."""
26+
attributes = []
27+
style = tag.attrs.get('style', '')
28+
29+
# Extracting CSS properties
30+
css_properties = {item.split(':')[0].strip(): item.split(':')[1].strip() for item in style.split(';') if ':' in item}
31+
32+
# Mapping CSS properties to BBCode
33+
if 'color' in css_properties:
34+
attributes.append(f"COLOR={css_properties['color']}")
35+
if 'font-size' in css_properties:
36+
attributes.append(f"SIZE={css_properties['font-size']}")
37+
if 'font-family' in css_properties:
38+
attributes.append(f"FONT={css_properties['font-family']}")
39+
if 'text-decoration' in css_properties and 'line-through' in css_properties['text-decoration']:
40+
attributes.append("S") # Assume strike-through
41+
if 'text-decoration' in css_properties and 'underline' in css_properties['text-decoration']:
42+
attributes.append("U")
43+
if 'font-weight' in css_properties:
44+
if css_properties['font-weight'].lower() == 'bold' or (css_properties['font-weight'].isdigit() and int(css_properties['font-weight']) >= 700):
45+
attributes.append("B") # Assume bold
46+
47+
inner_content = ''.join(recursive_html_to_bbcode(child, replacements) for child in tag.children)
48+
if attributes:
49+
# Nest all attributes
50+
for attr in reversed(attributes):
51+
if '=' in attr: # For attributes with values
52+
inner_content = f"[{attr}]{inner_content}[/{attr.split('=')[0]}]"
53+
else: # For simple BBCode tags like [B], [I], [U], [S]
54+
inner_content = f"[{attr}]{inner_content}[/{attr}]"
55+
return inner_content
56+
57+
def recursive_html_to_bbcode(element):
58+
"""Recursively convert HTML elements to BBCode."""
59+
bbcode = ''
60+
61+
if isinstance(element, NavigableString):
62+
bbcode += str(element)
63+
elif element.name == 'details':
64+
# Handle <details> tag
65+
summary = element.find('summary')
66+
spoiler_title = ''
67+
if summary:
68+
# Get the summary content and remove the summary element
69+
spoiler_title = '=' + ''.join([recursive_html_to_bbcode(child) for child in summary.contents])
70+
summary.decompose()
71+
72+
# Process remaining content
73+
content = ''.join([recursive_html_to_bbcode(child) for child in element.contents])
74+
bbcode += f'[SPOILER{spoiler_title}]{content}[/SPOILER]'
75+
elif element.name == 'summary':
76+
# Skip summary tag as it's handled in details
77+
return ''
78+
else:
79+
# Handle other tags or pass through
80+
content = ''.join([recursive_html_to_bbcode(child) for child in element.contents])
81+
bbcode += content
82+
83+
return bbcode
84+
85+
def html_to_bbcode(html):
86+
replacements = {
87+
'b': 'B',
88+
'strong': 'B',
89+
'i': 'I',
90+
'em': 'I',
91+
'u': 'U',
92+
's': 'S',
93+
'sub': 'SUB',
94+
'sup': 'SUP',
95+
'p': '', # Handled by default
96+
'ul': 'LIST',
97+
'ol': 'LIST=1',
98+
'li': '*', # Special handling in recursive function
99+
'font': '', # To be handled for attributes
100+
'blockquote': 'QUOTE',
101+
'pre': 'CODE',
102+
'code': 'ICODE',
103+
'a': 'URL', # Special handling for attributes
104+
'img': 'IMG' # Special handling for attributes
105+
}
106+
107+
soup = BeautifulSoup(html, 'html.parser')
108+
return recursive_html_to_bbcode(soup)
109+
110+
def process_html(input_html, debug=False, output_file=None):
111+
converted_bbcode = html_to_bbcode(input_html)
112+
113+
if debug:
114+
with open(output_file, 'w', encoding='utf-8') as file:
115+
file.write(converted_bbcode)
116+
else:
117+
return converted_bbcode
118+
119+
if __name__ == "__main__":
120+
parser = argparse.ArgumentParser(description="Convert HTML to BBCode with optional debugging output.")
121+
parser.add_argument('input_file', type=str, help='Input HTML file path')
122+
parser.add_argument('--debug', action='store_true', help='Save output to readme.finalpass for debugging')
123+
124+
args = parser.parse_args()
125+
input_file = args.input_file
126+
output_file = 'readme.finalpass' if args.debug else None
127+
128+
with open(input_file, 'r', encoding='utf-8') as file:
129+
html_content = file.read()
130+
131+
# Call the processing function
132+
process_html(html_content, debug=args.debug, output_file=output_file)

sbapp/md2bbcode/main.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# uses a custom mistune renderer to convert Markdown to BBCode. The custom renderer is defined in the bbcode.py file.
2+
# pass --debug to save the output to readme.1stpass (main.py) and readme.finalpass (html2bbcode)
3+
# for further debugging, you can convert the markdown file to AST using md2ast.py. Remember to load the plugin(s) you want to test.
4+
5+
#standard library
6+
import argparse
7+
import sys
8+
9+
# mistune
10+
import mistune
11+
from mistune.plugins.formatting import strikethrough, mark, superscript, subscript, insert
12+
from mistune.plugins.table import table, table_in_list
13+
from mistune.plugins.footnotes import footnotes
14+
from mistune.plugins.task_lists import task_lists
15+
from mistune.plugins.def_list import def_list
16+
from mistune.plugins.abbr import abbr
17+
from mistune.plugins.spoiler import spoiler
18+
19+
# local
20+
from md2bbcode.plugins.merge_lists import merge_ordered_lists
21+
from md2bbcode.renderers.bbcode import BBCodeRenderer
22+
from md2bbcode.html2bbcode import process_html
23+
24+
def convert_markdown_to_bbcode(markdown_text, domain):
25+
# Create a Markdown parser instance using the custom BBCode renderer
26+
markdown_parser = mistune.create_markdown(renderer=BBCodeRenderer(domain=domain), plugins=[strikethrough, mark, superscript, subscript, insert, table, footnotes, task_lists, def_list, abbr, spoiler, table_in_list, merge_ordered_lists])
27+
28+
# Convert Markdown text to BBCode
29+
return markdown_parser(markdown_text)
30+
31+
def process_readme(markdown_text, domain=None, debug=False):
32+
# Convert Markdown to BBCode
33+
bbcode_text = convert_markdown_to_bbcode(markdown_text, domain)
34+
35+
# If debug mode, save intermediate BBCode
36+
if debug:
37+
with open('readme.1stpass', 'w', encoding='utf-8') as file:
38+
file.write(bbcode_text)
39+
40+
# Convert BBCode formatted as HTML to final BBCode
41+
final_bbcode = process_html(bbcode_text, debug, 'readme.finalpass')
42+
43+
return final_bbcode
44+
45+
def main():
46+
parser = argparse.ArgumentParser(description='Convert Markdown file to BBCode with HTML processing.')
47+
parser.add_argument('input', help='Input Markdown file path')
48+
parser.add_argument('--domain', help='Domain to prepend to relative URLs')
49+
parser.add_argument('--debug', action='store_true', help='Output intermediate results to files for debugging')
50+
args = parser.parse_args()
51+
52+
if args.input == '-':
53+
# Read Markdown content from stdin
54+
markdown_text = sys.stdin.read()
55+
else:
56+
with open(args.input, 'r', encoding='utf-8') as md_file:
57+
markdown_text = md_file.read()
58+
59+
# Process the readme and get the final BBCode
60+
final_bbcode = process_readme(markdown_text, args.domain, args.debug)
61+
62+
# Optionally, print final BBCode to console
63+
if not args.debug:
64+
print(final_bbcode)
65+
66+
if __name__ == '__main__':
67+
main()

sbapp/md2bbcode/md2ast.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# this is for debugging the custom mistune renderer bbcode.py
2+
import argparse
3+
import mistune
4+
import json # Import the json module for serialization
5+
from mistune.plugins.formatting import strikethrough, mark, superscript, subscript, insert
6+
from mistune.plugins.table import table, table_in_list
7+
from mistune.plugins.footnotes import footnotes
8+
from mistune.plugins.task_lists import task_lists
9+
from mistune.plugins.def_list import def_list
10+
from mistune.plugins.abbr import abbr
11+
from mistune.plugins.spoiler import spoiler
12+
13+
#local
14+
from md2bbcode.plugins.merge_lists import merge_ordered_lists
15+
16+
def convert_markdown_to_ast(input_filepath, output_filepath):
17+
# Initialize Markdown parser with no renderer to produce an AST
18+
markdown_parser = mistune.create_markdown(renderer=None, plugins=[strikethrough, mark, superscript, subscript, insert, table, footnotes, task_lists, def_list, abbr, spoiler, table_in_list, merge_ordered_lists])
19+
20+
# Read the input Markdown file
21+
with open(input_filepath, 'r', encoding='utf-8') as md_file:
22+
markdown_text = md_file.read()
23+
24+
# Convert Markdown text to AST
25+
ast_text = markdown_parser(markdown_text)
26+
27+
# Serialize the AST to a JSON string
28+
ast_json = json.dumps(ast_text, indent=4)
29+
30+
# Write the output AST to a new file in JSON format
31+
with open(output_filepath, 'w', encoding='utf-8') as ast_file:
32+
ast_file.write(ast_json)
33+
34+
def main():
35+
# Create argument parser
36+
parser = argparse.ArgumentParser(description='Convert Markdown file to AST file (JSON format).')
37+
# Add arguments
38+
parser.add_argument('input', help='Input Markdown file path')
39+
parser.add_argument('output', help='Output AST file path (JSON format)')
40+
# Parse arguments
41+
args = parser.parse_args()
42+
43+
# Convert the Markdown to AST using the provided paths
44+
convert_markdown_to_ast(args.input, args.output)
45+
46+
if __name__ == '__main__':
47+
main()
+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
from typing import Dict, Any, List
2+
3+
def merge_ordered_lists(md):
4+
"""
5+
A plugin to merge consecutive "top-level" ordered lists into one,
6+
and also attach any intervening code blocks or blank lines to the
7+
last list item so that the final BBCode appears as a single list
8+
with multiple steps.
9+
10+
This relies on a few assumptions:
11+
1) The only tokens between two ordered lists that should be merged
12+
are code blocks or blank lines (not normal paragraphs).
13+
2) We want any code block(s) right after a list item to appear in
14+
that same bullet item.
15+
"""
16+
17+
def rewrite_tokens(md, state):
18+
tokens = state.tokens
19+
merged = []
20+
i = 0
21+
22+
while i < len(tokens):
23+
token = tokens[i]
24+
25+
# Check if this token is a top-level ordered list
26+
if (
27+
token["type"] == "list"
28+
and token.get("attrs", {}).get("ordered", False)
29+
and token.get("attrs", {}).get("depth", 0) == 0
30+
):
31+
# Start new merged list
32+
current_depth = token["attrs"]["depth"]
33+
list_items = list(token["children"]) # bullet items in the first list
34+
i += 1
35+
36+
# Continue until we run into something that's not:
37+
# another top-level ordered list,
38+
# or code blocks / blank lines (which we'll attach to the last bullet).
39+
while i < len(tokens):
40+
nxt = tokens[i]
41+
42+
# If there's another ordered list at the same depth, merge its bullet items
43+
if (
44+
nxt["type"] == "list"
45+
and nxt.get("attrs", {}).get("ordered", False)
46+
and nxt.get("attrs", {}).get("depth", 0) == current_depth
47+
):
48+
list_items.extend(nxt["children"])
49+
i += 1
50+
51+
# If there's a code block or blank line, attach it to the *last* bullet item.
52+
elif nxt["type"] in ["block_code", "blank_line"]:
53+
if list_items: # attach to last bullet item, if any
54+
list_items[-1]["children"].append(nxt)
55+
i += 1
56+
57+
else:
58+
# Not a same-depth list or code block—stop merging
59+
break
60+
61+
# Create single merged list token
62+
merged.append(
63+
{
64+
"type": "list",
65+
"children": list_items,
66+
"attrs": {
67+
"ordered": True,
68+
"depth": current_depth,
69+
},
70+
}
71+
)
72+
73+
else:
74+
# If not a top-level ordered list, just keep it as-is
75+
merged.append(token)
76+
i += 1
77+
78+
# Replace the old tokens with the merged version
79+
state.tokens = merged
80+
81+
# Attach to before_render_hooks so we can manipulate tokens before rendering
82+
md.before_render_hooks.append(rewrite_tokens)
83+
return md

sbapp/md2bbcode/renderers/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)