1
+ # converts some HTML tags to BBCode
2
+ # pass --debug to save the output to readme.finalpass
3
+ # may be better off replacing this with html to markdown (and then to bbcode). Lepture recommeds a JS html to markdown converter: sundown
4
+ from bs4 import BeautifulSoup , NavigableString
5
+ import argparse
6
+
7
+ def handle_font_tag (tag , replacements ):
8
+ """Handles the conversion of <font> tag with attributes like color and size."""
9
+ attributes = []
10
+ if 'color' in tag .attrs :
11
+ attributes .append (f"COLOR={ tag ['color' ]} " )
12
+ if 'size' in tag .attrs :
13
+ attributes .append (f"SIZE={ tag ['size' ]} " )
14
+ if 'face' in tag .attrs :
15
+ attributes .append (f"FONT={ tag ['face' ]} " )
16
+
17
+ inner_content = '' .join (recursive_html_to_bbcode (child , replacements ) for child in tag .children )
18
+ if attributes :
19
+ # Nest all attributes. Example: [COLOR=red][SIZE=5]content[/SIZE][/COLOR]
20
+ for attr in reversed (attributes ):
21
+ inner_content = f"[{ attr } ]{ inner_content } [/{ attr .split ('=' )[0 ]} ]"
22
+ return inner_content
23
+
24
+ def handle_style_tag (tag , replacements ):
25
+ """Handles the conversion of tags with style attributes like color, size, and font."""
26
+ attributes = []
27
+ style = tag .attrs .get ('style' , '' )
28
+
29
+ # Extracting CSS properties
30
+ css_properties = {item .split (':' )[0 ].strip (): item .split (':' )[1 ].strip () for item in style .split (';' ) if ':' in item }
31
+
32
+ # Mapping CSS properties to BBCode
33
+ if 'color' in css_properties :
34
+ attributes .append (f"COLOR={ css_properties ['color' ]} " )
35
+ if 'font-size' in css_properties :
36
+ attributes .append (f"SIZE={ css_properties ['font-size' ]} " )
37
+ if 'font-family' in css_properties :
38
+ attributes .append (f"FONT={ css_properties ['font-family' ]} " )
39
+ if 'text-decoration' in css_properties and 'line-through' in css_properties ['text-decoration' ]:
40
+ attributes .append ("S" ) # Assume strike-through
41
+ if 'text-decoration' in css_properties and 'underline' in css_properties ['text-decoration' ]:
42
+ attributes .append ("U" )
43
+ if 'font-weight' in css_properties :
44
+ if css_properties ['font-weight' ].lower () == 'bold' or (css_properties ['font-weight' ].isdigit () and int (css_properties ['font-weight' ]) >= 700 ):
45
+ attributes .append ("B" ) # Assume bold
46
+
47
+ inner_content = '' .join (recursive_html_to_bbcode (child , replacements ) for child in tag .children )
48
+ if attributes :
49
+ # Nest all attributes
50
+ for attr in reversed (attributes ):
51
+ if '=' in attr : # For attributes with values
52
+ inner_content = f"[{ attr } ]{ inner_content } [/{ attr .split ('=' )[0 ]} ]"
53
+ else : # For simple BBCode tags like [B], [I], [U], [S]
54
+ inner_content = f"[{ attr } ]{ inner_content } [/{ attr } ]"
55
+ return inner_content
56
+
57
+ def recursive_html_to_bbcode (element ):
58
+ """Recursively convert HTML elements to BBCode."""
59
+ bbcode = ''
60
+
61
+ if isinstance (element , NavigableString ):
62
+ bbcode += str (element )
63
+ elif element .name == 'details' :
64
+ # Handle <details> tag
65
+ summary = element .find ('summary' )
66
+ spoiler_title = ''
67
+ if summary :
68
+ # Get the summary content and remove the summary element
69
+ spoiler_title = '=' + '' .join ([recursive_html_to_bbcode (child ) for child in summary .contents ])
70
+ summary .decompose ()
71
+
72
+ # Process remaining content
73
+ content = '' .join ([recursive_html_to_bbcode (child ) for child in element .contents ])
74
+ bbcode += f'[SPOILER{ spoiler_title } ]{ content } [/SPOILER]'
75
+ elif element .name == 'summary' :
76
+ # Skip summary tag as it's handled in details
77
+ return ''
78
+ else :
79
+ # Handle other tags or pass through
80
+ content = '' .join ([recursive_html_to_bbcode (child ) for child in element .contents ])
81
+ bbcode += content
82
+
83
+ return bbcode
84
+
85
+ def html_to_bbcode (html ):
86
+ replacements = {
87
+ 'b' : 'B' ,
88
+ 'strong' : 'B' ,
89
+ 'i' : 'I' ,
90
+ 'em' : 'I' ,
91
+ 'u' : 'U' ,
92
+ 's' : 'S' ,
93
+ 'sub' : 'SUB' ,
94
+ 'sup' : 'SUP' ,
95
+ 'p' : '' , # Handled by default
96
+ 'ul' : 'LIST' ,
97
+ 'ol' : 'LIST=1' ,
98
+ 'li' : '*' , # Special handling in recursive function
99
+ 'font' : '' , # To be handled for attributes
100
+ 'blockquote' : 'QUOTE' ,
101
+ 'pre' : 'CODE' ,
102
+ 'code' : 'ICODE' ,
103
+ 'a' : 'URL' , # Special handling for attributes
104
+ 'img' : 'IMG' # Special handling for attributes
105
+ }
106
+
107
+ soup = BeautifulSoup (html , 'html.parser' )
108
+ return recursive_html_to_bbcode (soup )
109
+
110
+ def process_html (input_html , debug = False , output_file = None ):
111
+ converted_bbcode = html_to_bbcode (input_html )
112
+
113
+ if debug :
114
+ with open (output_file , 'w' , encoding = 'utf-8' ) as file :
115
+ file .write (converted_bbcode )
116
+ else :
117
+ return converted_bbcode
118
+
119
+ if __name__ == "__main__" :
120
+ parser = argparse .ArgumentParser (description = "Convert HTML to BBCode with optional debugging output." )
121
+ parser .add_argument ('input_file' , type = str , help = 'Input HTML file path' )
122
+ parser .add_argument ('--debug' , action = 'store_true' , help = 'Save output to readme.finalpass for debugging' )
123
+
124
+ args = parser .parse_args ()
125
+ input_file = args .input_file
126
+ output_file = 'readme.finalpass' if args .debug else None
127
+
128
+ with open (input_file , 'r' , encoding = 'utf-8' ) as file :
129
+ html_content = file .read ()
130
+
131
+ # Call the processing function
132
+ process_html (html_content , debug = args .debug , output_file = output_file )
0 commit comments