@@ -6,20 +6,30 @@ class ExcelParser(Parser):
6
6
def __init__ (self , file_path : str ):
7
7
super ().__init__ (file_path )
8
8
9
- def to_excel (self , save_file_path : str , ignore_merged_row : bool = True ) -> bool :
10
- # TODO: handle case when rows are merged
11
- data = self . read_file ( )
12
- soup = BeautifulSoup ( data , features = 'html.parser' )
13
- table_data = soup .table
14
- if table_data is None :
9
+ def load_workbook (self ) :
10
+ data = self . _read_file ()
11
+ soup = BeautifulSoup ( data , features = 'html5lib' )
12
+
13
+ all_data = soup .html . body . find_all ( recursive = False )
14
+ if all_data is None :
15
15
raise Exception ("No table found" )
16
- data_rows = self .get_row (table_data , ["tr" ])
17
- for i , row in enumerate (data_rows , 1 ):
18
- columns = self .get_row (row , ["th" , "td" ])
19
- next_j = 1
20
- for j , col in enumerate (columns , 1 ):
21
- j = next_j
22
- next_j , col_data = self .pre_validate_and_format (i , j , col )
23
- self .write_cell (i , j , col_data )
16
+ i , offset = 0 , 0
17
+ for each in all_data :
18
+ if each .name == 'br' :
19
+ offset += 1
20
+ elif each .name == 'table' :
21
+ data_rows = self ._get_row (each , ["tr" ])
22
+ for i , row in enumerate (data_rows , 1 ):
23
+ i += offset
24
+ columns = self ._get_row (row , ["th" , "td" ])
25
+ next_j = 1
26
+ for j , col in enumerate (columns , 1 ):
27
+ j = next_j
28
+ next_j , col_data = self ._pre_validate_and_format (
29
+ i , j , col )
30
+ self ._write_cell (i , j , col_data )
31
+ offset += i
24
32
25
- self .save_workbook (save_file_path )
33
+ def to_excel (self , save_file_path : str , ignore_merged_row : bool = True ) -> bool :
34
+ # TODO: handle case when rows are merged
35
+ self ._save_workbook (save_file_path )
0 commit comments