@@ -132,6 +132,7 @@ def get_manga_info(url):
132132 author_name = driver .find_element (By .XPATH , "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[2]/p[1]/a[1]" )
133133 author_name = author_name .text .replace ("\u014d " , '0254' )
134134 print (author_name )
135+ author_name = author_name .replace ("0254" , "ō" )
135136 try :
136137 author_name2 = driver .find_element (By .XPATH , "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[2]/p[1]/a[2]" )
137138 print (author_name2 .text )
@@ -140,7 +141,7 @@ def get_manga_info(url):
140141 print ("Pas d'auteur 2" )
141142
142143 # Récupérer le prix du manga
143- price = driver .find_element (By .CLASS_NAME , "price " )
144+ price = driver .find_element (By .XPATH , "/html/body/div[2]/main/div[2]/div/div[2]/section[1]/div[2]/div/div[1]/div[1]/div[2]/div/span[2]/span " )
144145 price = price .text
145146 print (price )
146147
@@ -154,8 +155,8 @@ def get_manga_info(url):
154155 print (available )
155156
156157 # Récupérer la date de sortie du manga
157- releaseDate = driver .find_element (By .XPATH , "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/section [1]/div[2 ]/div[1 ]/div[12 ]/div[2 ]" )
158- releaseDate = releaseDate .text .replace ("Date de parution : " , "" )
158+ releaseDate = driver .find_element (By .XPATH , "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div [1]/div[1 ]/div[4 ]/div[1 ]/div[1]/span[1 ]" )
159+ releaseDate = releaseDate .text # .replace("Date de parution : ", "")
159160 releaseDate = releaseDate .split (" " )
160161
161162 # Remplacer le mois par son numéro associé
@@ -171,18 +172,30 @@ def get_manga_info(url):
171172 "septembre" : "09" ,
172173 "octobre" : "10" ,
173174 "novembre" : "11" ,
174- "décembre" : "12"
175+ "décembre" : "12" ,
176+ "janv." : "01" ,
177+ "févr." : "02" ,
178+ "mars" : "03" ,
179+ "avri." : "04" ,
180+ "mai" : "05" ,
181+ "juin" : "06" ,
182+ "juil." : "07" ,
183+ "août" : "08" ,
184+ "sept." : "09" ,
185+ "octo." : "10" ,
186+ "nove." : "11" ,
187+ "déce." : "12"
175188 }
176- releaseDate1 = releaseDate [1 ].toLowerCase ()
177- releaseDate [1 ] = date_dico [releaseDate1 ]
189+ releaseDate [1 ] = date_dico [releaseDate [1 ]]
178190 # Remise en forme de la date
179- releaseDate = releaseDate1 + "/" + releaseDate [0 ] + "/" + releaseDate [2 ]
191+ releaseDate = releaseDate [ 1 ] + "/" + releaseDate [0 ] + "/" + releaseDate [2 ]
180192 print (releaseDate )
181193
182194 # Récupérer le genre du manga
183195 try :
184196 type = driver .find_element (By .XPATH , "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/section[1]/div[2]/div[1]/div[3]/div[2]/a[1]" )
185197 type = type .text #.replace("Genre : ", "")
198+ print (type )
186199 type = type .split (" " )
187200 # Remplacer le genre par son nom japonais si dans le dico
188201 type_dico = {
@@ -193,14 +206,13 @@ def get_manga_info(url):
193206 }
194207
195208 if type [0 ] in type_dico :
209+ print (type [0 ])
196210 type = type_dico [type [0 ]]
197211 else :
198212 type = type [0 ]
199213 except Exception as ex :
200214 type = "Genre inconnu"
201215 return "Genre inconnu"
202-
203- print (type )
204216
205217 # Récupérer le résumé du manga
206218 resume = driver .find_element (By .XPATH , "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]" )
@@ -224,7 +236,7 @@ def get_manga_info(url):
224236 print (editor )
225237
226238 # Récupérer le nombre de pages du manga
227- pageNumber = driver .find_element (By .XPATH , "/html[1] /body[1] /div[2]/main[1] /div[2]/div[1] /div[2]/section[1]/div[1]/section[1] /div[2]/div[1] /div[8 ]/div[2]" )
239+ pageNumber = driver .find_element (By .XPATH , "/html/body/div[2]/main/div[2]/div/div[2]/section[1]/div[1]/section/div[2]/div/div[9 ]/div[2]" )
228240 pageNumber = pageNumber .text #.replace("Nombre de pages : ", "")
229241 print (pageNumber )
230242
@@ -234,9 +246,7 @@ def get_manga_info(url):
234246 print (ean )
235247
236248 # Récupérer le lien de l'image du manga
237- # driver.find_element(By.ID, "media-top-zoom").click()
238- img = driver .find_element (By .CLASS_NAME , "element-slideshow" ).find_element (By .TAG_NAME , "img" )
239- # driver.find_element(By.ID, "media-popup-close").click()
249+ img = driver .find_element (By .XPATH , "/html/body/div[2]/main/div[2]/div/div[2]/section[1]/div[1]/div[1]/div/div[1]/div/div[2]/div[1]/picture[2]/img" )
240250 img = img .get_attribute ("src" )
241251 print (img )
242252
@@ -271,17 +281,12 @@ def get_manga_info(url):
271281 # Lecture de toute les données du fichier json
272282 with open ('data_scrap.json' , "r" ) as data_scrap :
273283 data = json .load (data_scrap )
274-
275- print (data )
276-
277- get_manga_info ("https://www.bdfugue.com/my-hero-my-hero-academia-tome-24" )
278284
279- """
285+
280286with open ('url_ok.json' , 'r' , encoding = 'utf-8' ) as urls :
281287 url_from_file = json .load (urls )
282288 for i in url_from_file :
283289 url = i ['url' ]
284290 get_manga_info (url )
285291with open ('url_ok.json' , 'w' , encoding = 'utf-8' ) as urls :
286- json.dump([], urls)
287- """
292+ json .dump ([], urls )
0 commit comments