Skip to content

Commit ace4466

Browse files
committed
Update FromWeb-to-Json.py
1 parent e3b351e commit ace4466

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

FromWeb-to-Json.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ def get_manga_info(url):
132132
author_name = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[2]/p[1]/a[1]")
133133
author_name = author_name.text.replace("\u014d", '0254')
134134
print(author_name)
135+
author_name = author_name.replace("0254", "ō")
135136
try :
136137
author_name2 = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[2]/p[1]/a[2]")
137138
print(author_name2.text)
@@ -140,7 +141,7 @@ def get_manga_info(url):
140141
print("Pas d'auteur 2")
141142

142143
# Récupérer le prix du manga
143-
price = driver.find_element(By.CLASS_NAME, "price")
144+
price = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[2]/div/div[2]/section[1]/div[2]/div/div[1]/div[1]/div[2]/div/span[2]/span")
144145
price = price.text
145146
print(price)
146147

@@ -154,8 +155,8 @@ def get_manga_info(url):
154155
print(available)
155156

156157
# Récupérer la date de sortie du manga
157-
releaseDate = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/section[1]/div[2]/div[1]/div[12]/div[2]")
158-
releaseDate = releaseDate.text.replace("Date de parution : ", "")
158+
releaseDate = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[4]/div[1]/div[1]/span[1]")
159+
releaseDate = releaseDate.text#.replace("Date de parution : ", "")
159160
releaseDate = releaseDate.split(" ")
160161

161162
# Remplacer le mois par son numéro associé
@@ -171,18 +172,30 @@ def get_manga_info(url):
171172
"septembre": "09",
172173
"octobre": "10",
173174
"novembre": "11",
174-
"décembre": "12"
175+
"décembre": "12",
176+
"janv.": "01",
177+
"févr.": "02",
178+
"mars": "03",
179+
"avri.": "04",
180+
"mai": "05",
181+
"juin": "06",
182+
"juil.": "07",
183+
"août": "08",
184+
"sept.": "09",
185+
"octo.": "10",
186+
"nove.": "11",
187+
"déce.": "12"
175188
}
176-
releaseDate1 = releaseDate[1].toLowerCase()
177-
releaseDate[1] = date_dico[releaseDate1]
189+
releaseDate[1] = date_dico[releaseDate[1]]
178190
# Remise en forme de la date
179-
releaseDate = releaseDate1 + "/" + releaseDate[0] + "/" + releaseDate[2]
191+
releaseDate = releaseDate[1] + "/" + releaseDate[0] + "/" + releaseDate[2]
180192
print(releaseDate)
181193

182194
# Récupérer le genre du manga
183195
try:
184196
type = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/section[1]/div[2]/div[1]/div[3]/div[2]/a[1]")
185197
type = type.text#.replace("Genre : ", "")
198+
print(type)
186199
type = type.split(" ")
187200
# Remplacer le genre par son nom japonais si dans le dico
188201
type_dico = {
@@ -193,14 +206,13 @@ def get_manga_info(url):
193206
}
194207

195208
if type[0] in type_dico:
209+
print(type[0])
196210
type = type_dico[type[0]]
197211
else:
198212
type = type[0]
199213
except Exception as ex:
200214
type = "Genre inconnu"
201215
return "Genre inconnu"
202-
203-
print(type)
204216

205217
# Récupérer le résumé du manga
206218
resume = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/div[1]/div[1]/div[5]/div[1]/div[1]/div[1]/div[1]")
@@ -224,7 +236,7 @@ def get_manga_info(url):
224236
print(editor)
225237

226238
# Récupérer le nombre de pages du manga
227-
pageNumber = driver.find_element(By.XPATH, "/html[1]/body[1]/div[2]/main[1]/div[2]/div[1]/div[2]/section[1]/div[1]/section[1]/div[2]/div[1]/div[8]/div[2]")
239+
pageNumber = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[2]/div/div[2]/section[1]/div[1]/section/div[2]/div/div[9]/div[2]")
228240
pageNumber = pageNumber.text#.replace("Nombre de pages : ", "")
229241
print(pageNumber)
230242

@@ -234,9 +246,7 @@ def get_manga_info(url):
234246
print(ean)
235247

236248
# Récupérer le lien de l'image du manga
237-
# driver.find_element(By.ID, "media-top-zoom").click()
238-
img = driver.find_element(By.CLASS_NAME, "element-slideshow").find_element(By.TAG_NAME, "img")
239-
# driver.find_element(By.ID, "media-popup-close").click()
249+
img = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[2]/div/div[2]/section[1]/div[1]/div[1]/div/div[1]/div/div[2]/div[1]/picture[2]/img")
240250
img = img.get_attribute("src")
241251
print(img)
242252

@@ -271,17 +281,12 @@ def get_manga_info(url):
271281
# Lecture de toute les données du fichier json
272282
with open('data_scrap.json', "r") as data_scrap:
273283
data = json.load(data_scrap)
274-
275-
print(data)
276-
277-
get_manga_info("https://www.bdfugue.com/my-hero-my-hero-academia-tome-24")
278284

279-
"""
285+
280286
with open('url_ok.json', 'r', encoding='utf-8') as urls:
281287
url_from_file = json.load(urls)
282288
for i in url_from_file:
283289
url = i['url']
284290
get_manga_info(url)
285291
with open('url_ok.json', 'w', encoding='utf-8') as urls:
286-
json.dump([], urls)
287-
"""
292+
json.dump([], urls)

0 commit comments

Comments
 (0)