-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate.py
87 lines (63 loc) · 2.61 KB
/
generate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from typing import List
from bs4 import BeautifulSoup, Tag
from podgen import Episode, Media, Podcast
import requests
import pytz
import datetime
BASE_URL = "http://www.evan-doorbell.com/production"
ARCHIVE_ORG_MIRROR = "https://archive.org/download/evan-doorbell"
def get_playlist() -> BeautifulSoup:
"""
Return a BS4 of the platlist page
"""
resp = requests.get(f"{BASE_URL}/group1.htm")
if not resp.ok:
raise Exception(f"non 200 error code downloading playlist page {resp.status}")
return BeautifulSoup(resp.text, "html.parser")
def episode_from_tr(row: Tag) -> Episode:
"""Parse out the title and mp3 columns (0, 2) to create an episode"""
tds = row.find_all("td")
title = tds[0].text.replace("\t", "").replace("\n", "").strip()
file_name = tds[2].find("a")['href'] # filename.mp3
print(f"building episode \"{title}\"")
try:
url = f"{ARCHIVE_ORG_MIRROR}/{file_name}"
media = Media.create_from_server_response(url)
except:
print("retrying from evan-doorbell.com")
url = f"{BASE_URL}/{file_name}"
media = Media.create_from_server_response(url)
ep = Episode()
ep.title = title
ep.media = media
return ep
def generate_episodes() -> List[Episode]:
episodes: List[Episode] = []
playlist_table = get_playlist().find(id="table21")
for row in playlist_table.find_all("tr")[1:]:
episodes.append(episode_from_tr(row))
# Set episode order
ep_count = len(episodes)
publish = datetime.datetime.now(tz=pytz.utc)
for idx in range(ep_count):
# episodes[idx].position = idx + 1
episodes[idx].title = F"#{idx+1}: " + episodes[idx].title
episodes[idx].publication_date = publish - datetime.timedelta(hours=ep_count - idx)
return episodes
def generate_podcast() -> Podcast:
episodes = generate_episodes()
podcast = Podcast()
podcast.name = "Evan Doorbell's Phone Tapes (Group 1)"
podcast.description = 'Evan Doorbell\'s Phone Tapes are a well known "documentary" of how the phone system used to be like in the 1970s. Evan has recorded many hours of "phone tapes" of the old phone network.'
podcast.website = "http://www.evan-doorbell.com"
podcast.explicit = False
podcast.image = "https://github.com/tsujamin/evan-doorbell-podcast/blob/main/logo-3.png?raw=true&dummy=.png"
podcast.withhold_from_itunes = True
podcast.complete = True
for episode in episodes:
podcast.add_episode(episode)
podcast.apply_episode_order()
return podcast
with open("podcast.xml", "w") as f:
podcast = generate_podcast()
f.write(podcast.rss_str())