Skip to content

Commit

Permalink
misc uncommited changes
Browse files Browse the repository at this point in the history
  • Loading branch information
saerdnaer committed Oct 26, 2023
1 parent 2954dd4 commit 33d0861
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 62 deletions.
4 changes: 2 additions & 2 deletions archive/csv2schedule_34C3_DLF.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def process(acronym, base_id, source_csv_url):

for event in csv_schedule:
id = str(base_id + int(event['ID']))
guid = voc.tools.gen_uuid(hashlib.md5((acronym + id).encode('utf-8')).hexdigest())
guid = voc.tools.gen_uuid(acronym + id).hexdigest())
duration = (event['end_time'] - event['start_time']).seconds/60

title = event['Was']
Expand Down Expand Up @@ -210,7 +210,7 @@ def process(acronym, base_id, source_csv_url):
('do_not_record', event.get('Aufzeichnung?', '') == 'nein'),
('persons', [ OrderedDict([
('id', 0),
('full_public_name', p.strip()),
('public_name', p.strip()),
#('#text', p),
]) for p in event.get('Wer', '').split(',') ]),
('links', [])
Expand Down
5 changes: 1 addition & 4 deletions csv2schedule_deu.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,7 @@ def process(acronym, base_id, source_csv_url):
version = None

filename = 'schedule-' + acronym + '.csv'
if sys.version_info[0] < 3:
infile = open(filename, 'rb')
else:
infile = open(filename, 'r', newline='', encoding='utf8')
infile = open(filename, 'r', newline='', encoding='utf8')

with infile as f:
reader = csv.reader(f)
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ gitpython
bs4
html5lib
ics
dataclasses
dataclasses
beautifulsoup4
5 changes: 3 additions & 2 deletions voc/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,7 @@ def remove_event(self, id=None, guid=None):
# * check links conversion
# * ' vs " in xml
# * logo is in json but not in xml
# formerly named dict_to_schedule_xml()
def xml(self, method="string"):
root_node = None

Expand All @@ -616,7 +617,7 @@ def _to_etree(d, node, parent=""):
elif isinstance(d, int):
node.text = str(d)
elif parent == "person":
node.text = d["public_name"]
node.text = d.get("public_name", d.get("name"))
_set_attrib(node, "id", d["id"])
elif (
isinstance(d, dict)
Expand Down Expand Up @@ -660,7 +661,7 @@ def _to_etree(d, node, parent=""):
if parent == "room":
# create room tag for each instance of a room name
node_ = ET.SubElement(node, "room")
node_.set("name", k)
node_.set("name", k or '')
if k in self._room_ids and self._room_ids[k]:
node_.set("guid", self._room_ids[k])

Expand Down
165 changes: 112 additions & 53 deletions wikitable2schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
import re
import sys
import json
from collections import OrderedDict
from datetime import datetime, timedelta
import locale
import traceback
import requests
from sys import environ as env
from bs4 import BeautifulSoup

import voc.tools
Expand All @@ -16,29 +16,73 @@

days = []
local = False
debug = False
locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')

voc.tools.set_base_id(2000)

year = 2022
wiki_url = 'https://di.c3voc.de/sessions-liste?do=export_xhtml#liste_der_self-organized_sessions'
output_dir = "/srv/www/divoc"
secondary_output_dir = "./divoc"
year = 2023
wiki_url = 'https://eh20.easterhegg.eu/self-organized-sessions?do=export_xhtml'
output_dir = "/srv/www/eh20"
secondary_output_dir = "./eh20"


template = {"schedule": {
"version": "1.0",
"version": "0.20",
"conference": {
"title": "DiVOC Bridging Bubbles",
"acronym": "divoc_bb3",
"acronym": "eh20",
"title": "Easterhegg 20 - Back to root",
"start": "2023-04-07",
"end": "2023-04-10",
"daysCount": 4,
"start": "2022-04-15",
"end": "2022-04-18",
"timeslot_duration": "00:15",
"timeslot_duration": "00:05",
"time_zone_name": "Europe/Amsterdam",
"days": [],
"base_url": "https://di.c3voc.de/",
},
"rooms": [
{
"name": "K2 Rahel Liebeschütz-Plaut",
"guid": "69865dca-0a39-42fc-b3d3-44663a947ccf",
"description": "Vortragssaal, [https://de.wikipedia.org/wiki/Rahel_Liebeschütz-Plaut](Rahel Liebeschütz-Plaut)",
"capacity": 400
},
{
"name": "K1/1 Lötwerkstatt Knott-ter Meer",
"guid": "9eeb1601-955a-4f37-a910-0568b7429598",
"description": "Löt- und Bastelraum. https://de.wikipedia.org/wiki/Ilse_Knott-ter_Meer",
"capacity": 20
},
{
"name": "K1/1b Lötwerkstatt Knott-ter Meer",
"guid": "8a448869-b221-4210-a925-a01abe99c12e",
"description": "Zweiter, paralleler Bastelworkshop",
"capacity": None
},
{
"name": "K1/2 Workshop Valerie Thomas",
"guid": "3e18429d-771a-4e47-9993-9dbfdcc8ebe2",
"description": "[Valerie Thomas](https://de.wikipedia.org/wiki/Valerie_Thomas)",
"capacity": 30
},
{
"name": "K1/3 Workshop Marge Piercy",
"guid": "39784595-0f78-4be7-8d2e-69597bcfa2c6",
"description": "[Marge Piercy](https://de.wikipedia.org/wiki/Marge_Piercy)",
"capacity": 30
},
{
"name": "P1 Workshop Mary G. Ross",
"guid": "e51e46fe-df65-45d8-977e-10f7edbe24bb",
"description": "[Mary G. Ross](https://de.wikipedia.org/wiki/Valerie_Thomas)",
"capacity": 120
},
{
"name": "Lounge",
"guid": "320846da-1985-4fc1-98ca-40410863149b",
"description": None,
"capacity": 100
},
],
"days": []
}
}}


Expand All @@ -54,9 +98,9 @@ def fetch_schedule(wiki_url):
schedule = Schedule.from_dict(template, start_hour=9)
tz = schedule.tz()
conference_start_date = tz.localize(schedule.conference_start())

print("Requesting wiki events")

soup = BeautifulSoup(requests.get(wiki_url).text, 'html5lib')
# soup = BeautifulSoup(open("divoc-sessions.xhtml"), 'lxml')

Expand All @@ -65,16 +109,23 @@ def fetch_schedule(wiki_url):

print('Processing sections')
section_title = None
room = None
sections_to_ignore = [
'durchgehende_treffpunkte_und_assemblies',
'wochentag_datum',
'regelmaessige_treffen'
'regelmaessige_treffen',
'raeume'
]
for element in elements:
if element.name == 'h3' or element.name == 'h2':
section_title = element
continue

if element.name == 'h4':
room = element.text
continue


# ignore some sections
if element.name == 'table':
if section_title.attrs['id'] in sections_to_ignore:
Expand All @@ -96,17 +147,16 @@ def fetch_schedule(wiki_url):
next(rows_iter)

for row in rows_iter:
event = process_row(row, tz, day)
event = process_row(row, tz, day, room or 'other')
if event is not None:
schedule.add_event(event)

# print(json.dumps(out, indent=2))

print()
print()
return schedule


def process_row(row, tz, day):
def process_row(row, tz, day, room):
event_n = None
data = {}
external_links = {}
Expand All @@ -115,7 +165,13 @@ def process_row(row, tz, day):
key = td.attrs['class'][0]
data[key] = re.compile(r'\s*\n\s*').split(td.get_text().strip())
external_links = voc.tools.parse_html_formatted_links(td)


# ignore events which are already in pretalx
if len(external_links) > 0:
urls = external_links.keys()
if list(urls)[0].startswith('https://cfp.eh20.easterhegg.eu/eh20/talk/'):
return None

try:
time = re.compile(r'\s*(?:-|–)\s*').split(data['col0'][0])
title = data['col1'][0]
Expand All @@ -135,10 +191,16 @@ def process_row(row, tz, day):
print(f'\n end time {time[1]} is invalid, assuming duration of 2h for event: {title}')
end = start + timedelta(hours=2)

guid = gen_uuid('{}-{}'.format(start, next(iter(links), title)))
local_id = voc.tools.get_id(guid)
duration = (end - start).total_seconds() / 60

# ignore dummy events
if duration == 0 or title == 'Beispielüberschrift' or persons == 'EH-Orga':
return None

guid = gen_uuid(f'{start}-{next(iter(links), title)}')
local_id = voc.tools.get_id(guid)

'''
if 'Workshop3' in title or 'Workshop3' in abstract:
room = 'Workshop 3'
elif 'Workshop2' in title or 'Workshop2' in abstract:
Expand All @@ -147,34 +209,31 @@ def process_row(row, tz, day):
room = 'Workshop 1'
else:
room = 'Self-organized'

event = Event(OrderedDict([
('id', local_id),
('guid', guid),
# ('logo', None),
('date', start.isoformat()),
('start', start.strftime('%H:%M')),
('duration', '%d:%02d' % divmod(duration, 60)),
('room', room),
('slug', None),
('url', wiki_url.split('?')[0]),
('title', title),
('subtitle', ''),
('track', 'Workshop'),
('type', 'Workshop'),
('language', 'de'),
('abstract', abstract or ''),
('description', ''),
('persons', [OrderedDict([
('id', 0),
('public_name', p.strip()),
# ('#text', p),
]) for p in persons and persons.split(',')]),
('links', [
{'url': link_url, 'title': link_title} for link_url, link_title in external_links.items()
])
]), start)
'''

event = Event({
'id': local_id,
'guid': guid,
# 'logo': None,
'date': start.isoformat(),
'start': start.strftime('%H:%M'),
'duration': '%d:%02d' % divmod(duration, 60),
'room': room,
'slug': None,
'url': wiki_url.split('?')[0],
'title': title,
'subtitle': '',
'track': 'Workshop',
'type': 'Workshop',
'language': 'de',
'abstract': abstract or '',
'description': '',
'persons': [{'id': 0, 'name': p.strip()} for p in persons and persons.split(',')],
'links': [{'url': link_url, 'title': link_title} for link_url, link_title in external_links.items()]
}, start)
write('.')
if debug:
print(event)
return event

except Exception as e:
Expand Down Expand Up @@ -206,15 +265,15 @@ def main():

if not os.path.exists(output_dir):
if not os.path.exists(secondary_output_dir):
os.mkdir(output_dir)
os.mkdir(output_dir)
else:
output_dir = secondary_output_dir
local = True
os.chdir(output_dir)

main()

if not local:
if not local:
os.system("git add *.json *.xml")
os.system("git commit -m 'updates from " + str(datetime.now()) + "'")
os.system("git push")

0 comments on commit 33d0861

Please sign in to comment.