misc uncommited changes

voc · Oct 26, 2023 · 33d0861 · 33d0861
1 parent 2954dd4
commit 33d0861
Show file tree

Hide file tree

Showing 5 changed files with 120 additions and 62 deletions.
diff --git a/archive/csv2schedule_34C3_DLF.py b/archive/csv2schedule_34C3_DLF.py
@@ -178,7 +178,7 @@ def process(acronym, base_id, source_csv_url):
 
     for event in csv_schedule:
         id = str(base_id + int(event['ID']))
-        guid = voc.tools.gen_uuid(hashlib.md5((acronym + id).encode('utf-8')).hexdigest())
+        guid = voc.tools.gen_uuid(acronym + id).hexdigest())
         duration = (event['end_time'] - event['start_time']).seconds/60
 
         title = event['Was']
@@ -210,7 +210,7 @@ def process(acronym, base_id, source_csv_url):
             ('do_not_record', event.get('Aufzeichnung?', '') == 'nein'),
             ('persons', [ OrderedDict([
                 ('id', 0),
-                ('full_public_name', p.strip()),
+                ('public_name', p.strip()),
                 #('#text', p),
             ]) for p in event.get('Wer', '').split(',') ]),
             ('links', [])

diff --git a/csv2schedule_deu.py b/csv2schedule_deu.py
@@ -107,10 +107,7 @@ def process(acronym, base_id, source_csv_url):
     version = None
 
     filename = 'schedule-' + acronym + '.csv'
-    if sys.version_info[0] < 3:
-        infile = open(filename, 'rb')
-    else:
-        infile = open(filename, 'r', newline='', encoding='utf8')
+    infile = open(filename, 'r', newline='', encoding='utf8')
 
     with infile as f:
         reader = csv.reader(f)

diff --git a/requirements.txt b/requirements.txt
@@ -9,4 +9,5 @@ gitpython
 bs4
 html5lib
 ics
-dataclasses
+dataclasses
+beautifulsoup4
diff --git a/voc/schedule.py b/voc/schedule.py
@@ -592,6 +592,7 @@ def remove_event(self, id=None, guid=None):
     #  * check links conversion
     #  * ' vs " in xml
     #  * logo is in json but not in xml
+    # formerly named dict_to_schedule_xml()
     def xml(self, method="string"):
         root_node = None
 
@@ -616,7 +617,7 @@ def _to_etree(d, node, parent=""):
             elif isinstance(d, int):
                 node.text = str(d)
             elif parent == "person":
-                node.text = d["public_name"]
+                node.text = d.get("public_name", d.get("name"))
                 _set_attrib(node, "id", d["id"])
             elif (
                 isinstance(d, dict)
@@ -660,7 +661,7 @@ def _to_etree(d, node, parent=""):
                         if parent == "room":
                             # create room tag for each instance of a room name
                             node_ = ET.SubElement(node, "room")
-                            node_.set("name", k)
+                            node_.set("name", k or '')
                             if k in self._room_ids and self._room_ids[k]:
                                 node_.set("guid", self._room_ids[k])
 

diff --git a/wikitable2schedule.py b/wikitable2schedule.py
@@ -3,11 +3,11 @@
 import re
 import sys
 import json
-from collections import OrderedDict
 from datetime import datetime, timedelta
 import locale
 import traceback
 import requests
+from sys import environ as env
 from bs4 import BeautifulSoup
 
 import voc.tools
@@ -16,29 +16,73 @@
 
 days = []
 local = False
+debug = False
 locale.setlocale(locale.LC_ALL, 'de_DE.UTF-8')
 
 voc.tools.set_base_id(2000)
 
-year = 2022
-wiki_url = 'https://di.c3voc.de/sessions-liste?do=export_xhtml#liste_der_self-organized_sessions'
-output_dir = "/srv/www/divoc"
-secondary_output_dir = "./divoc"
+year = 2023
+wiki_url = 'https://eh20.easterhegg.eu/self-organized-sessions?do=export_xhtml'
+output_dir = "/srv/www/eh20"
+secondary_output_dir = "./eh20"
 
 
 template = {"schedule": {
-    "version": "1.0",
+    "version": "0.20",
     "conference": {
-        "title": "DiVOC Bridging Bubbles",
-        "acronym": "divoc_bb3",
+        "acronym": "eh20",
+        "title": "Easterhegg 20 - Back to root",
+        "start": "2023-04-07",
+        "end": "2023-04-10",
         "daysCount": 4,
-        "start": "2022-04-15",
-        "end":   "2022-04-18",
-        "timeslot_duration": "00:15",
+        "timeslot_duration": "00:05",
         "time_zone_name": "Europe/Amsterdam",
-        "days": [],
-        "base_url": "https://di.c3voc.de/",
-    },
+        "rooms": [
+            {
+                "name": "K2 Rahel Liebeschütz-Plaut",
+                "guid": "69865dca-0a39-42fc-b3d3-44663a947ccf",
+                "description": "Vortragssaal, [https://de.wikipedia.org/wiki/Rahel_Liebeschütz-Plaut](Rahel Liebeschütz-Plaut)",
+                "capacity": 400
+            },
+            {
+                "name": "K1/1 Lötwerkstatt Knott-ter Meer",
+                "guid": "9eeb1601-955a-4f37-a910-0568b7429598",
+                "description": "Löt- und Bastelraum. https://de.wikipedia.org/wiki/Ilse_Knott-ter_Meer",
+                "capacity": 20
+            },
+            {
+                "name": "K1/1b Lötwerkstatt Knott-ter Meer",
+                "guid": "8a448869-b221-4210-a925-a01abe99c12e",
+                "description": "Zweiter, paralleler Bastelworkshop",
+                "capacity": None
+            },
+            {
+                "name": "K1/2 Workshop Valerie Thomas",
+                "guid": "3e18429d-771a-4e47-9993-9dbfdcc8ebe2",
+                "description": "[Valerie Thomas](https://de.wikipedia.org/wiki/Valerie_Thomas)",
+                "capacity": 30
+            },
+            {
+                "name": "K1/3 Workshop Marge Piercy",
+                "guid": "39784595-0f78-4be7-8d2e-69597bcfa2c6",
+                "description": "[Marge Piercy](https://de.wikipedia.org/wiki/Marge_Piercy)",
+                "capacity": 30
+            },
+            {
+                "name": "P1 Workshop Mary G. Ross",
+                "guid": "e51e46fe-df65-45d8-977e-10f7edbe24bb",
+                "description": "[Mary G. Ross](https://de.wikipedia.org/wiki/Valerie_Thomas)",
+                "capacity": 120
+            },
+            {
+                "name": "Lounge",
+                "guid": "320846da-1985-4fc1-98ca-40410863149b",
+                "description": None,
+                "capacity": 100
+            },
+        ],
+        "days": []
+    }
 }}
 
 
@@ -54,9 +98,9 @@ def fetch_schedule(wiki_url):
     schedule = Schedule.from_dict(template, start_hour=9)
     tz = schedule.tz()
     conference_start_date = tz.localize(schedule.conference_start())
-    
+
     print("Requesting wiki events")
-    
+
     soup = BeautifulSoup(requests.get(wiki_url).text, 'html5lib')
     # soup = BeautifulSoup(open("divoc-sessions.xhtml"), 'lxml')
 
@@ -65,16 +109,23 @@ def fetch_schedule(wiki_url):
 
     print('Processing sections')
     section_title = None
+    room = None
     sections_to_ignore = [
         'durchgehende_treffpunkte_und_assemblies',
         'wochentag_datum',
-        'regelmaessige_treffen'
+        'regelmaessige_treffen',
+        'raeume'
     ]
     for element in elements:
         if element.name == 'h3' or element.name == 'h2':
             section_title = element
             continue
 
+        if element.name == 'h4':
+            room = element.text
+            continue
+
+
         # ignore some sections
         if element.name == 'table':
             if section_title.attrs['id'] in sections_to_ignore:
@@ -96,17 +147,16 @@ def fetch_schedule(wiki_url):
         next(rows_iter)
 
         for row in rows_iter:
-            event = process_row(row, tz, day)
+            event = process_row(row, tz, day, room or 'other')
             if event is not None:
                 schedule.add_event(event)
-
-    # print(json.dumps(out, indent=2))
+
     print()
     print()
     return schedule
 
 
-def process_row(row, tz, day):
+def process_row(row, tz, day, room):
     event_n = None
     data = {}
     external_links = {}
@@ -115,7 +165,13 @@ def process_row(row, tz, day):
         key = td.attrs['class'][0]
         data[key] = re.compile(r'\s*\n\s*').split(td.get_text().strip())
         external_links = voc.tools.parse_html_formatted_links(td)
-
+
+    # ignore events which are already in pretalx
+    if len(external_links) > 0:
+        urls = external_links.keys()
+        if list(urls)[0].startswith('https://cfp.eh20.easterhegg.eu/eh20/talk/'):
+            return None
+
     try:
         time = re.compile(r'\s*(?:-|–)\s*').split(data['col0'][0])
         title = data['col1'][0]
@@ -135,10 +191,16 @@ def process_row(row, tz, day):
             print(f'\n end time {time[1]} is invalid, assuming duration of 2h for event: {title}')
             end = start + timedelta(hours=2)
 
-        guid = gen_uuid('{}-{}'.format(start, next(iter(links), title)))
-        local_id = voc.tools.get_id(guid)
         duration = (end - start).total_seconds() / 60
 
+        # ignore dummy events
+        if duration == 0 or title == 'Beispielüberschrift' or persons == 'EH-Orga':
+            return None
+
+        guid = gen_uuid(f'{start}-{next(iter(links), title)}')
+        local_id = voc.tools.get_id(guid)
+
+        '''
         if 'Workshop3' in title or 'Workshop3' in abstract:
             room = 'Workshop 3'
         elif 'Workshop2' in title or 'Workshop2' in abstract:
@@ -147,34 +209,31 @@ def process_row(row, tz, day):
             room = 'Workshop 1'
         else:
             room = 'Self-organized'
-
-        event = Event(OrderedDict([
-            ('id', local_id),
-            ('guid', guid),
-            # ('logo', None),
-            ('date', start.isoformat()),
-            ('start', start.strftime('%H:%M')),
-            ('duration', '%d:%02d' % divmod(duration, 60)),
-            ('room', room),
-            ('slug', None),
-            ('url', wiki_url.split('?')[0]),
-            ('title', title),
-            ('subtitle', ''),
-            ('track', 'Workshop'),
-            ('type', 'Workshop'),
-            ('language', 'de'),
-            ('abstract', abstract or ''),
-            ('description', ''),
-            ('persons', [OrderedDict([
-                ('id', 0),
-                ('public_name', p.strip()),
-                # ('#text', p),
-            ]) for p in persons and persons.split(',')]),
-            ('links', [ 
-                {'url': link_url, 'title': link_title} for link_url, link_title in external_links.items()
-            ])
-        ]), start)
+        '''
+
+        event = Event({
+            'id': local_id,
+            'guid': guid,
+            # 'logo': None,
+            'date': start.isoformat(),
+            'start': start.strftime('%H:%M'),
+            'duration': '%d:%02d' % divmod(duration, 60),
+            'room': room,
+            'slug': None,
+            'url': wiki_url.split('?')[0],
+            'title': title,
+            'subtitle': '',
+            'track': 'Workshop',
+            'type': 'Workshop',
+            'language': 'de',
+            'abstract': abstract or '',
+            'description': '',
+            'persons': [{'id': 0, 'name': p.strip()} for p in persons and persons.split(',')],
+            'links': [{'url': link_url, 'title': link_title} for link_url, link_title in external_links.items()]
+        }, start)
         write('.')
+        if debug:
+            print(event)
         return event
 
     except Exception as e:
@@ -206,15 +265,15 @@ def main():
 
     if not os.path.exists(output_dir):
         if not os.path.exists(secondary_output_dir):
-            os.mkdir(output_dir) 
+            os.mkdir(output_dir)
         else:
             output_dir = secondary_output_dir
             local = True
     os.chdir(output_dir)
 
     main()
 
-    if not local:  
+    if not local:
         os.system("git add *.json *.xml")
         os.system("git commit -m 'updates from " + str(datetime.now()) + "'")
         os.system("git push")