-
Notifications
You must be signed in to change notification settings - Fork 0
/
import-old.py
60 lines (40 loc) · 1.37 KB
/
import-old.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from nomads_pb2 import AppoinmentList, Appointment
from datetime import datetime
import requests
import re
# this is a simple scraping script for the archived appointments
# from the old nomaden.org site
def scrape_url(url, cb):
r = requests.get(url)
if r.status_code == requests.codes.ok:
r.encoding = 'utf-8'
txt = r.text
for line in txt.split('\n'):
m = re.match(ur'^([0-9]{2}\.[0-9]{2}\.[0-9]{4}, .*)<BR>$',
line)
if m:
res = m.group(1)
dat, name, addr = res.split(',', 2)
name = name.lstrip()
addr = addr.lstrip()
cb(dat.encode('utf-8'),
name.encode('utf-8'),
addr.encode('utf-8'))
applis = AppoinmentList()
def convert_date(datum):
return datetime.strptime(datum, "%d.%m.%Y").isoformat()
def put_appointment(datum, name, addr):
app = Appointment()
app.name = name
app.street = addr
app.setdate = convert_date(datum)
app.source = "import"
applis.apps.extend([ app ])
def import_old():
scrape_url("http://www.nomaden.org/cgi-bin/termine/olddates.cgi",
put_appointment)
with open("archive.pb", "wb") as f:
f.write(applis.SerializeToString())
f.close()
if __name__ == "__main__":
import_old()