-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_historic_data.py
84 lines (74 loc) · 2.63 KB
/
get_historic_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import requests, argparse
import time
from requests.exceptions import HTTPError
from dateutil import rrule, parser
import os
aparser = argparse.ArgumentParser(description='Scrape data from luftdaten')
aparser.add_argument(
'-sd', '--startdate', dest='start_date', action='store',
help='start date for scrape, format: yyyy-mm-dd')
aparser.add_argument(
'-ed', '--enddate', dest='end_date', action='store',
help='end date for scrape, format: yyyy-mm-dd')
aparser.add_argument(
'-v', '--v', dest='verbose', action = 'store_true',
help='verbose output')
aparser.add_argument(
'-id', '--id', nargs ='+', dest='sensor_ids', type=str,
help='ID list, use -id 123456 234567 345678')
args = aparser.parse_args()
def main ():
if (args.start_date):
start_date = args.start_date
else:
#start_date = time.strftime(str(int("%Y")-1 + "-%m-%d")) not working
start_date = '2019-05-17'
print ('INFO: using default start date, ' + start_date)
if (args.end_date):
end_date = args.end_date
else:
end_date = time.strftime("%Y-%m-%d")
#end_date = '2018-05-12'
print ('INFO: using default end date, ' + end_date)
date_list = list(rrule.rrule(rrule.DAILY, dtstart=parser.parse(start_date), until=parser.parse(end_date)))
if (not(date_list)):
print ('ERROR: dates not valid')
exit()
if (args.sensor_ids):
sensor_ids = args.sensor_ids
else:
sensor_ids = ["5331", "7789", "8554", "8733","15092","15462","16422","17079","22068","22449","22480","22523","22549","22597","22612","22618","22691","22879","22885","23007","23628","24543","25095","25555","26616"]
print ('Using sensor ids: ' + ', '.join(sensor_ids))
for sid in sensor_ids:
print('INFO: downloading from SID ' + sid)
dir = "data/luftdaten/"+ sid
try:
os.makedirs(dir)
except FileExistsError:
# directory already exists
pass
N = 0
L = str(0)
for dy in date_list:
N += 1
create_urls (sid, dy, dir)
def create_urls (sid, dy, dir):
file_add = "http://archive.luftdaten.info/" + dy.strftime('%Y-%m-%d') + "/"+ dy.strftime('%Y-%m-%d') + "_sds011_sensor_" + sid+ ".csv"
fname = dir + "/" + dy.strftime('%Y-%m-%d') + "_sds011_sensor_" + sid+ ".csv"
#could add check if file already exists here?
#if it did normally skip
try:
r = requests.get(file_add)
r.raise_for_status()
if (args.verbose):
print ('INFO: completed file (' + fname + ')')
except HTTPError:
print ('ERROR: Could not download file (' + fname + ')')
else:
# Save the string to a file
r = requests.get(file_add, stream=True)
with open(fname, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
main()