-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_madavi.py
142 lines (126 loc) · 4.76 KB
/
get_madavi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import argparse, time
from bs4 import BeautifulSoup
import requests
from requests.exceptions import HTTPError
from urllib.request import urlopen
import re
import zipfile, io
from dateutil import rrule, parser
import os
aparser = argparse.ArgumentParser(description='Scrape data from madavi')
aparser.add_argument(
'-sd', '--startdate', dest='start_date', action='store',
help='start date for scrape, format: yyyy-mm-dd')
aparser.add_argument(
'-ed', '--enddate', dest='end_date', action='store',
help='end date for scrape, format: yyyy-mm-dd')
aparser.add_argument(
'-v', '--v', dest='verbose', action = 'store_true',
help='verbose output')
aparser.add_argument(
'-id', '--id', nargs ='+', dest='sensor_ids', type=str,
help='ID list, use -id 123456 234567 345678')
args = aparser.parse_args()
zip_file_data = 0
zip_file_url = 0
def main ():
if (args.start_date):
start_date = args.start_date
else:
#start_date = time.strftime(str(int("%Y")-1 + "-%m-%d")) not working
start_date = '2017-10-07'
print ('INFO: using default start date, ' + start_date)
if (args.end_date):
end_date = args.end_date
else:
end_date = time.strftime("%Y-%m-%d")
#end_date = '2018-05-12'
print ('INFO: using default end date, ' + end_date)
date_list = list(rrule.rrule(rrule.DAILY, dtstart=parser.parse(start_date), until=parser.parse(end_date)))
if (not(date_list)):
print ('ERROR: dates not valid')
exit()
if (args.sensor_ids):
sensor_ids = args.sensor_ids
else:
#sensor_ids = ['3654427','12017738','3654335','7367238','8472622','8072719','11694256','6107040','8070061','6103410','4859280','562184','6136913','6115415','4860136','6152969','6095511','4862700','10312986','10350495','10312970']
sensor_ids = ['3654427']# ,'12017738','3654335','7367238','8472622','8072719','11694256','6107040','8070061','6103410','4859280','562184','6136913','6115415','4860136','6152969','6095511','4862700','10312986','10350495','10312970']
print ('Using sensor ids: ' + ', '.join(sensor_ids))
for sid in sensor_ids:
print('INFO: downloading from SID ' + sid)
dir = "data/madavi/"+ sid
try:
os.makedirs(dir)
except FileExistsError:
# directory already exists
pass
N = 0
for dy in date_list:
N += 1
create_urls (sid, dy, dir)
def create_urls (sid, dy, dir):
csv_add = "https://www.madavi.de/sensor/data_csv/data-esp8266-" + sid + "-" + dy.strftime('%Y-%m-%d') + ".csv"
csv_file = "data-esp8266-" + sid + "-" + dy.strftime('%Y-%m-%d') + ".csv"
#https://www.madavi.de/sensor/data_csv/data-esp8266-3654427-2019-02-13.csv
zip_add = "https://www.madavi.de/sensor/data_csv/"+ dy.strftime('%Y/%m') +"/data-esp8266-" + sid + "-" + dy.strftime('%Y-%m') + ".zip"
#https://www.madavi.de/sensor/data_csv/2018/02/data-esp8266-3654427-2018-02.zip
fname = dir + "/" + csv_file
#could add check if file already exists here?
#if it did normally skip
#try to get csv files directly
file_add = csv_add
zip_file_url = csv_add
try:
r = requests.get(file_add)
r.raise_for_status()
if (args.verbose):
print ('INFO: completed file (' + fname + ')')
except HTTPError:
#try to the zip of the whole month
file_add = zip_add
try:
if (zip_file_url != file_add):
zip_file_url = file_add
r = requests.get(file_add)
r.raise_for_status()
zip_file_data = zipfile.ZipFile(io.BytesIO(r.content))
try:
zip_file_data.extract(csv_file, path = dir)
if (args.verbose):
print ('INFO: completed file (' + fname + ')')
except:
print ('ERROR: Could not download file (' + fname + ')')
except HTTPError:
print ('ERROR: Could not download file (' + fname + ')')
else:
# Save the string to a file
r = requests.get(file_add, stream=True)
with open(fname, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
#----------
# html_page = urlopen(file_add)
# soup = BeautifulSoup(html_page, "lxml")
# for link in soup.findAll('a'):
#
# if link.get('href')[-14:] == dy.strftime('%Y-%m-%d') + ".csv":
# #csv files contain data for just one day
# target = "https://www.madavi.de/sensor/" + link.get('href')[9:]
# r = requests.get(target)
# r.raise_for_status()
# r = requests.get(target, stream=True)
# with open(fname + link.get('href'), 'wb') as f:
# for chunk in r.iter_content(chunk_size=1024):
# if chunk: # filter out keep-alive new chunks
# f.write(chunk)
# pass
# elif link.get('href')[-11:] == dy.strftime('%Y-%m') + ".zip":
# #zip files contain a whole month of data
# zip_file_url = "https://www.madavi.de/sensor/" + link.get('href')
# # fetch and extract zip files
# r = requests.get(zip_file_url)
# z = zipfile.ZipFile(io.BytesIO(r.content))
# z.extractall(fname)
# print ('INFO: completed file ('+ link.get('href') + ')')
main()