Skip to content

Commit 5811a53

Browse files
committed
DD-111 adding dcc and smcg gates downloader
1 parent 16343f7 commit 5811a53

File tree

4 files changed

+88
-0
lines changed

4 files changed

+88
-0
lines changed

dms_datastore/download_dcc.py

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Delta Cross Channel
2+
# Download the gate operations from https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf
3+
4+
import os
5+
import pandas as pd
6+
import datetime
7+
import tabula # for PDF parsing
8+
import requests
9+
from . import store_utils as utils
10+
import click
11+
12+
click.command()
13+
14+
15+
@click.option("--base-dir", default="data/raw/dxc_gate")
16+
def main(base_dir="data/raw/dxc_gate"):
17+
"""
18+
Download the Delta Cross Channel gate log from the US Bureau of Reclamation
19+
https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf
20+
21+
"""
22+
utils.ensure_dir(base_dir)
23+
today = datetime.datetime.now()
24+
date_str = today.strftime("%Y-%m-%d")
25+
url = "https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf"
26+
response = requests.get(url)
27+
assert response.status_code == 200
28+
fname = url.split("/")[-1]
29+
pdfname = os.path.join(base_dir, fname.split(".")[0] + ".pdf")
30+
with open(pdfname, "wb") as fh:
31+
fh.write(response.content)
32+
pages = tabula.read_pdf(
33+
pdfname, pages="all", guess=False, encoding="ISO-8859-1" # for windows maybe?
34+
) # columns=['date','time','remarks'])
35+
df = pd.concat(pages)
36+
df.columns = ["date", "time", "value"]
37+
df = df.dropna()
38+
df["datetime"] = df["date"] + " " + df["time"]
39+
df = df[df["datetime"] != "DATE TIME"]
40+
df["datetime"] = pd.to_datetime(df["datetime"])
41+
df = df[["datetime", "value"]]
42+
df = df.set_index("datetime")
43+
df = df.sort_index()
44+
df["action"] = df["value"].str.split(expand=True)[0]
45+
df["comments"] = df["value"].str.split().map(lambda x: " ".join(x[1:]))
46+
df = df.drop(columns=["value"])
47+
# df['action'].unique()
48+
df["action"] = (
49+
df["action"]
50+
.map({"open": 2, "closed": 0, "gate": 0, "partially": 1, "-": 0, "close": 0})
51+
.astype("int")
52+
)
53+
conv_dir = os.path.dirname(pdfname).replace("/raw/", "/converted/")
54+
utils.ensure_dir(conv_dir)
55+
df.to_csv(os.path.join(conv_dir, fname.split(".")[0] + ".csv"))
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import requests
2+
import pandas as pd
3+
from . import store_utils as utils
4+
import datetime
5+
import os
6+
import click
7+
8+
9+
@click.command()
10+
@click.option("--base-dir", default="data/raw/montezuma_gate_log")
11+
def main(base_dir="data/raw/montezuma_gate_log"):
12+
"""
13+
Download the Montezuma Slough Salinity Control Gates log from the California Natural Resources Agency
14+
# https://data.cnra.ca.gov/dataset/suisun-marsh-salinity-control-gates-log/resource/265729e9-4ac0-469e-828b-2564ac077689
15+
"""
16+
utils.ensure_dir(base_dir)
17+
today = datetime.datetime.now()
18+
date_str = today.strftime("%Y-%m-%d")
19+
url = "https://data.cnra.ca.gov/dataset/e76622ca-b6e9-4e78-a08e-deb9580d49b3/resource/265729e9-4ac0-469e-828b-2564ac077689/download/smscg-log.xlsx"
20+
response = requests.get(url)
21+
assert response.status_code == 200
22+
fname = url.split("/")[-1]
23+
xlsfname = os.path.join(base_dir, fname.split(".")[0] + ".xlsx")
24+
with open(xlsfname, "wb") as fh:
25+
fh.write(response.content)
26+
df = pd.read_excel(xlsfname, parse_dates=True, index_col=0)
27+
df = df.sort_index()
28+
conv_dir = os.path.dirname(xlsfname).replace("/raw/", "/converted/")
29+
utils.ensure_dir(conv_dir)
30+
df.to_csv(os.path.join(conv_dir, fname.split(".")[0] + ".csv"))

environment.yml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies:
1414
- openpyxl
1515
- lxml # for pandas html parsing
1616
- paramiko # for sftp cimis requests
17+
- tabula-py # for pdf parsing
1718
- pandas>=2
1819
- numpy<2
1920
- cfgrib

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@
6969
"download_mokelumne=dms_datastore.download_mokelumne:main",
7070
"download_ucdipm=dms_datastore.download_ucdipm:main",
7171
"download_cimis=dms_datastore.download_cimis:main",
72+
"download_dcc=dms_datastore.download_dcc:main",
73+
"download_montezuma_gates=dms_datastore.download_montezuma_gates:main",
7274
"compare_directories=dms_datastore.compare_directories:main",
7375
"populate_repo=dms_datastore.populate_repo:main",
7476
"station_info=dms_datastore.station_info:main",

0 commit comments

Comments
 (0)