|
| 1 | +# Delta Cross Channel |
| 2 | +# Download the gate operations from https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf |
| 3 | + |
| 4 | +import os |
| 5 | +import pandas as pd |
| 6 | +import datetime |
| 7 | +import tabula # for PDF parsing |
| 8 | +import requests |
| 9 | +from . import store_utils as utils |
| 10 | +import click |
| 11 | + |
| 12 | +click.command() |
| 13 | + |
| 14 | + |
| 15 | +@click.option("--base-dir", default="data/raw/dxc_gate") |
| 16 | +def main(base_dir="data/raw/dxc_gate"): |
| 17 | + """ |
| 18 | + Download the Delta Cross Channel gate log from the US Bureau of Reclamation |
| 19 | + https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf |
| 20 | +
|
| 21 | + """ |
| 22 | + utils.ensure_dir(base_dir) |
| 23 | + today = datetime.datetime.now() |
| 24 | + date_str = today.strftime("%Y-%m-%d") |
| 25 | + url = "https://www.usbr.gov/mp/cvo/vungvari/Ccgates.pdf" |
| 26 | + response = requests.get(url) |
| 27 | + assert response.status_code == 200 |
| 28 | + fname = url.split("/")[-1] |
| 29 | + pdfname = os.path.join(base_dir, fname.split(".")[0] + ".pdf") |
| 30 | + with open(pdfname, "wb") as fh: |
| 31 | + fh.write(response.content) |
| 32 | + pages = tabula.read_pdf( |
| 33 | + pdfname, pages="all", guess=False, encoding="ISO-8859-1" # for windows maybe? |
| 34 | + ) # columns=['date','time','remarks']) |
| 35 | + df = pd.concat(pages) |
| 36 | + df.columns = ["date", "time", "value"] |
| 37 | + df = df.dropna() |
| 38 | + df["datetime"] = df["date"] + " " + df["time"] |
| 39 | + df = df[df["datetime"] != "DATE TIME"] |
| 40 | + df["datetime"] = pd.to_datetime(df["datetime"]) |
| 41 | + df = df[["datetime", "value"]] |
| 42 | + df = df.set_index("datetime") |
| 43 | + df = df.sort_index() |
| 44 | + df["action"] = df["value"].str.split(expand=True)[0] |
| 45 | + df["comments"] = df["value"].str.split().map(lambda x: " ".join(x[1:])) |
| 46 | + df = df.drop(columns=["value"]) |
| 47 | + # df['action'].unique() |
| 48 | + df["action"] = ( |
| 49 | + df["action"] |
| 50 | + .map({"open": 2, "closed": 0, "gate": 0, "partially": 1, "-": 0, "close": 0}) |
| 51 | + .astype("int") |
| 52 | + ) |
| 53 | + conv_dir = os.path.dirname(pdfname).replace("/raw/", "/converted/") |
| 54 | + utils.ensure_dir(conv_dir) |
| 55 | + df.to_csv(os.path.join(conv_dir, fname.split(".")[0] + ".csv")) |
0 commit comments