Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FEAT: Nevada data scraper #108

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 82 additions & 2 deletions src/cmdc_tools/datasets/official/NV/data.py
Original file line number Diff line number Diff line change
@@ -17,14 +17,18 @@ class NevadaFips(DatasetBaseNoDate):
def get(self):
cases = self._get_cases()
tests = self._get_tests()
return pd.concat([tests, cases], sort=False)
hosp = self._get_hosp()
return pd.concat([tests, cases, hosp], sort=False)

def _get_tests(self):
return asyncio.run(self._get_tests_async())

def _get_cases(self):
return asyncio.run(self._get_cases_async())

def _get_hosp(self):
return asyncio.run(self._get_hosp_async())

async def _get_tests_async(self):
async with with_page() as page:
await page.goto(self.source)
@@ -78,7 +82,7 @@ async def _get_labels_from_graph(self, page):
elems = await visual_modern.Jx(
"//*[@class='series']//*[@class='column setFocusRing']"
)

# print("elems\n", elems)
labels = [
(await page.evaluate("(el) => el.getAttribute('aria-label')", e))
for e in elems
@@ -136,6 +140,82 @@ async def _get_cases_async(self):
vintage=pd.Timestamp.utcnow(), fips=self.state_fips
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use the _retrieve_vintage method so that, if we changed how we collected vintages, then it would be a change to one method rather than hunting down every place we used it.

)

async def _get_hosp_async(self):
async with with_page() as page:
await page.goto(self.source)
# Wait for dashboard to load
await page.waitForXPath("//span[text()='COVID-19 ']")
# Get next page button
button = await _get_next_page_button(page)
await button.click()
# Wait for dashboard to load (2)
await page.waitForXPath("//div[text()='COVID-19 Statistics by County']")
# Go to next page
await button.click()
# Wait for dashboard to laod (3)
await page.waitForXPath("//div[text()='Results Filter for Demographics']")
# Go to next page
await button.click()
# Wait for dashboard to laod (3)
await page.waitForXPath("//div[text()='Cumulative Tests Reported']")
# Go to next page
await button.click()
# Wait for dashboard to laod (3)
await page.waitForXPath("//div[text()='Cumulative Cases']")
# Go to next page
await button.click()
# Wait for dashboard to laod (3)
await page.waitForXPath("//div[text()='Cumulative Deaths']")
# Go to next page
await button.click()
# Wait for dashboard to laod (3)
await page.waitForXPath(
"//div[text()='Daily Growth Rate, Cases (April 1st to Current)']"
)
# Go to next page
await button.click()
# Find graph
graph = await page.waitForXPath("//*[@class='cartesianChart']")
# return graph
# all_labels = []
# labels = await self._get_labels_from_single_graph(page, graph[0])
# icu_labels = await self._get_labels_from_single_graph(page, graph[1])

await graph.click(button="right")
# Get table button
table_button = await page.waitForXPath("//h6[text()='Show as a table']")
await table_button.click()

labels = await self._get_labels_from_graph(page)
# parse labels
data = {"Suspected": [], "Confirmed": []}
for label in labels:
split = label.split(". ")
date = split[0].split("Date")[1].strip()
tests = split[1].split(" ")
tests_type = tests[0].strip()
# Skip all new tests

tests_num = int(tests[1][:-1].strip().replace(",", ""))
{"Date": date, f"{tests_type}": tests_num}

# data.append({"Date": date, f"{tests_type}": tests_num})
data[tests_type].append({"dt": date, "value": tests_num})
suspected = pd.DataFrame(data["Suspected"]).assign(
variable_name="hospital_beds_in_use_covid_suspected"
)
confirmed = pd.DataFrame(data["Confirmed"]).assign(
variable_name="hospital_beds_in_use_covid_confirmed"
)

df = pd.concat([suspected, confirmed])

df.dt = pd.to_datetime(df.dt)
return df.assign(vintage=pd.Timestamp.utcnow(), fips=self.state_fips)

def _parse_hospitalizations(self, labels):
pass


class NevadaCounty(DatasetBaseNoDate):
state_fips = int(us.states.lookup("Nevada").fips)