Skip to content
This repository has been archived by the owner on Nov 27, 2022. It is now read-only.

Commit

Permalink
Changed data/source file structure
Browse files Browse the repository at this point in the history
  • Loading branch information
xander-hirsch committed Aug 21, 2021
1 parent 7f5e0a2 commit fcde362
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 30 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
*.csv
*.pickle
data/*.pickle
sources/*.csv
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ Data is provided by the Los Angeles Times and Los Angeles County Department of P
### Python Environment
Clone the repository and install the pipenv environment with `$python3 -m pipenv install`.
### Acquire Data Sources
1. Navigate to the LACDPH [COVID-19 Data Dashboard](http:https://dashboard.publichealth.lacounty.gov/covid19_surveillance_dashboard/) and download the "14-Day Community Cases" and "7-Day Community Cases" tables into this directory.
2. Use the `fetch-latimes-place-totals.sh` Bash script to get the latest COVID-19 case totals compiled by the Los Angeles Times.
1. Navigate to the LACDPH [COVID-19 Data Dashboard](http:https://dashboard.publichealth.lacounty.gov/covid19_surveillance_dashboard/) and download the "14-Day Community Cases" and "7-Day Community Cases" tables into the `/sources` directory.
2. Run `./fetch-latimes-place-totals.sh` Bash script to get the latest COVID-19 case totals compiled by the Los Angeles Times.
3. Convert the CSV files to Pandas DataFrame pickle files by running `./parse-sources.sh`

## Deploy
Run the dashboard app with `$python3 -m pipenv run python app.py`.
The live dashboard is hosted at [`localhost:8050`](http:https://localhost:8050).
The live dashboard is hosted at [`localhost:8050`](http:https://localhost:8050).
23 changes: 2 additions & 21 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,32 +20,13 @@
# First Known COVID-19 Case in California
ABSOLUTE_FIRST_DAY = pd.to_datetime('2020-01-26')

df_times = pd.read_pickle('latimes-places-ts.pickle')
df_times = pd.read_pickle('data/latimes-places-ts.pickle')
ABSOLUTE_LAST_DAY = df_times[DATE].max()

df_dph_7day, df_dph_14day = [
pd.read_csv(f'LA_County_Covid19_CSA_{x}day_case_death_table.csv',
parse_dates=[EP_DATE],
infer_datetime_format=True) for x in (7, 14)
pd.read_pickle(f'data/lacdph-{x}day.pickle') for x in (7, 14)
]

dph_last_day = df_dph_7day[EP_DATE].max() - pd.Timedelta(7, 'days')
df_dph_7day = df_dph_7day[(df_dph_7day[EP_DATE].notna()) &
(df_dph_7day[EP_DATE] <= dph_last_day)].copy()
df_dph_14day = df_dph_14day[(df_dph_14day[EP_DATE].notna()) &
(df_dph_14day[EP_DATE] <= dph_last_day)].copy()

for df in df_dph_7day, df_dph_14day:
df.drop(columns=['Unnamed: 0'], inplace=True)
df.rename(columns={'geo_merge': CSA}, inplace=True)
df.sort_values([EP_DATE, CSA], inplace=True)
df.reset_index(drop=True, inplace=True)
df[CSA] = df[CSA].convert_dtypes()
df['population'] = df['population'].astype('int')
for stat in 'case', 'death':
df[f'{stat}_rate_unstable'] = df[f'{stat}_rate_unstable'].apply(
lambda x: x == '^')

lacdph_csa_list = list(df_dph_7day['csa'].unique())
lacdph_csa_list.sort()

Expand Down
Empty file added data/README.md
Empty file.
3 changes: 1 addition & 2 deletions fetch-latimes-place-totals.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
#!/bin/bash
wget -O latimes-place-totals.csv https://github.com/datadesk/california-coronavirus-data/raw/master/latimes-place-totals.csv
pipenv run python latimes-places-import.py
wget -O sources/latimes-place-totals.csv https://github.com/datadesk/california-coronavirus-data/raw/master/latimes-place-totals.csv
36 changes: 36 additions & 0 deletions import-lacdph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pandas as pd

EP_DATE = 'ep_date'
CSA = 'csa'
DPH_CASE_COLS = 'cases_{}day', 'case_{}day_rate', 'adj_case_{}day_rate'

df_dph_7day, df_dph_14day = [
pd.read_csv(f'sources/LA_County_Covid19_CSA_{x}day_case_death_table.csv',
parse_dates=[EP_DATE],
infer_datetime_format=True) for x in (7, 14)
]

dph_last_day = df_dph_7day[EP_DATE].max() - pd.Timedelta(7, 'days')
df_dph_7day = df_dph_7day[(df_dph_7day[EP_DATE].notna()) &
(df_dph_7day[EP_DATE] <= dph_last_day)].copy()
df_dph_14day = df_dph_14day[(df_dph_14day[EP_DATE].notna()) &
(df_dph_14day[EP_DATE] <= dph_last_day)].copy()

for df in df_dph_7day, df_dph_14day:
df.drop(columns=['Unnamed: 0'], inplace=True)
df.rename(columns={'geo_merge': CSA}, inplace=True)
df.sort_values([EP_DATE, CSA], inplace=True)
df.reset_index(drop=True, inplace=True)
df[CSA] = df[CSA].convert_dtypes()
df['population'] = df['population'].astype('int')
for stat in 'case', 'death':
df[f'{stat}_rate_unstable'] = df[f'{stat}_rate_unstable'].apply(
lambda x: x == '^')

for col in DPH_CASE_COLS:
df_dph_7day[col.format(7)] = df_dph_7day[col.format(7)].astype('int')
df_dph_14day[col.format(14)] = df_dph_14day[col.format(14)] / 2

if __name__ == '__main__':
for df, duration in (df_dph_7day, 7), (df_dph_14day, 14):
df.to_pickle(f'data/lacdph-{duration}day.pickle')
5 changes: 3 additions & 2 deletions latimes-places-import.py → import-latimes-places.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
NEW_CASES_7DAY, NEW_CASES_14DAY = [f'new_cases_{x}day' for x in (7, 14)]
CASE_RATE_7DAY, CASE_RATE_14DAY = [f'case_rate_{x}day' for x in (7, 14)]

df = pd.read_csv('latimes-place-totals.csv',
df = pd.read_csv('sources/latimes-place-totals.csv',
parse_dates=[DATE],
infer_datetime_format=True)

Expand All @@ -35,4 +35,5 @@
(df.loc[id_mask, NEW_CASES_14DAY] / df.loc[id_mask, POPULATION]) *
100_000).round(1)

df.to_pickle('latimes-places-ts.pickle')
if __name__ == '__main__':
df.to_pickle('data/latimes-places-ts.pickle')
3 changes: 3 additions & 0 deletions parse-sources.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash
pipenv run python import-latimes-places.py
pipenv run python import-lacdph.py
Empty file added sources/README.md
Empty file.

0 comments on commit fcde362

Please sign in to comment.