fix: combined -- counties and retargeted combined counties (#57)

* chore: updating master (#55) * refactor: change Aaron to standard feat: adding basic test - Changed APP to app according to Flask/FastAPI standard - Changed config_ to _config. trailing space is for python default conflicts not import - Adding basic tests for endpoints * test: added test structures - est_app.py: testing �pp.py app title, description, version - est_config.py: tested ProductionConfig, DevelopmentConfig, config loader - est_endpoints.py: tested root test * test: added test structures - est_app.py: testing �pp.py app title, description, version - est_config.py: tested ProductionConfig, DevelopmentConfig, config loader - est_endpoints.py: tested root test * tests: add testing for custom error messages * chore: updated README * chore: fixed PR template * chore: updated README * chore: updated README * tests: adds test cases for endpoints - '/' done - '/news' only testing for 422 and 405, no validation yet - '/twitter': tested 422, 404, 405, and random data sample validation - '/county': tested 404, 405, 422. no data return validation yet - '/state': tested 405, 422. - '/country': tested 405, 422. - '/stats': tested 405, 422. * style: fix for codefactor. - est_config.py: keeping assert == True/False for code readability - est_endpoints.py: keeping TODO as a reminder to fix endpoints.py * style: changed config_ to �pp_config after technical discussion * style: chore: - added .pylintrc - fixed all files for pylint - added .github/workflow/pythonapp.yml - added pipenv, pylint and pytest * chore: - streamlining pythonapp.yml - triggers pythonapp.yml on all push and pull_request * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml, readme.md * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml * chore: updated pythonapp.yml * feat: rerouting root endpoint to postman * feat: rerouting root endpoint to postman * feat: added redirect to postman, added test * fix: added uvloop * feat: adding coverall * feat: installed coveralls for coverall.io * feat: installed coveralls for coverall.io * feat: installed coveralls for coverall.io * feat: installed coveralls for coverall.io attempt 6 * feat: installed coveralls for coverall.io attempt 8 * feat: installed coveralls for coverall.io attempt 9 * feat: settingup coverall attempt #11 * feat: coverall badge attempt #12 * feat: coverall added, feat: routing root to redoc * fix: default config logic * fix: default config logic * Update LICENSE * chore: updating README again 🤧 (#46) * chore: updating READMEs * chore: updating READMEs * chore: updating READMEs (#48) * fix: post county new_death nan error (#50) * fix: /post county new_death nan error * fix: /post county new_death nan error * feat: test: (#52) * feat: adding zip route * feat: zip route #1 * feat: removed uszipcode, added zipcodes * feat: - feat: added zip endpoint to return county data given zip code - test: added tests for the zip endpoint - feat: modified github actions to trigger on push, and on pr to master/staging * han: attempt to fix codefactor #1 * feat: zip endpoint - added custom exception handlers - mal-formed zip codes now return 422 instead of 404 - changed mal-formed zip codes test cases from 404 to 422 * fixed zip endpoint for nyc (#54) Co-authored-by: leehanchung <[email protected]> Co-authored-by: Hanchung Lee <[email protected]> * fix: benton, wa * fix: github actions * fix: github actions to pr only * fix: counties yo counties * fix: merge confliictassz round 2 Co-authored-by: Harsh Desai <[email protected]>
ncov19-us · Apr 23, 2020 · 44d3d84 · 44d3d84
1 parent 14f963e
commit 44d3d84
Show file tree

Hide file tree

Showing 2 changed files with 130 additions and 7 deletions.
diff --git a/api/tests/test_endpoints.py b/api/tests/test_endpoints.py
@@ -389,10 +389,18 @@ def test_post_nyc_borough_zip(test_app):
 
 def test_post_zip(test_app):
  """Test problematic zip codes:
+ 04098 -> Cumberland, Maine
  63163 -> Saint Louis, MO
  70030 -> St. Charles, LA
  70341 -> Assumption Parish, LA
  """
+ payload = {'zip_code': '04098'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Maine"
+ assert data['county_name'] == "Cumberland"
+
  payload = {'zip_code': '63163'}
  response = test_app.post("/zip", data=json.dumps(payload))
  assert response.status_code == 200
@@ -422,6 +430,66 @@ def test_post_zip(test_app):
  assert data['county_name'] == "Benton and Franklin"
 
 
+def test_post_zip_data_source_adjustments(test_app):
+ """Test problematic zip codes:
+ 02552 -> Dukes County, MA (Dukes and Nantucket in data source)
+ 02584 -> Nantucket County, MA (Dukes and Nantucket in data source)
+ 48212 -> Detroit, MI (Wayne County)
+ 77003 -> Houston, TX (Harris County)
+ 75847 -> Houston County, TX (which is not Houston city)
+ 99326 -> Franklin County, WA (Benton and Franklin in data source)
+ 99352 -> Benton County, WA (Benton and Franklin in data source)
+ """
+ payload = {'zip_code': '02552'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Massachusetts"
+ assert data['county_name'] == "Dukes and Nantucket"
+
+ payload = {'zip_code': '02584'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Massachusetts"
+ assert data['county_name'] == "Dukes and Nantucket"
+
+ payload = {'zip_code': '48212'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Michigan"
+ assert data['county_name'] == "Wayne"
+
+ payload = {'zip_code': '77003'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Texas"
+ assert data['county_name'] == "Harris"
+
+ payload = {'zip_code': '75847'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Texas"
+ assert data['county_name'] == "Houston"
+
+ payload = {'zip_code': '99326'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Washington"
+ assert data['county_name'] == "Benton and Franklin"
+
+ payload = {'zip_code': '99352'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()['message']
+ assert data['state_name'] == "Washington"
+ assert data['county_name'] == "Benton and Franklin"
+
+
 def test_post_zip_validation(test_app):
  """Unprocessable entity"""
  response = test_app.post("/zip")
@@ -457,6 +525,10 @@ def test_post_zip_not_found(test_app):
  response = test_app.post("/zip", data=json.dumps(payload))
  assert response.status_code == 422
 
+ payload = {'zip_code': '75874'}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 422
+
  payload = {'zip_code': '57400'}
  response = test_app.post("/zip", data=json.dumps(payload))
  assert response.status_code == 422

diff --git a/api/utils/county.py b/api/utils/county.py
@@ -1,5 +1,7 @@
 from typing import Dict
 import pandas as pd
+from cachetools import cached, TTLCache
+
 from api.config import DataReadingError, DataValidationError
 from api.config import app_config
 from api.utils import reverse_states_map
@@ -15,28 +17,76 @@ def read_county_data() -> pd.DataFrame:
  df.columns = map(str.lower, df.columns)
  df.columns = df.columns.str.replace(" ", "_")
  df = pd.DataFrame.to_dict(df, orient="records")
+
+ return df
+
+
+@cached(cache=TTLCache(maxsize=1, ttl=3600))
+def ingest_county_data(*, url: str) -> pd.DataFrame:
+ """Read county data from COUNTY_URL, lower all county and state names.
+ Also change spaces to underscores for Pydantic to do type enforcement.
+
+ :return: :Dict: COUNTY_URL as a python dictionary/json file.
+ """
+
+ df = pd.read_csv(url)
+ df.columns = map(str.lower, df.columns)
+ df.columns = df.columns.str.replace(" ", "_")
+
+ df = wrangle_df(df=df)
+ return df
+
+
+def wrangle_df(*, df: pd.DataFrame) -> pd.DataFrame:
+ """combine -- entries together, specifically for Harris County (Houston), TX
+ and Wayne County (Detroit), MI
+ """
+
+ harris = df[df['county_name'].str.contains(
+ 'Harris.*Houston', regex=True) & (df['state_name'] == 'Texas')]
+ print('[DEBUG] Processing Harris, Texas...')
+ harris = harris.reset_index(drop=True)
+ harris.loc[0, 'county_name'] = "Harris"
+ harris.loc[0, 'confirmed'] += harris.loc[1, 'confirmed']
+ harris.loc[0, 'new'] += harris.loc[1, 'new']
+ harris.loc[0, 'death'] += harris.loc[1, 'death']
+ harris.loc[0, 'new_death'] += harris.loc[1, 'new_death']
+ harris.loc[0, 'fatality_rate'] = \
+ f"{harris.loc[0, 'death']/harris.loc[0, 'confirmed']:.2f}%"
+ df = df.append(harris.iloc[0], ignore_index=True)
+
+ wayne = df[df['county_name'].str.contains(
+ 'Wayne.*Detroit', regex=True) & (df['state_name'] == 'Michigan')]
+ wayne = wayne.reset_index(drop=True)
+ wayne.loc[0, 'county_name'] = "Wayne"
+ wayne.loc[0, 'confirmed'] += wayne.loc[1, 'confirmed']
+ wayne.loc[0, 'new'] += wayne.loc[1, 'new']
+ wayne.loc[0, 'death'] += wayne.loc[1, 'death']
+ wayne.loc[0, 'new_death'] += wayne.loc[1, 'new_death']
+ wayne.loc[0, 'fatality_rate'] = \
+ f"{wayne.loc[0, 'death']/wayne.loc[0, 'confirmed']:.2f}%"
+ df = df.append(wayne.iloc[0], ignore_index=True)
+
  return df
 
 
 def read_county_stats(state: str, county: str) -> Dict:
 
- # 2020-04-22 patch Benton, WA
+ # 2020-04-22 patch counties
  if (state == "WA") and (county in ['Benton', 'Franklin']):
  county = "Benton and Franklin"
 
+ if (state == "MA") and (county in ['Dukes', 'Nantucket']):
+ county = "Dukes and Nantucket"
+
  try:
- df = pd.read_csv(app_config.COUNTY_URL)
- #deaths = pd.read_csv(app_config.STATE_DEATH)
+ df = ingest_county_data(url=app_config.COUNTY_URL)
  except:
  raise DataReadingError(
  f"Data reading error State: {state}, and County: {county}."
  )
 
  try:
- df.columns = map(str.lower, df.columns)
- df.columns = df.columns.str.replace(" ", "_")
-
-
  # # used data source 2 for new death number
  # deaths = deaths[deaths['Province_State'] == reverse_states_map[state]]
  # deaths = deaths[deaths['Admin2'] == county]
@@ -55,6 +105,7 @@ def read_county_stats(state: str, county: str) -> Dict:
  raise DataValidationError(
  f"Can't find State: {state}, and County: {county} combination."
  )
+
  return df