tests: unit test for US districts and all territories added

ncov19-us · Apr 26, 2020 · 2857b8d · 2857b8d
1 parent e08cf8b
commit 2857b8d
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 20 deletions.
diff --git a/api/tests/test_endpoints.py b/api/tests/test_endpoints.py
@@ -499,15 +499,60 @@ def test_post_zip_us_districs_and_territories(test_app):
  00801 -> St. Thomas, US Virgin Islands
  00830 -> St. John, US Virgin Islands
  00820 -> St. Croix, US Virgin Islands (City - Christiansted)
- 00840 -> , US Virgin Islands (City - Frederiksted)
- 00850 -> , US Virgin Islands (City - Kingshill)
+ 00840 -> St. Croix, US Virgin Islands (City - Frederiksted)
+ 00850 -> St. Croix, US Virgin Islands (City - Kingshill)
  """
- payload = {"zip_code": "20037"}
- response = test_app.post("/zip", data=json.dumps(payload))
- assert response.status_code == 200
- data = response.json()["message"]
- assert data["state_name"] == "District of Columbia"
- assert data["county_name"] == "District of Columbia"
+
+ expected_data = [
+ {
+ "zip_code": "20037",
+ "state_name": "District of Columbia",
+ "county_name": "District of Columbia",
+ },
+ {
+ "zip_code": "00601",
+ "state_name": "Puerto Rico",
+ "county_name": "Puerto Rico",
+ },
+ {"zip_code": "96910", "state_name": "Guam", "county_name": "Guam"},
+ {
+ "zip_code": "96950",
+ "state_name": "Northern Mariana Islands",
+ "county_name": "Northern Mariana Islands",
+ },
+ {
+ "zip_code": "00801",
+ "state_name": "St. Thomas",
+ "county_name": "US Virgin Islands",
+ },
+ {
+ "zip_code": "00830",
+ "state_name": "St. John",
+ "county_name": "US Virgin Islands",
+ },
+ {
+ "zip_code": "00820",
+ "state_name": "St. Croix",
+ "county_name": "US Virgin Islands",
+ },
+ {
+ "zip_code": "00840",
+ "state_name": "St. Croix",
+ "county_name": "US Virgin Islands",
+ },
+ {
+ "zip_code": "00850",
+ "state_name": "St. Croix",
+ "county_name": "US Virgin Islands",
+ },
+ ]
+ for data in expected_data:
+ payload = {"zip_code": data["zip_code"]}
+ response = test_app.post("/zip", data=json.dumps(payload))
+ assert response.status_code == 200
+ data = response.json()["message"]
+ assert data["state_name"] == data["state_name"]
+ assert data["county_name"] == data["county_name"]
 
 
 def test_post_zip_validation(test_app):

diff --git a/api/utils/county.py b/api/utils/county.py
@@ -99,7 +99,9 @@ def read_county_stats(state: str, county: str) -> Dict:
  full_state_name = reverse_states_map[state]
  df = df[df["state_name"] == full_state_name]
  if len(df) == 0:
- raise DataValidationError(f"No records found for {full_state_name} in our database.")
+ raise DataValidationError(
+ f"No records found for {full_state_name} in our database."
+ )
  except:
  raise DataReadingError(f"Can't find {full_state_name} in our database.")
 
@@ -110,7 +112,9 @@ def read_county_stats(state: str, county: str) -> Dict:
  else:
  df = df[df["county_name"] == county]
  if len(df) == 0:
- raise DataValidationError(f"No records found for {full_state_name} in our database.")
+ raise DataValidationError(
+ f"No records found for {full_state_name} in our database."
+ )
  except:
  raise DataValidationError(
  f"Can't find State: {full_state_name}, and County: {county} combination."

diff --git a/api/utils/gnews.py b/api/utils/gnews.py
@@ -1,16 +1,14 @@
 import gc
 from typing import Dict
 
-import pandas as pd
 import requests
+import pandas as pd
 
 from bs4 import BeautifulSoup
 from api.config import app_config
 
 
-def get_state_topic_google_news(state: str,
- topic: str,
- max_rows: int = 10) -> Dict:
+def get_state_topic_google_news(state: str, topic: str, max_rows: int = 10) -> Dict:
  """This function takes a US State name (string dtype) and a topic of
  interest (string dtype). The output is a pandas DataFrame with articles,
  urls, and publishing times for articles containing the state and topic
@@ -23,8 +21,9 @@ def get_state_topic_google_news(state: str,
  type checking.
  """
 
- url = ("https://news.google.com/rss/search?"
- f"q={state}+{topic}&hl=en-US&gl=US&ceid=US:en"
+ url = (
+ "https://news.google.com/rss/search?"
+ f"q={state}+{topic}&hl=en-US&gl=US&ceid=US:en"
  )
  list_of_titles = []
  list_of_article_links = []
@@ -46,8 +45,7 @@ def get_state_topic_google_news(state: str,
  state_id_for_articles.append(state)
 
  df = pd.DataFrame(
- [list_of_titles, list_of_article_links,
- list_of_pubdates, state_id_for_articles]
+ [list_of_titles, list_of_article_links, list_of_pubdates, state_id_for_articles]
  ).T
  df.columns = ["title", "url", "published", "state"]
  df["source"] = df["title"].str.split("-").str[-1]
@@ -74,8 +72,7 @@ def get_us_news(max_rows: int = 50) -> Dict:
  df = pd.DataFrame(df[["title", "url", "publishedAt"]])
  df = df.rename(columns={"publishedAt": "published"})
  # Infer datetime
- df["published"] = pd.to_datetime(df["published"],
- infer_datetime_format=True)
+ df["published"] = pd.to_datetime(df["published"], infer_datetime_format=True)
  # Assuming timedelta of 5 hr based on what comparison between CNN and API.
  df["published"] = df["published"] - pd.Timedelta("5 hours")