Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Breaking Change]: Remove save file func #159

Merged
merged 10 commits into from
May 25, 2022
Next Next commit
refactor!: Allow only dataframe return format
  • Loading branch information
Milind220 committed May 23, 2022
commit 49d49e2f3c3d5c16d5e82816706221d0c952ad0e
79 changes: 8 additions & 71 deletions src/ozone/ozone.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ class Ozone:
output_dir_path (str): The path to the directory where
any output artifacts will be created
"""

_search_aqi_url: str = URLs.search_aqi_url
_find_stations_url: str = URLs.find_stations_url
_default_params: List[str] = [
Expand Down Expand Up @@ -86,15 +85,6 @@ def __init__(
self.token: str = token
self._check_token_validity()

self.output_dir_path: Path = Path(output_path, "ozone_output")
self.file_name = file_name

if self.file_name == "air_quality":
warnings.warn(
"You have not specified a custom save file name. "
"Existing files with the same name may be overwritten!"
)

def _check_token_validity(self) -> None:
"""Check if the token is valid"""
test_city: str = "london"
Expand Down Expand Up @@ -142,52 +132,6 @@ def reset_token(self, token: str) -> None:
self.token = token
self._check_token_validity()

def _format_output(
self,
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
) -> pandas.DataFrame:
"""Format output data

Args:
data_format (str): File format. Defaults to 'df'.
Choose from 'csv', 'json', 'xlsx'.
df (pandas.DataFrame,): Dataframe object of air quality data.

Returns:
pandas.DataFrame: The dataframe containing the air quality data.
None: print the string response of file type created.
"""
if data_format == "df":
return df

if data_format not in ["csv", "xlsx", "json"]:
raise Exception(
f"Invalid file format {data_format}. Use any of: csv, json, xlsx, df"
)

self.output_dir_path.mkdir(exist_ok=True)

if data_format == "csv":
df.to_csv(Path(self.output_dir_path, f"{self.file_name}.csv"), index=False)
print(
f"File saved to disk at {self.output_dir_path} as {self.file_name}.csv"
)
elif data_format == "json":
df.to_json(Path(self.output_dir_path, f"{self.file_name}.json"))
print(
f"File saved to disk at {self.output_dir_path} as {self.file_name}.json"
)
elif data_format == "xlsx":
df.to_excel(
Path(self.output_dir_path, f"{self.file_name}.xlsx"),
)
print(
f"File saved to disk at {self.output_dir_path} as {self.file_name}.xlsx"
)

return pandas.DataFrame()

def _extract_live_data(
self, data_obj: Any, params: List[str] = [""]
) -> Dict[str, Union[str, float]]:
Expand Down Expand Up @@ -410,7 +354,6 @@ def get_coordinate_air(
self,
lat: float,
lon: float,
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
params: List[str] = [""],
) -> pandas.DataFrame:
Expand Down Expand Up @@ -443,12 +386,11 @@ def get_coordinate_air(

row = self._extract_live_data(data_obj, params=params)
df = pandas.concat([df, pandas.DataFrame([row])], ignore_index=True)
return self._format_output(data_format, df)
return df

def get_city_air(
self,
city: str,
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
params: List[str] = [""],
) -> pandas.DataFrame:
Expand Down Expand Up @@ -480,12 +422,11 @@ def get_city_air(
row["city"] = city

df = pandas.concat([df, pandas.DataFrame([row])], ignore_index=True)
return self._format_output(data_format, df)
return df

def get_multiple_coordinate_air(
self,
locations: List[Tuple],
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
params: List[str] = [""],
) -> pandas.DataFrame:
Expand Down Expand Up @@ -522,13 +463,12 @@ def get_multiple_coordinate_air(
df = pandas.concat([df, empty_row], ignore_index=True)

df.reset_index(inplace=True, drop=True)
return self._format_output(data_format, df)
return df

def get_range_coordinates_air(
self,
lower_bound: Tuple[float, float],
upper_bound: Tuple[float, float],
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
params: List[str] = [""],
) -> pandas.DataFrame:
Expand All @@ -555,13 +495,12 @@ def get_range_coordinates_air(
lower_bound=lower_bound, upper_bound=upper_bound
)
return self.get_multiple_coordinate_air(
locations, data_format=data_format, df=df, params=params
locations, df=df, params=params
)

def get_multiple_city_air(
self,
cities: List[str],
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
params: List[str] = [""],
) -> pandas.DataFrame:
Expand Down Expand Up @@ -596,7 +535,7 @@ def get_multiple_city_air(
df = pandas.concat([df, empty_row], ignore_index=True)

df.reset_index(inplace=True, drop=True)
return self._format_output(data_format, df)
return df

def get_specific_parameter(
self,
Expand Down Expand Up @@ -638,7 +577,6 @@ def get_city_station_options(self, city: str) -> pandas.DataFrame:

Returns:
pandas.DataFrame: Table of stations and their relevant information.

"""
# NOTE, HACK, FIXME:
# This functionality was born together with historical data feature.
Expand Down Expand Up @@ -669,7 +607,7 @@ def get_city_station_options(self, city: str) -> pandas.DataFrame:
).sort_values(by=["score"], ascending=False)

def get_historical_data(
self, data_format: str = "df", *, city: str = None, city_id: int = None
self, *, city: str = None, city_id: int = None
) -> pandas.DataFrame:
"""Get historical air quality data for a city

Expand Down Expand Up @@ -725,12 +663,11 @@ def get_historical_data(
# Reset date index and rename the column appropriately
df = df.reset_index().rename(columns={"index": "date"})

return self._format_output(data_format, df)
return df

def get_city_forecast(
self,
city: str,
data_format: str = "df",
df: pandas.DataFrame = pandas.DataFrame(),
) -> pandas.DataFrame:
"""Get a city's air quality forecast
Expand All @@ -753,7 +690,7 @@ def get_city_forecast(
# This ensures that pm25 data is labelled correctly.
df.rename(columns={"pm25": "pm2.5"}, inplace=True)

return self._format_output(data_format, df)
return df


if __name__ == "__main__":
Expand Down