diff --git a/.streamlit/config.toml b/.streamlit/config.toml deleted file mode 100644 index 03496a9..0000000 --- a/.streamlit/config.toml +++ /dev/null @@ -1,4 +0,0 @@ -[logger] - -# Level of logging: 'error', 'warning', 'info', or 'debug'. -level = "info" \ No newline at end of file diff --git a/Docker-requirements.txt b/Docker-requirements.txt index d5206db..9c4deff 100644 --- a/Docker-requirements.txt +++ b/Docker-requirements.txt @@ -44,5 +44,4 @@ sphinxcontrib-serializinghtml sphinx_rtd_theme google-api-python-client google-auth-httplib2 -google-auth-oauthlib -psutil \ No newline at end of file +google-auth-oauthlib \ No newline at end of file diff --git a/surveyweathertool/Home.py b/surveyweathertool/Home.py index ed8fd17..0c29b8c 100644 --- a/surveyweathertool/Home.py +++ b/surveyweathertool/Home.py @@ -2,9 +2,7 @@ from io import StringIO from PIL import Image import streamlit as st -from src.dashboard.utils import read_logos, check_memory_and_disk_usage -import psutil - +from src.dashboard.utils import read_logos def run_dashboard(): # # Home Page Configuration @@ -87,7 +85,4 @@ def run_dashboard(): unsafe_allow_html=True, ) -logging.info(f"The number of CPUs of the web app: {psutil.cpu_count()}") -print(f"The number of CPUs of the web app: {psutil.cpu_count()}") -check_memory_and_disk_usage() run_dashboard() diff --git "a/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py" "b/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py" index e273c56..eb2e1a4 100644 --- "a/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py" +++ "b/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py" @@ -7,8 +7,7 @@ filter_survey, filter_weather, load_data_from_google_drive, - dataframe_reader, - check_memory_and_disk_usage + preprocess_weather_data ) from src.weather.weather_pipeline import ( aggr_monthly, @@ -19,16 +18,17 @@ from src.weather.utils import read_shape_file from src.weather.create_visuals import ( generate_choropleth, - generate_interactive_time_series, + # generate_interactive_time_series, generate_bivariate_map, plot_poverty_index, ) -from src.weather.weather_pipeline import plot_heatmap_grid_on_map +# from src.weather.weather_pipeline import plot_heatmap_grid_on_map from src.weather.constants import ( TEMPERATURE_FILE, PRECIPITATION_FILE, NIGERIA_SHAPE_PATH_FILE, LSMS_SURVEY_FILE, + JOINED_WEATHER_DATA_FILE ) from src.weather_x_survey.weather_survey import combine_with_poverty_index @@ -51,7 +51,7 @@ # Add filters/input widgets with tooltips st.sidebar.markdown("Select Filters:") -year_list = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019] +year_list = [2010, 2011, 2012, 2013, 2014, 2015, 2016] year_choice_dropdown = st.sidebar.selectbox("Year", year_list, help="Year Selection") # Time Aggregation Choice Widget time_list = ["Yearly", "Seasonaly", "Monthly", "Survey-Dependent"] @@ -66,8 +66,6 @@ disable_dropdown = True poverty_legend = "0: Not Deprived, 1: Moderately Deprived, 2: Severely Deprived" -check_memory_and_disk_usage() - # Use forms and submit button to batch input widgets with st.sidebar.form(key="columns_in_form"): if "Seasonaly" in time_choice_dropdown: @@ -158,39 +156,37 @@ if submitted: st.toast("Weather data is being read and preprocessed", icon="⌛") - check_memory_and_disk_usage() with st.spinner("Weather data is being read and preprocessed..."): # Read Data for Dashboard (Once and st.caches it) nigeria_shape_df = read_shape_file(data_path=NIGERIA_SHAPE_PATH_FILE) - check_memory_and_disk_usage() - precipitation_indicators_data = load_data_from_google_drive( - file_to_load=PRECIPITATION_FILE - ) - check_memory_and_disk_usage() - precipitation_indicators = pd.read_parquet(precipitation_indicators_data) - temperature_indicators_data = load_data_from_google_drive( - file_to_load=TEMPERATURE_FILE + # precipitation_indicators_data = load_data_from_google_drive( + # file_to_load=PRECIPITATION_FILE + # ) + # precipitation_indicators = pd.read_parquet(precipitation_indicators_data) + # temperature_indicators_data = load_data_from_google_drive( + # file_to_load=TEMPERATURE_FILE + # ) + # temperature_indicators = pd.read_parquet(temperature_indicators_data) + weather_data = load_data_from_google_drive( + file_to_load=JOINED_WEATHER_DATA_FILE ) - check_memory_and_disk_usage() - temperature_indicators = pd.read_parquet(temperature_indicators_data) - check_memory_and_disk_usage() + weather_data_df = pd.read_parquet(weather_data) + weather_data_df = preprocess_weather_data(weather_data_df) st.toast("Survey data is being read and preprocessed", icon="⌛") with st.spinner("Survey data is being read and preprocessed..."): lsms_survey_data = load_data_from_google_drive(file_to_load=LSMS_SURVEY_FILE) - check_memory_and_disk_usage() survey_data_df = pd.read_pickle(lsms_survey_data).reset_index() - check_memory_and_disk_usage() target_epsg = 4326 if disable_dropdown == True: poverty_index_dropdown = None dict_value_cols = { - "Precipitation (mm)": (precipitation_indicators, "Blues"), - "Temperature (°C)": (temperature_indicators, "Reds"), - "Drought": (precipitation_indicators, "Blues"), - "Heavy Rain": (precipitation_indicators, "Blues"), - "Heat Wave": (temperature_indicators, "Blues"), + "Precipitation (mm)": (weather_data_df, "Blues"), + "Temperature (°C)": (weather_data_df, "Reds"), + "Drought": (weather_data_df, "Blues"), + "Heavy Rain": (weather_data_df, "Blues"), + "Heat Wave": (weather_data_df, "Blues"), } weather_indicators = { @@ -365,32 +361,32 @@ # ) # ) - st.markdown( - f"

Heatmap for {weather_dropdown[0]}

", - unsafe_allow_html=True, - ) - st.pyplot( - plot_heatmap_grid_on_map( - df=filtered_grid_1, - value_col="mean", - geo_df=nigeria_shape_df, - legend_title=legends[weather_dropdown[0]], - cmap=dict_value_cols[weather_dropdown[0]][1], - ) - ) - st.markdown( - f"

Heatmap for {weather_dropdown[1]}

", - unsafe_allow_html=True, - ) - st.pyplot( - plot_heatmap_grid_on_map( - df=filtered_grid_2, - value_col="mean", - geo_df=nigeria_shape_df, - legend_title=legends[weather_dropdown[1]], - cmap=dict_value_cols[weather_dropdown[1]][1], - ) - ) + # st.markdown( + # f"

Heatmap for {weather_dropdown[0]}

", + # unsafe_allow_html=True, + # ) + # st.pyplot( + # plot_heatmap_grid_on_map( + # df=filtered_grid_1, + # value_col="mean", + # geo_df=nigeria_shape_df, + # legend_title=legends[weather_dropdown[0]], + # cmap=dict_value_cols[weather_dropdown[0]][1], + # ) + # ) + # st.markdown( + # f"

Heatmap for {weather_dropdown[1]}

", + # unsafe_allow_html=True, + # ) + # st.pyplot( + # plot_heatmap_grid_on_map( + # df=filtered_grid_2, + # value_col="mean", + # geo_df=nigeria_shape_df, + # legend_title=legends[weather_dropdown[1]], + # cmap=dict_value_cols[weather_dropdown[1]][1], + # ) + # ) st.markdown( f"

Univariate map for {weather_dropdown[0]}

", @@ -490,19 +486,19 @@ # ) # ) - st.markdown( - f"

Heatmap for {weather_dropdown[0]}

", - unsafe_allow_html=True, - ) - st.pyplot( - plot_heatmap_grid_on_map( - df=aggregated_prec_grid_1_year.copy(), - geo_df=nigeria_shape_df, - value_col="mean", - legend_title=legends[weather_dropdown[0]], - cmap=dict_value_cols[weather_dropdown[0]][1], - ) - ) + # st.markdown( + # f"

Heatmap for {weather_dropdown[0]}

", + # unsafe_allow_html=True, + # ) + # st.pyplot( + # plot_heatmap_grid_on_map( + # df=aggregated_prec_grid_1_year.copy(), + # geo_df=nigeria_shape_df, + # value_col="mean", + # legend_title=legends[weather_dropdown[0]], + # cmap=dict_value_cols[weather_dropdown[0]][1], + # ) + # ) st.markdown( f"

Univariate map for {poverty_index_dropdown}

", @@ -610,19 +606,19 @@ # ) # ) - st.markdown( - f"

Heatmap for {weather_dropdown[0]}

", - unsafe_allow_html=True, - ) - st.pyplot( - plot_heatmap_grid_on_map( - df=filtered_grid_1, - geo_df=nigeria_shape_df, - value_col="mean", - legend_title=legends[weather_dropdown[0]], - cmap=dict_value_cols[weather_dropdown[0]][1], - ) - ) + # st.markdown( + # f"

Heatmap for {weather_dropdown[0]}

", + # unsafe_allow_html=True, + # ) + # st.pyplot( + # plot_heatmap_grid_on_map( + # df=filtered_grid_1, + # geo_df=nigeria_shape_df, + # value_col="mean", + # legend_title=legends[weather_dropdown[0]], + # cmap=dict_value_cols[weather_dropdown[0]][1], + # ) + # ) st.markdown( f"

Univariate map for {weather_dropdown[0]}

", @@ -705,20 +701,20 @@ # Side Bar Set Up st.sidebar.markdown( """ - - """, + + """, unsafe_allow_html=True, ) @@ -736,4 +732,4 @@ st.sidebar.markdown( f"
Copyright (c) 2023 Data Science for Social Good (RPTU and DFKI)
", unsafe_allow_html=True, -) +) \ No newline at end of file diff --git "a/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py" "b/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py" index 508acdf..dc3b141 100644 --- "a/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py" +++ "b/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py" @@ -13,13 +13,11 @@ check_if_df_has_lat_long, preprocess_data_input, preprocess_weather_data, - load_data_from_google_drive, - dataframe_reader, - check_memory_and_disk_usage + load_data_from_google_drive ) from src.weather_x_survey.weather_survey import merge_weather_household -from src.weather.constants import PRECIPITATION_FILE, TEMPERATURE_FILE +from src.weather.constants import PRECIPITATION_FILE, TEMPERATURE_FILE, JOINED_WEATHER_DATA_FILE # Page Configuration st.set_page_config(page_title="Weather Data Enhancement", page_icon="📊") @@ -59,8 +57,6 @@ # Add file upload element uploaded_file = st.file_uploader("Choose a file to add weather columns", type="csv") -check_memory_and_disk_usage() - # Run the following code only if a file is uploaded if uploaded_file is not None: @@ -68,8 +64,6 @@ start_time = time.time() # Read the file input = pd.read_csv(uploaded_file) - check_memory_and_disk_usage() - # Display the top 5 rows of the file st.write("This is the uploaded input data:") st.write(input.head(5)) @@ -77,32 +71,31 @@ input = preprocess_data_input(input) # Error checks file check_if_df_has_lat_long(input) - check_memory_and_disk_usage() # Read Weather data (Temp and Precip with climate columns) and preprocess it st.toast("Weather data is being read and preprocessed", icon="⌛") with st.spinner("Weather data is being read and preprocessed..."): - check_memory_and_disk_usage() - precipitation_indicators_data = load_data_from_google_drive( - file_to_load=PRECIPITATION_FILE - ) - precipitation_indicators = pd.read_parquet(precipitation_indicators_data) - precipitation_indicators = preprocess_weather_data(precipitation_indicators) - check_memory_and_disk_usage() - temperature_indicators_data = load_data_from_google_drive( - file_to_load=TEMPERATURE_FILE - ) - temperature_indicators = pd.read_parquet(temperature_indicators_data) - temperature_indicators = preprocess_weather_data(temperature_indicators) - - check_memory_and_disk_usage() + # precipitation_indicators_data = load_data_from_google_drive( + # file_to_load=PRECIPITATION_FILE + # ) + # precipitation_indicators = pd.read_parquet(precipitation_indicators_data) + # precipitation_indicators = preprocess_weather_data(precipitation_indicators) + # temperature_indicators_data = load_data_from_google_drive( + # file_to_load=TEMPERATURE_FILE + # ) + # temperature_indicators = pd.read_parquet(temperature_indicators_data) + # temperature_indicators = preprocess_weather_data(temperature_indicators) # Defining all indicators to aggregate and return attached to uploaded data weather_data_indicators_dict = { "precipitation": ["precipitation", "heavy_rain_index", "spi_index"], "temperature": ["temperature", "heatwave_index"], } - + weather_data = load_data_from_google_drive( + file_to_load=JOINED_WEATHER_DATA_FILE + ) + weather_data_df = pd.read_parquet(weather_data) + weather_data_df = preprocess_weather_data(weather_data_df) merged_data = input.copy() st.toast("Weather features are being created", icon="⌛") @@ -110,14 +103,13 @@ with st.spinner("Weather features are being created..."): for key, value_cols in weather_data_indicators_dict.items(): if key == "precipitation": - weather_df = precipitation_indicators.copy() + weather_df = weather_data_df elif key == "temperature": - weather_df = temperature_indicators.copy() + weather_df = weather_data_df for indicator in value_cols: # Retrieve weather information for the input using interpolated weather data - check_memory_and_disk_usage() merged_weather_data = merge_weather_household( - input, weather_df.copy(), indicator + input, weather_df, indicator ) merged_data = pd.merge( merged_data, @@ -125,7 +117,6 @@ how="left", on=["lat", "lon", "date"], ) - check_memory_and_disk_usage() st.toast("Weather features have been successfully created", icon="⌛") # Print out new dataset and get it download-ready @@ -144,20 +135,20 @@ # Side Bar Set Up st.sidebar.markdown( """ - - """, + + """, unsafe_allow_html=True, ) @@ -175,4 +166,4 @@ st.sidebar.markdown( f"
Copyright (c) 2023 Data Science for Social Good (RPTU and DFKI)
", unsafe_allow_html=True, -) +) \ No newline at end of file diff --git a/surveyweathertool/requirements.txt b/surveyweathertool/requirements.txt index b321cd1..90b61b8 100644 --- a/surveyweathertool/requirements.txt +++ b/surveyweathertool/requirements.txt @@ -42,5 +42,4 @@ rioxarray google-api-python-client # google-auth-httplib2 # google-auth-oauthlib -streamlit -psutil \ No newline at end of file +streamlit \ No newline at end of file diff --git a/surveyweathertool/src/dashboard/utils.py b/surveyweathertool/src/dashboard/utils.py index ce0a56f..125434e 100644 --- a/surveyweathertool/src/dashboard/utils.py +++ b/surveyweathertool/src/dashboard/utils.py @@ -7,13 +7,10 @@ from PIL import Image from pathlib import Path from typing import List, Optional -import psutil -import logging from src.weather.weather_pipeline import convert_point_crs -@st.cache_data def read_logos(logos_path): """ Reads and returns the STC, UNICEF, DSA, and NOAA logos from the provided path. @@ -229,13 +226,3 @@ def dataframe_reader( if reset_index: data = data.reset_index() return data - -def check_memory_and_disk_usage(): - gb_size = 1024 * 1024 * 1000 - # used_memory = psutil.virtual_memory().used - # calculated_used_memory = psutil.virtual_memory().total - psutil.virtual_memory().available - logging.basicConfig(level=logging.INFO) - logging.info(f"The used memory: {psutil.virtual_memory().percent}") - print(f"The used memory: {psutil.virtual_memory().percent}") - # print(f"The used swap memory: {psutil.swap_memory().percent}") - # print(f"The used disk space: {psutil.disk_usage('/').percent}") diff --git a/surveyweathertool/src/weather/constants.py b/surveyweathertool/src/weather/constants.py index 8d7c85b..748274a 100644 --- a/surveyweathertool/src/weather/constants.py +++ b/surveyweathertool/src/weather/constants.py @@ -37,5 +37,9 @@ # Dasbhoard input data on Google Drive LSMS_SURVEY_FILE = "16K6TFFe9nZD_tos5YDBrEVWHAA4zNx4P" +JOINED_WEATHER_DATA_FILE = "1WLaf5ywBGJLhpr8VvLr67VRK3tL_KQSN" + +# THESE ARE THE VERY FINE-GRANULAR WEATHER FILES -> CAN NOT BE USED WITH STREAMLIT CLOUD DUE TO OOM ISSUES PRECIPITATION_FILE = "1L4htbJs3spx-Ojy2sRz82YAmO4nKBkc1" TEMPERATURE_FILE = "1_6PtYtEbneuSeq4wywQDvKbrpOqZpOvt" + diff --git a/surveyweathertool/src/weather_x_survey/weather_survey.py b/surveyweathertool/src/weather_x_survey/weather_survey.py index e985340..b6162a9 100644 --- a/surveyweathertool/src/weather_x_survey/weather_survey.py +++ b/surveyweathertool/src/weather_x_survey/weather_survey.py @@ -149,6 +149,9 @@ def get_weather_for_household_survey( # Select the weather data for this date and reset index weather_df_date = weather_df[weather_df["date"] == date].reset_index(drop=True) + if len(weather_df_date) == 0: + print(f"WARNING: For the date {date} there is no weather data!") + # Calculate the nearest points and distances