diff --git a/.streamlit/config.toml b/.streamlit/config.toml
deleted file mode 100644
index 03496a9..0000000
--- a/.streamlit/config.toml
+++ /dev/null
@@ -1,4 +0,0 @@
-[logger]
-
-# Level of logging: 'error', 'warning', 'info', or 'debug'.
-level = "info"
\ No newline at end of file
diff --git a/Docker-requirements.txt b/Docker-requirements.txt
index d5206db..9c4deff 100644
--- a/Docker-requirements.txt
+++ b/Docker-requirements.txt
@@ -44,5 +44,4 @@ sphinxcontrib-serializinghtml
sphinx_rtd_theme
google-api-python-client
google-auth-httplib2
-google-auth-oauthlib
-psutil
\ No newline at end of file
+google-auth-oauthlib
\ No newline at end of file
diff --git a/surveyweathertool/Home.py b/surveyweathertool/Home.py
index ed8fd17..0c29b8c 100644
--- a/surveyweathertool/Home.py
+++ b/surveyweathertool/Home.py
@@ -2,9 +2,7 @@
from io import StringIO
from PIL import Image
import streamlit as st
-from src.dashboard.utils import read_logos, check_memory_and_disk_usage
-import psutil
-
+from src.dashboard.utils import read_logos
def run_dashboard():
# # Home Page Configuration
@@ -87,7 +85,4 @@ def run_dashboard():
unsafe_allow_html=True,
)
-logging.info(f"The number of CPUs of the web app: {psutil.cpu_count()}")
-print(f"The number of CPUs of the web app: {psutil.cpu_count()}")
-check_memory_and_disk_usage()
run_dashboard()
diff --git "a/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py" "b/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py"
index e273c56..eb2e1a4 100644
--- "a/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py"
+++ "b/surveyweathertool/pages/1_\360\237\223\210_LSMS-ISA_Dashboard.py"
@@ -7,8 +7,7 @@
filter_survey,
filter_weather,
load_data_from_google_drive,
- dataframe_reader,
- check_memory_and_disk_usage
+ preprocess_weather_data
)
from src.weather.weather_pipeline import (
aggr_monthly,
@@ -19,16 +18,17 @@
from src.weather.utils import read_shape_file
from src.weather.create_visuals import (
generate_choropleth,
- generate_interactive_time_series,
+ # generate_interactive_time_series,
generate_bivariate_map,
plot_poverty_index,
)
-from src.weather.weather_pipeline import plot_heatmap_grid_on_map
+# from src.weather.weather_pipeline import plot_heatmap_grid_on_map
from src.weather.constants import (
TEMPERATURE_FILE,
PRECIPITATION_FILE,
NIGERIA_SHAPE_PATH_FILE,
LSMS_SURVEY_FILE,
+ JOINED_WEATHER_DATA_FILE
)
from src.weather_x_survey.weather_survey import combine_with_poverty_index
@@ -51,7 +51,7 @@
# Add filters/input widgets with tooltips
st.sidebar.markdown("Select Filters:")
-year_list = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
+year_list = [2010, 2011, 2012, 2013, 2014, 2015, 2016]
year_choice_dropdown = st.sidebar.selectbox("Year", year_list, help="Year Selection")
# Time Aggregation Choice Widget
time_list = ["Yearly", "Seasonaly", "Monthly", "Survey-Dependent"]
@@ -66,8 +66,6 @@
disable_dropdown = True
poverty_legend = "0: Not Deprived, 1: Moderately Deprived, 2: Severely Deprived"
-check_memory_and_disk_usage()
-
# Use forms and submit button to batch input widgets
with st.sidebar.form(key="columns_in_form"):
if "Seasonaly" in time_choice_dropdown:
@@ -158,39 +156,37 @@
if submitted:
st.toast("Weather data is being read and preprocessed", icon="⌛")
- check_memory_and_disk_usage()
with st.spinner("Weather data is being read and preprocessed..."):
# Read Data for Dashboard (Once and st.caches it)
nigeria_shape_df = read_shape_file(data_path=NIGERIA_SHAPE_PATH_FILE)
- check_memory_and_disk_usage()
- precipitation_indicators_data = load_data_from_google_drive(
- file_to_load=PRECIPITATION_FILE
- )
- check_memory_and_disk_usage()
- precipitation_indicators = pd.read_parquet(precipitation_indicators_data)
- temperature_indicators_data = load_data_from_google_drive(
- file_to_load=TEMPERATURE_FILE
+ # precipitation_indicators_data = load_data_from_google_drive(
+ # file_to_load=PRECIPITATION_FILE
+ # )
+ # precipitation_indicators = pd.read_parquet(precipitation_indicators_data)
+ # temperature_indicators_data = load_data_from_google_drive(
+ # file_to_load=TEMPERATURE_FILE
+ # )
+ # temperature_indicators = pd.read_parquet(temperature_indicators_data)
+ weather_data = load_data_from_google_drive(
+ file_to_load=JOINED_WEATHER_DATA_FILE
)
- check_memory_and_disk_usage()
- temperature_indicators = pd.read_parquet(temperature_indicators_data)
- check_memory_and_disk_usage()
+ weather_data_df = pd.read_parquet(weather_data)
+ weather_data_df = preprocess_weather_data(weather_data_df)
st.toast("Survey data is being read and preprocessed", icon="⌛")
with st.spinner("Survey data is being read and preprocessed..."):
lsms_survey_data = load_data_from_google_drive(file_to_load=LSMS_SURVEY_FILE)
- check_memory_and_disk_usage()
survey_data_df = pd.read_pickle(lsms_survey_data).reset_index()
- check_memory_and_disk_usage()
target_epsg = 4326
if disable_dropdown == True:
poverty_index_dropdown = None
dict_value_cols = {
- "Precipitation (mm)": (precipitation_indicators, "Blues"),
- "Temperature (°C)": (temperature_indicators, "Reds"),
- "Drought": (precipitation_indicators, "Blues"),
- "Heavy Rain": (precipitation_indicators, "Blues"),
- "Heat Wave": (temperature_indicators, "Blues"),
+ "Precipitation (mm)": (weather_data_df, "Blues"),
+ "Temperature (°C)": (weather_data_df, "Reds"),
+ "Drought": (weather_data_df, "Blues"),
+ "Heavy Rain": (weather_data_df, "Blues"),
+ "Heat Wave": (weather_data_df, "Blues"),
}
weather_indicators = {
@@ -365,32 +361,32 @@
# )
# )
- st.markdown(
- f"
Heatmap for {weather_dropdown[0]}
",
- unsafe_allow_html=True,
- )
- st.pyplot(
- plot_heatmap_grid_on_map(
- df=filtered_grid_1,
- value_col="mean",
- geo_df=nigeria_shape_df,
- legend_title=legends[weather_dropdown[0]],
- cmap=dict_value_cols[weather_dropdown[0]][1],
- )
- )
- st.markdown(
- f"Heatmap for {weather_dropdown[1]}
",
- unsafe_allow_html=True,
- )
- st.pyplot(
- plot_heatmap_grid_on_map(
- df=filtered_grid_2,
- value_col="mean",
- geo_df=nigeria_shape_df,
- legend_title=legends[weather_dropdown[1]],
- cmap=dict_value_cols[weather_dropdown[1]][1],
- )
- )
+ # st.markdown(
+ # f"Heatmap for {weather_dropdown[0]}
",
+ # unsafe_allow_html=True,
+ # )
+ # st.pyplot(
+ # plot_heatmap_grid_on_map(
+ # df=filtered_grid_1,
+ # value_col="mean",
+ # geo_df=nigeria_shape_df,
+ # legend_title=legends[weather_dropdown[0]],
+ # cmap=dict_value_cols[weather_dropdown[0]][1],
+ # )
+ # )
+ # st.markdown(
+ # f"Heatmap for {weather_dropdown[1]}
",
+ # unsafe_allow_html=True,
+ # )
+ # st.pyplot(
+ # plot_heatmap_grid_on_map(
+ # df=filtered_grid_2,
+ # value_col="mean",
+ # geo_df=nigeria_shape_df,
+ # legend_title=legends[weather_dropdown[1]],
+ # cmap=dict_value_cols[weather_dropdown[1]][1],
+ # )
+ # )
st.markdown(
f"Univariate map for {weather_dropdown[0]}
",
@@ -490,19 +486,19 @@
# )
# )
- st.markdown(
- f"Heatmap for {weather_dropdown[0]}
",
- unsafe_allow_html=True,
- )
- st.pyplot(
- plot_heatmap_grid_on_map(
- df=aggregated_prec_grid_1_year.copy(),
- geo_df=nigeria_shape_df,
- value_col="mean",
- legend_title=legends[weather_dropdown[0]],
- cmap=dict_value_cols[weather_dropdown[0]][1],
- )
- )
+ # st.markdown(
+ # f"Heatmap for {weather_dropdown[0]}
",
+ # unsafe_allow_html=True,
+ # )
+ # st.pyplot(
+ # plot_heatmap_grid_on_map(
+ # df=aggregated_prec_grid_1_year.copy(),
+ # geo_df=nigeria_shape_df,
+ # value_col="mean",
+ # legend_title=legends[weather_dropdown[0]],
+ # cmap=dict_value_cols[weather_dropdown[0]][1],
+ # )
+ # )
st.markdown(
f"Univariate map for {poverty_index_dropdown}
",
@@ -610,19 +606,19 @@
# )
# )
- st.markdown(
- f"Heatmap for {weather_dropdown[0]}
",
- unsafe_allow_html=True,
- )
- st.pyplot(
- plot_heatmap_grid_on_map(
- df=filtered_grid_1,
- geo_df=nigeria_shape_df,
- value_col="mean",
- legend_title=legends[weather_dropdown[0]],
- cmap=dict_value_cols[weather_dropdown[0]][1],
- )
- )
+ # st.markdown(
+ # f"Heatmap for {weather_dropdown[0]}
",
+ # unsafe_allow_html=True,
+ # )
+ # st.pyplot(
+ # plot_heatmap_grid_on_map(
+ # df=filtered_grid_1,
+ # geo_df=nigeria_shape_df,
+ # value_col="mean",
+ # legend_title=legends[weather_dropdown[0]],
+ # cmap=dict_value_cols[weather_dropdown[0]][1],
+ # )
+ # )
st.markdown(
f"Univariate map for {weather_dropdown[0]}
",
@@ -705,20 +701,20 @@
# Side Bar Set Up
st.sidebar.markdown(
"""
-
- """,
+
+ """,
unsafe_allow_html=True,
)
@@ -736,4 +732,4 @@
st.sidebar.markdown(
f"Copyright (c) 2023 Data Science for Social Good (RPTU and DFKI) ",
unsafe_allow_html=True,
-)
+)
\ No newline at end of file
diff --git "a/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py" "b/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py"
index 508acdf..dc3b141 100644
--- "a/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py"
+++ "b/surveyweathertool/pages/2_\360\237\223\212_Weather_Data_Enhancement.py"
@@ -13,13 +13,11 @@
check_if_df_has_lat_long,
preprocess_data_input,
preprocess_weather_data,
- load_data_from_google_drive,
- dataframe_reader,
- check_memory_and_disk_usage
+ load_data_from_google_drive
)
from src.weather_x_survey.weather_survey import merge_weather_household
-from src.weather.constants import PRECIPITATION_FILE, TEMPERATURE_FILE
+from src.weather.constants import PRECIPITATION_FILE, TEMPERATURE_FILE, JOINED_WEATHER_DATA_FILE
# Page Configuration
st.set_page_config(page_title="Weather Data Enhancement", page_icon="📊")
@@ -59,8 +57,6 @@
# Add file upload element
uploaded_file = st.file_uploader("Choose a file to add weather columns", type="csv")
-check_memory_and_disk_usage()
-
# Run the following code only if a file is uploaded
if uploaded_file is not None:
@@ -68,8 +64,6 @@
start_time = time.time()
# Read the file
input = pd.read_csv(uploaded_file)
- check_memory_and_disk_usage()
-
# Display the top 5 rows of the file
st.write("This is the uploaded input data:")
st.write(input.head(5))
@@ -77,32 +71,31 @@
input = preprocess_data_input(input)
# Error checks file
check_if_df_has_lat_long(input)
- check_memory_and_disk_usage()
# Read Weather data (Temp and Precip with climate columns) and preprocess it
st.toast("Weather data is being read and preprocessed", icon="⌛")
with st.spinner("Weather data is being read and preprocessed..."):
- check_memory_and_disk_usage()
- precipitation_indicators_data = load_data_from_google_drive(
- file_to_load=PRECIPITATION_FILE
- )
- precipitation_indicators = pd.read_parquet(precipitation_indicators_data)
- precipitation_indicators = preprocess_weather_data(precipitation_indicators)
- check_memory_and_disk_usage()
- temperature_indicators_data = load_data_from_google_drive(
- file_to_load=TEMPERATURE_FILE
- )
- temperature_indicators = pd.read_parquet(temperature_indicators_data)
- temperature_indicators = preprocess_weather_data(temperature_indicators)
-
- check_memory_and_disk_usage()
+ # precipitation_indicators_data = load_data_from_google_drive(
+ # file_to_load=PRECIPITATION_FILE
+ # )
+ # precipitation_indicators = pd.read_parquet(precipitation_indicators_data)
+ # precipitation_indicators = preprocess_weather_data(precipitation_indicators)
+ # temperature_indicators_data = load_data_from_google_drive(
+ # file_to_load=TEMPERATURE_FILE
+ # )
+ # temperature_indicators = pd.read_parquet(temperature_indicators_data)
+ # temperature_indicators = preprocess_weather_data(temperature_indicators)
# Defining all indicators to aggregate and return attached to uploaded data
weather_data_indicators_dict = {
"precipitation": ["precipitation", "heavy_rain_index", "spi_index"],
"temperature": ["temperature", "heatwave_index"],
}
-
+ weather_data = load_data_from_google_drive(
+ file_to_load=JOINED_WEATHER_DATA_FILE
+ )
+ weather_data_df = pd.read_parquet(weather_data)
+ weather_data_df = preprocess_weather_data(weather_data_df)
merged_data = input.copy()
st.toast("Weather features are being created", icon="⌛")
@@ -110,14 +103,13 @@
with st.spinner("Weather features are being created..."):
for key, value_cols in weather_data_indicators_dict.items():
if key == "precipitation":
- weather_df = precipitation_indicators.copy()
+ weather_df = weather_data_df
elif key == "temperature":
- weather_df = temperature_indicators.copy()
+ weather_df = weather_data_df
for indicator in value_cols:
# Retrieve weather information for the input using interpolated weather data
- check_memory_and_disk_usage()
merged_weather_data = merge_weather_household(
- input, weather_df.copy(), indicator
+ input, weather_df, indicator
)
merged_data = pd.merge(
merged_data,
@@ -125,7 +117,6 @@
how="left",
on=["lat", "lon", "date"],
)
- check_memory_and_disk_usage()
st.toast("Weather features have been successfully created", icon="⌛")
# Print out new dataset and get it download-ready
@@ -144,20 +135,20 @@
# Side Bar Set Up
st.sidebar.markdown(
"""
-
- """,
+
+ """,
unsafe_allow_html=True,
)
@@ -175,4 +166,4 @@
st.sidebar.markdown(
f"Copyright (c) 2023 Data Science for Social Good (RPTU and DFKI) ",
unsafe_allow_html=True,
-)
+)
\ No newline at end of file
diff --git a/surveyweathertool/requirements.txt b/surveyweathertool/requirements.txt
index b321cd1..90b61b8 100644
--- a/surveyweathertool/requirements.txt
+++ b/surveyweathertool/requirements.txt
@@ -42,5 +42,4 @@ rioxarray
google-api-python-client
# google-auth-httplib2
# google-auth-oauthlib
-streamlit
-psutil
\ No newline at end of file
+streamlit
\ No newline at end of file
diff --git a/surveyweathertool/src/dashboard/utils.py b/surveyweathertool/src/dashboard/utils.py
index ce0a56f..125434e 100644
--- a/surveyweathertool/src/dashboard/utils.py
+++ b/surveyweathertool/src/dashboard/utils.py
@@ -7,13 +7,10 @@
from PIL import Image
from pathlib import Path
from typing import List, Optional
-import psutil
-import logging
from src.weather.weather_pipeline import convert_point_crs
-@st.cache_data
def read_logos(logos_path):
"""
Reads and returns the STC, UNICEF, DSA, and NOAA logos from the provided path.
@@ -229,13 +226,3 @@ def dataframe_reader(
if reset_index:
data = data.reset_index()
return data
-
-def check_memory_and_disk_usage():
- gb_size = 1024 * 1024 * 1000
- # used_memory = psutil.virtual_memory().used
- # calculated_used_memory = psutil.virtual_memory().total - psutil.virtual_memory().available
- logging.basicConfig(level=logging.INFO)
- logging.info(f"The used memory: {psutil.virtual_memory().percent}")
- print(f"The used memory: {psutil.virtual_memory().percent}")
- # print(f"The used swap memory: {psutil.swap_memory().percent}")
- # print(f"The used disk space: {psutil.disk_usage('/').percent}")
diff --git a/surveyweathertool/src/weather/constants.py b/surveyweathertool/src/weather/constants.py
index 8d7c85b..748274a 100644
--- a/surveyweathertool/src/weather/constants.py
+++ b/surveyweathertool/src/weather/constants.py
@@ -37,5 +37,9 @@
# Dasbhoard input data on Google Drive
LSMS_SURVEY_FILE = "16K6TFFe9nZD_tos5YDBrEVWHAA4zNx4P"
+JOINED_WEATHER_DATA_FILE = "1WLaf5ywBGJLhpr8VvLr67VRK3tL_KQSN"
+
+# THESE ARE THE VERY FINE-GRANULAR WEATHER FILES -> CAN NOT BE USED WITH STREAMLIT CLOUD DUE TO OOM ISSUES
PRECIPITATION_FILE = "1L4htbJs3spx-Ojy2sRz82YAmO4nKBkc1"
TEMPERATURE_FILE = "1_6PtYtEbneuSeq4wywQDvKbrpOqZpOvt"
+
diff --git a/surveyweathertool/src/weather_x_survey/weather_survey.py b/surveyweathertool/src/weather_x_survey/weather_survey.py
index e985340..b6162a9 100644
--- a/surveyweathertool/src/weather_x_survey/weather_survey.py
+++ b/surveyweathertool/src/weather_x_survey/weather_survey.py
@@ -149,6 +149,9 @@ def get_weather_for_household_survey(
# Select the weather data for this date and reset index
weather_df_date = weather_df[weather_df["date"] == date].reset_index(drop=True)
+ if len(weather_df_date) == 0:
+ print(f"WARNING: For the date {date} there is no weather data!")
+
# Calculate the nearest points and distances