clean up

acse-ww721 · Sep 12, 2023 · d57e0de · d57e0de
1 parent 063d0af
commit d57e0de
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 32 deletions.
diff --git a/src/assimilation/assimilation.py b/src/assimilation/assimilation.py
@@ -1,3 +1,6 @@
+# The function implementation below is a modification version from Tensorflow
+# Original code link: https://github.com/ashesh6810/DDWP-DA/blob/master/EnKF_DD_all_time.py
+
 import numpy as np
 import netCDF4 as nc
 import scipy.io as sio
@@ -47,6 +50,30 @@
 
 
 def ENKF(x, n, P, Q, R, obs, model, u_ensemble):
+ """
+ Perform Ensemble Kalman Filter (EnKF) data assimilation.
+
+ Parameters:
+ x (ndarray): The state vector to be updated.
+ n (int): The size of the state vector.
+ P (ndarray): The covariance matrix of the state vector.
+ Q (ndarray): The process noise covariance matrix.
+ R (ndarray): The observation noise covariance matrix.
+ obs (ndarray): The observed data.
+ model: The predictive model used for forecasting.
+ u_ensemble (ndarray): An array to store the ensemble of forecasts.
+
+ Returns:
+ x_updated (ndarray): The updated state vector.
+ P_updated (ndarray): The updated covariance matrix.
+
+ This function performs the Ensemble Kalman Filter (EnKF) data assimilation to update
+ the state vector 'x' and its covariance matrix 'P' based on observations 'obs'.
+ The process noise covariance 'Q' and observation noise covariance 'R' are also used.
+ The predictive model 'model' is used for forecasting, and the ensemble of forecasts
+ is stored in 'u_ensemble'.
+ """
+
  obs = np.reshape(obs, [n, 1])
  x = np.reshape(x, [n, 1])
  [U, S, V] = np.linalg.svd(P)
@@ -82,35 +109,6 @@ def ENKF(x, n, P, Q, R, obs, model, u_ensemble):
  return x, P
 
 
-import tensorflow
-import keras.backend as K
-
-# from data_manager import ClutteredMNIST
-# from visualizer import plot_mnist_sample
-# from visualizer import print_evaluation
-# from visualizer import plot_mnist_grid
-import netCDF4
-import numpy as np
-from keras.layers import (
- Input,
- Convolution2D,
- Convolution1D,
- MaxPooling2D,
- Dense,
- Dropout,
- Flatten,
- concatenate,
- Activation,
- Reshape,
- UpSampling2D,
- ZeroPadding2D,
-)
-import keras
-from keras.callbacks import History
-
-history = History()
-
-
 model = stn()
 model.load_weights("best_weights_lead1.h5")
 ### This code performs DA at every 24 hrs with a model that is forecasting every hour. So lead will always be 1 ######

diff --git a/src/assimilation/calculation.py b/src/assimilation/calculation.py
@@ -6,8 +6,28 @@
 from sklearn.metrics import mean_squared_error
 from tqdm import tqdm
 
+# Name: Wenqi Wang
+# Github username: acse-ww721
+
 
 def calculate_acc_and_rmse(era5_data, model_weight):
+ """
+ Calculate accuracy (correlation) and RMSE between ERA5 data and model predictions.
+
+ Parameters:
+ era5_data (str): Path to ERA5 dataset in NetCDF format.
+ model_weight (str): Path to the pre-trained model weights.
+
+ Returns:
+ stepwise_acc_values (ndarray): Array of accuracy values for each time step.
+ stepwise_rmse_values (ndarray): Array of RMSE values for each time step.
+
+ This function loads a pre-trained model, ERA5 data, and calculates the accuracy (correlation)
+ and RMSE between ERA5 data and model predictions. It randomly selects 50 initial conditions
+ and predicts data for 24 hours. It then calculates accuracy and RMSE for each time step
+ in the prediction.
+ """
+
  # Load model
  model = stn()
  model.load_weights(model_weight)
@@ -19,7 +39,9 @@ def calculate_acc_and_rmse(era5_data, model_weight):
  # Select 50 initial conditions randomly
  initial_conditions = 50
  random_indices = np.random.choice(
- era5_t.shape[0] - 240, size=initial_conditions, replace=False # -240 to ensure enough range for prediction
+ era5_t.shape[0] - 240,
+ size=initial_conditions,
+ replace=False, # -240 to ensure enough range for prediction
  )
 
  # Parameters
@@ -44,7 +66,7 @@ def calculate_acc_and_rmse(era5_data, model_weight):
  acc_values_for_this_step = []
  rmse_values_for_this_step = []
  for i in range(initial_conditions):
- actual_data = era5_t[random_indices[i] + s*dt]
+ actual_data = era5_t[random_indices[i] + s * dt]
  predicted_data = pred_data[i, s]
 
  correlation, _ = pearsonr(actual_data.flatten(), predicted_data.flatten())
@@ -56,4 +78,4 @@ def calculate_acc_and_rmse(era5_data, model_weight):
  stepwise_acc_values[s] = np.mean(acc_values_for_this_step)
  stepwise_rmse_values[s] = np.mean(rmse_values_for_this_step)
 
- return stepwise_acc_values, stepwise_rmse_values
+ return stepwise_acc_values, stepwise_rmse_values