minor refactor

GiovanniPasserello · May 21, 2021 · 4f31419 · 4f31419
1 parent 9528a65
commit 4f31419
Show file tree

Hide file tree

Showing 2 changed files with 113 additions and 33 deletions.
diff --git a/shgp/data/metadata_metrics.py b/shgp/data/metadata_metrics.py
@@ -25,14 +25,19 @@ class MetricsMetaDataset:
  opt_iters: int
  ci_iters: int
 
+# TODO: Get metrics results for PGPR greedy var (non-hetero)
+# TODO: Get metrics results for PGPR uniform, k_means
+# TODO: Get metrics results for PGPR gradient optim (initialised at hgv)
+# This is the best way to phrase it (grad optim, intitialsed at hgv)
+
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default)
+SVGP Distribution: (kmeans++, no grad-optim)
 ELBO - max: -89.519647, min: -103.250001, median: -98.954888, mean: -98.629271, std: 3.787107.
 ACC - max: 0.975000, min: 0.825000, median: 0.912500, mean: 0.902500, std: 0.039449.
 NLL - max: 0.538161, min: 0.061780, median: 0.202625, mean: 0.222576, std: 0.123889.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default)
+PGPR Distribution: (hetero greedy var, no grad-optim)
 ELBO - max: -103.146880, min: -115.827680, median: -110.754296, mean: -110.913423, std: 3.235203.
 ACC - max: 1.000000, min: 0.825000, median: 0.925000, mean: 0.907500, std: 0.044791.
 NLL - max: 0.457047, min: 0.069402, median: 0.215834, mean: 0.216745, std: 0.097789.
@@ -46,12 +51,12 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default)
+SVGP Distribution: (kmeans++, no grad-optim)
 ELBO - max: -22.581493, min: -54.522554, median: -29.843940, mean: -32.031536, std: 8.377061.
 ACC - max: 1.000000, min: 1.000000, median: 1.000000, mean: 1.000000, std: 0.000000.
 NLL - max: 0.104137, min: 0.001727, median: 0.021329, mean: 0.029044, std: 0.028902.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default)
+PGPR Distribution: (hetero greedy var, no grad-optim)
 ELBO - max: -29.895069, min: -30.130667, median: -29.984135, mean: -30.000083, std: 0.078844.
 ACC - max: 1.000000, min: 1.000000, median: 1.000000, mean: 1.000000, std: 0.000000.
 NLL - max: 0.030897, min: 0.003004, median: 0.008187, mean: 0.011842, std: 0.009417.
@@ -66,12 +71,12 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default)
+SVGP Distribution: (kmeans++, no grad-optim)
 ELBO - max: -102.457024, min: -147.645443, median: -105.172674, mean: -116.764816, std: 19.141206.
 ACC - max: 0.888889, min: 0.740741, median: 0.814815, mean: 0.825926, std: 0.037222.
 NLL - max: 0.541615, min: 0.234183, median: 0.394648, mean: 0.394743, std: 0.095801.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default)
+PGPR Distribution: (hetero greedy var, no grad-optim)
 ELBO - max: -105.712986, min: -109.829346, median: -107.190899, mean: -107.372922, std: 1.172509.
 ACC - max: 0.888889, min: 0.814815, median: 0.851852, mean: 0.844444, std: 0.027716.
 NLL - max: 0.424670, min: 0.240716, median: 0.347886, mean: 0.347235, std: 0.050955.
@@ -84,23 +89,24 @@ def __init__(self):
  MetricsMetaDataset.__init__(self, 10, 35, 250, 10, 250, 10)
 
 
+# TODO: Rerun?
 """ Most likely use M=30 to show the benefits of sparse PGPR
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=30)
+SVGP Distribution: (kmeans++, no grad-optim, M=30)
 ELBO - max: -107.877620, min: -187.498914, median: -115.158968, mean: -121.753272, std: 22.224797.
 ACC - max: 0.972222, min: 0.611111, median: 0.875000, mean: 0.847222, std: 0.096425.
 NLL - max: 0.581576, min: 0.175762, median: 0.285526, mean: 0.334639, std: 0.137892.
 
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=50)
+SVGP Distribution: (kmeans++, no grad-optim, M=50)
 ELBO - max: -102.320602, min: -111.255612, median: -107.341194, mean: -107.392428, std: 2.878623.
 ACC - max: 0.972222, min: 0.861111, median: 0.888889, mean: 0.911111, std: 0.042673.
 NLL - max: 0.539788, min: 0.087637, median: 0.269310, mean: 0.276174, std: 0.127492.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=30)
+PGPR Distribution: (hetero greedy var, no grad-optim, M=30)
 ELBO - max: -119.951628, min: -131.741912, median: -126.352503, mean: -125.994329, std: 3.639924.
 ACC - max: 0.972222, min: 0.750000, median: 0.888889, mean: 0.883333, std: 0.059317.
 NLL - max: 0.493626, min: 0.176245, median: 0.302106, mean: 0.317360, std: 0.106654.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=50)
+PGPR Distribution: (hetero greedy var, no grad-optim, M=50)
 ELBO - max: -116.037903, min: -125.215633, median: -120.372931, mean: -120.675359, std: 2.760629.
 ACC - max: 0.972222, min: 0.833333, median: 0.861111, mean: 0.886111, std: 0.042035.
 NLL - max: 0.502648, min: 0.121410, median: 0.301423, mean: 0.307200, std: 0.112117.
@@ -114,17 +120,17 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default)
+SVGP Distribution: (kmeans++, no grad-optim)
 ELBO - max: -51.035899, min: -260.193332, median: -79.258723, mean: -128.164729, std: 87.821823.
 ACC - max: 1.000000, min: 0.877193, median: 0.956140, mean: 0.954386, std: 0.034379.
 NLL - max: 0.438799, min: 0.035690, median: 0.147612, mean: 0.198864, std: 0.145042.
 
-SVGP Distribution: (kmeans++, with grad-optim, with unconstrained/default)
+SVGP Distribution: (kmeans++, with grad-optim)
 ELBO - max: -49.402746, min: -263.220134, median: -55.021090, mean: -75.629275, std: 62.565196.
 ACC - max: 1.000000, min: 0.807018, median: 0.982456, mean: 0.963158, std: 0.053473.
 NLL - max: 0.436694, min: 0.038113, median: 0.088166, mean: 0.121176, std: 0.110232.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default)
+PGPR Distribution: (hetero greedy var, no grad-optim)
 ELBO - max: -65.263036, min: -71.668893, median: -69.381602, mean: -69.548874, std: 1.810755.
 ACC - max: 1.000000, min: 0.947368, median: 0.982456, mean: 0.980702, std: 0.014573.
 NLL - max: 0.156879, min: 0.037506, median: 0.085697, mean: 0.082185, std: 0.033648.
@@ -138,12 +144,12 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default)
+SVGP Distribution: (kmeans++, no grad-optim)
 ELBO - max: -335.880773, min: -346.814701, median: -342.072790, mean: -341.638486, std: 3.397007.
 ACC - max: 0.844156, min: 0.714286, median: 0.785714, mean: 0.780519, std: 0.035065.
 NLL - max: 0.522449, min: 0.364078, median: 0.443357, mean: 0.441000, std: 0.047642.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default)
+PGPR Distribution: (hetero greedy var, no grad-optim)
 ELBO - max: -339.673734, min: -350.845679, median: -345.761043, mean: -345.529780, std: 3.339615.
 ACC - max: 0.857143, min: 0.727273, median: 0.785714, mean: 0.789610, std: 0.032233.
 NLL - max: 0.518303, min: 0.370971, median: 0.440707, mean: 0.441000, std: 0.044599.
@@ -157,12 +163,12 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=300)
+SVGP Distribution: (kmeans++, no grad-optim, M=300)
 ELBO - max: -440.252890, min: -4480.603436, median: -4454.929089, mean: -3581.029505, std: 1314.972465.
 ACC - max: 0.982432, min: 0.810811, median: 0.968243, mean: 0.943649, std: 0.057986.
 NLL - max: 0.667848, min: 0.054345, median: 0.663046, mean: 0.516713, std: 0.204524.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=300)
+PGPR Distribution: (hetero greedy var, no grad-optim, M=300)
 ELBO - max: -441.419710, min: -464.018613, median: -452.083285, mean: -451.609530, std: 7.613344.
 ACC - max: 0.986486, min: 0.972973, median: 0.979730, mean: 0.979324, std: 0.004799.
 NLL - max: 0.075884, min: 0.043305, median: 0.052011, mean: 0.054395, std: 0.010713.
@@ -176,12 +182,12 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=300)
+SVGP Distribution: (kmeans++, no grad-optim, M=300)
 ELBO - max: -768.285139, min: -4332.227797, median: -2597.246881, mean: -2260.877601, std: 1301.823368.
 ACC - max: 0.979730, min: 0.495946, median: 0.871622, mean: 0.803243, std: 0.183625.
 NLL - max: 0.646989, min: 0.043059, median: 0.315875, mean: 0.290692, std: 0.209444.
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=300)
+PGPR Distribution: (hetero greedy var, no grad-optim, M=300)
 ELBO - max: -933.852979, min: -967.672191, median: -952.890675, mean: -953.277250, std: 9.537135.
 ACC - max: 0.989189, min: 0.964865, median: 0.980405, mean: 0.976622, std: 0.007023.
 NLL - max: 0.097374, min: 0.037707, median: 0.054798, mean: 0.062037, std: 0.015969.
@@ -195,13 +201,13 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=500)
+SVGP Distribution: (kmeans++, no grad-optim, M=500)
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=500)
+PGPR Distribution: (hetero greedy var, no grad-optim, M=500)
 """
 
 
-# TODO: Run experiment
+# TODO: Run experiment on Colab
 # TODO: M=300?
 class MagicMetricsMetaDataset(MagicDataset, MetricsMetaDataset):
  def __init__(self):
@@ -210,13 +216,13 @@ def __init__(self):
 
 
 """
-SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=500)
+SVGP Distribution: (kmeans++, no grad-optim, M=500)
 
-PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=500)
+PGPR Distribution: (hetero greedy var, no grad-optim, M=500)
 """
 
 
-# TODO: Run experiment
+# TODO: Run experiment on Colab
 # TODO: M=300?
 class ElectricityMetricsMetaDataset(ElectricityDataset, MetricsMetaDataset):
  def __init__(self):

diff --git a/shgp/utilities/train_pgpr_svgp.py b/shgp/utilities/train_pgpr_svgp.py
@@ -90,8 +90,8 @@ def _train_sparse_pgpr_svgp(X, Y, M, opt_iters, kernel_type, reinit_method, rein
 
  # Initialise SVGP
  pgpr_svgp = gpflow.models.SVGP(
- kernel=pgpr.kernel,
- likelihood=gpflow.likelihoods.Bernoulli(tf.sigmoid), # TODO: inversion error with tf.sigmoid?
+ kernel=kernel_type(),
+ likelihood=gpflow.likelihoods.Bernoulli(tf.sigmoid),
  inducing_variable=pgpr.inducing_variable,
  q_mu=q_mu,
  q_sqrt=q_sqrt
@@ -130,18 +130,92 @@ def _train_sparse_pgpr_svgp(X, Y, M, opt_iters, kernel_type, reinit_method, rein
  f_mu, f_var = pgpr_svgp.predict_f(X)
  pgpr.likelihood.c_i = tf.math.sqrt(tf.math.square(f_mu) + f_var)
 
- # Update SVGP with reinitialised Z
+ # Reinitialised Z
+ pgpr.kernel = pgpr_svgp.kernel # TODO: Performs worse with this line included?
  reinit_method(pgpr, M, reinit_metadata.selection_threshold)
- pgpr_svgp.inducing_variable = pgpr.inducing_variable
- gpflow.set_trainable(pgpr_svgp.inducing_variable, optimise_Z)
 
- # Update SVGP with optimal q_mu, q_sqrt from PGPR
+ # Compute optimal q_mu, q_sqrt
  q_mu, q_var = pgpr.compute_qu()
  q_sqrt = tf.expand_dims(tf.linalg.cholesky(q_var), axis=0)
- pgpr_svgp.q_mu = Parameter(q_mu, dtype=default_float())
- pgpr_svgp.q_sqrt = Parameter(q_sqrt, transform=triangular())
+
+ # Restart SVGP with optimal q_mu, q_sqrt and reinitialised Z from PGPR
+ pgpr_svgp = gpflow.models.SVGP(
+ kernel=kernel_type(),
+ likelihood=gpflow.likelihoods.Bernoulli(tf.sigmoid),
+ inducing_variable=pgpr.inducing_variable,
+ q_mu=q_mu,
+ q_sqrt=q_sqrt
+ )
+ gpflow.set_trainable(pgpr_svgp.inducing_variable, optimise_Z)
 
  if return_metrics:
  return pgpr_svgp, np.max(results.results) # return the metrics with the highest ELBO
  else:
  return pgpr_svgp, np.max(elbos) # return the highest ELBO
+
+
+# TODO: With greedy variance reinit (no PGPR)
+# def _train_sparse_pgpr_svgp(X, Y, M, opt_iters, kernel_type, reinit_method, reinit_metadata, optimise_Z, X_test, Y_test):
+# """
+# Train a sparse PGPR-SVGP model with a given reinitialisation method.
+# For example: greedy_variance() or h_greedy_variance().
+# """
+# return_metrics = X_test is not None
+# if return_metrics: # track ELBO, ACC, NLL
+# results = ExperimentResults()
+#
+# # Initialise PGPR
+# # pgpr = PGPR(data=(X, Y), kernel=kernel_type())
+# # reinit_method(pgpr, M, reinit_metadata.selection_threshold)
+# # q_mu, q_var = pgpr.compute_qu()
+# # q_sqrt = tf.expand_dims(tf.linalg.cholesky(q_var), axis=0)
+#
+# # Initialise SVGP
+# pgpr_svgp = gpflow.models.SVGP(
+# kernel=kernel_type(),
+# likelihood=gpflow.likelihoods.Bernoulli(tf.sigmoid),
+# inducing_variable=X.copy()
+# )
+#
+# opt = gpflow.optimizers.Scipy()
+# outer_iters = reinit_metadata.outer_iters
+# prev_elbo, elbos = -float("inf"), []
+# while True:
+# # Reinitialise Z
+# inducing_locs, _ = greedy_variance(X, M, pgpr_svgp.kernel)
+# inducing_vars = gpflow.inducing_variables.InducingPoints(inducing_locs)
+# pgpr_svgp = gpflow.models.SVGP(
+# kernel=kernel_type(),
+# likelihood=gpflow.likelihoods.Bernoulli(tf.sigmoid),
+# inducing_variable=inducingpoint_wrapper(inducing_vars)
+# )
+# gpflow.set_trainable(pgpr_svgp.inducing_variable, optimise_Z)
+#
+# # Optimise SVGP
+# opt.minimize(
+# pgpr_svgp.training_loss_closure((X, Y)),
+# variables=pgpr_svgp.trainable_variables,
+# options=dict(maxiter=opt_iters)
+# )
+#
+# # Evaluate metrics
+# next_elbo = pgpr_svgp.elbo((X, Y))
+# print("PGPR-SVGP ELBO:", next_elbo)
+# elbos.append(next_elbo)
+# if return_metrics: # track ELBO, ACC, NLL
+# results.add_result(ExperimentResult(next_elbo, *compute_test_metrics(pgpr_svgp, X_test, Y_test)))
+#
+# # Check convergence
+# outer_iters -= 1
+# if np.abs(next_elbo - prev_elbo) <= reinit_metadata.conv_threshold: # if ELBO fails to significantly improve.
+# break
+# elif outer_iters == 0: # it is likely that M is too low, and we will not further converge.
+# if reinit_metadata.conv_threshold > 0:
+# print("PGPR-SVGP ELBO failed to converge: prev {}, next {}.".format(prev_elbo, next_elbo))
+# break
+# prev_elbo = next_elbo
+#
+# if return_metrics:
+# return pgpr_svgp, np.max(results.results) # return the metrics with the highest ELBO
+# else:
+# return pgpr_svgp, np.max(elbos) # return the highest ELBO