From 697ae882b69d19a9a331802daac762ee0724affe Mon Sep 17 00:00:00 2001 From: Giovanni Passerello Date: Tue, 8 Jun 2021 17:21:11 +0100 Subject: [PATCH] update convergence experiments to use svgp --- .../experiments/convergence_experiment.py | 121 +++++++++--------- shgp/data/metadata_convergence.py | 20 ++- 2 files changed, 66 insertions(+), 75 deletions(-) diff --git a/shgp/classification/experiments/convergence_experiment.py b/shgp/classification/experiments/convergence_experiment.py index 836c664..32c2a26 100644 --- a/shgp/classification/experiments/convergence_experiment.py +++ b/shgp/classification/experiments/convergence_experiment.py @@ -4,11 +4,10 @@ import tensorflow as tf from shgp.data.metadata_reinit import ReinitMetaDataset -from shgp.data.metadata_convergence import IonosphereConvergenceMetaDataset -from shgp.inducing.initialisation_methods import h_reinitialise_PGPR, k_means -from shgp.models.pgpr import PGPR -from shgp.robustness.contrained_kernels import ConstrainedExpSEKernel +from shgp.data.metadata_convergence import PimaConvergenceMetaDataset +from shgp.inducing.initialisation_methods import h_reinitialise_PGPR, k_means, uniform_subsample from shgp.utilities.train_pgpr import train_pgpr +from shgp.utilities.train_svgp import train_svgp np.random.seed(42) tf.random.set_seed(42) @@ -21,10 +20,22 @@ """ -def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters): - ################################################ - # PGPR with Different Reinitialisation Methods # - ################################################ +def run_convergence_experiment(X, Y, M, opt_iters): + ############################################## + # SVGP with Full Inducing Points ('Optimal') # + ############################################## + + print("Training non-sparse model...") + svgp, elbo_svgp = train_svgp(X, Y, M=len(X), train_iters=250, init_method=uniform_subsample) + optimal_kernel = svgp.kernel + optimal = np.full(opt_iters + 1, elbo_svgp) + + print("Non-sparse result:") + print("optimal = {}".format(elbo_svgp)) + + ############################################ + # Z for Different Reinitialisation Methods # + ############################################ # Get the initial inducing points for hgv and kmeans print("Selecting inducing points...") @@ -37,8 +48,12 @@ def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters): kmeans_Z = k_means(X, M) print("Inducing points selected.") + #################### + # Convergence test # + #################### + results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam = \ - test_convergence(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, kmeans_Z) + test_convergence((X, Y), opt_iters, hgv_Z, kmeans_Z, optimal_kernel) results = list(zip(results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam)) @@ -48,78 +63,59 @@ def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters): print("results_kmeans_bfgs = {}".format(results_kmeans_bfgs)) print("results_kmeans_adam = {}".format(results_kmeans_adam)) - ############################################## - # PGPR with Full Inducing Points ('Optimal') # - ############################################## - - print("Training non-sparse model...") - _, elbo_pgpr = train_pgpr(X, Y, inner_iters, opt_iters, ci_iters) - optimal = np.full(len(results), elbo_pgpr) - - print("Non-sparse result:") - print("optimal = {}".format(elbo_pgpr)) - return results, optimal # Test the performance of a model with a given number of inducing points, M. -def test_convergence(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, kmeans_Z): +def test_convergence(data, opt_iters, hgv_Z, kmeans_Z, optimal_kernel): print("Training HGV BFGS...") bfgs_opt = gpflow.optimizers.Scipy() - results_hgv_bfgs = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, bfgs_opt) + results_hgv_bfgs = train_convergence_svgp(data, opt_iters, optimal_kernel, hgv_Z, bfgs_opt) print("HGV BFGS trained: ELBO = {}".format(results_hgv_bfgs[-1])) print("Training HGV Adam...") adam_opt = tf.optimizers.Adam(beta_1=0.5, beta_2=0.5) - results_hgv_adam = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, adam_opt) + results_hgv_adam = train_convergence_svgp(data, opt_iters, optimal_kernel, hgv_Z, adam_opt) print("HGV Adam trained: ELBO = {}".format(results_hgv_adam[-1])) print("Training K-means BFGS...") bfgs_opt = gpflow.optimizers.Scipy() - results_kmeans_bfgs = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, kmeans_Z, bfgs_opt) + results_kmeans_bfgs = train_convergence_svgp(data, opt_iters, optimal_kernel, kmeans_Z, bfgs_opt) print("K-means BFGS trained: ELBO = {}".format(results_kmeans_bfgs[-1])) print("Training K-means BFGS...") adam_opt = tf.optimizers.Adam(beta_1=0.5, beta_2=0.5) - results_kmeans_adam = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, kmeans_Z, adam_opt) + results_kmeans_adam = train_convergence_svgp(data, opt_iters, optimal_kernel, kmeans_Z, adam_opt) print("K-means Adam trained: ELBO = {}".format(results_kmeans_adam[-1])) return results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam -def train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt): +def train_convergence_svgp(data, opt_iters, kernel, Z, opt): """ - Train a PGPR model until completion, recording the ELBO every 'elbo_period' iterations. + Train an SVGP model until completion, recording the ELBO every 'elbo_period' iterations. If we error, keep retrying until success - this is due to a spurious Cholesky error. """ - model = PGPR( - data=(X, Y), - kernel=ConstrainedExpSEKernel(), - inducing_variable=Z # gradient-optimised inducing points, initialised at Z + model = gpflow.models.SVGP( + kernel=kernel, + likelihood=gpflow.likelihoods.Bernoulli(), + inducing_variable=Z ) - gpflow.set_trainable(model.inducing_variable, True) - results = [model.elbo().numpy()] + gpflow.set_trainable(model.kernel, False) # fixed hyperparameters + gpflow.set_trainable(model.inducing_variable, True) # optimised Z + results = [model.elbo(data).numpy()] # Try to run the full optimisation cycle. try: if isinstance(opt, gpflow.optimizers.Scipy): - for _ in range(inner_iters): - # Optimise kernel hyperparameters, recording the ELBO after each step. - for _ in range(opt_iters): - opt.minimize(model.training_loss, variables=model.trainable_variables, options=dict(maxiter=1)) - results.append(model.elbo().numpy()) - # Update local variational parameters. - model.optimise_ci(num_iters=ci_iters) + # Optimise kernel hyperparameters, recording the ELBO after each step. + for _ in range(opt_iters): + opt.minimize(model.training_loss_closure(data), variables=model.trainable_variables, options=dict(maxiter=1)) + results.append(model.elbo(data).numpy()) else: - step = 0 - decay_steps = inner_iters * opt_iters - for _ in range(inner_iters): - # Optimise kernel hyperparameters, recording the ELBO after each step. - for i in range(opt_iters): - opt.lr.assign(decayed_learning_rate(step, decay_steps)) - opt.minimize(model.training_loss, var_list=model.trainable_variables) - results.append(model.elbo().numpy()) - step += 1 - # Update local variational parameters. - model.optimise_ci(num_iters=ci_iters) + # Optimise kernel hyperparameters, recording the ELBO after each step. + for step in range(opt_iters): + opt.lr.assign(decayed_learning_rate(step, opt_iters)) + opt.minimize(model.training_loss_closure(data), var_list=model.trainable_variables) + results.append(model.elbo(data).numpy()) # If we fail due to a (spurious) Cholesky error, restart. except tf.errors.InvalidArgumentError as error: @@ -128,7 +124,7 @@ def train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt): raise error else: print("Cholesky error caught, retrying...") - return train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt) + return train_convergence_svgp(data, opt_iters, kernel, Z, opt) return results @@ -140,7 +136,7 @@ def decayed_learning_rate(step, decay_steps, initial_learning_rate=0.5, alpha=0. return initial_learning_rate * decayed -def plot_results(name, max_iters, results, optimal): +def plot_results(name, opt_iters, results, optimal): plt.rcParams["figure.figsize"] = (12, 6) plt.tick_params(labelright=True) @@ -149,10 +145,12 @@ def plot_results(name, max_iters, results, optimal): plt.ylabel('ELBO') plt.xlabel('Iterations') # Axis limits - plt.xlim(0, max_iters) + plt.xlim(0, opt_iters) + plt.xticks([0, 50, 100, 150]) + plt.yscale('symlog') # Setup each subplot - iter_array = np.arange(max_iters + 1) + iter_array = np.arange(opt_iters + 1) plt.plot(iter_array, optimal, color='k', linestyle='dashed', label='Optimal', zorder=101) plt.plot(iter_array, results[:, 0], label='HGV GO - BFGS', zorder=100) plt.plot(iter_array, results[:, 1], label='HGV GO - Adam', zorder=98) @@ -165,14 +163,11 @@ def plot_results(name, max_iters, results, optimal): if __name__ == '__main__': # Load data - dataset = IonosphereConvergenceMetaDataset() # to test another dataset, just change this definition + dataset = PimaConvergenceMetaDataset() # to test another dataset, just change this definition X, Y = dataset.load_data() - results, optimal = run_convergence_experiment( - X, Y, - dataset.M, dataset.inner_iters, dataset.opt_iters, dataset.ci_iters - ) - - max_iters = dataset.inner_iters * dataset.opt_iters + # Get convergence results + results, optimal = run_convergence_experiment(X, Y, dataset.M, dataset.opt_iters) - plot_results(dataset.name, max_iters, np.array(results), optimal) + # Plot results + plot_results(dataset.name, dataset.opt_iters, np.array(results), optimal) diff --git a/shgp/data/metadata_convergence.py b/shgp/data/metadata_convergence.py index f846548..eaeb8f7 100644 --- a/shgp/data/metadata_convergence.py +++ b/shgp/data/metadata_convergence.py @@ -10,47 +10,43 @@ class ConvergenceMetaDataset: Please ensure that elbo_period is an integer divisor of (inner_iters * opt_iters). :param M: The number of inducing points. - :param inner_iters: The number of iterations of the inner optimisation loop. - :param opt_iters: The number of iterations of gradient-based optimisation of the kernel hyperparameters. - :param ci_iters: The number of iterations of update for the local variational parameters. + :param opt_iters: The number of iterations of gradient-based optimisation. """ M: int - inner_iters: int = 10 - opt_iters: int = 20 - ci_iters: int = 10 + opt_iters: int = 100 class BananaConvergenceMetaDataset(BananaDataset, ConvergenceMetaDataset): def __init__(self): BananaDataset.__init__(self) - ConvergenceMetaDataset.__init__(self, 40, 5, 5, 10) + ConvergenceMetaDataset.__init__(self, 40, 150) class CrabsConvergenceMetaDataset(CrabsDataset, ConvergenceMetaDataset): def __init__(self): CrabsDataset.__init__(self) - ConvergenceMetaDataset.__init__(self, 10, 10, 10, 10) + ConvergenceMetaDataset.__init__(self, 10, 200) class HeartConvergenceMetaDataset(HeartDataset, ConvergenceMetaDataset): def __init__(self): HeartDataset.__init__(self) - ConvergenceMetaDataset.__init__(self, 35, 3, 10, 10) + ConvergenceMetaDataset.__init__(self, 35, 150) class IonosphereConvergenceMetaDataset(IonosphereDataset, ConvergenceMetaDataset): def __init__(self): IonosphereDataset.__init__(self) - ConvergenceMetaDataset.__init__(self, 50, 5, 10, 10) + ConvergenceMetaDataset.__init__(self, 80, 250) class BreastCancerConvergenceMetaDataset(BreastCancerDataset, ConvergenceMetaDataset): def __init__(self): BreastCancerDataset.__init__(self) - ConvergenceMetaDataset.__init__(self, 50, 5, 10, 10) + ConvergenceMetaDataset.__init__(self, 50, 150) class PimaConvergenceMetaDataset(PimaDataset, ConvergenceMetaDataset): def __init__(self): PimaDataset.__init__(self) - ConvergenceMetaDataset.__init__(self, 60, 4, 5, 10) + ConvergenceMetaDataset.__init__(self, 60, 150)