Skip to content

Commit

Permalink
add convergence experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
GiovanniPasserello committed Jun 8, 2021
1 parent c705884 commit e5c99ac
Show file tree
Hide file tree
Showing 2 changed files with 288 additions and 0 deletions.
178 changes: 178 additions & 0 deletions shgp/classification/experiments/convergence_experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import gpflow
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

from shgp.data.metadata_reinit import ReinitMetaDataset
from shgp.data.metadata_convergence import IonosphereConvergenceMetaDataset
from shgp.inducing.initialisation_methods import h_reinitialise_PGPR, k_means
from shgp.models.pgpr import PGPR
from shgp.robustness.contrained_kernels import ConstrainedExpSEKernel
from shgp.utilities.train_pgpr import train_pgpr

np.random.seed(42)
tf.random.set_seed(42)

"""
A comparison of PGPR with two different inducing point initialisation procedures. Here we investigate
the convergence of the ELBO using Adam vs BFGS on both k_means and hgv. This allows us to analyse
whether the inducing points of hgv are initialised closer than k_means to the gradient-optimised result.
To evaluate other inducing point selection methods, change the model definitions below.
"""


def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters):
################################################
# PGPR with Different Reinitialisation Methods #
################################################

# Get the initial inducing points for hgv and kmeans
print("Selecting inducing points...")
pgpr_hgv, _ = train_pgpr(
X, Y,
10, 500, 10,
M=M, init_method=h_reinitialise_PGPR, reinit_metadata=ReinitMetaDataset()
)
hgv_Z = pgpr_hgv.inducing_variable.Z.variables[0].numpy()
kmeans_Z = k_means(X, M)
print("Inducing points selected.")

results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam = \
test_convergence(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, kmeans_Z)

results = list(zip(results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam))

print("Sparse results:")
print("results_hgv_bfgs = {}".format(results_hgv_bfgs))
print("results_hgv_adam = {}".format(results_hgv_adam))
print("results_kmeans_bfgs = {}".format(results_kmeans_bfgs))
print("results_kmeans_adam = {}".format(results_kmeans_adam))

##############################################
# PGPR with Full Inducing Points ('Optimal') #
##############################################

print("Training non-sparse model...")
_, elbo_pgpr = train_pgpr(X, Y, inner_iters, opt_iters, ci_iters)
optimal = np.full(len(results), elbo_pgpr)

print("Non-sparse result:")
print("optimal = {}".format(elbo_pgpr))

return results, optimal


# Test the performance of a model with a given number of inducing points, M.
def test_convergence(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, kmeans_Z):
print("Training HGV BFGS...")
bfgs_opt = gpflow.optimizers.Scipy()
results_hgv_bfgs = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, bfgs_opt)
print("HGV BFGS trained: ELBO = {}".format(results_hgv_bfgs[-1]))
print("Training HGV Adam...")
adam_opt = tf.optimizers.Adam()
results_hgv_adam = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, adam_opt)
print("HGV Adam trained: ELBO = {}".format(results_hgv_adam[-1]))

print("Training K-means BFGS...")
bfgs_opt = gpflow.optimizers.Scipy()
results_kmeans_bfgs = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, kmeans_Z, bfgs_opt)
print("K-means BFGS trained: ELBO = {}".format(results_kmeans_bfgs[-1]))
print("Training K-means BFGS...")
adam_opt = tf.optimizers.Adam()
results_kmeans_adam = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, kmeans_Z, adam_opt)
print("K-means Adam trained: ELBO = {}".format(results_kmeans_adam[-1]))

return results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam


def train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt):
"""
Train a PGPR model until completion, recording the ELBO every 'elbo_period' iterations.
If we error, keep retrying until success - this is due to a spurious Cholesky error.
"""
model = PGPR(
data=(X, Y),
kernel=ConstrainedExpSEKernel(),
inducing_variable=Z # gradient-optimised inducing points, initialised at Z
)
gpflow.set_trainable(model.inducing_variable, True)
results = [model.elbo().numpy()]

# Try to run the full optimisation cycle.
try:
if isinstance(opt, gpflow.optimizers.Scipy):
for _ in range(inner_iters):
# Optimise kernel hyperparameters, recording the ELBO after each step.
for _ in range(opt_iters):
opt.minimize(model.training_loss, variables=model.trainable_variables, options=dict(maxiter=1))
results.append(model.elbo().numpy())
# Update local variational parameters.
model.optimise_ci(num_iters=ci_iters)
else:
step = 0
decay_steps = inner_iters * opt_iters
for _ in range(inner_iters):
# Optimise kernel hyperparameters, recording the ELBO after each step.
for i in range(opt_iters):
opt.lr.assign(decayed_learning_rate(step, decay_steps))
opt.minimize(model.training_loss, var_list=model.trainable_variables)
results.append(model.elbo().numpy())
step += 1
# Update local variational parameters.
model.optimise_ci(num_iters=ci_iters)

# If we fail due to a (spurious) Cholesky error, restart.
except tf.errors.InvalidArgumentError as error:
msg = error.message
if "Cholesky" not in msg and "invertible" not in msg:
raise error
else:
print("Cholesky error caught, retrying...")
return train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt)

return results


def decayed_learning_rate(step, decay_steps, initial_learning_rate=0.5, alpha=0.05):
step = min(step, decay_steps)
cosine_decay = 0.5 * (1 + np.cos(np.pi * step / decay_steps))
decayed = (1 - alpha) * cosine_decay + alpha
return initial_learning_rate * decayed


def plot_results(name, max_iters, results, optimal):
plt.rcParams["figure.figsize"] = (12, 6)
plt.tick_params(labelright=True)

# Axis labels
plt.title('Convergence of Inducing Point Methods - {} Dataset'.format(name))
plt.ylabel('ELBO')
plt.xlabel('Iterations')
# Axis limits
plt.xlim(0, max_iters)

# Setup each subplot
iter_array = np.arange(max_iters + 1)
plt.plot(iter_array, optimal, color='k', linestyle='dashed', label='Optimal', zorder=101)
plt.plot(iter_array, results[:, 0], label='HGV GO - BFGS', zorder=100)
plt.plot(iter_array, results[:, 1], label='HGV GO - Adam', zorder=98)
plt.plot(iter_array, results[:, 2], label='K-means GO - BFGS', zorder=99)
plt.plot(iter_array, results[:, 3], label='K-means GO - Adam', zorder=97)

plt.legend(loc='lower right')
plt.show()


if __name__ == '__main__':
# Load data
dataset = IonosphereConvergenceMetaDataset() # to test another dataset, just change this definition
X, Y = dataset.load_data()

results, optimal = run_convergence_experiment(
X, Y,
dataset.M, dataset.inner_iters, dataset.opt_iters, dataset.ci_iters
)

max_iters = dataset.inner_iters * dataset.opt_iters

plot_results(dataset.name, max_iters, np.array(results), optimal)
110 changes: 110 additions & 0 deletions shgp/data/metadata_convergence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
from dataclasses import dataclass

from shgp.data.dataset import BananaDataset, BreastCancerDataset, CrabsDataset, HeartDataset, IonosphereDataset, PimaDataset


@dataclass
class ConvergenceMetaDataset:
"""
A dataset utilities class specifically for convergence experiments.
Please ensure that elbo_period is an integer divisor of (inner_iters * opt_iters).
:param M: The number of inducing points.
:param inner_iters: The number of iterations of the inner optimisation loop.
:param opt_iters: The number of iterations of gradient-based optimisation of the kernel hyperparameters.
:param ci_iters: The number of iterations of update for the local variational parameters.
"""
M: int
inner_iters: int = 10
opt_iters: int = 20
ci_iters: int = 10


""" Banana with Exp kernel:
results_hgv_bfgs = -120.12304219158608
results_hgv_adam = -120.74396144822651
results_kmeans_bfgs = -120.92400722500008
results_kmeans_adam = -121.4123171648204
optimal = -120.06609320829034
"""


class BananaConvergenceMetaDataset(BananaDataset, ConvergenceMetaDataset):
def __init__(self):
BananaDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 40, 5, 5, 10)


""" Crabs with Exp kernel:
results_hgv_bfgs = -30.83096017301088
results_hgv_adam = -30.91271745579118
results_kmeans_bfgs = -30.893175906038067
results_kmeans_adam = -30.592522689905365
optimal = -30.176934247928045
"""


class CrabsConvergenceMetaDataset(CrabsDataset, ConvergenceMetaDataset):
def __init__(self):
CrabsDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 10, 10, 10, 10)


""" Heart with Exp kernel:
results_hgv_bfgs = -116.87896126803733
results_hgv_adam = -120.50981894487643
results_kmeans_bfgs = -117.49246260411502
results_kmeans_adam = -120.1986022919203
optimal = -116.40517503711263
"""


class HeartConvergenceMetaDataset(HeartDataset, ConvergenceMetaDataset):
def __init__(self):
HeartDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 35, 3, 10, 10)


""" Ionosphere with Exp kernel:
results_hgv_bfgs = -136.5843232435876
results_hgv_adam = -132.4442299062226
results_kmeans_bfgs = -134.31545532969926
results_kmeans_adam = -133.37755384748468
optimal = -127.33302466735934
"""


class IonosphereConvergenceMetaDataset(IonosphereDataset, ConvergenceMetaDataset):
def __init__(self):
IonosphereDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 50, 5, 10, 10)


""" Breast Cancer with Exp kernel:
results_hgv_bfgs = -80.05559903201078
results_hgv_adam = -88.1640662775219
results_kmeans_bfgs = -87.11531148182786
results_kmeans_adam = -85.66347679514195
optimal = -80.14058112260301
"""


class BreastCancerConvergenceMetaDataset(BreastCancerDataset, ConvergenceMetaDataset):
def __init__(self):
BreastCancerDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 50, 5, 10, 10)


""" Pima Diabetes with Exp kernel:
results_hgv_bfgs = -379.6071485735707
results_hgv_adam = -386.11236963470503
results_kmeans_bfgs = -378.93046287023844
results_kmeans_adam = -383.08885975956173
optimal = -377.604760027815
"""


class PimaConvergenceMetaDataset(PimaDataset, ConvergenceMetaDataset):
def __init__(self):
PimaDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 60, 5, 5, 10)

0 comments on commit e5c99ac

Please sign in to comment.