Skip to content

Commit

Permalink
update convergence experiments to use svgp
Browse files Browse the repository at this point in the history
  • Loading branch information
GiovanniPasserello committed Jun 8, 2021
1 parent b1efb1c commit 697ae88
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 75 deletions.
121 changes: 58 additions & 63 deletions shgp/classification/experiments/convergence_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@
import tensorflow as tf

from shgp.data.metadata_reinit import ReinitMetaDataset
from shgp.data.metadata_convergence import IonosphereConvergenceMetaDataset
from shgp.inducing.initialisation_methods import h_reinitialise_PGPR, k_means
from shgp.models.pgpr import PGPR
from shgp.robustness.contrained_kernels import ConstrainedExpSEKernel
from shgp.data.metadata_convergence import PimaConvergenceMetaDataset
from shgp.inducing.initialisation_methods import h_reinitialise_PGPR, k_means, uniform_subsample
from shgp.utilities.train_pgpr import train_pgpr
from shgp.utilities.train_svgp import train_svgp

np.random.seed(42)
tf.random.set_seed(42)
Expand All @@ -21,10 +20,22 @@
"""


def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters):
################################################
# PGPR with Different Reinitialisation Methods #
################################################
def run_convergence_experiment(X, Y, M, opt_iters):
##############################################
# SVGP with Full Inducing Points ('Optimal') #
##############################################

print("Training non-sparse model...")
svgp, elbo_svgp = train_svgp(X, Y, M=len(X), train_iters=250, init_method=uniform_subsample)
optimal_kernel = svgp.kernel
optimal = np.full(opt_iters + 1, elbo_svgp)

print("Non-sparse result:")
print("optimal = {}".format(elbo_svgp))

############################################
# Z for Different Reinitialisation Methods #
############################################

# Get the initial inducing points for hgv and kmeans
print("Selecting inducing points...")
Expand All @@ -37,8 +48,12 @@ def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters):
kmeans_Z = k_means(X, M)
print("Inducing points selected.")

####################
# Convergence test #
####################

results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam = \
test_convergence(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, kmeans_Z)
test_convergence((X, Y), opt_iters, hgv_Z, kmeans_Z, optimal_kernel)

results = list(zip(results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam))

Expand All @@ -48,78 +63,59 @@ def run_convergence_experiment(X, Y, M, inner_iters, opt_iters, ci_iters):
print("results_kmeans_bfgs = {}".format(results_kmeans_bfgs))
print("results_kmeans_adam = {}".format(results_kmeans_adam))

##############################################
# PGPR with Full Inducing Points ('Optimal') #
##############################################

print("Training non-sparse model...")
_, elbo_pgpr = train_pgpr(X, Y, inner_iters, opt_iters, ci_iters)
optimal = np.full(len(results), elbo_pgpr)

print("Non-sparse result:")
print("optimal = {}".format(elbo_pgpr))

return results, optimal


# Test the performance of a model with a given number of inducing points, M.
def test_convergence(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, kmeans_Z):
def test_convergence(data, opt_iters, hgv_Z, kmeans_Z, optimal_kernel):
print("Training HGV BFGS...")
bfgs_opt = gpflow.optimizers.Scipy()
results_hgv_bfgs = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, bfgs_opt)
results_hgv_bfgs = train_convergence_svgp(data, opt_iters, optimal_kernel, hgv_Z, bfgs_opt)
print("HGV BFGS trained: ELBO = {}".format(results_hgv_bfgs[-1]))
print("Training HGV Adam...")
adam_opt = tf.optimizers.Adam(beta_1=0.5, beta_2=0.5)
results_hgv_adam = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, hgv_Z, adam_opt)
results_hgv_adam = train_convergence_svgp(data, opt_iters, optimal_kernel, hgv_Z, adam_opt)
print("HGV Adam trained: ELBO = {}".format(results_hgv_adam[-1]))

print("Training K-means BFGS...")
bfgs_opt = gpflow.optimizers.Scipy()
results_kmeans_bfgs = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, kmeans_Z, bfgs_opt)
results_kmeans_bfgs = train_convergence_svgp(data, opt_iters, optimal_kernel, kmeans_Z, bfgs_opt)
print("K-means BFGS trained: ELBO = {}".format(results_kmeans_bfgs[-1]))
print("Training K-means BFGS...")
adam_opt = tf.optimizers.Adam(beta_1=0.5, beta_2=0.5)
results_kmeans_adam = train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, kmeans_Z, adam_opt)
results_kmeans_adam = train_convergence_svgp(data, opt_iters, optimal_kernel, kmeans_Z, adam_opt)
print("K-means Adam trained: ELBO = {}".format(results_kmeans_adam[-1]))

return results_hgv_bfgs, results_hgv_adam, results_kmeans_bfgs, results_kmeans_adam


def train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt):
def train_convergence_svgp(data, opt_iters, kernel, Z, opt):
"""
Train a PGPR model until completion, recording the ELBO every 'elbo_period' iterations.
Train an SVGP model until completion, recording the ELBO every 'elbo_period' iterations.
If we error, keep retrying until success - this is due to a spurious Cholesky error.
"""
model = PGPR(
data=(X, Y),
kernel=ConstrainedExpSEKernel(),
inducing_variable=Z # gradient-optimised inducing points, initialised at Z
model = gpflow.models.SVGP(
kernel=kernel,
likelihood=gpflow.likelihoods.Bernoulli(),
inducing_variable=Z
)
gpflow.set_trainable(model.inducing_variable, True)
results = [model.elbo().numpy()]
gpflow.set_trainable(model.kernel, False) # fixed hyperparameters
gpflow.set_trainable(model.inducing_variable, True) # optimised Z
results = [model.elbo(data).numpy()]

# Try to run the full optimisation cycle.
try:
if isinstance(opt, gpflow.optimizers.Scipy):
for _ in range(inner_iters):
# Optimise kernel hyperparameters, recording the ELBO after each step.
for _ in range(opt_iters):
opt.minimize(model.training_loss, variables=model.trainable_variables, options=dict(maxiter=1))
results.append(model.elbo().numpy())
# Update local variational parameters.
model.optimise_ci(num_iters=ci_iters)
# Optimise kernel hyperparameters, recording the ELBO after each step.
for _ in range(opt_iters):
opt.minimize(model.training_loss_closure(data), variables=model.trainable_variables, options=dict(maxiter=1))
results.append(model.elbo(data).numpy())
else:
step = 0
decay_steps = inner_iters * opt_iters
for _ in range(inner_iters):
# Optimise kernel hyperparameters, recording the ELBO after each step.
for i in range(opt_iters):
opt.lr.assign(decayed_learning_rate(step, decay_steps))
opt.minimize(model.training_loss, var_list=model.trainable_variables)
results.append(model.elbo().numpy())
step += 1
# Update local variational parameters.
model.optimise_ci(num_iters=ci_iters)
# Optimise kernel hyperparameters, recording the ELBO after each step.
for step in range(opt_iters):
opt.lr.assign(decayed_learning_rate(step, opt_iters))
opt.minimize(model.training_loss_closure(data), var_list=model.trainable_variables)
results.append(model.elbo(data).numpy())

# If we fail due to a (spurious) Cholesky error, restart.
except tf.errors.InvalidArgumentError as error:
Expand All @@ -128,7 +124,7 @@ def train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt):
raise error
else:
print("Cholesky error caught, retrying...")
return train_convergence_pgpr(X, Y, inner_iters, opt_iters, ci_iters, Z, opt)
return train_convergence_svgp(data, opt_iters, kernel, Z, opt)

return results

Expand All @@ -140,7 +136,7 @@ def decayed_learning_rate(step, decay_steps, initial_learning_rate=0.5, alpha=0.
return initial_learning_rate * decayed


def plot_results(name, max_iters, results, optimal):
def plot_results(name, opt_iters, results, optimal):
plt.rcParams["figure.figsize"] = (12, 6)
plt.tick_params(labelright=True)

Expand All @@ -149,10 +145,12 @@ def plot_results(name, max_iters, results, optimal):
plt.ylabel('ELBO')
plt.xlabel('Iterations')
# Axis limits
plt.xlim(0, max_iters)
plt.xlim(0, opt_iters)
plt.xticks([0, 50, 100, 150])
plt.yscale('symlog')

# Setup each subplot
iter_array = np.arange(max_iters + 1)
iter_array = np.arange(opt_iters + 1)
plt.plot(iter_array, optimal, color='k', linestyle='dashed', label='Optimal', zorder=101)
plt.plot(iter_array, results[:, 0], label='HGV GO - BFGS', zorder=100)
plt.plot(iter_array, results[:, 1], label='HGV GO - Adam', zorder=98)
Expand All @@ -165,14 +163,11 @@ def plot_results(name, max_iters, results, optimal):

if __name__ == '__main__':
# Load data
dataset = IonosphereConvergenceMetaDataset() # to test another dataset, just change this definition
dataset = PimaConvergenceMetaDataset() # to test another dataset, just change this definition
X, Y = dataset.load_data()

results, optimal = run_convergence_experiment(
X, Y,
dataset.M, dataset.inner_iters, dataset.opt_iters, dataset.ci_iters
)

max_iters = dataset.inner_iters * dataset.opt_iters
# Get convergence results
results, optimal = run_convergence_experiment(X, Y, dataset.M, dataset.opt_iters)

plot_results(dataset.name, max_iters, np.array(results), optimal)
# Plot results
plot_results(dataset.name, dataset.opt_iters, np.array(results), optimal)
20 changes: 8 additions & 12 deletions shgp/data/metadata_convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,47 +10,43 @@ class ConvergenceMetaDataset:
Please ensure that elbo_period is an integer divisor of (inner_iters * opt_iters).
:param M: The number of inducing points.
:param inner_iters: The number of iterations of the inner optimisation loop.
:param opt_iters: The number of iterations of gradient-based optimisation of the kernel hyperparameters.
:param ci_iters: The number of iterations of update for the local variational parameters.
:param opt_iters: The number of iterations of gradient-based optimisation.
"""
M: int
inner_iters: int = 10
opt_iters: int = 20
ci_iters: int = 10
opt_iters: int = 100


class BananaConvergenceMetaDataset(BananaDataset, ConvergenceMetaDataset):
def __init__(self):
BananaDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 40, 5, 5, 10)
ConvergenceMetaDataset.__init__(self, 40, 150)


class CrabsConvergenceMetaDataset(CrabsDataset, ConvergenceMetaDataset):
def __init__(self):
CrabsDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 10, 10, 10, 10)
ConvergenceMetaDataset.__init__(self, 10, 200)


class HeartConvergenceMetaDataset(HeartDataset, ConvergenceMetaDataset):
def __init__(self):
HeartDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 35, 3, 10, 10)
ConvergenceMetaDataset.__init__(self, 35, 150)


class IonosphereConvergenceMetaDataset(IonosphereDataset, ConvergenceMetaDataset):
def __init__(self):
IonosphereDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 50, 5, 10, 10)
ConvergenceMetaDataset.__init__(self, 80, 250)


class BreastCancerConvergenceMetaDataset(BreastCancerDataset, ConvergenceMetaDataset):
def __init__(self):
BreastCancerDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 50, 5, 10, 10)
ConvergenceMetaDataset.__init__(self, 50, 150)


class PimaConvergenceMetaDataset(PimaDataset, ConvergenceMetaDataset):
def __init__(self):
PimaDataset.__init__(self)
ConvergenceMetaDataset.__init__(self, 60, 4, 5, 10)
ConvergenceMetaDataset.__init__(self, 60, 150)

0 comments on commit 697ae88

Please sign in to comment.