Skip to content

Commit

Permalink
fix k_means large N subsampling
Browse files Browse the repository at this point in the history
  • Loading branch information
GiovanniPasserello committed May 21, 2021
1 parent 19ca76b commit 9528a65
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 19 deletions.
4 changes: 3 additions & 1 deletion shgp/classification/experiments/metrics_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
likelihood and fixed inducing points initialised with kmeans.
These are the 'best' case scenarios of best method used for a thorough evaluation of PGPR vs SVGP.
In particular we evaluate the performance of very sparse models to see whether HGV is beneficial.
Please note that many of the experiments were run on a GPU so are not entirely reproducible on CPU.
"""


Expand Down Expand Up @@ -92,4 +94,4 @@ def run_iteration(metadata, X, Y, X_test, Y_test):


if __name__ == '__main__':
run_metrics_experiment(TwonormMetricsMetaDataset())
run_metrics_experiment(MagicMetricsMetaDataset())
27 changes: 10 additions & 17 deletions shgp/data/metadata_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ class MetricsMetaDataset:
ci_iters: int


# TODO: Fertility experiment?


"""
SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default)
ELBO - max: -89.519647, min: -103.250001, median: -98.954888, mean: -98.629271, std: 3.787107.
Expand Down Expand Up @@ -198,34 +195,30 @@ def __init__(self):


"""
SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=300)
SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=500)
PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=300)
PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=500)
"""


# TODO: Doesn't fit in memory -> crashes
# TODO: Try fix memory errors (maybe I can run PGPR, but not svgp?)
# This is why it won't fit in memory???
# This experiment was run on a GPU so is not reproducable on CPU.
# TODO: Run experiment
# TODO: M=300?
class MagicMetricsMetaDataset(MagicDataset, MetricsMetaDataset):
def __init__(self):
MagicDataset.__init__(self)
MetricsMetaDataset.__init__(self, 10, 200, 500, 20, 500, 20)
MetricsMetaDataset.__init__(self, 10, 500, 500, 10, 250, 10)


"""
SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=300)
SVGP Distribution: (kmeans++, no grad-optim, with unconstrained/default, M=500)
PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=300)
PGPR Distribution: (hetero greedy var, no grad-optim, with unconstrained/default, M=500)
"""


# TODO: Doesn't fit in memory -> crashes
# TODO: Try fix memory errors (maybe I can run PGPR, but not svgp?)
# This is why it won't fit in memory???
# This experiment was run on a GPU so is not reproducable on CPU.
# TODO: Run experiment
# TODO: M=300?
class ElectricityMetricsMetaDataset(ElectricityDataset, MetricsMetaDataset):
def __init__(self):
ElectricityDataset.__init__(self)
MetricsMetaDataset.__init__(self, 10, 300, 500, 10, 250, 10)
MetricsMetaDataset.__init__(self, 10, 500, 500, 10, 250, 10)
2 changes: 1 addition & 1 deletion shgp/inducing/initialisation_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def k_means(training_inputs: np.ndarray, M: int):

# If N is large, take a uniform subset
if N > 20000:
training_inputs, _ = uniform_subsample(training_inputs, 20000)
training_inputs = uniform_subsample(training_inputs, 20000)

# Scipy k-means++
centroids, _ = scipy.cluster.vq.kmeans(training_inputs, M)
Expand Down

0 comments on commit 9528a65

Please sign in to comment.