diff --git a/.gitignore b/.gitignore index 3a56c7f..4d3658a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,36 @@ -synth_env -__pycache__ -*.png -*.pt -*.pkl +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Auxiliary .DS_Store -.ipynb_checkpoints -/data diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index d10971a..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,35 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -Instructions on how to update this Changelog are available in the `Updating the Changelog` section of the [`CONTRIBUTING.md`](./CONTRIBUTING.md). This project follows [semantic versioning](https://semver.org/spec/v2.0.0.html). - -## [Unreleased v2.0.0] - -### Breaking Changes - -- Added Gaussian Mixture Modelling as a pre-processing feature for non-gaussian continuous variables - -### New Features - -- Introduced hyperparameter tuning using Optuna -- Expanded SynthVAE use cases to include MIMIC-III dataset -- Introduced plotting functionality & training logging for SynthVAE training -- Expanded `argparse` selections to allow more user flexibility -- Added MIMIC-III pre-processing notebook -- Introduced `random_state` changes as well as other seed changes to allow for reproducibility of results - -## [Unreleased v1.0.0] - -### New Features - -- Added project from Dom's [(djdnx)](https://github.com/djdnx) working repository -- Added missing project files -- Added `argparse` approach to running experiments - -### Fixed - -- Fixed black and flake8 adherence - - -[Unreleased]: https://github.com/nhsx/SynthVAE/tree/main diff --git a/Hyperparameter_Tuning/Hyperparameter_Tuning_MIMIC.py b/Hyperparameter_Tuning/Hyperparameter_Tuning_MIMIC.py deleted file mode 100644 index 034e7dd..0000000 --- a/Hyperparameter_Tuning/Hyperparameter_Tuning_MIMIC.py +++ /dev/null @@ -1,291 +0,0 @@ -#%% -------- Import Libraries -------- # - -# Standard imports -from selectors import EpollSelector -from tokenize import String -import numpy as np -import pandas as pd -import torch - -# VAE is in other folder -import sys - -sys.path.append("../") - -# Opacus support for differential privacy -from opacus.utils.uniform_sampler import UniformWithReplacementSampler - -# For VAE dataset formatting -from torch.utils.data import TensorDataset, DataLoader - -# VAE functions -from VAE import Decoder, Encoder, VAE - -# For datetime columns we need a transformer -from rdt.transformers import datetime - -# Utility file contains all functions required to run notebook -from utils import ( - set_seed, - mimic_pre_proc, - constraint_filtering, - plot_elbo, - plot_likelihood_breakdown, - plot_variable_distributions, - reverse_transformers, -) -from metrics import distribution_metrics - -import optuna -import pickle - -import warnings - -warnings.filterwarnings("ignore") # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e. -# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses -# GMM from sklearn and this throws non convergence warnings) - -set_seed(0) - -filepath = ".../Private MIMIC Data/table_one_synthvae.csv" - -# Load in the MIMIC dataset -data_supp = pd.read_csv(filepath) - -# Save the original columns - -original_categorical_columns = [ - "ETHNICITY", - "DISCHARGE_LOCATION", - "GENDER", - "FIRST_CAREUNIT", - "VALUEUOM", - "LABEL", -] -original_continuous_columns = ["SUBJECT_ID", "VALUE", "age"] -original_datetime_columns = ["ADMITTIME", "DISCHTIME", "DOB", "CHARTTIME"] - -# Drop DOD column as it contains NANS - for now - -# data_supp = data_supp.drop('DOD', axis = 1) - -original_columns = ( - original_categorical_columns - + original_continuous_columns - + original_datetime_columns -) -#%% -------- Data Pre-Processing -------- # - -pre_proc_method = "GMM" - -( - x_train, - original_metric_set, - reordered_dataframe_columns, - continuous_transformers, - categorical_transformers, - datetime_transformers, - num_categories, - num_continuous, -) = mimic_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method) - -#%% -------- Create & Train VAE -------- # - -# User defined parameters - -# General training -batch_size = 32 -n_epochs = 5 -logging_freq = 1 # Number of epochs we should log the results to the user -patience = 5 # How many epochs should we allow the model train to see if -# improvement is made -delta = 10 # The difference between elbo values that registers an improvement -filepath = None # Where to save the best model - - -# Privacy params -differential_privacy = False # Do we want to implement differential privacy -sample_rate = 0.1 # Sampling rate -noise_scale = None # Noise multiplier - influences how much noise to add -target_eps = 1 # Target epsilon for privacy accountant -target_delta = 1e-5 # Target delta for privacy accountant - -# Define the metrics you want the model to evaluate - -# Define distributional metrics required - for sdv_baselines this is set by default -distributional_metrics = [ - "SVCDetection", - "GMLogLikelihood", - "CSTest", - "KSTest", - "KSTestExtended", - "ContinuousKLDivergence", - "DiscreteKLDivergence", -] - -gower = False - -# Prepare data for interaction with torch VAE -Y = torch.Tensor(x_train) -dataset = TensorDataset(Y) - -generator = None -sample_rate = batch_size / len(dataset) -data_loader = DataLoader( - dataset, - batch_sampler=UniformWithReplacementSampler( - num_samples=len(dataset), sample_rate=sample_rate, generator=generator - ), - pin_memory=True, - generator=generator, -) - - -# -------- Define our Optuna trial -------- # - - -def objective( - trial, - gower, - distributional_metrics, - differential_privacy=False, - target_delta=1e-3, - target_eps=10.0, - n_epochs=50, -): - - latent_dim = trial.suggest_int("Latent Dimension", 2, 128, step=2) # Hyperparam - hidden_dim = trial.suggest_int("Hidden Dimension", 32, 1024, step=32) # Hyperparam - - encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim) - decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories) - - lr = trial.suggest_float("Learning Rate", 1e-3, 1e-2, step=1e-5) - vae = VAE(encoder, decoder, lr=1e-3) # lr hyperparam - - C = trial.suggest_int("C", 10, 1e4, step=50) - - if differential_privacy == True: - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.diff_priv_train( - data_loader, - n_epochs=n_epochs, - C=C, # Hyperparam - target_eps=target_eps, - target_delta=target_delta, - sample_rate=sample_rate, - ) - print(f"(epsilon, delta): {vae.get_privacy_spent(target_delta)}") - - else: - - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.train(data_loader, n_epochs=n_epochs) - - # -------- Generate Synthetic Data -------- # - - synthetic_supp = constraint_filtering( - n_rows=data_supp.shape[0], - vae=vae, - reordered_cols=reordered_dataframe_columns, - data_supp_columns=data_supp.columns, - cont_transformers=continuous_transformers, - cat_transformers=categorical_transformers, - date_transformers=datetime_transformers, - pre_proc_method=pre_proc_method, - ) - - # -------- Datetime Handling -------- # - - # If the dataset has datetimes then we need to re-convert these to a numerical - # Value representing seconds, this is so we can evaluate the metrics on them - - metric_synthetic_supp = synthetic_supp.copy() - - for index, column in enumerate(original_datetime_columns): - - # Fit datetime transformer - converts to seconds - temp_datetime = datetime.DatetimeTransformer() - temp_datetime.fit(metric_synthetic_supp, columns=column) - - metric_synthetic_supp = temp_datetime.transform(metric_synthetic_supp) - - # -------- SDV Metrics -------- # - # Calculate the sdv metrics for SynthVAE - - metrics = distribution_metrics( - gower_bool=gower, - distributional_metrics=distributional_metrics, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - saving_filepath=None, - pre_proc_method=pre_proc_method, - ) - - # Optuna wants a list of values in float form - - list_metrics = [metrics[i] for i in metrics.columns] - - print(list_metrics) - - return list_metrics - - -#%% -------- Run Hyperparam Optimisation -------- # - -# If there is no study object in your folder then run and save the study so -# It can be resumed if needed - -first_run = True # First run indicates if we are creating a new hyperparam study - -if first_run == True: - - if gower == True: - directions = ["maximize" for i in range(distributional_metrics.shape[0] + 1)] - else: - directions = ["maximize" for i in range(distributional_metrics.shape[0])] - - study = optuna.create_study(directions=directions) - -else: - - with open("no_dp_MIMIC.pkl", "rb") as f: - study = pickle.load(f) - -study.optimize( - lambda trial: objective( - trial, - gower=gower, - distributional_metrics=distributional_metrics, - differential_privacy=differential_privacy, - target_delta=target_delta, - target_eps=target_eps, - n_epochs=n_epochs, - ), - n_trials=3, - gc_after_trial=True, -) # GC to avoid OOM -#%% - -study.best_trials -#%% -------- Save The Study -------- # - -# For a multi objective study we need to find the best trials and basically -# average between the 3 metrics to get the best trial - -with open("no_dp_MIMIC.pkl", "wb") as f: - pickle.dump(study, f) diff --git a/Hyperparameter_Tuning/Hyperparameter_Tuning_SUPPORT.py b/Hyperparameter_Tuning/Hyperparameter_Tuning_SUPPORT.py deleted file mode 100644 index 2d36ee5..0000000 --- a/Hyperparameter_Tuning/Hyperparameter_Tuning_SUPPORT.py +++ /dev/null @@ -1,268 +0,0 @@ -#%% -------- Import Libraries -------- # - -# Standard imports -from webbrowser import GenericBrowser -import numpy as np -import pandas as pd -import torch - -# VAE is in other folder as well as opacus adapted library -import sys - -sys.path.append("../") - -# Opacus support for differential privacy -from opacus.utils.uniform_sampler import UniformWithReplacementSampler - -# For the SUPPORT dataset -from pycox.datasets import support - -# For VAE dataset formatting -from torch.utils.data import TensorDataset, DataLoader - -# VAE functions -from VAE import Decoder, Encoder, VAE - -# Utility file contains all functions required to run notebook -from utils import ( - set_seed, - support_pre_proc, - plot_elbo, - plot_likelihood_breakdown, - plot_variable_distributions, - reverse_transformers, -) -from metrics import distribution_metrics - -import optuna -import pickle - -import warnings - -warnings.filterwarnings("ignore") # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e. -# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses -# GMM from sklearn and this throws non convergence warnings) - -set_seed(0) - -# Load in the support data -data_supp = support.read_df() - -# Save the original columns - -original_continuous_columns = ["duration"] + [f"x{i}" for i in range(7, 15)] -original_categorical_columns = ["event"] + [f"x{i}" for i in range(1, 7)] - -original_columns = original_categorical_columns + original_continuous_columns -#%% -------- Data Pre-Processing -------- # - -pre_proc_method = "GMM" - -( - x_train, - data_supp, - reordered_dataframe_columns, - continuous_transformers, - categorical_transformers, - num_categories, - num_continuous, -) = support_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method) - -#%% -------- Create & Train VAE -------- # - -# User defined parameters - -# General training -batch_size = 32 -n_epochs = 5 -logging_freq = 1 # Number of epochs we should log the results to the user -patience = 5 # How many epochs should we allow the model train to see if -# improvement is made -delta = 10 # The difference between elbo values that registers an improvement -filepath = None # Where to save the best model - - -# Privacy params -differential_privacy = False # Do we want to implement differential privacy -sample_rate = 0.1 # Sampling rate -noise_scale = None # Noise multiplier - influences how much noise to add -target_eps = 1 # Target epsilon for privacy accountant -target_delta = 1e-5 # Target delta for privacy accountant - -# Define the metrics you want the model to evaluate - -# Define distributional metrics required - for sdv_baselines this is set by default -distributional_metrics = [ - "SVCDetection", - "GMLogLikelihood", - "CSTest", - "KSTest", - "KSTestExtended", - "ContinuousKLDivergence", - "DiscreteKLDivergence", -] - -gower = False - -# Prepare data for interaction with torch VAE -Y = torch.Tensor(x_train) -dataset = TensorDataset(Y) - -generator = None -sample_rate = batch_size / len(dataset) -data_loader = DataLoader( - dataset, - batch_sampler=UniformWithReplacementSampler( - num_samples=len(dataset), sample_rate=sample_rate, generator=generator - ), - pin_memory=True, - generator=generator, -) - -# -------- Define our Optuna trial -------- # - - -def objective( - trial, - gower, - distributional_metrics, - differential_privacy=False, - target_delta=1e-3, - target_eps=10.0, - n_epochs=50, -): - - latent_dim = trial.suggest_int("Latent Dimension", 2, 128, step=2) # Hyperparam - hidden_dim = trial.suggest_int("Hidden Dimension", 32, 1024, step=32) # Hyperparam - - encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim) - decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories) - - lr = trial.suggest_float("Learning Rate", 1e-3, 1e-2, step=1e-5) - vae = VAE(encoder, decoder, lr=1e-3) # lr hyperparam - - C = trial.suggest_int("C", 10, 1e4, step=50) # Clipping hyperparam - - if differential_privacy == True: - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.diff_priv_train( - data_loader, - n_epochs=n_epochs, - C=C, # Hyperparam - target_eps=target_eps, - target_delta=target_delta, - sample_rate=sample_rate, - ) - print(f"(epsilon, delta): {vae.get_privacy_spent(target_delta)}") - - else: - - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.train(data_loader, n_epochs=n_epochs) - - # -------- Synthetic Data Generation -------- # - - synthetic_sample = vae.generate(data_supp.shape[0]) - - if torch.cuda.is_available(): - synthetic_sample = pd.DataFrame( - synthetic_sample.cpu().detach(), columns=reordered_dataframe_columns - ) - else: - synthetic_sample = pd.DataFrame( - synthetic_sample.detach(), columns=reordered_dataframe_columns - ) - - # Reverse the transformations - - synthetic_supp = reverse_transformers( - synthetic_set=synthetic_sample, - data_supp_columns=data_supp.columns, - cont_transformers=continuous_transformers, - cat_transformers=categorical_transformers, - pre_proc_method=pre_proc_method, - ) - # -------- SDV Metrics -------- # - - metrics = distribution_metrics( - gower_bool=gower, - distributional_metrics=distributional_metrics, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - saving_filepath=None, - pre_proc_method=pre_proc_method, - ) - - # Optuna wants a list of values in float form - - list_metrics = [metrics[i] for i in metrics.columns] - - return list_metrics - - -#%% -------- Run Hyperparam Optimisation -------- # - -# If there is no study object in your folder then run and save the study so -# It can be resumed if needed - -first_run = True # First run indicates if we are creating a new hyperparam study - -if first_run == True: - - if gower == True: - directions = ["maximize" for i in range(distributional_metrics.shape[0] + 1)] - else: - directions = ["maximize" for i in range(distributional_metrics.shape[0])] - - study = optuna.create_study(directions=directions) - -else: - - with open("dp_SUPPORT.pkl", "rb") as f: - study = pickle.load(f) - -study.optimize( - lambda trial: objective( - trial, - gower=gower, - distributional_metrics=distributional_metrics, - differential_privacy=differential_privacy, - target_delta=target_delta, - target_eps=target_eps, - n_epochs=n_epochs, - ), - n_trials=3, - gc_after_trial=True, -) # GC to avoid OOM -#%% - -study.best_trials - -#%% -------- Save The Study -------- # - -# For a multi objective study we need to find the best trials and basically -# average between the 3 metrics to get the best trial - -with open("dp_SUPPORT.pkl", "wb") as f: - pickle.dump(study, f) - -trial_averages = [] - -for trials in study.best_trials: - - metrics = trials.values - trial_averages.append(np.mean(metrics)) diff --git a/Hyperparameter_Tuning/MIMIC.ipynb b/Hyperparameter_Tuning/MIMIC.ipynb deleted file mode 100644 index cafef7d..0000000 --- a/Hyperparameter_Tuning/MIMIC.ipynb +++ /dev/null @@ -1,491 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# MIMIC Hyperparameter Tuning\n", - "\n", - "This notebook runs through hyperparameter tuning for the internal MIMIC dataset. For this we use the Optuna library.\n", - "\n", - "The notebook that produces our single table is found here . If you want to create a single table yourself then follow the example csv file given at \n", - "\n", - "We validate our hyperparameter tuning results on our training dataset metrics - This is isn't optimal as usually it would be validated on a separate validation set. Hard to create an appropriate validation set in this instance as we would require the distributions for each variable column to look similar between training & validation.\n", - "\n", - "NOTE: There are known limitations that are explained as they come up in these markdown cells." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# Standard imports\n", - "from tokenize import String\n", - "import numpy as np\n", - "import pandas as pd\n", - "import torch\n", - "\n", - "# VAE is in other folder\n", - "import sys\n", - "\n", - "sys.path.append(\"../\")\n", - "\n", - "# Opacus support for differential privacy\n", - "from opacus.utils.uniform_sampler import UniformWithReplacementSampler\n", - "\n", - "# For VAE dataset formatting\n", - "from torch.utils.data import TensorDataset, DataLoader\n", - "\n", - "# VAE functions\n", - "from VAE import Decoder, Encoder, VAE\n", - "\n", - "# For datetime columns we need a transformer\n", - "from rdt.transformers import datetime\n", - "\n", - "# Utility file contains all functions required to run notebook\n", - "from utils import (\n", - " set_seed,\n", - " mimic_pre_proc,\n", - " constraint_filtering,\n", - " plot_elbo,\n", - " plot_likelihood_breakdown,\n", - " plot_variable_distributions,\n", - " reverse_transformers,\n", - ")\n", - "from metrics import distribution_metrics\n", - "\n", - "# Hyperparameter tuning library as well as pickle to save study objects\n", - "import optuna\n", - "\n", - "import pickle\n", - "\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\") # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e.\n", - "# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses\n", - "# GMM from sklearn and this throws non convergence warnings)\n", - "\n", - "set_seed(0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Loading & Column Definitions\n", - "\n", - "First we need to load in the MIMIC dataset from a specified filepath. \n", - "\n", - "We then need to create lists indicating which columns are:\n", - "a) continuous\n", - "b) categorical\n", - "c) datetime\n", - "\n", - "Currently other data types are not supported. Importantly if columns contain missing data then they need to be dropped - Do not include these in original column lists & instead drop them from the loaded set in the cell below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load in the mimic single table data\n", - "\n", - "filepath = \"\"\n", - "\n", - "data_supp = pd.read_csv(filepath)\n", - "\n", - "original_categorical_columns = [\n", - " \"ETHNICITY\",\n", - " \"DISCHARGE_LOCATION\",\n", - " \"GENDER\",\n", - " \"FIRST_CAREUNIT\",\n", - " \"VALUEUOM\",\n", - " \"LABEL\",\n", - "]\n", - "original_continuous_columns = [\"SUBJECT_ID\", \"VALUE\", \"age\"]\n", - "original_datetime_columns = [\"ADMITTIME\", \"DISCHTIME\", \"DOB\", \"CHARTTIME\"]\n", - "\n", - "# Drop DOD column as it contains NANS - for now\n", - "\n", - "# data_supp = data_supp.drop('DOD', axis = 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Drop columns that have missing data as these cannot be handled in the current implementation\n", - "\n", - "#data_supp = data_supp.drop(\"Missing_Column_1\", axis=1) # etc ..." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Pre-Processing\n", - "\n", - "Data can be pre-processed in 2 ways. Either we use \"standard\" option which performs a standard scaler on continuous variables - This has known limitations as:\n", - "\n", - "- Data in tables is usually non-gaussian and SynthVAE implements a gaussian loss, so this will perform worse unless the data is KNOWN to follow a gaussian distribution already.\n", - "\n", - "Or we use the second option of \"GMM\". This performs a variational gaussian mixture model to scale the data & transform it to a gaussian distribution. We use a maximum number of clusters of 10 but the variational method will select the best number of clusters for that continuous variable. This also has known limitations:\n", - "\n", - "- 10 Clusters is arbitrary and may not be enough for certain variables.\n", - "- We are fitting a model to transform the data and hence we are approximating before model is trained. This will lose fidelity as the distribution will not be transformed perfectly.\n", - "\n", - "\n", - "For datasets that include datetime columns, original_metric_set returns the initial dataset after these columns have been transformed. This is because:\n", - "\n", - "- Our evaluation suite cannot calculate certain metrics on datetime objects so these need to be converted to continuous values first" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pre_proc_method = \"GMM\" # Select pre-processing method standard or GMM\n", - "\n", - "#%% -------- Data Pre-Processing -------- #\n", - "\n", - "(\n", - " x_train,\n", - " original_metric_set,\n", - " reordered_dataframe_columns,\n", - " continuous_transformers,\n", - " categorical_transformers,\n", - " datetime_transformers,\n", - " num_categories,\n", - " num_continuous,\n", - ") = mimic_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creation & Training of VAE.\n", - "\n", - "We can adapt certain parameters of the model e.g. batch size, latent dimension size etc. This model implements early stopping and these values can be adapted.\n", - "\n", - "We can also activate differential privacy by implementing dp-sgd through the opacus library.\n", - "\n", - "The user defined parameters are defined first and these are arbitrary. For example you could change batch size as well as other variables and if you wanted to do this then you simply move batch size into the objective function in the cell below and then follow the Optuna guidelines on creating a hyperparameter selection.\n", - "\n", - "NOTE: training can be fail and cause errors if the hyperparameter values are not chosen carefully. In this example learning rate was left as 1e-3 rather than adapted as giving it a selection lead to errors in the training of the encoder - something to watch out for" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# User defined parameters\n", - "\n", - "# General training\n", - "batch_size = 32\n", - "n_epochs = 5\n", - "logging_freq = 1 # Number of epochs we should log the results to the user\n", - "patience = 5 # How many epochs should we allow the model train to see if\n", - "# improvement is made\n", - "delta = 10 # The difference between elbo values that registers an improvement\n", - "filepath = None # Where to save the best model\n", - "\n", - "\n", - "# Privacy params\n", - "differential_privacy = False # Do we want to implement differential privacy\n", - "sample_rate = 0.1 # Sampling rate\n", - "noise_scale = None # Noise multiplier - influences how much noise to add\n", - "target_eps = 1 # Target epsilon for privacy accountant\n", - "target_delta = 1e-5 # Target delta for privacy accountant\n", - "\n", - "# Define the metrics you want the model to evaluate\n", - "\n", - "# Define distributional metrics required - for sdv_baselines this is set by default\n", - "distributional_metrics = [\n", - " \"SVCDetection\",\n", - " \"GMLogLikelihood\",\n", - " \"CSTest\",\n", - " \"KSTest\",\n", - " \"KSTestExtended\",\n", - " \"ContinuousKLDivergence\",\n", - " \"DiscreteKLDivergence\",\n", - "]\n", - "\n", - "gower = False\n", - "\n", - "# Prepare data for interaction with torch VAE\n", - "Y = torch.Tensor(x_train)\n", - "dataset = TensorDataset(Y)\n", - "\n", - "generator = None\n", - "sample_rate = batch_size / len(dataset)\n", - "data_loader = DataLoader(\n", - " dataset,\n", - " batch_sampler=UniformWithReplacementSampler(\n", - " num_samples=len(dataset), sample_rate=sample_rate, generator=generator\n", - " ),\n", - " pin_memory=True,\n", - " generator=generator,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setting Up Optuna Hyperparameter Tuning Objective Function\n", - "\n", - "See markdown above for details" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# -------- Define our Optuna trial -------- #\n", - "\n", - "\n", - "def objective(\n", - " trial,\n", - " gower,\n", - " distributional_metrics,\n", - " differential_privacy=False,\n", - " target_delta=1e-3,\n", - " target_eps=10.0,\n", - " n_epochs=50,\n", - "):\n", - "\n", - " latent_dim = trial.suggest_int(\"Latent Dimension\", 2, 128, step=2) # Hyperparam\n", - " hidden_dim = trial.suggest_int(\"Hidden Dimension\", 32, 1024, step=32) # Hyperparam\n", - "\n", - " encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim)\n", - " decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories)\n", - "\n", - " lr = trial.suggest_float(\"Learning Rate\", 1e-3, 1e-2, step=1e-5)\n", - " vae = VAE(encoder, decoder, lr=1e-3) # lr hyperparam\n", - "\n", - " C = trial.suggest_int(\"C\", 10, 1e4, step=50)\n", - "\n", - " if differential_privacy == True:\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.diff_priv_train(\n", - " data_loader,\n", - " n_epochs=n_epochs,\n", - " C=C, # Hyperparam\n", - " target_eps=target_eps,\n", - " target_delta=target_delta,\n", - " sample_rate=sample_rate,\n", - " )\n", - " print(f\"(epsilon, delta): {vae.get_privacy_spent(target_delta)}\")\n", - "\n", - " else:\n", - "\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.train(data_loader, n_epochs=n_epochs)\n", - "\n", - " # -------- Generate Synthetic Data -------- #\n", - "\n", - " synthetic_supp = constraint_filtering(\n", - " n_rows=data_supp.shape[0],\n", - " vae=vae,\n", - " reordered_cols=reordered_dataframe_columns,\n", - " data_supp_columns=data_supp.columns,\n", - " cont_transformers=continuous_transformers,\n", - " cat_transformers=categorical_transformers,\n", - " date_transformers=datetime_transformers,\n", - " pre_proc_method=pre_proc_method,\n", - " )\n", - "\n", - " # -------- Datetime Handling -------- #\n", - "\n", - " # If the dataset has datetimes then we need to re-convert these to a numerical\n", - " # Value representing seconds, this is so we can evaluate the metrics on them\n", - "\n", - " metric_synthetic_supp = synthetic_supp.copy()\n", - "\n", - " for index, column in enumerate(original_datetime_columns):\n", - "\n", - " # Fit datetime transformer - converts to seconds\n", - " temp_datetime = datetime.DatetimeTransformer()\n", - " temp_datetime.fit(metric_synthetic_supp, columns=column)\n", - "\n", - " metric_synthetic_supp = temp_datetime.transform(metric_synthetic_supp)\n", - "\n", - " # -------- SDV Metrics -------- #\n", - " # Calculate the sdv metrics for SynthVAE\n", - "\n", - " metrics = distribution_metrics(\n", - " gower_bool=gower,\n", - " distributional_metrics=distributional_metrics,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " saving_filepath=None,\n", - " pre_proc_method=pre_proc_method,\n", - " )\n", - "\n", - " # Optuna wants a list of values in float form\n", - "\n", - " list_metrics = [metrics[i] for i in metrics.columns]\n", - "\n", - " print(list_metrics)\n", - "\n", - " return list_metrics\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Hyperparameter Trials\n", - "\n", - "Here we use optuna to set up a study and run it for a predefined number of trials. If the study has not already been created then set first_run to True. This will then create the study for running.\n", - "\n", - "NOTE: directions show if we are maximising or minimising the metrics we are inputting. Most of SDV metrics require maximizing and that is why directions is set up like this. If you are inputting metrics that require minimizing then you need to set up your directions list accordingly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If there is no study object in your folder then run and save the study so\n", - "# It can be resumed if needed\n", - "\n", - "# User parameters\n", - "\n", - "first_run = True # First run indicates if we are creating a new hyperparam study\n", - "saving_filepath = \"test.pkl\" # To save the study if you wish - needs to be .pkl format\n", - "n_trials = (\n", - " 3 # Number of trials you want to hyperparameter tune for - needs to be .pkl format\n", - ")\n", - "loading_filepath = None # To load any older study if they have already been created\n", - "\n", - "if first_run == True:\n", - "\n", - " if gower == True:\n", - " directions = [\"maximize\" for i in range(distributional_metrics.shape[0] + 1)]\n", - " else:\n", - " directions = [\"maximize\" for i in range(distributional_metrics.shape[0])]\n", - "\n", - " study = optuna.create_study(directions=directions)\n", - "\n", - "else:\n", - "\n", - " with open(\"{}\".format(loading_filepath), \"rb\") as f:\n", - " study = pickle.load(f)\n", - "\n", - "study.optimize(\n", - " lambda trial: objective(\n", - " trial,\n", - " gower=gower,\n", - " distributional_metrics=distributional_metrics,\n", - " differential_privacy=differential_privacy,\n", - " target_delta=target_delta,\n", - " target_eps=target_eps,\n", - " n_epochs=n_epochs,\n", - " ),\n", - " n_trials=n_trials,\n", - " gc_after_trial=True,\n", - ") # GC to avoid OOM" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Saving The Study\n", - "\n", - "Here we use pickle to save the study so that it can be loaded up and run from its current point.\n", - "\n", - "If your study is a multi objective study then it will give you multiple best_trials when you use study.best_trials. Depending your weighting for each metric, you can decide which study you will pick as optimal. An example of this is shown in the second cell where each metric is equally important and we average over them using the mean." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Save The Study -------- #\n", - "\n", - "with open(\"{}\".format(saving_filepath), \"wb\") as f:\n", - " pickle.dump(study, f)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trial_averages = []\n", - "\n", - "for trials in study.best_trials:\n", - "\n", - " metrics = trials.values\n", - " trial_averages.append(np.mean(metrics))\n", - "\n", - "# Now find the best trial\n", - "\n", - "best_trial = np.argmax(np.asarray(trial_averages))\n", - "\n", - "# Best trial hyperparameters\n", - "\n", - "study.best_trials[best_trial].params\n" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "a3faeeb7a141a1b6863ef3d83f2d4891432bfa1117b17a94d8e78eaa2bfb2ea7" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Hyperparameter_Tuning/SUPPORT.ipynb b/Hyperparameter_Tuning/SUPPORT.ipynb deleted file mode 100644 index 5be9ffa..0000000 --- a/Hyperparameter_Tuning/SUPPORT.ipynb +++ /dev/null @@ -1,451 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# SUPPORT Hyperparameter Tuning\n", - "\n", - "This notebook runs through hyperparameter tuning for the SUPPORT dataset. For this we use the Optuna library.\n", - "\n", - "For users with limited computational power OR with no access to MIMIC datasets.\n", - "\n", - "We validate our hyperparameter tuning results on our training dataset metrics - This is isn't optimal as usually it would be validated on a separate validation set. Hard to create an appropriate validation set in this instance as we would require the distributions for each variable column to look similar between training & validation.\n", - "\n", - "NOTE: There are known limitations that are explained as they come up in these markdown cells." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Standard imports\n", - "import numpy as np\n", - "import pandas as pd\n", - "import torch\n", - "\n", - "# VAE is in other folder as well as opacus adapted library\n", - "import sys\n", - "\n", - "sys.path.append(\"../\")\n", - "\n", - "# Opacus support for differential privacy\n", - "from opacus.utils.uniform_sampler import UniformWithReplacementSampler\n", - "\n", - "# For the SUPPORT dataset\n", - "from pycox.datasets import support\n", - "\n", - "# For VAE dataset formatting\n", - "from torch.utils.data import TensorDataset, DataLoader\n", - "\n", - "# VAE functions\n", - "from VAE import Decoder, Encoder, VAE\n", - "\n", - "# Utility file contains all functions required to run notebook\n", - "from utils import (\n", - " set_seed,\n", - " support_pre_proc,\n", - " plot_elbo,\n", - " plot_likelihood_breakdown,\n", - " plot_variable_distributions,\n", - " reverse_transformers,\n", - ")\n", - "from metrics import distribution_metrics\n", - "\n", - "# For hyperparameter tuning as well as saving different trial objects\n", - "import optuna\n", - "import pickle\n", - "\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\") # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e.\n", - "# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses\n", - "# GMM from sklearn and this throws non convergence warnings)\n", - "\n", - "set_seed(0)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Loading & Column Definitions\n", - "\n", - "First we load in the SUPPORT dataset from pycox datasets. Then we define the continuous and categorical columns in that dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Load in the support data\n", - "data_supp = support.read_df()\n", - "\n", - "# Save the original columns\n", - "\n", - "original_continuous_columns = [\"duration\"] + [f\"x{i}\" for i in range(7, 15)]\n", - "original_categorical_columns = [\"event\"] + [f\"x{i}\" for i in range(1, 7)]\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Pre-Processing\n", - "\n", - "Data can be pre-processed in 2 ways. Either we use \"standard\" option which performs a standard scaler on continuous variables - This has known limitations as:\n", - "\n", - "- Data in tables is usually non-gaussian and SynthVAE implements a gaussian loss, so this will perform worse unless the data is KNOWN to follow a gaussian distribution already.\n", - "\n", - "Or we use the second option of \"GMM\". This performs a variational gaussian mixture model to scale the data & transform it to a gaussian distribution. We use a maximum number of clusters of 10 but the variational method will select the best number of clusters for that continuous variable. This also has known limitations:\n", - "\n", - "- 10 Clusters is arbitrary and may not be enough for certain variables.\n", - "- We are fitting a model to transform the data and hence we are approximating before model is trained. This will lose fidelity as the distribution will not be transformed perfectly.\n", - "\n", - "SUPPORT is a limited dataset as it has no missingness (which our model currently does NOT handle) and it has no datetime columns or other data types. Be wary drawing any conclusions from this set due to these constraints as well as the dataset size. Testing/training new models with this set could be useful but conclusive results should be tested on other sets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Data Pre-Processing -------- #\n", - "\n", - "pre_proc_method = \"GMM\"\n", - "\n", - "(\n", - " x_train,\n", - " data_supp,\n", - " reordered_dataframe_columns,\n", - " continuous_transformers,\n", - " categorical_transformers,\n", - " num_categories,\n", - " num_continuous,\n", - ") = support_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creation & Training of VAE.\n", - "\n", - "We can adapt certain parameters of the model e.g. batch size, latent dimension size etc. This model implements early stopping and these values can be adapted.\n", - "\n", - "We can also activate differential privacy by implementing dp-sgd through the opacus library.\n", - "\n", - "The user defined parameters are defined first and these are arbitrary. For example you could change batch size as well as other variables and if you wanted to do this then you simply move batch size into the objective function in the cell below and then follow the Optuna guidelines on creating a hyperparameter selection.\n", - "\n", - "NOTE: training can be fail and cause errors if the hyperparameter values are not chosen carefully. In this example learning rate was left as 1e-3 rather than adapted as giving it a selection lead to errors in the training of the encoder - something to watch out for" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Create & Train VAE -------- #\n", - "\n", - "# User defined parameters\n", - "\n", - "# General training\n", - "batch_size = 32\n", - "n_epochs = 5\n", - "logging_freq = 1 # Number of epochs we should log the results to the user\n", - "patience = 5 # How many epochs should we allow the model train to see if\n", - "# improvement is made\n", - "delta = 10 # The difference between elbo values that registers an improvement\n", - "filepath = None # Where to save the best model\n", - "\n", - "\n", - "# Privacy params\n", - "differential_privacy = False # Do we want to implement differential privacy\n", - "sample_rate = 0.1 # Sampling rate\n", - "noise_scale = None # Noise multiplier - influences how much noise to add\n", - "target_eps = 1 # Target epsilon for privacy accountant\n", - "target_delta = 1e-5 # Target delta for privacy accountant\n", - "\n", - "# Define the metrics you want the model to evaluate\n", - "\n", - "# Define distributional metrics required - for sdv_baselines this is set by default\n", - "distributional_metrics = [\n", - " \"SVCDetection\",\n", - " \"GMLogLikelihood\",\n", - " \"CSTest\",\n", - " \"KSTest\",\n", - " \"KSTestExtended\",\n", - " \"ContinuousKLDivergence\",\n", - " \"DiscreteKLDivergence\",\n", - "]\n", - "\n", - "gower = False\n", - "\n", - "# Prepare data for interaction with torch VAE\n", - "Y = torch.Tensor(x_train)\n", - "dataset = TensorDataset(Y)\n", - "\n", - "generator = None\n", - "sample_rate = batch_size / len(dataset)\n", - "data_loader = DataLoader(\n", - " dataset,\n", - " batch_sampler=UniformWithReplacementSampler(\n", - " num_samples=len(dataset), sample_rate=sample_rate, generator=generator\n", - " ),\n", - " pin_memory=True,\n", - " generator=generator,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setting Up Optuna Hyperparameter Tuning Objective Function\n", - "\n", - "See markdown above for details" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# -------- Define our Optuna trial -------- #\n", - "\n", - "\n", - "def objective(\n", - " trial,\n", - " gower,\n", - " distributional_metrics,\n", - " differential_privacy=False,\n", - " target_delta=1e-3,\n", - " target_eps=10.0,\n", - " n_epochs=50,\n", - "):\n", - "\n", - " latent_dim = trial.suggest_int(\"Latent Dimension\", 2, 128, step=2) # Hyperparam\n", - " hidden_dim = trial.suggest_int(\"Hidden Dimension\", 32, 1024, step=32) # Hyperparam\n", - "\n", - " encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim)\n", - " decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories)\n", - "\n", - " lr = trial.suggest_float(\"Learning Rate\", 1e-3, 1e-2, step=1e-5)\n", - " vae = VAE(encoder, decoder, lr=1e-3) # lr hyperparam\n", - "\n", - " C = trial.suggest_int(\"C\", 10, 1e4, step=50) # Clipping hyperparam\n", - "\n", - " if differential_privacy == True:\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.diff_priv_train(\n", - " data_loader,\n", - " n_epochs=n_epochs,\n", - " C=C, # Hyperparam\n", - " target_eps=target_eps,\n", - " target_delta=target_delta,\n", - " sample_rate=sample_rate,\n", - " )\n", - " print(f\"(epsilon, delta): {vae.get_privacy_spent(target_delta)}\")\n", - "\n", - " else:\n", - "\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.train(data_loader, n_epochs=n_epochs)\n", - "\n", - " # -------- Synthetic Data Generation -------- #\n", - "\n", - " synthetic_sample = vae.generate(data_supp.shape[0])\n", - "\n", - " if torch.cuda.is_available():\n", - " synthetic_sample = pd.DataFrame(\n", - " synthetic_sample.cpu().detach(), columns=reordered_dataframe_columns\n", - " )\n", - " else:\n", - " synthetic_sample = pd.DataFrame(\n", - " synthetic_sample.detach(), columns=reordered_dataframe_columns\n", - " )\n", - "\n", - " # Reverse the transformations\n", - "\n", - " synthetic_supp = reverse_transformers(\n", - " synthetic_set=synthetic_sample,\n", - " data_supp_columns=data_supp.columns,\n", - " cont_transformers=continuous_transformers,\n", - " cat_transformers=categorical_transformers,\n", - " pre_proc_method=pre_proc_method,\n", - " )\n", - " # -------- SDV Metrics -------- #\n", - "\n", - " metrics = distribution_metrics(\n", - " gower_bool=gower,\n", - " distributional_metrics=distributional_metrics,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " saving_filepath=None,\n", - " pre_proc_method=pre_proc_method,\n", - " )\n", - "\n", - " # Optuna wants a list of values in float form\n", - "\n", - " list_metrics = [metrics[i] for i in metrics.columns]\n", - "\n", - " print(list_metrics)\n", - "\n", - " return list_metrics\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Hyperparameter Trials\n", - "\n", - "Here we use optuna to set up a study and run it for a predefined number of trials. If the study has not already been created then set first_run to True. This will then create the study for running.\n", - "\n", - "NOTE: directions show if we are maximising or minimising the metrics we are inputting. Most of SDV metrics require maximizing and that is why directions is set up like this. If you are inputting metrics that require minimizing then you need to set up your directions list accordingly." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If there is no study object in your folder then run and save the study so\n", - "# It can be resumed if needed\n", - "\n", - "# User parameters\n", - "\n", - "first_run = True # First run indicates if we are creating a new hyperparam study\n", - "saving_filepath = \"\" # To save the study if you wish - needs to be .pkl format\n", - "n_trials = 3 # Number of trials you want to hyperparameter tune for\n", - "loading_filepath = None # For loading any older study objects - needs to be .pkl format\n", - "\n", - "if first_run == True:\n", - "\n", - " if gower == True:\n", - "\n", - " directions = [\"maximize\" for i in range(distributional_metrics.shape[0] + 1)]\n", - "\n", - " else:\n", - "\n", - " directions = [\"maximize\" for i in range(distributional_metrics.shape[0])]\n", - "\n", - " study = optuna.create_study(directions=directions)\n", - "\n", - "else:\n", - "\n", - " with open(\"{}\".format(loading_filepath), \"rb\") as f:\n", - " study = pickle.load(f)\n", - "\n", - "study.optimize(\n", - " lambda trial: objective(\n", - " trial,\n", - " gower=gower,\n", - " distributional_metrics=distributional_metrics,\n", - " differential_privacy=differential_privacy,\n", - " target_delta=target_delta,\n", - " target_eps=target_eps,\n", - " n_epochs=n_epochs,\n", - " ),\n", - " n_trials=n_trials,\n", - " gc_after_trial=True,\n", - ") # GC to avoid OOM\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Saving The Study\n", - "\n", - "Here we use pickle to save the study so that it can be loaded up and run from its current point.\n", - "\n", - "If your study is a multi objective study then it will give you multiple best_trials when you use study.best_trials. Depending your weighting for each metric, you can decide which study you will pick as optimal. An example of this is shown in the second cell where each metric is equally important and we average over them using the mean." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Save The Study -------- #\n", - "\n", - "with open(\"{}\".format(saving_filepath), \"wb\") as f:\n", - " pickle.dump(study, f)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "trial_averages = []\n", - "\n", - "for trials in study.best_trials:\n", - "\n", - " metrics = trials.values\n", - " trial_averages.append(np.mean(metrics))\n", - "\n", - "# Now find the best trial\n", - "\n", - "best_trial = np.argmax(np.asarray(trial_averages))\n", - "\n", - "# Best trial hyperparameters\n", - "\n", - "study.best_trials[best_trial].params\n" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "a3faeeb7a141a1b6863ef3d83f2d4891432bfa1117b17a94d8e78eaa2bfb2ea7" - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/Investigations/MIMIC_Notebook.ipynb b/Investigations/MIMIC_Notebook.ipynb deleted file mode 100644 index b59f021..0000000 --- a/Investigations/MIMIC_Notebook.ipynb +++ /dev/null @@ -1,576 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "c082cd4d", - "metadata": {}, - "source": [ - "# MIMIC Notebook\n", - "\n", - "This notebook runs through investigations on internal NHSX datasets collated from the MIMIC-III dataset.\n", - "\n", - "For users who do not have MIMIC-III access then investigations cannot be run through until access is completed. In the meantime you can access the investigations on the open access SUPPORT dataset.\n", - "\n", - "The notebook that produces our single table is found here . If you want to create a single table yourself then follow the example csv file given at " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb1f6f90", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Import Libraries -------- #\n", - "\n", - "# Standard imports\n", - "from tokenize import String\n", - "import numpy as np\n", - "import pandas as pd\n", - "import torch\n", - "\n", - "# VAE is in other folder\n", - "import sys\n", - "\n", - "sys.path.append(\"../\")\n", - "\n", - "# Opacus support for differential privacy\n", - "from opacus.utils.uniform_sampler import UniformWithReplacementSampler\n", - "\n", - "# For VAE dataset formatting\n", - "from torch.utils.data import TensorDataset, DataLoader\n", - "\n", - "# VAE functions\n", - "from VAE import Decoder, Encoder, VAE\n", - "\n", - "# For datetime columns we need a transformer\n", - "from rdt.transformers import datetime\n", - "\n", - "# Utility file contains all functions required to run notebook\n", - "from utils import (\n", - " set_seed,\n", - " mimic_pre_proc,\n", - " constraint_filtering,\n", - " plot_elbo,\n", - " plot_likelihood_breakdown,\n", - " plot_variable_distributions,\n", - " reverse_transformers,\n", - ")\n", - "from metrics import distribution_metrics, privacy_metrics\n", - "\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\") # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e.\n", - "# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses\n", - "# GMM from sklearn and this throws non convergence warnings)\n", - "\n", - "set_seed(0)" - ] - }, - { - "cell_type": "markdown", - "id": "a9d8b521", - "metadata": {}, - "source": [ - "## Data Loading & Column Definitions\n", - "\n", - "First we need to load in the MIMIC dataset from a specified filepath. \n", - "\n", - "We then need to create lists indicating which columns are:\n", - "a) continuous\n", - "b) categorical\n", - "c) datetime\n", - "\n", - "Currently other data types are not supported. Importantly if columns contain missing data then they need to be dropped - Do not include these in original column lists & instead drop them from the loaded set in the cell below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b8629429", - "metadata": {}, - "outputs": [], - "source": [ - "# Load in the mimic single table data\n", - "\n", - "filepath = \"\"\n", - "\n", - "data_supp = pd.read_csv(filepath)\n", - "original_categorical_columns = [\n", - " \"ETHNICITY\",\n", - " \"DISCHARGE_LOCATION\",\n", - " \"GENDER\",\n", - " \"FIRST_CAREUNIT\",\n", - " \"VALUEUOM\",\n", - " \"LABEL\",\n", - "]\n", - "original_continuous_columns = [\"SUBJECT_ID\", \"VALUE\", \"age\"]\n", - "original_datetime_columns = [\"ADMITTIME\", \"DISCHTIME\", \"DOB\", \"CHARTTIME\"]\n", - "\n", - "# Drop DOD column as it contains NANS - for now\n", - "\n", - "# data_supp = data_supp.drop('DOD', axis = 1)" - ] - }, - { - "cell_type": "markdown", - "id": "af1ddb42", - "metadata": {}, - "source": [ - "## Data Pre-Processing\n", - "\n", - "Data can be pre-processed in 2 ways. Either we use \"standard\" option which performs a standard scaler on continuous variables - This has known limitations as:\n", - "\n", - "- Data in tables is usually non-gaussian and SynthVAE implements a gaussian loss, so this will perform worse unless the data is KNOWN to follow a gaussian distribution already.\n", - "\n", - "Or we use the second option of \"GMM\". This performs a variational gaussian mixture model to scale the data & transform it to a gaussian distribution. We use a maximum number of clusters of 10 but the variational method will select the best number of clusters for that continuous variable. This also has known limitations:\n", - "\n", - "- 10 Clusters is arbitrary and may not be enough for certain variables.\n", - "- We are fitting a model to transform the data and hence we are approximating before model is trained. This will lose fidelity as the distribution will not be transformed perfectly.\n", - "\n", - "\n", - "For datasets that include datetime columns, original_metric_set returns the initial dataset after these columns have been transformed. This is because:\n", - "\n", - "- Our evaluation suite cannot calculate certain metrics on datetime objects so these need to be converted to continuous values first" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13839178", - "metadata": {}, - "outputs": [], - "source": [ - "pre_proc_method = \"standard\" # Select pre-processing method standard or GMM\n", - "\n", - "#%% -------- Data Pre-Processing -------- #\n", - "\n", - "(\n", - " x_train,\n", - " original_metric_set,\n", - " reordered_dataframe_columns,\n", - " continuous_transformers,\n", - " categorical_transformers,\n", - " datetime_transformers,\n", - " num_categories,\n", - " num_continuous,\n", - ") = mimic_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "77c705de", - "metadata": {}, - "source": [ - "## Creation & Training of VAE\n", - "\n", - "We can adapt certain parameters of the model e.g. batch size, latent dimension size etc. This model implements early stopping and these values can be adapted.\n", - "\n", - "We can also activate differential privacy by implementing dp-sgd through the opacus library." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d58de105", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Create & Train VAE -------- #\n", - "\n", - "# User defined hyperparams\n", - "# General training\n", - "batch_size = 32\n", - "latent_dim = 256\n", - "hidden_dim = 256\n", - "n_epochs = 5\n", - "logging_freq = 1 # Number of epochs we should log the results to the user\n", - "patience = 5 # How many epochs should we allow the model train to see if\n", - "# improvement is made\n", - "delta = 10 # The difference between elbo values that registers an improvement\n", - "filepath = None # Where to save the best model\n", - "\n", - "\n", - "# Privacy params\n", - "differential_privacy = False # Do we want to implement differential privacy\n", - "sample_rate = 0.1 # Sampling rate\n", - "C = 1e16 # Clipping threshold - any gradients above this are clipped\n", - "noise_scale = None # Noise multiplier - influences how much noise to add\n", - "target_eps = 1 # Target epsilon for privacy accountant\n", - "target_delta = 1e-5 # Target delta for privacy accountant\n", - "\n", - "\n", - "# Prepare data for interaction with torch VAE\n", - "Y = torch.Tensor(x_train)\n", - "dataset = TensorDataset(Y)\n", - "\n", - "generator = None\n", - "sample_rate = batch_size / len(dataset)\n", - "data_loader = DataLoader(\n", - " dataset,\n", - " batch_sampler=UniformWithReplacementSampler(\n", - " num_samples=len(dataset), sample_rate=sample_rate, generator=generator\n", - " ),\n", - " pin_memory=True,\n", - " generator=generator,\n", - ")\n", - "\n", - "# Create VAE\n", - "\n", - "encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim)\n", - "decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories)\n", - "\n", - "vae = VAE(encoder, decoder)\n", - "\n", - "print(vae)\n", - "\n", - "if differential_privacy == False:\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.train(\n", - " data_loader, \n", - " n_epochs=n_epochs,\n", - " logging_freq=logging_freq,\n", - " patience=patience,\n", - " delta=delta,\n", - " )\n", - "\n", - "elif differential_privacy == True:\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.diff_priv_train(\n", - " data_loader,\n", - " n_epochs=n_epochs,\n", - " logging_freq=logging_freq,\n", - " patience=patience,\n", - " delta=delta,\n", - " C=C,\n", - " target_eps=target_eps,\n", - " target_delta=target_delta,\n", - " sample_rate=sample_rate,\n", - " noise_scale=noise_scale,\n", - " )\n", - " print(f\"(epsilon, delta): {vae.get_privacy_spent(target_delta)}\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "1110e951", - "metadata": {}, - "source": [ - "## Plotting Elbo Functionality\n", - "\n", - "Here we can plot and save the ELBO graph for the training run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1474f100", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Plot Loss Features ELBO Breakdown -------- #\n", - "\n", - "elbo_fig = plot_elbo(\n", - " n_epochs=training_epochs,\n", - " log_elbo=log_elbo,\n", - " log_reconstruction=log_reconstruction,\n", - " log_divergence=log_divergence,\n", - " saving_filepath=\"\",\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "106e3548", - "metadata": {}, - "source": [ - "## Plotting Reconstruction Breakdown\n", - "\n", - "Here we can plot the breakdown of reconstruction loss i.e. visualise how the categorical and numerical losses change over training" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e4deaa8e", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Plot Loss Features Reconstruction Breakdown -------- #\n", - "\n", - "likelihood_fig = plot_likelihood_breakdown(\n", - " n_epochs=training_epochs,\n", - " log_categorical=log_categorical,\n", - " log_numerical=log_numerical,\n", - " saving_filepath=\"\",\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "58ae4717", - "metadata": {}, - "source": [ - "## Synthetic Data Generation\n", - "\n", - "Here we create synthetic data ready for metric testing as well as visualisation of variable reconstruction.\n", - "\n", - "If you are using the MIMIC-III internal set from NHSX then constraint sampling here checks to ensure certain constraints are obeyed in the synthetic set. These are:\n", - "\n", - "- age is greater than or equal to 0\n", - "- The admission date is after the date of birth\n", - "- The discharge date is after or equal to the admission date\n", - "- The first chart time is also after or equal to admission date" - ] - }, - { - "cell_type": "markdown", - "id": "f9d88f35", - "metadata": {}, - "source": [ - "## Either run the cell directly below for constraints included in sampling OR run the cell second below to just generate a sample without constraints" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "36881d4d", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Constraint Sampling -------- #\n", - "\n", - "# For NHSX internal MIMIC set OR sets which follow a similar data structure\n", - "\n", - "synthetic_supp = constraint_filtering(\n", - " n_rows=data_supp.shape[0],\n", - " vae=vae,\n", - " reordered_cols=reordered_dataframe_columns,\n", - " data_supp_columns=data_supp.columns,\n", - " cont_transformers=continuous_transformers,\n", - " cat_transformers=categorical_transformers,\n", - " date_transformers=datetime_transformers,\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "76b1dbc0", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Synthetic Data Generation Without Constraints -------- #\n", - "\n", - "# For any other datasets OR for running without constraint sampling\n", - "\n", - "synthetic_sample = vae.generate(data_supp.shape[0])\n", - "\n", - "# Reverse the transformations\n", - "\n", - "synthetic_supp = reverse_transformers(\n", - " synthetic_set=synthetic_sample,\n", - " data_supp_columns=data_supp.columns,\n", - " cont_transformers=continuous_transformers,\n", - " cat_transformers=categorical_transformers,\n", - " date_transformers=datetime_transformers,\n", - " pre_proc_method=pre_proc_method,\n", - ")\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "33445ad7", - "metadata": {}, - "source": [ - "## Synthetic Variable Visualisation\n", - "\n", - "Here we want to visualise the synthetic variables generated and compare them to the original set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9511a251", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Plot Histograms For All The Variable Distributions -------- #\n", - "\n", - "plot_variable_distributions(\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " saving_filepath=\"\",\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "90f1ebb0", - "metadata": {}, - "source": [ - "## Metric evaluation\n", - "\n", - "For datasets that have datetime columns, we need to re-transform these into a numerical value as our metrics cannot handle datetime objects. We are then inputting original_metric_set alongside the newly transformed synthetic set i.e. metric_synthetic_supp. If datetimes are not included in the set then you can just run data_supp against synthetic_supp and skip the datetime handling.\n", - "\n", - "We use the SDV evaluation framework. Supply the metrics you wish to find in the user_metrics list from SDV guidance. Can start here: https://sdv.dev/SDV/user_guides/evaluation/single_table_metrics.html\n", - "\n", - "Note that not all of these will work, some are hit and miss. We predominantly rely on continuous and discrete KL divergence measures. You can also input \"gower\" and this will calculate the gower distance using the gower library." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d845b202", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Datetime Handling -------- #\n", - "\n", - "# If the dataset has datetimes then we need to re-convert these to a numerical\n", - "# Value representing seconds, this is so we can evaluate the metrics on them\n", - "\n", - "metric_synthetic_supp = synthetic_supp.copy()\n", - "\n", - "for index, column in enumerate(original_datetime_columns):\n", - "\n", - " # Fit datetime transformer - converts to seconds\n", - " temp_datetime = datetime.DatetimeTransformer()\n", - " temp_datetime.fit(metric_synthetic_supp, columns=column)\n", - "\n", - " metric_synthetic_supp = temp_datetime.transform(metric_synthetic_supp)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f7e4174b", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- SDV Metrics -------- #\n", - "\n", - "# Define the metrics you want the model to evaluate\n", - "\n", - "# Define distributional metrics required - for sdv_baselines this is set by default\n", - "distributional_metrics = [\n", - " \"SVCDetection\",\n", - " \"GMLogLikelihood\",\n", - " \"CSTest\",\n", - " \"KSTest\",\n", - " \"KSTestExtended\",\n", - " \"ContinuousKLDivergence\",\n", - " \"DiscreteKLDivergence\",\n", - "]\n", - "\n", - "gower = False\n", - "\n", - "metrics = distribution_metrics(\n", - " gower_bool=gower,\n", - " distributional_metrics=distributional_metrics,\n", - " data_supp=original_metric_set,\n", - " synthetic_supp=metric_synthetic_supp,\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " saving_filepath=\"\",\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e8bba0e", - "metadata": {}, - "outputs": [], - "source": [ - "metrics" - ] - }, - { - "cell_type": "markdown", - "id": "975c5d80", - "metadata": {}, - "source": [ - "# Privacy Metric Evaluation\n", - "\n", - "Using SDV privacy metrics we can get an insight into how privacy is conserved when utilising dp-sgd methods. SDV's privacy metrics are limited in that they can only be used on similar data types. E.g. if we choose age to be the sensitive variable, we can build ML based models to predict a users age using the other columns. However we are forced to only use columns that are also continuous variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e4c9495", - "metadata": {}, - "outputs": [], - "source": [ - "# Specify our private variable\n", - "\n", - "private_variable = \"ETHNICITY\"\n", - "\n", - "privacy_metric = privacy_metrics(\n", - " private_variable=private_variable,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " saving_filepath=None,\n", - " pre_proc_method=pre_proc_method,\n", - ")\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e9b74b6f", - "metadata": {}, - "outputs": [], - "source": [ - "privacy_metric" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Investigations/MIMIC_Runs.py b/Investigations/MIMIC_Runs.py deleted file mode 100644 index ce9246e..0000000 --- a/Investigations/MIMIC_Runs.py +++ /dev/null @@ -1,267 +0,0 @@ -#%% -------- Import Libraries -------- # - -# Standard imports -from tokenize import String -import numpy as np -import pandas as pd -import torch - -# VAE is in other folder -import sys - -sys.path.append("../") - -from opacus.utils.uniform_sampler import UniformWithReplacementSampler - -# For VAE dataset formatting -from torch.utils.data import TensorDataset, DataLoader - -# VAE functions -from VAE import Decoder, Encoder, VAE - -from rdt.transformers import datetime - -from utils import ( - set_seed, - mimic_pre_proc, - constraint_filtering, - plot_elbo, - plot_likelihood_breakdown, - plot_variable_distributions, -) -from metrics import distribution_metrics, privacy_metrics - -import warnings - -warnings.filterwarnings( - "ignore" -) # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e. -# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses -# GMM from sklearn and this throws non convergence warnings) - -set_seed(0) - -# Load in the mimic single table data - use variable filepath to gather the data - -filepath = ".../Private MIMIC Data/table_one_synthvae.csv" - -data_supp = pd.read_csv(filepath) -# Save the original columns - -original_categorical_columns = [ - "ETHNICITY", - "DISCHARGE_LOCATION", - "GENDER", - "FIRST_CAREUNIT", - "VALUEUOM", - "LABEL", -] -original_continuous_columns = ["SUBJECT_ID", "VALUE", "age"] -original_datetime_columns = ["ADMITTIME", "DISCHTIME", "DOB", "CHARTTIME"] - -# Drop DOD column as it contains NANS - for now - -# data_supp = data_supp.drop('DOD', axis = 1) - -original_columns = ( - original_categorical_columns - + original_continuous_columns - + original_datetime_columns -) - -#%% -------- Data Pre-Processing -------- # - -pre_proc_method = "standard" -( - x_train, - original_metric_set, - reordered_dataframe_columns, - continuous_transformers, - categorical_transformers, - datetime_transformers, - num_categories, - num_continuous, -) = mimic_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method) - -#%% -------- Create & Train VAE -------- # - -# User defined hyperparams -# General training -batch_size = 32 -latent_dim = 256 -hidden_dim = 256 -n_epochs = 5 -logging_freq = 1 # Number of epochs we should log the results to the user -patience = 5 # How many epochs should we allow the model train to see if -# improvement is made -delta = 10 # The difference between elbo values that registers an improvement -filepath = None # Where to save the best model - - -# Privacy params -differential_privacy = False # Do we want to implement differential privacy -sample_rate = 0.1 # Sampling rate -C = 1e16 # Clipping threshold - any gradients above this are clipped -noise_scale = None # Noise multiplier - influences how much noise to add -target_eps = 1 # Target epsilon for privacy accountant -target_delta = 1e-5 # Target delta for privacy accountant - -# Prepare data for interaction with torch VAE -Y = torch.Tensor(x_train) -dataset = TensorDataset(Y) - -generator = None -sample_rate = batch_size / len(dataset) -data_loader = DataLoader( - dataset, - batch_sampler=UniformWithReplacementSampler( - num_samples=len(dataset), sample_rate=sample_rate, generator=generator - ), - pin_memory=True, - generator=generator, -) - -# Create VAE -encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim) -decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories) - -vae = VAE(encoder, decoder) - -print(vae) - -if differential_privacy == False: - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.train( - data_loader, - n_epochs=n_epochs, - logging_freq=logging_freq, - patience=patience, - delta=delta, - ) - -elif differential_privacy == True: - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.diff_priv_train( - data_loader, - n_epochs=n_epochs, - logging_freq=logging_freq, - patience=patience, - delta=delta, - C=C, - target_eps=target_eps, - target_delta=target_delta, - sample_rate=sample_rate, - noise_scale=noise_scale, - ) - print(f"(epsilon, delta): {vae.get_privacy_spent(target_delta)}") -#%% -------- Plot Loss Features ELBO Breakdown -------- # - -elbo_fig = plot_elbo( - n_epochs=training_epochs, - log_elbo=log_elbo, - log_reconstruction=log_reconstruction, - log_divergence=log_divergence, - saving_filepath="", - pre_proc_method=pre_proc_method, -) -#%% -------- Plot Loss Features Reconstruction Breakdown -------- # - -likelihood_fig = plot_likelihood_breakdown( - n_epochs=training_epochs, - log_categorical=log_categorical, - log_numerical=log_numerical, - saving_filepath="", - pre_proc_method=pre_proc_method, -) -#%% -------- Constraint Sampling -------- # - -synthetic_supp = constraint_filtering( - n_rows=data_supp.shape[0], - vae=vae, - reordered_cols=reordered_dataframe_columns, - data_supp_columns=data_supp.columns, - cont_transformers=continuous_transformers, - cat_transformers=categorical_transformers, - date_transformers=datetime_transformers, - pre_proc_method=pre_proc_method, -) -#%% -------- Plot Histograms For All The Variable Distributions -------- # - -plot_variable_distributions( - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - saving_filepath="", - pre_proc_method=pre_proc_method, -) -#%% -------- Datetime Handling -------- # - -# If the dataset has datetimes then we need to re-convert these to a numerical -# Value representing seconds, this is so we can evaluate the metrics on them - -metric_synthetic_supp = synthetic_supp.copy(deep=True) - -for index, column in enumerate(original_datetime_columns): - - # Fit datetime transformer - converts to seconds - temp_datetime = datetime.DatetimeTransformer() - temp_datetime.fit(metric_synthetic_supp, columns=column) - - metric_synthetic_supp = temp_datetime.transform(metric_synthetic_supp) - -#%% -------- SDV Metrics -------- # - -# Define the metrics you want the model to evaluate - -# Define distributional metrics required - for sdv_baselines this is set by default -distributional_metrics = [ - "SVCDetection", - "GMLogLikelihood", - "CSTest", - "KSTest", - "KSTestExtended", - "ContinuousKLDivergence", - "DiscreteKLDivergence", -] - -gower = False - -metrics = distribution_metrics( - gower_bool=gower, - distributional_metrics=distributional_metrics, - data_supp=original_metric_set, - synthetic_supp=metric_synthetic_supp, - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - saving_filepath="", - pre_proc_method=pre_proc_method, -) - -#%% -------- Privacy Metrics -------- # - -# Specify our private variable - -private_variable = "ETHNICITY" - -privacy_metric = privacy_metrics( - private_variable=private_variable, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - saving_filepath=None, - pre_proc_method=pre_proc_method, -) diff --git a/Investigations/SUPPORT_Notebook.ipynb b/Investigations/SUPPORT_Notebook.ipynb deleted file mode 100644 index e8aa017..0000000 --- a/Investigations/SUPPORT_Notebook.ipynb +++ /dev/null @@ -1,499 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "9eca4994", - "metadata": {}, - "source": [ - "\n", - "# SUPPORT Notebook\n", - "\n", - "This notebook runs through investigations on the open access SUPPORT dataset.\n", - "\n", - "For users who do not have lots of computational resources or do not have access to MIMIC-III then this notebook should be used." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b724eb8b", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Import Libraries -------- #\n", - "\n", - "# Standard imports\n", - "import numpy as np\n", - "import pandas as pd\n", - "import torch\n", - "\n", - "# VAE is in other folder as well as opacus adapted library\n", - "import sys\n", - "\n", - "sys.path.append(\"../\")\n", - "\n", - "# Opacus support for differential privacy\n", - "from opacus.utils.uniform_sampler import UniformWithReplacementSampler\n", - "\n", - "# For the SUPPORT dataset\n", - "from pycox.datasets import support\n", - "\n", - "# For VAE dataset formatting\n", - "from torch.utils.data import TensorDataset, DataLoader\n", - "\n", - "# VAE functions\n", - "from VAE import Decoder, Encoder, VAE\n", - "\n", - "# Utility file contains all functions required to run notebook\n", - "from utils import (\n", - " set_seed,\n", - " support_pre_proc,\n", - " plot_elbo,\n", - " plot_likelihood_breakdown,\n", - " plot_variable_distributions,\n", - " reverse_transformers,\n", - ")\n", - "from metrics import distribution_metrics, privacy_metrics\n", - "\n", - "import warnings\n", - "\n", - "warnings.filterwarnings(\"ignore\") # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e.\n", - "# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses\n", - "# GMM from sklearn and this throws non convergence warnings)\n" - ] - }, - { - "cell_type": "markdown", - "id": "dc3b1602", - "metadata": {}, - "source": [ - "## Data Loading & Column Definitions\n", - "\n", - "First we load in the SUPPORT dataset from pycox datasets. Then we define the continuous and categorical columns in that dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a7afb3e", - "metadata": {}, - "outputs": [], - "source": [ - "set_seed(0)\n", - "\n", - "# Load in the support data\n", - "data_supp = support.read_df()\n", - "\n", - "# Column Definitions\n", - "original_continuous_columns = [\"duration\"] + [f\"x{i}\" for i in range(7, 15)]\n", - "original_categorical_columns = [\"event\"] + [f\"x{i}\" for i in range(1, 7)]\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "81156c9b", - "metadata": {}, - "source": [ - "## Data Pre-Processing\n", - "\n", - "Data can be pre-processed in 2 ways. Either we use \"standard\" option which performs a standard scaler on continuous variables - This has known limitations as:\n", - "\n", - "- Data in tables is usually non-gaussian and SynthVAE implements a gaussian loss, so this will perform worse unless the data is KNOWN to follow a gaussian distribution already.\n", - "\n", - "Or we use the second option of \"GMM\". This performs a variational gaussian mixture model to scale the data & transform it to a gaussian distribution. We use a maximum number of clusters of 10 but the variational method will select the best number of clusters for that continuous variable. This also has known limitations:\n", - "\n", - "- 10 Clusters is arbitrary and may not be enough for certain variables.\n", - "- We are fitting a model to transform the data and hence we are approximating before model is trained. This will lose fidelity as the distribution will not be transformed perfectly.\n", - "\n", - "SUPPORT is a limited dataset as it has no missingness (which our model currently does NOT handle) and it has no datetime columns or other data types. Be wary drawing any conclusions from this set due to these constraints as well as the dataset size. Testing/training new models with this set could be useful but conclusive results should be tested on other sets." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e663fe35", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Data Pre-Processing -------- #\n", - "\n", - "pre_proc_method = \"standard\"\n", - "\n", - "(\n", - " x_train,\n", - " data_supp,\n", - " reordered_dataframe_columns,\n", - " continuous_transformers,\n", - " categorical_transformers,\n", - " num_categories,\n", - " num_continuous,\n", - ") = support_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "0fe28857", - "metadata": {}, - "source": [ - "## Creation & Training of VAE\n", - "\n", - "We can adapt certain parameters of the model e.g. batch size, latent dimension size etc. This model implements early stopping and these values can be adapted.\n", - "\n", - "We can also activate differential privacy by implementing dp-sgd through the opacus library." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0eae5190", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Create & Train VAE -------- #\n", - "\n", - "# User defined hyperparams\n", - "# General training\n", - "batch_size = 32\n", - "latent_dim = 8\n", - "hidden_dim = 32\n", - "n_epochs = 150\n", - "logging_freq = 1 # Number of epochs we should log the results to the user\n", - "patience = 50 # How many epochs should we allow the model train to see if\n", - "# improvement is made\n", - "delta = 10 # The difference between elbo values that registers an improvement\n", - "filepath = None # Where to save the best model\n", - "\n", - "\n", - "# Privacy params\n", - "differential_privacy = False # Do we want to implement differential privacy\n", - "sample_rate = 0.1 # Sampling rate\n", - "C = 1e16 # Clipping threshold - any gradients above this are clipped\n", - "noise_scale = None # Noise multiplier - influences how much noise to add\n", - "target_eps = 1 # Target epsilon for privacy accountant\n", - "target_delta = 1e-5 # Target delta for privacy accountant\n", - "\n", - "# Prepare data for interaction with torch VAE\n", - "Y = torch.Tensor(x_train)\n", - "dataset = TensorDataset(Y)\n", - "\n", - "generator = None\n", - "sample_rate = batch_size / len(dataset)\n", - "data_loader = DataLoader(\n", - " dataset,\n", - " batch_sampler=UniformWithReplacementSampler(\n", - " num_samples=len(dataset), sample_rate=sample_rate, generator=generator\n", - " ),\n", - " pin_memory=True,\n", - " generator=generator,\n", - ")\n", - "\n", - "# Create VAE\n", - "encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim)\n", - "decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories)\n", - "\n", - "vae = VAE(encoder, decoder)\n", - "\n", - "print(vae)\n", - "\n", - "if differential_privacy == False:\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.train(\n", - " data_loader, \n", - " n_epochs=n_epochs,\n", - " logging_freq=logging_freq,\n", - " patience=patience,\n", - " delta=delta,\n", - " )\n", - "\n", - "elif differential_privacy == True:\n", - " (\n", - " training_epochs,\n", - " log_elbo,\n", - " log_reconstruction,\n", - " log_divergence,\n", - " log_categorical,\n", - " log_numerical,\n", - " ) = vae.diff_priv_train(\n", - " data_loader,\n", - " n_epochs=n_epochs,\n", - " logging_freq=logging_freq,\n", - " patience=patience,\n", - " delta=delta,\n", - " C=C,\n", - " target_eps=target_eps,\n", - " target_delta=target_delta,\n", - " sample_rate=sample_rate,\n", - " noise_scale=noise_scale,\n", - " )\n", - " print(f\"(epsilon, delta): {vae.get_privacy_spent(target_delta)}\")\n" - ] - }, - { - "cell_type": "markdown", - "id": "e7c169e7", - "metadata": {}, - "source": [ - "## Plotting Elbo Functionality\n", - "\n", - "Here we can plot and save the ELBO graph for the training run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2208de1e", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Plot Loss Features ELBO Breakdown -------- #\n", - "\n", - "elbo_fig = plot_elbo(\n", - " n_epochs=training_epochs,\n", - " log_elbo=log_elbo,\n", - " log_reconstruction=log_reconstruction,\n", - " log_divergence=log_divergence,\n", - " saving_filepath=\"\",\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "626409ac", - "metadata": {}, - "source": [ - "## Plotting Reconstruction Breakdown\n", - "\n", - "Here we can plot the breakdown of reconstruction loss i.e. visualise how the categorical and numerical losses change over training" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91ffee7c", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Plot Loss Features Reconstruction Breakdown -------- #\n", - "\n", - "likelihood_fig = plot_likelihood_breakdown(\n", - " n_epochs=training_epochs,\n", - " log_categorical=log_categorical,\n", - " log_numerical=log_numerical,\n", - " saving_filepath=\"\",\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "fadfca69", - "metadata": {}, - "source": [ - "## Synthetic Data Generation\n", - "\n", - "Here we create synthetic data ready for metric testing as well as visualisation of variable reconstruction. For this we simply generate from our generative model and then reverse transformations using the prior transformers." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9bcd8be1", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Synthetic Data Generation -------- #\n", - "\n", - "synthetic_sample = vae.generate(data_supp.shape[0])\n", - "\n", - "if torch.cuda.is_available():\n", - " synthetic_sample = pd.DataFrame(\n", - " synthetic_sample.cpu().detach().numpy(), \n", - " columns=reordered_dataframe_columns\n", - " )\n", - "else:\n", - " synthetic_sample = pd.DataFrame(\n", - " synthetic_sample.detach().numpy(), \n", - " columns=reordered_dataframe_columns\n", - " )\n", - "\n", - "# Reverse the transformations\n", - "\n", - "synthetic_supp = reverse_transformers(\n", - " synthetic_set=synthetic_sample,\n", - " data_supp_columns=data_supp.columns,\n", - " cont_transformers=continuous_transformers,\n", - " cat_transformers=categorical_transformers,\n", - " pre_proc_method=pre_proc_method,\n", - ")\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "7a7d8d8c", - "metadata": {}, - "source": [ - "## Synthetic Variable Visualisation\n", - "\n", - "Here we want to visualise the synthetic variables generated and compare them to the original set" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7fdaf4f1", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- Plot Histograms For All The Variable Distributions -------- #\n", - "\n", - "plot_variable_distributions(\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " saving_filepath=\"\",\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "id": "09630b17", - "metadata": {}, - "source": [ - "## Metric evaluation\n", - "\n", - "We use the SDV evaluation framework. Supply the metrics you wish to find in the distributional_metrics list from SDV guidance. Can start here: https://sdv.dev/SDV/user_guides/evaluation/single_table_metrics.html\n", - "\n", - "Note that not all of these will work, some are hit and miss. We predominantly rely on continuous and discrete KL divergence measures. You can also input \"gower\" and this will calculate the gower distance using the gower library." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0f240e21", - "metadata": {}, - "outputs": [], - "source": [ - "#%% -------- SDV Metrics -------- #\n", - "\n", - "# Define the metrics you want the model to evaluate\n", - "\n", - "# Define distributional metrics required - for sdv_baselines this is set by default\n", - "distributional_metrics = [\n", - " \"SVCDetection\",\n", - " \"GMLogLikelihood\",\n", - " \"CSTest\",\n", - " \"KSTest\",\n", - " \"KSTestExtended\",\n", - " \"ContinuousKLDivergence\",\n", - " \"DiscreteKLDivergence\",\n", - "]\n", - "\n", - "gower = False\n", - "\n", - "metrics = distribution_metrics(\n", - " gower_bool=gower,\n", - " distributional_metrics=distributional_metrics,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " saving_filepath=None,\n", - " pre_proc_method=pre_proc_method,\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f63a661f", - "metadata": {}, - "outputs": [], - "source": [ - "metrics" - ] - }, - { - "cell_type": "markdown", - "id": "ecc9065d", - "metadata": {}, - "source": [ - "# Privacy Metric Evaluation\n", - "\n", - "Using SDV privacy metrics we can get an insight into how privacy is conserved when utilising dp-sgd methods. SDV's privacy metrics are limited in that they can only be used on similar data types. E.g. if we choose age to be the sensitive variably, we can build ML based models to predict a users age using the other columns. However we are forced to only use columns that are also continuous variables." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "de25de2b", - "metadata": {}, - "outputs": [], - "source": [ - "# Specify our private variable\n", - "\n", - "private_variable = \"x14\"\n", - "\n", - "privacy_metric = privacy_metrics(\n", - " private_variable=private_variable,\n", - " data_supp=data_supp,\n", - " synthetic_supp=synthetic_supp,\n", - " categorical_columns=original_categorical_columns,\n", - " continuous_columns=original_continuous_columns,\n", - " saving_filepath=None,\n", - " pre_proc_method=pre_proc_method,\n", - ")\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "48582f42", - "metadata": {}, - "outputs": [], - "source": [ - "privacy_metric" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "717713d2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Investigations/SUPPORT_Runs.py b/Investigations/SUPPORT_Runs.py deleted file mode 100644 index a0108cf..0000000 --- a/Investigations/SUPPORT_Runs.py +++ /dev/null @@ -1,240 +0,0 @@ -#%% -------- Import Libraries -------- # - -# Standard imports -import numpy as np -import pandas as pd -import torch - -# VAE is in other folder -import sys - -sys.path.append("../") - -from opacus.utils.uniform_sampler import UniformWithReplacementSampler - -# For the SUPPORT dataset -from pycox.datasets import support - -# For VAE dataset formatting -from torch.utils.data import TensorDataset, DataLoader - -# VAE functions -from VAE import Decoder, Encoder, VAE - -from utils import ( - set_seed, - support_pre_proc, - plot_elbo, - plot_likelihood_breakdown, - plot_variable_distributions, - reverse_transformers, -) -from metrics import distribution_metrics, privacy_metrics - -import warnings - -warnings.filterwarnings( - "ignore" -) # We suppress warnings to avoid SDMETRICS throwing unique synthetic data warnings (i.e. -# data in synthetic set is not in the real data set) as well as SKLEARN throwing convergence warnings (pre-processing uses -# GMM from sklearn and this throws non convergence warnings) - -set_seed(0) - -# Load in the support data -data_supp = support.read_df() - -# Column Definitions -original_continuous_columns = ["duration"] + [f"x{i}" for i in range(7, 15)] -original_categorical_columns = ["event"] + [f"x{i}" for i in range(1, 7)] -#%% -------- Data Pre-Processing -------- # - -pre_proc_method = "standard" - -( - x_train, - data_supp, - reordered_dataframe_columns, - continuous_transformers, - categorical_transformers, - num_categories, - num_continuous, -) = support_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method) -#%% -------- Create & Train VAE -------- # - -# User defined hyperparams -# General training -batch_size = 32 -latent_dim = 8 -hidden_dim = 32 -n_epochs = 5 -logging_freq = 1 # Number of epochs we should log the results to the user -patience = 5 # How many epochs should we allow the model train to see if -# improvement is made -delta = 10 # The difference between elbo values that registers an improvement -filepath = None # Where to save the best model - - -# Privacy params -differential_privacy = False # Do we want to implement differential privacy -sample_rate = 0.1 # Sampling rate -C = 1e16 # Clipping threshold - any gradients above this are clipped -noise_scale = None # Noise multiplier - influences how much noise to add -target_eps = 1 # Target epsilon for privacy accountant -target_delta = 1e-5 # Target delta for privacy accountant - -# Prepare data for interaction with torch VAE -Y = torch.Tensor(x_train) -dataset = TensorDataset(Y) - -# Prepare data for interaction with torch VAE -Y = torch.Tensor(x_train) -dataset = TensorDataset(Y) - -generator = None -sample_rate = batch_size / len(dataset) -data_loader = DataLoader( - dataset, - batch_sampler=UniformWithReplacementSampler( - num_samples=len(dataset), sample_rate=sample_rate, generator=generator - ), - pin_memory=True, - generator=generator, -) - -# Create VAE -encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim) -decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories) - -vae = VAE(encoder, decoder) - -print(vae) - -if differential_privacy == False: - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.train( - data_loader, - n_epochs=n_epochs, - logging_freq=logging_freq, - patience=patience, - delta=delta, - ) - -elif differential_privacy == True: - ( - training_epochs, - log_elbo, - log_reconstruction, - log_divergence, - log_categorical, - log_numerical, - ) = vae.diff_priv_train( - data_loader, - n_epochs=n_epochs, - logging_freq=logging_freq, - patience=patience, - delta=delta, - C=C, - target_eps=target_eps, - target_delta=target_delta, - sample_rate=sample_rate, - noise_scale=noise_scale, - ) - print(f"(epsilon, delta): {vae.get_privacy_spent(target_delta)}") -#%% -------- Plot Loss Features ELBO Breakdown -------- # - -elbo_fig = plot_elbo( - n_epochs=training_epochs, - log_elbo=log_elbo, - log_reconstruction=log_reconstruction, - log_divergence=log_divergence, - saving_filepath="", - pre_proc_method=pre_proc_method, -) -#%% -------- Plot Loss Features Reconstruction Breakdown -------- # - -likelihood_fig = plot_likelihood_breakdown( - n_epochs=training_epochs, - log_categorical=log_categorical, - log_numerical=log_numerical, - saving_filepath="", - pre_proc_method=pre_proc_method, -) -#%% -------- Synthetic Data Generation -------- # - -synthetic_sample = vae.generate(data_supp.shape[0]) - -synthetic_sample = pd.DataFrame( - synthetic_sample.cpu().detach().numpy(), - columns=reordered_dataframe_columns, -) - -# Reverse the transformations - -synthetic_supp = reverse_transformers( - synthetic_set=synthetic_sample, - data_supp_columns=data_supp.columns, - cont_transformers=continuous_transformers, - cat_transformers=categorical_transformers, - pre_proc_method=pre_proc_method, -) - -#%% -------- Plot Histograms For All The Variable Distributions -------- # - -plot_variable_distributions( - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - saving_filepath="", - pre_proc_method=pre_proc_method, -) -#%% -------- SDV Metrics -------- # - -# Define the metrics you want the model to evaluate - -gower = False - -# Define distributional metrics required - for sdv_baselines this is set by default -distributional_metrics = [ - "SVCDetection", - "GMLogLikelihood", - "CSTest", - "KSTest", - "KSTestExtended", - "ContinuousKLDivergence", - "DiscreteKLDivergence", -] - -metrics = distribution_metrics( - gower_bool=gower, - distributional_metrics=distributional_metrics, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - saving_filepath="", - pre_proc_method=pre_proc_method, -) - -#%% -------- Privacy Metrics -------- # - -# Specify our private variable - -private_variable = "x14" - -privacy_metric = privacy_metrics( - private_variable=private_variable, - data_supp=data_supp, - synthetic_supp=synthetic_supp, - categorical_columns=original_categorical_columns, - continuous_columns=original_continuous_columns, - saving_filepath=None, - pre_proc_method=pre_proc_method, -) diff --git a/README.md b/README.md index d6b5f57..3cadeec 100644 --- a/README.md +++ b/README.md @@ -1,198 +1,72 @@ -# Synthetic Data Exploration: Variational Autoencoders -## NHSX Analytics Unit - PhD Internship Project +# NHS Synth -### About the Project +
-[![status: experimental](https://github.com/GIScience/badges/raw/master/status/experimental.svg)](https://github.com/GIScience/badges#experimental) +[![Python](https://img.shields.io/badge/python-3.8%20--%203.10-blue)](https://www.python.org/downloads/release/python-31010/) +[![MIT license](https://img.shields.io/badge/License-MIT-blue.svg)](https://lbesson.mit-license.org/) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat)](https://pycqa.github.io/isort/) -This repository holds code for the NHSX Analytics Unit PhD internship project (previously known as Synthetic Data Generation - VAE) contextualising and investigating the potential use of Variational AutoEncoders (VAEs) for synthetic health data generation initially undertaken by Dominic Danks ([last commit to the repository: commit 88a4bdf](https://github.com/nhsx/SynthVAE/commit/88a4bdf613f538af45834f22d38e52312cfe24c5)). This has then been further extended through the work undertaken by David Brind. +
-[Project Description - Synthetic Data Exploration: Variational Autoencoders](https://nhsx.github.io/nhsx-internship-projects/synthetic-data-exploration-vae/) - -_**Note:** No data, public or private are shared in this repository._ - -### Project Stucture - -- The main code is found in the root of the repository (see Usage below for more information) -- The accompanying [report](./reports/report.pdf) is also available in the `reports` folder -- More information about the VAE with Differential Privacy can be found in the [model card](./model_card.md) -- `scratch_vae_expts.py` is similar to the files you will find within the `investigations` folder. To re-run our results then `scratch_vae_expts.py` is all you require. If you want an easy way to understand our code and work process then using the respective notebooks within the `investigations` folder helps to run through the work. - -**N.B.** A modified copy of [Opacus](https://github.com/pytorch/opacus) (v0.14.0), a library for training PyTorch models with differential privacy, is contained within the repository. A modified copy of [RDT](https://github.com/sdv-dev/RDT) (v0.6.2) that allows us to set the `random_state` for our GMM transformer method is also contained within the repository. See the [model card](./model_card.md) for more details. +## About the Project -### Built With +The project currently consists of a Python package alongside research and investigative materials covering the effectiveness of the package and synthetic data more generally when applied to NHS use cases. -[![Python v3.8](https://img.shields.io/badge/python-v3.8-blue.svg)](https://www.python.org/downloads/release/python-380/) -- [PyTorch v1.9.0](https://github.com/pytorch) -- [SDV v0.13.1](https://github.com/sdv-dev/SDV) -- [Opacus v0.14.0](https://github.com/pytorch/opacus) - -### Getting Started - -#### Installation +[Project Description - Synthetic Data Exploration: Variational Autoencoders](https://nhsx.github.io/nhsx-internship-projects/synthetic-data-exploration-vae/) -To get a local copy up and running follow these simple steps. +The codebase builds on previous NHSX Analytics Unit PhD internships contextualising and investigating the potential use of Variational Auto Encoders (VAEs) for synthetic data generation. These were undertaken by Dominic Danks ([last commit to the repository: 88a4bdf](https://github.com/nhsx/NHSSynth/commit/88a4bdf613f538af45834f22d38e52312cfe24c5)) and David Brind ([last commit to the repository: ]()). -To clone the repo: +_**Note:** No data, public or private are shared in this repository._ -`git clone https://github.com/nhsx/SynthVAE.git` +## Getting Started -To create a suitable environment: -- ```python -m venv synth_env``` -- `source synth_env/bin/activate` -- `pip install -r requirements.txt` -- `pip uninstall rdt` (SDV installs rdt by default however we have included an added fix so this needs to be uninstalled to avoid conflicts) +### Project Stucture -#### GPU Support +- The main package and codebase is found in [`src/nhssynth`]() (see Usage below for more information) +- Accompanying materials are available in the `docs` folder: + - A [report](docs/reports/report.pdf) summarising the previous iteration of this project + - A [model card](docs/model_card.md) providing more information about the VAE with Differential Privacy +- Numerous [exemplar configurations](config) are found in `config` +- Empty `data` and `experiments` folders are provided; these are the default locations for inputs and outputs when running the project using the provided [`cli`](src/nhssynth/cli/) module +- Pre-processing notebooks for specific datasets used to assess the approach and other non-core code can be found in [`auxiliary`](auxiliary/) -This code has been tested both on CPU in the torch v1.9.0 given. But it has also been run on a GPU environment. The specifications for the device running this are as follows: +### Installation -- NVIDIA GeForce RTX 3070 Laptop GPU -- CUDA v11.1 -- cuDNN v8.1.0 for CUDA v11.1 +As it stands, we recommend the following steps to reproduce our experiments and fully work with this project: -Refer to [PyTorch documentation](https://pytorch.org/get-started/previous-versions/) under v1.9.0 for wheel linux & windows CUDA 11.1 for the pip install required. +1. Clone the repo +2. Ensure one of the required versions of Python is installed +3. Install [`poetry`](https://python-poetry.org/docs/#installation) +4. Instantiate a virtual environment, e.g. via `python -m venv nhssynth` +3. Activate the virtual environment, e.g. via `source nhssynth/bin/activate` +4. Install project dependencies with `poetry install` (optionally install `jupyter` and `notebook` to work with some of the preprocessing files in [`auxiliary`](auxiliary/)) +5. Interact with the package in one of two ways: + - Via the [`cli`](src/nhssynth/cli/) module using `poetry run cli` + - Through building the package with `poetry build` and using it in an existing project (`import nhssynth`). However, if you intend on doing the latter it may be preferable to instead follow the second, simpler setup below. -We use: -- `'pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html'` -after using the pip install on requirements file in order to get the gpu supported versions of PyTorch that we require. +For more standard usage of the package: -Note that the model used is a simple MLP encoder/decoder and we are working with tabular data. Therefore the speed up provided by GPU is not that noticeable and it is easy to train using the CPU version for people without GPU support. +1. Run `pip install nhssynth` within a supported Python installation +2. Use the modules exported by the package as you would any other. _Note that in this setup you will have to work more closely with the configuration and code to ensure you are handling inputs and outputs for each module appropriately. The cli handles a lot of this complexity, and interacting with the modules directly is considered advanced usage._ ### Usage -There are three main sections of interest (2 folders and a collation of python files): `Hyperparameter_Tuning`, `Investigations` and files containing `sdv_baselines.py`, `scratch_vae_expts.py` and `plot.py`. `Hyperparameter_Tuning` and `Investigations` can use either SUPPORT or internal MIMIC datasets. `sdv_baselines.py`, `scratch_vae_expts.py` and `plot.py` all use SUPPORT only. If you want to run these quickly then below outlines command line implementation. - -#### SDV Baselines - -To reproduce the experiments contained in the report involving the [SDV](https://github.com/sdv-dev/SDV) baseline models (e.g. CopulaGAN, CTGAN, GaussianCopula and TVAE), run `sdv_baselines.py`. The parameters can be found using the `--help` flag: - -``` -python sdv_baselines.py --help - -usage: sdv_baselines.py [-h] [--n_runs N_RUNS] [--model_type {CopulaGAN,CTGAN,GaussianCopula,TVAE}] [--pre_proc_method {GMM, standard}] [--save_metrics {True, False}] [--gower {True, False}] - -optional arguments: - -h, --help show this help message and exit - --n_runs N_RUNS set number of runs/seeds - --model_type {CopulaGAN,CTGAN,GaussianCopula,TVAE} - set model for baseline experiment - --pre_proc_method {GMM, standard} - set the pre-processing method - --save_metrics {True, False} - set if you wish to save the metrics for this model run - saves default as Metric Breakdown.csv unless changed - --gower {True, False} - calculate the average gower distance -``` - -#### Scratch VAE + Differential Privacy - -To reproduce the experiments contained in the report involving the VAE with/without differential privacy, run `scratch_vae_expts.py`. The parameters can be found using the `--help` flag: - -``` -python scratch_vae_expts.py --help - -usage: scratch_vae_expts.py [-h] [--n_runs N_RUNS] [--diff_priv DIFF_PRIV] [--n_epochs N_EPOCHS] [--save_file {True, False}] [--save_visualisation {True, False}] [--save_metrics {True, False}] [--pre_proc_method {GMM, standard}] [--gower {True, False}] - -optional arguments: - -h, --help show this help message and exit - --n_runs N_RUNS set number of runs/seeds - --diff_priv DIFF_PRIV - run VAE with differential privacy - --n_epochs N_EPOCHS number of epochs to train for (early stopping is used by default) - --save_file {True, False} - save trained model's state_dict as 'trained_SynthVAE.pt' - --save_visualisation {True, False} - save the loss & variable plots - --save_metrics {True, False} - calculate and save the metrics of the training set - --pre_proc_method {GMM, standard} - set the pre-processing method - --gower {True, False} - calculate the average gower distance -``` - -Code to load a saved model and generate correlation heatmaps is contained within `plot.py`. -The file containing the save model's `state_dict` should be provided via a command line argument: - -``` -python plot.py --help - -usage: plot.py [-h] [--save_file SAVEFILE] [--pre_proc_method {GMM, standard}] - -optional arguments: - -h, --help show this help message and exit - --save_file SAVEFILE load trained model's state_dict from file - --pre_proc_method {GMM, standard} - specify the pre-processing method that you wish to employ -``` - -#### Outputs Of The Model - -There are a selection of plots & metrics the model can output. These are given by parse arguments `--save_metrics`, `--save_visualisation` or functions `plot_elbo`, `plot_likelihood_breakdown`, `plot_variable_distributions` & `distribution_metrics` in the notebooks provided. These outputs give you a graph displaying the ELBO breakdown during training, the breakdown of categorical & numerical likelihoods, a comparison of variable distributions between original data & synthetic data for each variable as well as a csv file displaying all the distributional metrics from SDV. - -
- -
ELBO Breakdown
-
- - - -
- -
Likelihood Breakdown
-
- - - -
- -
Example Continuous Variable Comparison
-
- - - - -
- -
Example Discrete Variable Comparison
-
- - - - -The distributional metrics produces a csv following this order - depending on number of runs: - -| SVCDetection | GMLogLikelihood | CSTest | KSTestExtended | KSTestExtended | ContinuousKLDivergence | DiscreteKLDivergence | -| --- | --- | --- | --- | --- | --- | --- | -| 0.32 | -520.01 | 0.91 | 0.85 | 0.87 | 0.91 | 0.97 | -| 0.31 | -523.21 | 0.90 | 0.86 | 0.88 | 0.92 | 0.99 | - -For information on these metrics then look in the [report](./reports/report.pdf) as well as the [SDV single table metrics documentation](https://sdv.dev/SDV/user_guides/evaluation/single_table_metrics.html) - -#### Note On Reproducibility Of Results - -In order to get reproducible results we have added in the `random_state` argument to the RDT transformers in order to set the sklearn's `random_state` argument. This results in the GMM pre-processing method producing the same transformation each run for the same dataset. We also set the PyTorch seed at the top of each file using the `set_seed` function found in utils. If you do not wish to nullify the randomness in training then remove this `set_seed` line at the start of the files. - -#### Dataset - -Experiments are run against the [Study to Understand Prognoses Preferences Outcomes and Risks of Treatment (SUPPORT) dataset](https://biostat.app.vumc.org/wiki/Main/SupportDesc) accessed via the [pycox](https://github.com/havakv/pycox) python library. Further experiments to test scalability of model were also performed on a pre-processed single table extracted from [MIMIC-III dataset](https://physionet.org/content/mimiciii/1.4/). The pre-processing to access this single table can be found within the [SynthVAE files](./MIMIC_preproc.ipynb). - -Your dataset should follow a simple structure as shown in the [example table](./example_input.csv) - continuous, categorical & datetime variables with no missingness or NaN values. Number of columns can be as many as required along with as many rows as required. +This package comprises a pipeline that is runnable via `poetry run cli pipeline ` or `poetry run cli config `. You can run the modules that make up this pipeline independently via `poetry run cli `. To see the modules that are available and their corresponding arguments and function, run `poetry run cli --help` / `poetry run cli --help`. ### Roadmap -See the [open issues](https://github.com/nhsx/SynthVAE/issues) for a list of proposed features (and known issues). +See the [open issues](https://github.com/nhsx/NHSSynth/issues) for a list of proposed features (and known issues). ### Contributing Contributions are what make the open source community such an amazing place to learn, inspire, and create. Any contributions you make are **greatly appreciated**. -1. Fork the Project -2. Create your Feature Branch (`git checkout -b feature/AmazingFeature`) -3. Commit your Changes (`git commit -m 'Add some AmazingFeature'`) -4. Push to the Branch (`git push origin feature/AmazingFeature`) -5. Open a Pull Request +1. Fork the project +2. Create your branch (`git checkout -b /`) +3. Commit your changes (`git commit -m 'Add some amazing feature'`) +4. Push to the branch (`git push origin /`) +5. Open a PR and we will try to get it merged! _See [CONTRIBUTING.md](./CONTRIBUTING.md) for detailed guidance._ diff --git a/MIMIC_preproc.ipynb b/auxiliary/MIMIC_preproc.ipynb similarity index 63% rename from MIMIC_preproc.ipynb rename to auxiliary/MIMIC_preproc.ipynb index 9ac8fa7..4e369d0 100644 --- a/MIMIC_preproc.ipynb +++ b/auxiliary/MIMIC_preproc.ipynb @@ -31,13 +31,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "bd97d146", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", + "import time\n", "from random import randrange, seed\n", "from tqdm import tqdm" ] @@ -54,20 +55,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "6419f34c", "metadata": {}, "outputs": [], "source": [ - "mimic_path = \"../Private MIMIC Data/Original MIMIC Data/\" # Path containing MIMIC data\n", - "output_path = \"../outputs/\" # Path to save the created datasets\n", - "\n", - "admissions = pd.read_csv(mimic_path + \"admissions.csv.gz\")\n", - "chartevents = pd.read_csv(mimic_path + \"CHARTEVENTS.csv.gz\", nrows=10000000)\n", - "icustays = pd.read_csv(mimic_path + \"ICUSTAYS.csv.gz\")\n", - "items = pd.read_csv(mimic_path + \"D_ITEMS.csv.gz\")\n", - "outputevents = pd.read_csv(mimic_path + \"OUTPUTEVENTS.csv.gz\")\n", - "patients = pd.read_csv(mimic_path + \"PATIENTS.csv.gz\")" + "mimic_path = \"../data/\" # Path containing MIMIC data\n", + "output_path = \"../data/\" # Path to save the created datasets\n", + "\n", + "admissions = pd.read_csv(mimic_path + \"ADMISSIONS.csv\")\n", + "chartevents = pd.read_csv(mimic_path + \"CHARTEVENTS.csv\", nrows=10000000)\n", + "icustays = pd.read_csv(mimic_path + \"ICUSTAYS.csv\")\n", + "items = pd.read_csv(mimic_path + \"D_ITEMS.csv\")\n", + "outputevents = pd.read_csv(mimic_path + \"OUTPUTEVENTS.csv\")\n", + "patients = pd.read_csv(mimic_path + \"PATIENTS.csv\")" ] }, { @@ -82,10 +83,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "1ef0408f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 46520/46520 [00:31<00:00, 1493.63it/s]\n" + ] + } + ], "source": [ "seed(2021)\n", "## Augment patient DOBs\n", @@ -155,10 +164,25 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "8a38da68", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 26927/26927 [00:31<00:00, 865.58it/s] \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Small input table saved, number of columns: 13, number of rows: 11040\n" + ] + } + ], "source": [ "seed(2021)\n", "# Generate small input data file\n", @@ -176,9 +200,9 @@ " admit_min = len(pd.date_range(start=pd.to_datetime(row['DOB'],format='%Y-%m-%d %H:%M:%S'),end=pd.to_datetime('2021-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S'),freq='D'))\n", " stay_len = len(pd.date_range(start=pd.to_datetime(row['ADMITTIME'],format='%Y-%m-%d %H:%M:%S'),end=pd.to_datetime(row['DISCHTIME'],format='%Y-%m-%d %H:%M:%S'),freq='S'))\n", "\n", - " num_days_admit = randrange(np.round(admit_min*0.25,0),np.round(admit_min*0.9,0)+5)\n", + " num_days_admit = randrange(np.round(admit_min*0.25).astype(int),np.round(admit_min*0.9).astype(int)+5)\n", " num_days_disch = randrange(0,50)\n", - " num_secs_chart = randrange(np.round(stay_len*0.01,0)+1,np.round(stay_len*0.99,0)+10)\n", + " num_secs_chart = randrange(np.round(stay_len*0.01).astype(int)+1,np.round(stay_len*0.99).astype(int)+10)\n", " new_admit_date = pd.to_datetime(row['DOB'],format='%Y-%m-%d %H:%M:%S') + pd.DateOffset(days=num_days_admit)\n", " new_admits.append(new_admit_date)\n", " new_dischs.append(new_admit_date + pd.DateOffset(days=num_days_disch))\n", @@ -195,16 +219,165 @@ "mimic_table['DOB'] = pd.to_datetime(mimic_table['DOB'], format='%Y-%m-%d %H:%M:%S')\n", "mimic_table['age'] = (age_calc - mimic_table['DOB']).astype('\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SUBJECT_IDETHNICITYADMITTIMEDISCHTIMEDISCHARGE_LOCATIONGENDERDOBFIRST_CAREUNITCHARTTIMEVALUEVALUEUOMLABELage
023WHITE2004-07-072004-08-16HOME HEALTH CAREM1974-07-17SICU2004-07-10 08:33:4058.0mmHgNon Invasive Blood Pressure diastolic47.0
123WHITE1998-10-181998-11-02HOME HEALTH CAREM1974-07-17SICU1998-10-18 11:43:1458.0mmHgNon Invasive Blood Pressure diastolic47.0
234WHITE1997-04-121997-05-12HOMEM1962-07-18CCU1997-04-13 18:17:3866.0mmHgNon Invasive Blood Pressure diastolic59.0
334WHITE1980-03-311980-04-20HOME HEALTH CAREM1962-07-18CCU1980-03-31 20:17:1166.0mmHgNon Invasive Blood Pressure diastolic59.0
4107HISPANIC OR LATINO1997-12-301998-01-24HOMEM1975-04-02MICU1997-12-30 15:45:5661.0mmHgNon Invasive Blood Pressure diastolic46.0
\n", + "" + ], + "text/plain": [ + " SUBJECT_ID ETHNICITY ADMITTIME DISCHTIME DISCHARGE_LOCATION \\\n", + "0 23 WHITE 2004-07-07 2004-08-16 HOME HEALTH CARE \n", + "1 23 WHITE 1998-10-18 1998-11-02 HOME HEALTH CARE \n", + "2 34 WHITE 1997-04-12 1997-05-12 HOME \n", + "3 34 WHITE 1980-03-31 1980-04-20 HOME HEALTH CARE \n", + "4 107 HISPANIC OR LATINO 1997-12-30 1998-01-24 HOME \n", + "\n", + " GENDER DOB FIRST_CAREUNIT CHARTTIME VALUE VALUEUOM \\\n", + "0 M 1974-07-17 SICU 2004-07-10 08:33:40 58.0 mmHg \n", + "1 M 1974-07-17 SICU 1998-10-18 11:43:14 58.0 mmHg \n", + "2 M 1962-07-18 CCU 1997-04-13 18:17:38 66.0 mmHg \n", + "3 M 1962-07-18 CCU 1980-03-31 20:17:11 66.0 mmHg \n", + "4 M 1975-04-02 MICU 1997-12-30 15:45:56 61.0 mmHg \n", + "\n", + " LABEL age \n", + "0 Non Invasive Blood Pressure diastolic 47.0 \n", + "1 Non Invasive Blood Pressure diastolic 47.0 \n", + "2 Non Invasive Blood Pressure diastolic 59.0 \n", + "3 Non Invasive Blood Pressure diastolic 59.0 \n", + "4 Non Invasive Blood Pressure diastolic 46.0 " + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "mimic_table.head()\n" ] @@ -231,10 +404,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "65ab26b0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/fy/skyvcp4s3tj6z_xhj2sv7jf00000gn/T/ipykernel_42777/1106345372.py:36: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " patient_stays.drop_duplicates(inplace=True)\n", + "100%|██████████| 90891/90891 [01:14<00:00, 1222.72it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mid sized input table saved, number of columns: 13, number of rows: 81665\n" + ] + } + ], "source": [ "seed(2021)\n", "# Generate mid-sized input file\n", @@ -246,8 +439,8 @@ "\n", "df_list = []\n", "\n", - "split_one = int(np.round(len(total_subjects)*0.3,0))\n", - "split_two = int(np.round(len(total_subjects)*0.9,0))\n", + "split_one = int(np.round(len(total_subjects)*0.3).astype(int))\n", + "split_two = int(np.round(len(total_subjects)*0.9).astype(int))\n", "\n", "for sub_index in range(0,split_one):\n", " search_id = total_subjects[sub_index]\n", @@ -277,9 +470,9 @@ " admit_min = len(pd.date_range(start=pd.to_datetime(row['DOB'],format='%Y-%m-%d %H:%M:%S'),end=pd.to_datetime('2021-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S'),freq='D'))\n", " stay_len = len(pd.date_range(start=pd.to_datetime(row['ADMITTIME'],format='%Y-%m-%d %H:%M:%S'),end=pd.to_datetime(row['DISCHTIME'],format='%Y-%m-%d %H:%M:%S'),freq='S'))\n", "\n", - " num_days_admit = randrange(np.round(admit_min*0.25,0),np.round(admit_min*0.9,0)+5)\n", + " num_days_admit = randrange(np.round(admit_min*0.25).astype(int),np.round(admit_min*0.9).astype(int)+5)\n", " num_days_disch = randrange(0,50)\n", - " num_secs_chart = randrange(np.round(stay_len*0.01,0)+1,np.round(stay_len*0.99,0)+10)\n", + " num_secs_chart = randrange(np.round(stay_len*0.01).astype(int)+1,np.round(stay_len*0.99).astype(int)+10)\n", " new_admit_date = pd.to_datetime(row['DOB'],format='%Y-%m-%d %H:%M:%S') + pd.DateOffset(days=num_days_admit)\n", " new_admits.append(new_admit_date)\n", " new_dischs.append(new_admit_date + pd.DateOffset(days=num_days_disch))\n", @@ -295,7 +488,7 @@ "age_calc = pd.Timestamp('2021-12-01 00:00:00')\n", "mimic_table['DOB'] = pd.to_datetime(mimic_table['DOB'], format='%Y-%m-%d %H:%M:%S')\n", "mimic_table['age'] = (age_calc - mimic_table['DOB']).astype(' [B, 16, 14, 14] - x = F.max_pool2d(x, 2, 1) # -> [B, 16, 13, 13] - x = F.relu(self.conv2(x)) # -> [B, 32, 5, 5] - x = F.max_pool2d(x, 2, 1) # -> [B, 32, 4, 4] - x = x.view(-1, 32 * 4 * 4) # -> [B, 512] - x = F.relu(self.fc1(x)) # -> [B, 32] - x = self.fc2(x) # -> [B, 10] - return x -``` - -## Limitations of backward hooks -The implementation of gradient clipping in autograd_grad_sample.py uses backward hooks to capture per-sample gradients. -The `register_backward hook` function has a known issue being tracked at https://github.com/pytorch/pytorch/issues/598. However, this is the only known way of implementing this as of now (your suggestions and contributions are very welcome). The behavior has been verified to be correct for the layers currently supported by opacus. diff --git a/opacus/__init__.py b/opacus/__init__.py deleted file mode 100644 index 0e95747..0000000 --- a/opacus/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from . import utils -from .per_sample_gradient_clip import PerSampleGradientClipper -from .privacy_engine import PrivacyEngine -from .version import __version__ - - -__all__ = ["PrivacyEngine", "PerSampleGradientClipper", "utils", "__version__"] diff --git a/opacus/dp_model_inspector.py b/opacus/dp_model_inspector.py deleted file mode 100644 index fd95deb..0000000 --- a/opacus/dp_model_inspector.py +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from opacus.grad_sample import GradSampleModule -from opacus.utils.module_inspection import ModelInspector, get_layer_type -from torch import nn - - -class IncompatibleModuleException(Exception): - r""" - Exception class to be thrown in case - the given model contains incompatible modules. - """ - - pass - - -class DPModelInspector: - r""" - Class to validate if a given module meets the requirements for attaching - ``PrivacyEngine``. - - Active checks are listed in the ``DPModelInspector.inspectors`` attribute. - """ - - def __init__(self, should_throw: bool = True): - r""" - Args: - should_throw: Whether the inspector should throw an exception or - return False in case of validation error - """ - self.should_throw = should_throw - - self.inspectors = [ - # Inspector to check model only consists of sub-modules we support - ModelInspector( - name="validity", - predicate=_is_valid_check, - message="grad_sampler method is not yet supported for this module.", - ), - # Inspector to check for BatchNorms as they could be replaced with groupnorm - ModelInspector( - name="batchnorm", - predicate=_no_batchnorm_check, - message="Model contains BatchNorm layers. It is recommended" - "That they are replaced with GroupNorm.", - ), - # Inspector to check that instance norms doesn't track running stats - ModelInspector( - name="running_stats", - predicate=_no_running_stats_instancenorm_check, - message="InstanceNorm layer initialised with track_running_stats=True" - "This is currently not supported.", - ), - # Inspector to check the number of groups in Conv2d layers - ModelInspector( - name="conv_group_number", - predicate=_conv_group_number_check, - message="Number of groups in Conv2d layer must be either 1 or equal to number of channels.", - ), - # Inspector to check for LSTM as it can be replaced with DPLSTM - ModelInspector( - name="lstm", - predicate=_no_lstm, - message="Model contains LSTM layers. It is recommended that they are" - "replaced with DPLSTM.", - ), - # Inspector to check that the module is in training mode - ModelInspector( - name="train_mode", - predicate=_is_in_training_mode, - message=( - "Your model must be in training mode for the PrivacyEngine to compute " - "per-sample gradients. You can put your model in training mode by simply calling " - "module.train(). If you have part of the model that you want to keep frozen, " - "the best approach is to split your model into a frozen backbone and a trainable " - "head and passing only the head to the optimizer and PrivacyEngine. See an example " - "in this colab: https://bit.ly/opacus-dev-day" - ), - ), - ] - - def validate(self, model: nn.Module) -> bool: - r""" - Runs the validation on the model and all its submodules. - - - Validation comprises a series of individual - :class:`ModelInspectors `, - each checking one predicate. Depending on ``should_throw`` flag in - the constructor, will either return False or throw - :class:`~opacus.dp_model_inspector.IncompatibleModuleException` in case of - validation failure. - - Notes: - This method is called in :meth:`opacus.privacy_engine.PrivacyEngine.attach`. - - Args: - model: The model to validate. - - Returns: - True if successful. False if validation fails and ``should_throw == False`` - - Raises: - IncompatibleModuleException - If the validation fails and ``should_throw == True``. Exception message will - contain the details of validation failure reason. - - Example: - >>> inspector = DPModelInspector() - >>> valid_model = nn.Linear(16, 32) - >>> is_valid = inspector.validate(valid_model) - >>> is_valid - True - >>> invalid_model = nn.BatchNorm1d(2) - >>> is_valid = inspector.validate(invalid_model) - # IncompatibleModuleException is thrown. - """ - valid = all(inspector.validate(model) for inspector in self.inspectors) - if self.should_throw and not valid: - message = "Model contains incompatible modules." - for inspector in self.inspectors: - if inspector.violators: - message += f"\n{inspector.message}: {inspector.violators}" - raise IncompatibleModuleException(message) - return valid - - -def _is_valid_check(module: nn.Module) -> bool: - r""" - Checks if the ``module`` is supported by ``autograd_grad_sample`` - - Args: - module: The model to validate. - - Returns: - True if ``module`` is supported by ``autograd_grad_sample`` - """ - return GradSampleModule.is_supported(module) - - -def _is_in_training_mode(module: nn.Module) -> bool: - r""" - Checks if the ``module`` is in train mode - - Args: - module: The model to validate. - - Returns: - True if ``module`` is in train mode - """ - return module.training - - -def _conv_group_number_check(module: nn.Module) -> bool: - r""" - Checks if number of groups in ``nn.Conv2d`` layer is valid - - Args: - module: The input module (layer) for which the check occurs. - - Returns: - True if number of groups in ``nn.Conv2d`` layer of ``module`` is valid - """ - if isinstance(module, nn.Conv2d): - return module.groups == 1 or module.groups == module.in_channels - - return True - - -def _no_batchnorm_check(module: nn.Module) -> bool: - r""" - Checks if the input module is not BatchNorm. - - This check overlaps with ``_is_valid_check``, but provides more targeted remedy. - - Args: - module: The input module - - Returns: - True if the input module is not BatchNorm - """ - return not isinstance(module, nn.modules.batchnorm._BatchNorm) - - -def _no_running_stats_instancenorm_check(module: nn.Module) -> bool: - r""" - Checks that ``InstanceNorm`` layer has ``track_running_stats`` set to False - - Args: - module: The input module (layer) for which the check is verified. - - Returns: - True if the module is not ``InstanceNorm``, otherwise it returns - True if the module (layer) have ``track_running_stats`` set to False, - and False otherwise. - - """ - is_instancenorm = get_layer_type(module) in ( - "InstanceNorm1d", - "InstanceNorm2d", - "InstanceNorm3d", - ) - - if is_instancenorm: - return not module.track_running_stats - return True - - -def _no_lstm(module: nn.Module) -> bool: - r""" - Checks if the input module is not LSTM. - - Args: - module: The input module - - Returns: - True if the input module is not LSTM - """ - is_lstm = True if get_layer_type(module) == "LSTM" else False - - return not is_lstm diff --git a/opacus/grad_sample/README.md b/opacus/grad_sample/README.md deleted file mode 100644 index b539731..0000000 --- a/opacus/grad_sample/README.md +++ /dev/null @@ -1,16 +0,0 @@ -# Grad Samples -An integral part of Opacus is to compute per-sample gradients. In order to make this computation as fast as possible, we provide vectorized code for each of the most common "basic modules" that are the building blocks of most ML models. If your model uses these building blocks, then you don't have to do anything! - -We always welcome PRs to add nn.Modules we don't yet support, but we also support registering custom grad_sample functions that can expand support just for your project, or even override Opacus's default implementations if they don't suit your needs. - -Override as following: - -```python -from opacus.grad_sample import register_grad_sampler - -@register_grad_sampler(nn.MyCustomClass) -def compute_grad_sample(module, activations, backprops): - pass -``` - -Note that you can also pass a list to the decorator, and register one function against multiple nn.Module classes. diff --git a/opacus/grad_sample/__init__.py b/opacus/grad_sample/__init__.py deleted file mode 100644 index 9220555..0000000 --- a/opacus/grad_sample/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from .conv import compute_conv_grad_sample # noqa -from .dp_lstm import compute_lstm_linear_grad_sample # noqa -from .dp_multihead_attention import compute_sequence_bias_grad_sample # noqa -from .embedding import compute_embedding_grad_sample # noqa -from .grad_sample_module import GradSampleModule -from .group_norm import compute_group_norm_grad_sample # noqa -from .instance_norm import compute_instance_norm_grad_sample # noqa -from .layer_norm import compute_layer_norm_grad_sample # noqa -from .linear import compute_linear_grad_sample # noqa -from .utils import ( - create_or_accumulate_grad_sample, - create_or_extend_grad_sample, - register_grad_sampler, -) - - -__all__ = [ - "GradSampleModule", - "register_grad_sampler", - "create_or_accumulate_grad_sample", - "create_or_extend_grad_sample", -] diff --git a/opacus/grad_sample/conv.py b/opacus/grad_sample/conv.py deleted file mode 100644 index 75f8a41..0000000 --- a/opacus/grad_sample/conv.py +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Union - -import numpy as np -import torch -import torch.nn as nn -from opacus.utils.tensor_utils import unfold3d - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler([nn.Conv1d, nn.Conv2d, nn.Conv3d]) -def compute_conv_grad_sample( - layer: Union[nn.Conv2d, nn.Conv1d], - A: torch.Tensor, - B: torch.Tensor, - batch_dim: int = 0, -) -> None: - """ - Computes per sample gradients for convolutional layers - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - n = A.shape[0] - # get A and B in shape depending on the Conv layer - if type(layer) == nn.Conv2d: - A = torch.nn.functional.unfold( - A, - layer.kernel_size, - padding=layer.padding, - stride=layer.stride, - dilation=layer.dilation, - ) - B = B.reshape(n, -1, A.shape[-1]) - elif type(layer) == nn.Conv1d: - # unfold doesn't work for 3D tensors; so force it to be 4D - A = A.unsqueeze(-2) # add the H dimension - # set arguments to tuples with appropriate second element - A = torch.nn.functional.unfold( - A, - (1, layer.kernel_size[0]), - padding=(0, layer.padding[0]), - stride=(1, layer.stride[0]), - dilation=(1, layer.dilation[0]), - ) - B = B.reshape(n, -1, A.shape[-1]) - elif type(layer) == nn.Conv3d: - A = unfold3d( - A, - kernel_size=layer.kernel_size, - padding=layer.padding, - stride=layer.stride, - dilation=layer.dilation, - ) - B = B.reshape(n, -1, A.shape[-1]) - - # n=batch_sz; o=num_out_channels; p=(num_in_channels/groups)*kernel_sz - grad_sample = torch.einsum("noq,npq->nop", B, A) - # rearrange the above tensor and extract diagonals. - grad_sample = grad_sample.view( - n, - layer.groups, - -1, - layer.groups, - int(layer.in_channels / layer.groups), - np.prod(layer.kernel_size), - ) - grad_sample = torch.einsum("ngrg...->ngr...", grad_sample).contiguous() - shape = [n] + list(layer.weight.shape) - - create_or_extend_grad_sample(layer.weight, grad_sample.view(shape), batch_dim) - - if layer.bias is not None: - create_or_extend_grad_sample(layer.bias, torch.sum(B, dim=2), batch_dim) diff --git a/opacus/grad_sample/dp_lstm.py b/opacus/grad_sample/dp_lstm.py deleted file mode 100644 index 39fb662..0000000 --- a/opacus/grad_sample/dp_lstm.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import torch -from opacus.layers.dp_lstm import LSTMLinear - -from .utils import create_or_accumulate_grad_sample, register_grad_sampler - - -@register_grad_sampler(LSTMLinear) -def compute_lstm_linear_grad_sample( - layer: LSTMLinear, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0 -) -> None: - """ - Computes per sample gradients for ``LSTMLinear`` layer. The DPLSTM class is written using - this layer as its building block. - - class - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - - gs = torch.einsum("n...i,n...j->nij", B, A) - create_or_accumulate_grad_sample(layer.weight, gs, layer) - - if layer.bias is not None: - create_or_accumulate_grad_sample( - layer.bias, torch.einsum("n...k->nk", B), layer, - ) diff --git a/opacus/grad_sample/dp_multihead_attention.py b/opacus/grad_sample/dp_multihead_attention.py deleted file mode 100644 index c1be2e2..0000000 --- a/opacus/grad_sample/dp_multihead_attention.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import torch -from opacus.layers.dp_multihead_attention import SequenceBias - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler(SequenceBias) -def compute_sequence_bias_grad_sample( - layer: SequenceBias, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0 -) -> None: - """ - Computes per sample gradients for ``SequenceBias`` layer - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - create_or_extend_grad_sample(layer.bias, B[:, -1], batch_dim) diff --git a/opacus/grad_sample/embedding.py b/opacus/grad_sample/embedding.py deleted file mode 100644 index 4ed19f3..0000000 --- a/opacus/grad_sample/embedding.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import torch -import torch.nn as nn - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler(nn.Embedding) -def compute_embedding_grad_sample( - layer: nn.Embedding, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0 -) -> None: - """ - Computes per sample gradients for ``nn.Embedding`` layer. - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - saved = torch.backends.cudnn.deterministic - torch.backends.cudnn.deterministic = True - - batch_size = A.shape[batch_dim] - index = ( - A.unsqueeze(-1) - .expand(*A.shape, layer.embedding_dim) - .reshape(batch_size, -1, layer.embedding_dim) - ) - grad_sample = torch.zeros( - batch_size, *layer.weight.shape, device=layer.weight.device - ) - grad_sample.scatter_add_(1, index, B.reshape(batch_size, -1, layer.embedding_dim)) - torch.backends.cudnn.deterministic = saved - - create_or_extend_grad_sample(layer.weight, grad_sample, batch_dim) diff --git a/opacus/grad_sample/grad_sample_module.py b/opacus/grad_sample/grad_sample_module.py deleted file mode 100644 index c8bdd6e..0000000 --- a/opacus/grad_sample/grad_sample_module.py +++ /dev/null @@ -1,283 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from functools import partial -from typing import Iterable, List, Tuple - -import torch -import torch.nn as nn -from opacus.layers.dp_lstm import DPLSTM, LSTMLinear -from opacus.utils.module_inspection import requires_grad - - -class UnsupportedModuleError(ValueError): - pass - - -class GradSampleModule(nn.Module): - r""" - Extends nn.Module so that its parameter tensors have an extra field called .grad_sample. - """ - GRAD_SAMPLERS = {} - - def __init__(self, m: nn.Module, *, batch_first=True, loss_reduction="mean"): - super().__init__() - self._module = m - self.hooks_enabled = False - self.batch_first = batch_first - self.loss_reduction = loss_reduction - self.add_hooks(loss_reduction=loss_reduction, batch_first=batch_first) - - def forward(self, x): - return self._module(x) - - def zero_grad(self): - self.del_grad_sample() - super().zero_grad() - - def del_grad_sample(self): - """ - Deletes ``.grad_sample`` from this module's parameters. - - Why del? Normally, ``zero_grad()`` would do ``p.grad.zero_()`` and keep the allocation. - Normal grads can do this, because their shape is always the same. - Grad samples do not behave like this, because they accumulate over the batch dim. - If you have ``batch_size=32`` and size (12, 16) and you backprop twice, you should - expect to have grad_samples of size [64, 12, 16]. If you backprop once more, - then you'll get size [96, 12, 16] and so on. - So when you zero out, you should be left with nothing so you can start over. - """ - for p in self.parameters(): - if hasattr(p, "grad_sample") and p.grad_sample is not None: - if p.grad_sample.grad_fn is not None: - p.grad_sample.detach_() - else: - p.grad_sample.requires_grad_(False) - - del p.grad_sample - - def to_standard_module(self) -> nn.Module: - """ - Returns the standard nn.Module wrapped by this, eliminating all traces - of grad samples and hooks - - Returns: - The wrapped module - """ - self._close() - return self._module - - def add_hooks(self, loss_reduction: str = "mean", batch_first: bool = True) -> None: - """ - Adds hooks to model to save activations and backprop values. - The hooks will - 1. save activations into param.activations during forward pass - 2. compute per-sample gradients in params.grad_sample during backward pass. - Call ``remove_hooks(model)`` to disable this. - - Args: - model: the model to which hooks are added - loss_type: either "mean" or "sum" depending on whether backpropped - loss was averaged or summed over batch (default: "mean") - batch_dim: the batch dimension (default: 0) - """ - if hasattr(self._module, "autograd_grad_sample_hooks"): - raise ValueError("Trying to add hooks twice to the same model") - else: - self._module.autograd_grad_sample_hooks = [] - self.autograd_grad_sample_hooks = self._module.autograd_grad_sample_hooks - - for module in self.trainable_modules(): - if type(module) in self.GRAD_SAMPLERS: - self.autograd_grad_sample_hooks.append( - module.register_forward_hook(self.capture_activations_hook) - ) - - self.autograd_grad_sample_hooks.append( - module.register_full_backward_hook( - partial( - self.capture_backprops_hook, - loss_reduction=loss_reduction, - batch_first=batch_first, - ) - ) - ) - self.enable_hooks() - - def remove_hooks(self) -> None: - """ - Removes hooks added by ``add_hooks()`` - """ - self.disable_hooks() - if not hasattr(self, "autograd_grad_sample_hooks"): - raise ValueError("Asked to remove hooks, but no hooks found") - else: - while self.autograd_grad_sample_hooks: - handle = self.autograd_grad_sample_hooks.pop() - handle.remove() - delattr(self, "autograd_grad_sample_hooks") - delattr(self._module, "autograd_grad_sample_hooks") - - def disable_hooks(self) -> None: - r""" - Globally disable all hooks installed by this library. - Why is this needed? As per https://github.com/pytorch/pytorch/issues/25723, there is - a bug in Autograd that makes removing hooks do nothing if the graph was already - constructed. For this reason, we have this method to at least turn them off. - """ - self.hooks_enabled = False - - def enable_hooks(self) -> None: - r""" - The opposite of ``disable_hooks()``. Hooks are always enabled unless you explicitly - disable them so you don't need to call this unless you want to re-enable them. - """ - self.hooks_enabled = True - - def parametrized_modules(self) -> Iterable[nn.Module]: - """ - Recursively iterates over all submodules, returning those that - have parameters (as opposed to "wrapper modules" that just organize modules). - """ - yield from ( - m - for m in self._module.modules() - if any(p is not None for p in m.parameters(recurse=False)) - ) - - def trainable_modules(self) -> Iterable[nn.Module]: - """ - Recursively iterates over all submodules, returning those that - have parameters and are trainable (ie they want a grad). - """ - yield from ( - m - for m in self.parametrized_modules() - if any(p.requires_grad for p in m.parameters()) - ) - - def __repr__(self): - return f"GradSample({self._module.__repr__()})" - - def _close(self): - self.del_grad_sample() - self.remove_hooks() - - def capture_activations_hook( - self, - module: nn.Module, - forward_input: List[torch.Tensor], - _forward_output: torch.Tensor, - ): - if ( - not requires_grad(module) - or not module.training - or not torch.is_grad_enabled() - ): - return - - if not self.hooks_enabled: - return - - if not hasattr(module, "activations"): - module.activations = [] - module.activations.append(forward_input[0].detach()) # pyre-ignore - - def capture_backprops_hook( - self, - module: nn.Module, - _forward_input: torch.Tensor, - forward_output: torch.Tensor, - loss_reduction: str, - batch_first: bool, - ): - """Captures backprops in backward pass and store per-sample gradients.""" - if not self.hooks_enabled: - return - - backprops = forward_output[0].detach() - activations, backprops = self.rearrange_grad_samples( - module, backprops, loss_reduction, batch_first - ) - grad_sampler_fn = self.GRAD_SAMPLERS[type(module)] - grad_sampler_fn(module, activations, backprops) - - if ( - not isinstance(module.activations, list) or len(module.activations) == 0 - ) and hasattr(module, "max_batch_len"): - del module.max_batch_len - - def rearrange_grad_samples( - self, - module: nn.Module, - backprops: torch.Tensor, - loss_reduction: str, - batch_first: bool, - ) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Rearrange activations and grad_samples based on loss reduction and batch dim - - Args: - module: the module for which per-sample gradients are computed - backprops: the captured backprops - loss_reduction: either "mean" or "sum" depending on whether backpropped - loss was averaged or summed over batch - batch_first: True is batch dimension is first - """ - if not hasattr(module, "activations"): - raise ValueError( - f"No activations detected for {type(module)}," - " run forward after add_hooks(model)" - ) - - batch_dim = 0 if batch_first or type(module) is LSTMLinear else 1 - - if isinstance(module.activations, list): - A = module.activations.pop() - else: - A = module.activations - - if not hasattr(module, "max_batch_len"): - # For packed sequences, max_batch_len is set in the forward of the model (e.g. the LSTM) - # Otherwise we infer it here - module.max_batch_len = _get_batch_size(module, A, batch_dim) - - n = module.max_batch_len - if loss_reduction == "mean": - B = backprops * n - elif loss_reduction == "sum": - B = backprops - else: - raise ValueError( - f"loss_reduction = {loss_reduction}. Only 'sum' and 'mean' losses are supported" - ) - - # No matter where the batch dimension was, .grad_samples will *always* put it in the first dim - if batch_dim != 0: - A = A.permute([batch_dim] + [x for x in range(A.dim()) if x != batch_dim]) - B = B.permute([batch_dim] + [x for x in range(B.dim()) if x != batch_dim]) - - return A, B - - @classmethod - def is_supported(cls, module: nn.Module) -> bool: - """Check if this module is supported""" - return type(module) in cls.GRAD_SAMPLERS or type(module) is DPLSTM - - -def _get_batch_size( - module: nn.Module, grad_sample: torch.Tensor, batch_dim: int -) -> int: - r""" - Computes and returns the maximum batch size which is the maximum of the dimension values - along 'batch_dim' axis over module.activations + [grad_sample], where module.activations is - a list. If module.activations is a not a list, then return grad_sample.shape[batch_dim]. - """ - - max_batch_len = 0 - if isinstance(module.activations, list): - for out in module.activations: - if out.shape[batch_dim] > max_batch_len: - max_batch_len = out.shape[batch_dim] - - max_batch_len = max(max_batch_len, grad_sample.shape[batch_dim]) - return max_batch_len diff --git a/opacus/grad_sample/group_norm.py b/opacus/grad_sample/group_norm.py deleted file mode 100644 index 77082d0..0000000 --- a/opacus/grad_sample/group_norm.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler(nn.GroupNorm) -def compute_group_norm_grad_sample( - layer: nn.GroupNorm, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0, -) -> None: - """ - Computes per sample gradients for GroupNorm - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - gs = F.group_norm(A, layer.num_groups, eps=layer.eps) * B - create_or_extend_grad_sample(layer.weight, torch.einsum("ni...->ni", gs), batch_dim) - if layer.bias is not None: - create_or_extend_grad_sample( - layer.bias, torch.einsum("ni...->ni", B), batch_dim - ) diff --git a/opacus/grad_sample/instance_norm.py b/opacus/grad_sample/instance_norm.py deleted file mode 100644 index 56e08a4..0000000 --- a/opacus/grad_sample/instance_norm.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Union - -import torch -import torch.nn as nn -import torch.nn.functional as F - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler( - [nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d,] -) -def compute_instance_norm_grad_sample( - layer: Union[nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d,], - A: torch.Tensor, - B: torch.Tensor, - batch_dim: int = 0, -) -> None: - """ - Computes per sample gradients for InstanceNorm layers - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - gs = F.instance_norm(A, eps=layer.eps) * B - create_or_extend_grad_sample(layer.weight, torch.einsum("ni...->ni", gs), batch_dim) - if layer.bias is not None: - create_or_extend_grad_sample( - layer.bias, torch.einsum("ni...->ni", B), batch_dim - ) diff --git a/opacus/grad_sample/layer_norm.py b/opacus/grad_sample/layer_norm.py deleted file mode 100644 index 5346b7c..0000000 --- a/opacus/grad_sample/layer_norm.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import torch -import torch.nn as nn -import torch.nn.functional as F -from opacus.utils.tensor_utils import sum_over_all_but_batch_and_last_n - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler(nn.LayerNorm) -def compute_layer_norm_grad_sample( - layer: nn.LayerNorm, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0, -) -> None: - """ - Computes per sample gradients for LayerNorm - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - create_or_extend_grad_sample( - layer.weight, - sum_over_all_but_batch_and_last_n( - F.layer_norm(A, layer.normalized_shape, eps=layer.eps) * B, - layer.weight.dim(), - ), - batch_dim, - ) - create_or_extend_grad_sample( - layer.bias, sum_over_all_but_batch_and_last_n(B, layer.bias.dim()), batch_dim, - ) diff --git a/opacus/grad_sample/linear.py b/opacus/grad_sample/linear.py deleted file mode 100644 index 4ae592a..0000000 --- a/opacus/grad_sample/linear.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import torch -import torch.nn as nn - -from .utils import create_or_extend_grad_sample, register_grad_sampler - - -@register_grad_sampler(nn.Linear) -def compute_linear_grad_sample( - layer: nn.Linear, A: torch.Tensor, B: torch.Tensor, batch_dim: int = 0 -) -> None: - """ - Computes per sample gradients for ``nn.Linear`` layer - - Args: - layer: Layer - A: Activations - B: Backpropagations - batch_dim: Batch dimension position - """ - gs = torch.einsum("n...i,n...j->nij", B, A) - create_or_extend_grad_sample(layer.weight, gs, batch_dim) - if layer.bias is not None: - - create_or_extend_grad_sample( - layer.bias, torch.einsum("n...k->nk", B), batch_dim, - ) diff --git a/opacus/grad_sample/utils.py b/opacus/grad_sample/utils.py deleted file mode 100644 index 4d33e96..0000000 --- a/opacus/grad_sample/utils.py +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Sequence, Union - -import torch -import torch.nn as nn - -from .grad_sample_module import GradSampleModule - - -def register_grad_sampler(target_class_or_classes: Union[type, Sequence[type]]): - """ - Registers the decorated function as the ``grad_sampler`` of ``target_class_or_classes``, which is - the function that will be invoked every time you want to compute a per-sample gradient - of ``target_class_or_classes``. The signature of every grad_sampler is always the same: - - >>> @register_grad_sampler(nn.MyCustomClass) - >>> def compute_grad_sample(module, activations, backprops): - >>> pass - - It may help you to take a look at the existing grad_samplers inside Opacus, under ``opacus.grad_sample.`` - """ - - def decorator(f): - target_classes = ( - target_class_or_classes - if isinstance(target_class_or_classes, Sequence) - else [target_class_or_classes] - ) - for target_class in target_classes: - GradSampleModule.GRAD_SAMPLERS[target_class] = f - return f - - return decorator - - -def create_or_extend_grad_sample( - param: torch.Tensor, grad_sample: torch.Tensor, batch_dim: int -) -> None: - """ - Creates a ``grad_sample`` attribute in the given parameter, or appends to it - if the ``grad_sample`` attribute already exists. - - Args: - param: Parameter to which ``grad_sample`` will be added - grad_sample: Per-sample gradients tensor. Must be of the same - shape as ``param`` with extra batch dimension - batch_dim: Position of the batch dimension in the shape of - ``grad_sample`` - """ - - if hasattr(param, "grad_sample"): - param.grad_sample = torch.cat((param.grad_sample, grad_sample), batch_dim) - else: - param.grad_sample = grad_sample - - -def create_or_accumulate_grad_sample( - param: torch.Tensor, grad_sample: torch.Tensor, layer: nn.Module -) -> None: - """ - Creates a ``grad_sample`` attribute in the given parameter, or adds to it - if the ``grad_sample`` attribute already exists. - - Args: - param: Parameter to which ``grad_sample`` will be added - grad_sample: Per-sample gradients tensor. Must be of the same - shape as ``param`` with extra batch dimension - """ - - if hasattr(param, "grad_sample"): - param.grad_sample[: grad_sample.shape[0]] += grad_sample - else: - max_batch_len = layer.max_batch_len - param.grad_sample = torch.zeros( - torch.Size([max_batch_len]) + grad_sample.shape[1:], - device=grad_sample.device, - dtype=grad_sample.dtype, - ) - param.grad_sample[: grad_sample.shape[0]] = grad_sample diff --git a/opacus/layers/__init__.py b/opacus/layers/__init__.py deleted file mode 100644 index 1ea7283..0000000 --- a/opacus/layers/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from .dp_ddp import DifferentiallyPrivateDistributedDataParallel -from .dp_lstm import DPLSTM -from .dp_multihead_attention import DPMultiheadAttention, SequenceBias -from .param_rename import ParamRenamedModule - - -__all__ = [ - "DPLSTM", - "DPMultiheadAttention", - "ParamRenamedModule", - "SequenceBias", - "DifferentiallyPrivateDistributedDataParallel", -] diff --git a/opacus/layers/dp_ddp.py b/opacus/layers/dp_ddp.py deleted file mode 100644 index bc10cd9..0000000 --- a/opacus/layers/dp_ddp.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import torch -import torch.nn as nn - - -def average_gradients(model): - world_size = torch.distributed.get_world_size() - for param in model.parameters(): - torch.distributed.all_reduce(param.grad, op=torch.distributed.ReduceOp.SUM) - param.grad /= world_size - - -class DifferentiallyPrivateDistributedDataParallel(nn.Module): - def __init__(self, model): - super().__init__() - self.module = model - - def forward(self, *args, **kwargs): - return self.module(*args, **kwargs) diff --git a/opacus/layers/dp_lstm.py b/opacus/layers/dp_lstm.py deleted file mode 100644 index 09b0ab7..0000000 --- a/opacus/layers/dp_lstm.py +++ /dev/null @@ -1,593 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import math -from typing import List, Optional, Tuple, Union - -import torch -import torch.nn as nn -from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_sequence - -from .param_rename import ParamRenamedModule - - -def _compute_seq_lengths(batch_sizes: torch.Tensor) -> List[int]: - r""" - Computes the sequence lengths (the length parameter used in the packed_padded_sequence function to create a PackedSequence). - - Args: - batch_sizes: Contains the batch sizes as stored in a PackedSequence - - Returns: - running_seq_lengths: the length parameter used in the torch.nn.utils.rnn.packed_padded_sequence function to create a PackedSequence. - It's a list of the same length as batch_sizes. - """ - - max_batch_size = batch_sizes[0] - if len(batch_sizes) == 1: - return [1] * max_batch_size - - running_seq = 0 - running_seq_lengths = [] - for i in range(1, len(batch_sizes)): - delta = batch_sizes[i - 1].item() - batch_sizes[i].item() - running_seq += 1 - running_seq_lengths += delta * [running_seq] - - running_seq += 1 - running_seq_lengths += batch_sizes[-1].item() * [running_seq] - running_seq_lengths.reverse() - return running_seq_lengths - - -def _compute_last_states( - h_n: List[torch.Tensor], c_n: List[torch.Tensor], seq_lengths: List[int] -) -> Tuple[torch.Tensor, torch.Tensor]: - r""" - Given h and c values of all time steps, this function computes the h and c values for each sequence at their last timestep (this can vary across sequences with different sequence lengths). - - Args: - h_n: A list of hidden state values across all timesteps. - c_n: A list of cell state values across all timesteps. - seq_lengths: the length parameter used in the torch.nn.utils.rnn.packed_padded_sequence function to create a PackedSequence. This can be computed using the _compute_seq_lengths function. - - Returns: - h_last: Contains the last hidden state values for each of the sequences. - If the i'th sequence has a length of l_i, then h_last[i,:] contains the hidden state corresponding to the i'th sequence at timestep l_i. - c_last: The structure is the same as h_last, except that it contains the last cell state values for each of the sequences. - """ - - max_batch_size = len(seq_lengths) - hidden_size = h_n[0].shape[-1] - h_last = torch.zeros(max_batch_size, hidden_size) - c_last = torch.zeros(max_batch_size, hidden_size) - - for i, seq_len in enumerate(seq_lengths): - h_last[i, :] = h_n[seq_len - 1][i, :] - c_last[i, :] = c_n[seq_len - 1][i, :] - - return h_last, c_last - - -def _concat_sequence_directions( - forward: Union[List[torch.Tensor], Tuple[torch.Tensor]], - reverse: Union[List[torch.Tensor], Tuple[torch.Tensor]], - dim: int, -) -> Tuple[torch.Tensor]: - r""" - Given two list/tuple of same length containing tensors, this function returns a concatenation along dimension d. So, output[i] : concatenation of forward[i] and reverse[i] along dimension dim. - forward[i] and reverse[i] should have the same shape. This function is used for concatenating the outputs of the forward and reverse layer of a bidirectional LSTM. - - Args: - forward: list/tuple containing n tensors, representing the output of the forward layer. - reverse: list/tuple containing n tensors, representing the output of the backward layer. - dim: the dimension along which the sequence of tensors within forward and reverse will be concatenated. - Returns: - output: list/tuple containing n concatenated tensors. - """ - - if len(forward) != len(reverse): - raise ValueError( - "The forward and reverse layer output sequences should have the same length" - ) - - seq_length = len(forward) - output = [0] * seq_length - - for i in range(seq_length): - output[i] = torch.cat((forward[i], reverse[i]), dim=dim) - - return output - - -class LSTMLinear(nn.Linear): - r""" - This function is the same as a nn.Linear layer, except that in the backward pass - the grad_samples get accumulated (instead of being concatenated as in the standard - nn.Linear) - """ - - def __init__(self, in_features: int, out_features: int, bias: bool = True): - super().__init__(in_features, out_features, bias) - - -class DPLSTMCell(nn.Module): - r""" - Internal-only class. Implements *one* step of LSTM so that a LSTM layer can be seen as repeated - applications of this class. - """ - - def __init__( - self, input_size: int, hidden_size: int, bias: bool, - ): - super().__init__() - self.input_size = input_size - self.hidden_size = hidden_size - self.bias = bias - - self.ih = LSTMLinear(input_size, 4 * hidden_size, bias=self.bias) - self.hh = LSTMLinear(hidden_size, 4 * hidden_size, bias=self.bias) - - self.reset_parameters() - - def reset_parameters(self): - r""" - Resets parameters by initializing them from an uniform distribution. - """ - stdv = 1.0 / math.sqrt(self.hidden_size) - for weight in self.parameters(): - nn.init.uniform_(weight, -stdv, stdv) - - def set_max_batch_length(self, max_batch_length: int) -> None: - """ - Sets max batch length - """ - self.ih.max_batch_len = max_batch_length - self.hh.max_batch_len = max_batch_length - - def forward( - self, - x: torch.Tensor, - h_prev: torch.Tensor, - c_prev: torch.Tensor, - batch_size_t: Optional[int] = None, - ) -> Tuple[torch.Tensor, torch.Tensor]: - if batch_size_t is None: - gates = self.ih(x) + self.hh(h_prev) # [B, 4*D] - else: - gates = self.ih(x) + self.hh( - h_prev[:batch_size_t, :] - ) # [batch_size_t, 4*D] - - i_t_input, f_t_input, g_t_input, o_t_input = torch.split( - gates, self.hidden_size, 1 - ) - i_t = torch.sigmoid( - i_t_input - ) # [B, D] or [batch_size_t, D] if batch_size_t is not None - f_t = torch.sigmoid( - f_t_input - ) # [B, D] or [batch_size_t, D] if batch_size_t is not None - g_t = torch.tanh( - g_t_input - ) # [B, D] or [batch_size_t, D] if batch_size_t is not None - o_t = torch.sigmoid( - o_t_input - ) # [B, D] or [batch_size_t, D] if batch_size_t is not None - if batch_size_t is None: - c_t = f_t * c_prev + i_t * g_t - else: - c_t = f_t * c_prev[:batch_size_t, :] + i_t * g_t - - h_t = o_t * torch.tanh(c_t) - - return h_t, c_t - - -class DPLSTMLayer(nn.Module): - r""" - Implements *one* layer of LSTM in a way amenable to differential privacy. - We don't expect you to use this directly: use DPLSTM instead :) - """ - - def __init__( - self, - input_size: int, - hidden_size: int, - bias: bool, - dropout: float, - reverse: bool = False, - ): - super().__init__() - self.input_size = input_size - self.hidden_size = hidden_size - self.bias = bias - self.dropout = dropout - self.reverse = reverse - - self.cell = DPLSTMCell( - input_size=input_size, hidden_size=hidden_size, bias=bias - ) - - self.dropout_layer = nn.Dropout(dropout) if dropout > 0 else None - - def set_max_batch_length(self, max_batch_length: int) -> None: - """ - Sets max batch length. Useful for PackedSequences - """ - self.cell.set_max_batch_length(max_batch_length) - - def forward( - self, - x: Union[torch.Tensor, Tuple], - state_init: Tuple[torch.Tensor, torch.Tensor], - batch_sizes: Optional[torch.Tensor] = None, - ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - r""" - Implements the forward pass of the DPLSTMLayer when a sequence is given in input. - - Args: - x: Input sequence to the DPLSTMCell of shape ``[T, B, D]``. - state_init: Initial state of the LSTMCell as a tuple ``(h_0, c_0)`` - where ``h_0`` is the initial hidden state and ``c_0`` is the - initial cell state of the DPLSTMCell - batch_sizes: Contains the batch sizes as stored in PackedSequence - - - Returns: - ``output, (h_n, c_n)`` where, ``output`` is of shape ``[T, B, H]`` and is a - tensor containing the output features (``h_t``) from the last layer of the - DPLSTMCell for each timestep ``t``. ``h_n`` is of shape ``[B, H]`` and is a - tensor containing the hidden state for ``t = T``. ``c_n`` is of shape ``[B, H]`` - tensor containing the cell state for ``t = T``. - """ - - if batch_sizes is not None: - seq_length = batch_sizes.size(0) - if self.reverse: - x = tuple(reversed(x)) - batch_sizes = batch_sizes.flip(0) - else: - seq_length, batch_sz, _ = x.shape - if self.reverse: - x = x.flip(0) - x = torch.unbind(x, dim=0) - - h_0, c_0 = state_init - - h_n = [h_0] - c_n = [c_0] - batch_size_prev = h_0.shape[0] - - for t in range(seq_length): - if batch_sizes is not None: - batch_size_t = batch_sizes[t].item() - delta = batch_size_t - batch_size_prev - if delta > 0: - h_cat = torch.cat((h_n[t], h_0[batch_size_prev:batch_size_t, :]), 0) - c_cat = torch.cat((c_n[t], c_0[batch_size_prev:batch_size_t, :]), 0) - h_next, c_next = self.cell(x[t], h_cat, c_cat, batch_size_t) - else: - h_next, c_next = self.cell(x[t], h_n[t], c_n[t], batch_size_t) - else: - h_next, c_next = self.cell(x[t], h_n[t], c_n[t]) - if self.dropout: - h_next = self.dropout_layer(h_next) - h_n.append(h_next) - c_n.append(c_next) - batch_size_prev = h_next.shape[0] - - if batch_sizes is None: - h_n = torch.stack(h_n[1:], dim=0) # [T, B, H], init step not part of output - - return ( - h_n.flip(0) if self.reverse else h_n, # Flip the output... - (h_n[-1], c_n[-1]), # ... But not the states - ) - else: - seq_lengths = _compute_seq_lengths(batch_sizes) - h_temp, c_temp = h_n[1:], c_n[1:] - h_last, c_last = _compute_last_states(h_temp, c_temp, seq_lengths) - if self.reverse: - h_temp = tuple(reversed(h_temp)) - - return h_temp, (h_last, c_last) - - -class BidirectionalDPLSTMLayer(nn.Module): - r""" - Implements *one* layer of Bidirectional LSTM in a way amenable to differential privacy. - We don't expect you to use this directly: use DPLSTM instead :) - """ - - def __init__( - self, input_size: int, hidden_size: int, bias: bool, dropout: float, - ): - super().__init__() - self.input_size = input_size - self.hidden_size = hidden_size - self.bias = bias - self.dropout = dropout - - # nn.LSTM (as of November 2020) only implements a "type 2" multilayer bidirectional LSTM. - # See https://github.com/pytorch/pytorch/issues/4930 for the definition of type 1 and type 2 - # and for discussion. When the PR to extend nn.LSTM to Type 1 lands, we will extend this - # accordingly. - - self.forward_layer = DPLSTMLayer( - input_size=input_size, - hidden_size=hidden_size, - bias=bias, - dropout=dropout, - reverse=False, - ) - self.reverse_layer = DPLSTMLayer( - input_size=input_size, - hidden_size=hidden_size, - bias=bias, - dropout=dropout, - reverse=True, - ) - - def set_max_batch_length(self, max_batch_length: int) -> None: - """ - Sets max batch length - """ - self.forward_layer.set_max_batch_length(max_batch_length) - self.reverse_layer.set_max_batch_length(max_batch_length) - - def forward( - self, - x: torch.Tensor, - state_init: Tuple[torch.Tensor, torch.Tensor], - batch_sizes: Optional[torch.Tensor] = None, - ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - r""" - Implements the forward pass of the DPLSTM when a sequence is input. - - Dimensions as follows: - - B: Batch size - - T: Sequence length - - D: LSTM input hidden size (eg from a word embedding) - - H: LSTM output hidden size - - P: number of directions (2 if bidirectional, else 1) - - Args: - x: Input sequence to the DPLSTM of shape ``[T, B, D]`` - state_init: Initial state of the LSTM as a tuple ``(h_0, c_0)``, where - ``h_0`` of shape ``[P, B, H]`` contains the initial hidden state, and - ``c_0`` of shape ``[P, B, H]`` contains the initial cell state. This - argument can be (and defaults to) None, in which case zero tensors - will be used. - - Returns: - ``output, (h_n, c_n)`` where, ``output`` is of shape ``[T, B, H * P]`` and is a - tensor containing the output features (``h_t``) from the last layer of the - DPLSTM for each timestep ``t``. ``h_n`` is of shape ``[P, B, H]`` and contains - the hidden state for ``t = T``. ``c_n`` is of shape ``[P, B, H]`` and contains - the cell state for ``t = T``. - """ - - h0, c0 = state_init - - h0_f, h0_r = h0.unbind(0) # each of shape [B, H] for their layer - c0_f, c0_r = c0.unbind(0) # each of shape [B, H] for their layer - - out_f, (h_f, c_f) = self.forward_layer(x, (h0_f, c0_f), batch_sizes) - out_r, (h_r, c_r) = self.reverse_layer(x, (h0_r, c0_r), batch_sizes) - - if batch_sizes is None: - out = torch.cat([out_f, out_r], dim=-1) # [T, B, H * P] - else: - out = _concat_sequence_directions(out_f, out_r, -1) - - h = torch.stack([h_f, h_r], dim=0) # [P, B, H] - c = torch.stack([c_f, c_r], dim=0) # [P, B, H] - return out, (h, c) - - -class DPLSTM(ParamRenamedModule): - r""" - DP-friendly drop-in replacement of the ``torch.nn.LSTM`` module. - - Its state_dict matches that of nn.LSTM exactly, so that after training it can be exported - and loaded by an nn.LSTM for inference. - - Refer to nn.LSTM's documentation for all parameters and inputs. - """ - - def __init__( - self, - input_size: int, - hidden_size: int, - num_layers: int = 1, - bias: bool = True, - batch_first: bool = False, - dropout: float = 0, - bidirectional: bool = False, - ): - rename_dict = self._make_rename_dict(num_layers, bias, bidirectional) - super().__init__(rename_dict) - self.input_size = input_size - self.hidden_size = hidden_size - self.num_layers = num_layers - self.bias = bias - self.batch_first = batch_first - self.dropout = dropout - self.bidirectional = bidirectional - self.num_directions = 2 if self.bidirectional else 1 - - LayerClass = BidirectionalDPLSTMLayer if bidirectional else DPLSTMLayer - - self.layers = nn.ModuleList( - [ - LayerClass( - input_size=self.input_size - if i == 0 - else self.hidden_size * self.num_directions, - hidden_size=self.hidden_size, - bias=self.bias, - dropout=self.dropout if i < self.num_layers - 1 else 0, - ) - for i in range(num_layers) - ] - ) - - def forward( - self, - x: Union[torch.Tensor, PackedSequence], - state_init: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, - ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: - r""" - Implements the forward pass of the DPLSTM when a sequence is input. - - Dimensions as follows: - - B: Batch size - - T: Sequence length - - D: LSTM input hidden size (eg from a word embedding) - - H: LSTM output hidden size - - L: number of layers in the LSTM - - P: number of directions (2 if bidirectional, else 1) - - Args: - x: Input sequence to the DPLSTM of shape ``[T, B, D]``. Or it can be a PackedSequence. - state_init: Initial state of the LSTM as a tuple ``(h_0, c_0)``, where: - - ``h_0`` of shape ``[L*P, B, H]`` contains the initial hidden state - - ``c_0`` of shape ``[L*P, B, H]`` contains the initial cell state - - This argument can be (and defaults to) None, in which case zero tensors will be used. - - Returns: - ``output, (h_n, c_n)`` where, ``output`` is of shape ``[T, B, H * P]`` and is a - tensor containing the output features (``h_t``) from the last layer of the DPLSTM - for each timestep ``t``. ``h_n`` is of shape ``[L * P, B, H]`` and contains the - hidden state for ``t = T``. ``c_n`` is of shape ``[L * P, B, H]`` and contains - the cell state for ``t = T``. - """ - - if isinstance(x, PackedSequence): - x, batch_sizes, sorted_indices, unsorted_indices = x - B = batch_sizes[0].item() - _, D = x.shape - x = x.split(tuple(batch_sizes)) - for layer in self.layers: - layer.set_max_batch_length(B) - else: - sorted_indices = None - unsorted_indices = None - batch_sizes = None - x = self._rearrange_batch_dim(x) - T, B, D = x.shape - - L = self.num_layers - P = 2 if self.bidirectional else 1 - H = self.hidden_size - - h_0s, c_0s = state_init or (None, None) - - if h_0s is None: - h_0s = torch.zeros( - L, P, B, self.hidden_size, dtype=x[0].dtype, device=x[0].device, - ) - else: - h_0s = h_0s.reshape([L, P, B, H]) - h_0s = self._permute_hidden(h_0s, sorted_indices, 2) - - if c_0s is None: - c_0s = torch.zeros( - L, P, B, self.hidden_size, dtype=x[0].dtype, device=x[0].device, - ) - else: - c_0s = c_0s.reshape([L, P, B, H]) - c_0s = self._permute_hidden(c_0s, sorted_indices, 2) - - hs: List[torch.Tensor] = [] - cs: List[torch.Tensor] = [] - - for layer, h0, c0 in zip(self.layers, h_0s, c_0s): - if not self.bidirectional: - h0 = h0.squeeze(0) - c0 = c0.squeeze(0) - x, (h, c) = layer(x, (h0, c0), batch_sizes) - if not self.bidirectional: - h = h.unsqueeze(0) # [1, B, H] - c = c.unsqueeze(0) # [1, B, H] - - hs.append(h) - cs.append(c) - - hs = torch.cat(hs, dim=0) # [L * P, B, H] - cs = torch.cat(cs, dim=0) # [L * P, B, H] - - if batch_sizes is not None: - seq_lengths = _compute_seq_lengths(batch_sizes) - packed_data = pack_padded_sequence( - pad_sequence(x, batch_first=False), seq_lengths, batch_first=True - )[0] - out = PackedSequence( - packed_data, batch_sizes, sorted_indices, unsorted_indices - ) - else: - out = self._rearrange_batch_dim(x) - - return ( - out, - ( - self._permute_hidden(hs, unsorted_indices), - self._permute_hidden(cs, unsorted_indices), - ), - ) - - def _permute_hidden( - self, x: torch.Tensor, permutation: Optional[torch.Tensor] = None, dim: int = 1 - ) -> torch.Tensor: - if permutation is None: - return x - if dim == 1: - return x[:, permutation, :] - elif dim == 2: - return x[:, :, permutation, :] - - def _rearrange_batch_dim(self, x: torch.Tensor) -> torch.Tensor: - if self.batch_first: # batch is by default in second dimension - x = x.transpose(0, 1) - return x - - def __repr__(self): - s = f"DPLSTM({self.input_size}, {self.hidden_size}, bias={self.bias}" - - if self.batch_first: - s += f", batch_first={self.batch_first}" - - if self.num_layers > 1: - s += f", num_layers={self.num_layers}" - - if self.dropout: - s += f", dropout={self.dropout}" - - if self.bidirectional: - s += f", bidirectional={self.bidirectional}" - - return s - - def _make_rename_dict(self, num_layers, bias, bidirectional): - """ - Programmatically constructs a dictionary old_name -> new_name to align with the param - names used in ``torch.nn.LSTM``. - """ - d = {} - components = ["weight"] + ["bias" if bias else []] - matrices = ["ih", "hh"] - for i in range(num_layers): - for c in components: - for m in matrices: - nn_name = f"{c}_{m}_l{i}" - if bidirectional: - d[f"layers.{i}.forward_layer.cell.{m}.{c}"] = nn_name - d[f"layers.{i}.reverse_layer.cell.{m}.{c}"] = ( - nn_name + "_reverse" - ) - else: - d[f"layers.{i}.cell.{m}.{c}"] = nn_name - - return d diff --git a/opacus/layers/dp_multihead_attention.py b/opacus/layers/dp_multihead_attention.py deleted file mode 100644 index e662d62..0000000 --- a/opacus/layers/dp_multihead_attention.py +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import warnings - -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn.parameter import Parameter - - -class SequenceBias(nn.Module): - r""" - Adds one bias element to the end of the sequence. - so if the input has a shape ``(L, N, E)``, where - ``L`` is the sequence length, ``N`` is the batch size, and ``E`` is - the embedding dimension, the output will have a shape - ``(L+1, N, E)``. - - Attributes: - bias (:class:`torch.nn.parameter.Parameter`): the learnable bias of - the module of shape ``(E)``, where ``E`` is the embedding dimension. - - Example: - >>> m = SequenceBias(16) - >>> input = torch.randn(20, 4, 16) - >>> output = m(input) - >>> print(output.size()) - torch.Size([21, 4, 16]) - """ - - def __init__(self, embed_dim: int): - r""" - Args: - embed_dim: Embedding dimension - """ - super(SequenceBias, self).__init__() - - self.bias = Parameter(torch.empty(embed_dim)) - self._reset_parameters() - - def _reset_parameters(self): - r""" - assing's Normally distributed random values to bias. - """ - nn.init.normal_(self.bias) - - def forward(self, x): - _, bsz, _ = x.shape - return torch.cat([x, self.bias.repeat(1, bsz, 1)]) - - -class DPMultiheadAttention(nn.Module): - r""" - This is DP-friendly implementation of nn.MultiheadAttention. - For full reference see original module refer to - :class:`torch.nn.MultiheadAttention`. - - Current implementation leverages pytorch modules as building blocks - to allow DP engine to calculate per-sample gradients. - This is in contrast with original implementation based on nn.functional. - """ - - def __init__( - self, - embed_dim, - num_heads, - dropout=0.0, - bias=True, - add_bias_kv=False, - add_zero_attn=False, - kdim=None, - vdim=None, - ): - super(DPMultiheadAttention, self).__init__() - self.embed_dim = embed_dim - self.kdim = kdim if kdim is not None else embed_dim - self.vdim = vdim if vdim is not None else embed_dim - - self.num_heads = num_heads - self.dropout = dropout - self.head_dim = embed_dim // num_heads - assert ( - self.head_dim * num_heads == self.embed_dim - ), "embed_dim must be divisible by num_heads" - - self.qlinear = nn.Linear(embed_dim, embed_dim, bias=bias) - self.klinear = nn.Linear(self.kdim, embed_dim, bias=bias) - self.vlinear = nn.Linear(self.vdim, embed_dim, bias=bias) - - # torch.nn.MultiHeadAttention out_proj is _LinearWithBias - # explicilty setting bias=True for consistent mimicry - self.out_proj = nn.Linear(embed_dim, embed_dim, bias=True) - - self.add_bias_kv = add_bias_kv - if self.add_bias_kv: - self.seq_bias_k = SequenceBias(embed_dim) - self.seq_bias_v = SequenceBias(embed_dim) - - self.add_zero_attn = add_zero_attn - - self.dropout = nn.Dropout(dropout) - - def load_state_dict(self, state_dict): - r""" - Loads module from previously saved state. - - Supports loading from both :class:`torch.nn.MultiheadAttention` and - :class:`opacus.layers.dp_multihead_attention.DPMultiheadAttention`. - - Args: - state_dict: Please refer to - https://pytorch.org/tutorials/recipes/recipes/what_is_state_dict.html. - """ - if "in_proj_weight" in state_dict: - qweight, kweight, vweight = state_dict["in_proj_weight"].chunk(3, dim=0) - - state_dict["qlinear.weight"] = qweight - state_dict["klinear.weight"] = kweight - state_dict["vlinear.weight"] = vweight - del state_dict["in_proj_weight"] - - if "in_proj_bias" in state_dict: - qbias, kbias, vbias = state_dict["in_proj_bias"].chunk(3, dim=0) - - state_dict["qlinear.bias"] = qbias - state_dict["klinear.bias"] = kbias - state_dict["vlinear.bias"] = vbias - del state_dict["in_proj_bias"] - - if "bias_k" in state_dict: - state_dict["seq_bias_k.bias"] = state_dict["bias_k"].squeeze() - del state_dict["bias_k"] - - if "bias_v" in state_dict: - state_dict["seq_bias_v.bias"] = state_dict["bias_v"].squeeze() - del state_dict["bias_v"] - - if "q_proj_weight" in state_dict: - state_dict["qlinear.weight"] = state_dict["q_proj_weight"] - del state_dict["q_proj_weight"] - - if "k_proj_weight" in state_dict: - state_dict["klinear.weight"] = state_dict["k_proj_weight"] - del state_dict["k_proj_weight"] - - if "v_proj_weight" in state_dict: - state_dict["vlinear.weight"] = state_dict["v_proj_weight"] - del state_dict["v_proj_weight"] - - super(DPMultiheadAttention, self).load_state_dict(state_dict) - - # flake8: noqa C901 - def forward( - self, - query, - key, - value, - key_padding_mask=None, - need_weights=True, - attn_mask=None, - ): - tgt_len, bsz, embed_dim = query.size() - if embed_dim != self.embed_dim: - raise ValueError( - f"query has as size of {embed_dim} while the embedding" - " size is {self.embed_dim}" - ) - - head_dim = embed_dim // self.num_heads - if head_dim * self.num_heads != embed_dim: - raise ValueError( - f"embedding dimension {embed_dim} not divisible " - "by number of heads {num_heads}" - ) - scaling = float(head_dim) ** -0.5 - - q = self.qlinear(query) - k = self.klinear(key) - v = self.vlinear(value) - - q = q * scaling - - if attn_mask is not None: - if attn_mask.dtype not in ( - torch.float32, - torch.float64, - torch.uint8, - torch.bool, - ): - raise ValueError( - f"Only float, byte, and bool types are supported for attn_mask, " - f"not {attn_mask.dtype}." - ) - - if attn_mask.dtype == torch.uint8: - warnings.warn( - "Byte tensor for attn_mask in nn.MultiheadAttention is deprecated." - "Use bool tensor instead." - ) - attn_mask = attn_mask.to(torch.bool) - - if attn_mask.dim() == 2: - attn_mask = attn_mask.unsqueeze(0) - if list(attn_mask.size()) != [1, query.size(0), key.size(0)]: - raise ValueError("The size of the 2D attn_mask is not correct.") - elif attn_mask.dim() == 3: - if list(attn_mask.size()) != [ - bsz * self.num_heads, - query.size(0), - key.size(0), - ]: - raise ValueError("The size of the 3D attn_mask is not correct.") - else: - raise ValueError( - "attn_mask's dimension {} is not supported".format(attn_mask.dim()) - ) - # attn_mask's dim is 3 now. - - # convert ByteTensor key_padding_mask to bool - if key_padding_mask is not None and key_padding_mask.dtype == torch.uint8: - warnings.warn( - "Byte tensor for key_padding_mask in nn.MultiheadAttention" - "is deprecated. Use bool tensor instead." - ) - key_padding_mask = key_padding_mask.to(torch.bool) - - if self.add_bias_kv: - k = self.seq_bias_k(k) - v = self.seq_bias_v(v) - if attn_mask is not None: - attn_mask = F.pad(attn_mask, (0, 1)) - if key_padding_mask is not None: - key_padding_mask = F.pad(key_padding_mask, (0, 1)) - - q = q.contiguous().view(tgt_len, bsz * self.num_heads, head_dim).transpose(0, 1) - if k is not None: - k = k.contiguous().view(-1, bsz * self.num_heads, head_dim).transpose(0, 1) - if v is not None: - v = v.contiguous().view(-1, bsz * self.num_heads, head_dim).transpose(0, 1) - - src_len = k.size(1) - - if key_padding_mask is not None: - assert key_padding_mask.size(0) == bsz - assert key_padding_mask.size(1) == src_len - - if self.add_zero_attn: - src_len += 1 - k = torch.cat( - [ - k, - torch.zeros( - (k.size(0), 1) + k.size()[2:], dtype=k.dtype, device=k.device - ), - ], - dim=1, - ) - v = torch.cat( - [ - v, - torch.zeros( - (v.size(0), 1) + v.size()[2:], dtype=v.dtype, device=v.device - ), - ], - dim=1, - ) - if attn_mask is not None: - attn_mask = F.pad(attn_mask, (0, 1)) - if key_padding_mask is not None: - key_padding_mask = F.pad(key_padding_mask, (0, 1)) - - attn_output_weights = torch.bmm(q, k.transpose(1, 2)) - assert list(attn_output_weights.size()) == [ - bsz * self.num_heads, - tgt_len, - src_len, - ] - - if attn_mask is not None: - if attn_mask.dtype == torch.bool: - attn_output_weights.masked_fill_(attn_mask, float("-inf")) - else: - attn_output_weights += attn_mask - - if key_padding_mask is not None: - attn_output_weights = attn_output_weights.view( - bsz, self.num_heads, tgt_len, src_len - ) - attn_output_weights = attn_output_weights.masked_fill( - key_padding_mask.unsqueeze(1).unsqueeze(2), float("-inf") - ) - attn_output_weights = attn_output_weights.view( - bsz * self.num_heads, tgt_len, src_len - ) - - attn_output_weights = F.softmax(attn_output_weights, dim=-1) - attn_output_weights = self.dropout(attn_output_weights) - - attn_output = torch.bmm(attn_output_weights, v) - assert list(attn_output.size()) == [bsz * self.num_heads, tgt_len, head_dim] - attn_output = ( - attn_output.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim) - ) - attn_output = self.out_proj(attn_output) - - if need_weights: - # average attention weights over heads - attn_output_weights = attn_output_weights.view( - bsz, self.num_heads, tgt_len, src_len - ) - return attn_output, attn_output_weights.sum(dim=1) / self.num_heads - else: - return attn_output, None diff --git a/opacus/layers/param_rename.py b/opacus/layers/param_rename.py deleted file mode 100644 index 7995e38..0000000 --- a/opacus/layers/param_rename.py +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -from typing import Dict, Union - -import torch.nn as nn -from torch import Tensor -from torch.nn.modules.module import _IncompatibleKeys - - -def filter_out_old_keys(self, state_dict, prefix, local_metadata): - new_state_dict = { - param_name: param_value - for param_name, param_value in state_dict.items() - if param_name not in self.old_to_new - } - return new_state_dict - - -class ParamRenamedModule(nn.Module): - """ - This class defines a nn.Module whose parameters are renamed. This is useful when you want to - reimplement a layer but make sure its state_dict and list of parameters are exactly the same - as another reference layer so that you can have a drop-in replacement that does not depend on - how your layer is actually implemented. In Opacus, this is used for DPLSTM, where our - implementation leverages submodules and requires alignment to the state_dict of nn.LSTM. - """ - - def __init__(self, rename_map: Dict[str, str]): - """ - Initializes internal state. Subclass this instead of ``torch.nn.Module`` whenever you need - to rename your model's state. - - Args: - rename_map: mapping from old name -> new name for each parameter you want renamed. - Note that this must be a 1:1 mapping! - """ - super().__init__() - self.old_to_new = rename_map - self.new_to_old = {v: k for k, v in rename_map.items()} - - self._register_state_dict_hook(filter_out_old_keys) - - def _register_renamed_parameters(self): - """ - Internal function. This function simply registers parameters under their new name. They will - automatically mask their duplicates coming from submodules. This trick works because - self.parameters() proceeds recursively from the top, going into submodules after processing - items at the current level, and will not return duplicates. - """ - for old_name, param in super().named_parameters(): - if old_name in self.old_to_new: - new_name = self.old_to_new[old_name] - self.register_parameter(new_name, param) - - def __setattr__(self, name: str, value: Union[Tensor, nn.Module]) -> None: - """ - Whenever you set an attribute, eg `self.linear`, this is called to actually register it in - any nn.Module. We rely on the masking trick explained in the docs for - ``_register_renamed_parameters`` to make sure we replace things only once. If a new parameter - in the rename list is detected, we rename and mask it so next time this is called we will - no longer find it. - """ - super().__setattr__(name, value) - try: - self._register_renamed_parameters() - except AttributeError: - # At the very beginning of instantiation, this will fail because we do not yet have - # self._parameters. Safe to ignore. - pass - - def load_state_dict( - self, state_dict: Dict[str, Tensor], strict: bool = True, - ): - """ - Identical to ``torch.nn.Module.load_state_dict()`` but handles the renamed keys. - """ - - # nn.Module recomputes its state_dict(), without calling the same logic as in self.state_dict() - # This means that it will find both the old and the renamed parameters. Both point to the - # same parameter object, so either of them will set it correctly. It will however complain - # that some keys are missing (the "old" keys). We can safely ignore those and process them - # accordingly - - missing_keys, unexpected_keys = super().load_state_dict( - state_dict, strict=False - ) - missing_keys = [k for k in missing_keys if k not in self.old_to_new] - if strict: - error_msgs = [] - if len(unexpected_keys) > 0: - error_msgs.insert( - 0, - "Unexpected key(s) in state_dict: {}. ".format( - ", ".join('"{}"'.format(k) for k in unexpected_keys) - ), - ) - if len(missing_keys) > 0: - error_msgs.insert( - 0, - "Missing key(s) in state_dict: {}. ".format( - ", ".join('"{}"'.format(k) for k in missing_keys) - ), - ) - - if len(error_msgs) > 0: - raise RuntimeError( - "Error(s) in loading state_dict for {}:\n\t{}".format( - self.__class__.__name__, "\n\t".join(error_msgs) - ) - ) - return _IncompatibleKeys(missing_keys, unexpected_keys) diff --git a/opacus/per_sample_gradient_clip.py b/opacus/per_sample_gradient_clip.py deleted file mode 100644 index a3193b4..0000000 --- a/opacus/per_sample_gradient_clip.py +++ /dev/null @@ -1,337 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -r""" -The process of adding differential privacy to a model involves bounds its sensitivity prior to -applying the Gaussian mechanism. This is achieved by clipping the per-sample gradients. -Normally for a parameterized layer if you have a tensor of parameters of size ``[m, n]``, -the size of the gradients will match it. This means that they get aggregated over the batch. -Here, we will keep them per-sample i.e., we will have a tensor of size ``[b_sz, m, n]``, where -the slice ``[i, :, :]`` corresponds to the per-example gradients for the i-th example in the batch. - -Per-sample gradient clipping has to be achieved under the following constraints: - -1. The norm of the grad_sample of the loss with respect to all model parameters has -to be clipped so that if they were to be put in a single vector together. If ``C`` is the clipping -threshold, this ensures the total norm will be at most ``C``. - -Example: - >>> T = torch.cat([p.grad_sample.flatten() for p in model.parameters()]) - - ``T`` will have shape ``[B, N_TOTAL_PARAMS]``. The total L2 norm of each row of ``T`` - cannot be greater than ``C``. - -2. This clipping should not backpropagate. This means that clipping in the layer ``i+1`` -should not affect computing the gradient of layer ``i``. To make sure this is followed -we will first compute the grad_sample of all layers **without clipping**. In a second pass, we will -go back to the per-sample gradients, clip them, and accumulate them in ``.grad`` -(thus replacing the "real" gradients). - -Notes: - There is only a single .backward() call as the second pass just works on top of - the stored grad_sample. -""" - -from typing import Callable, Iterator, Optional, Tuple - -import torch -from opacus.grad_sample import GradSampleModule -from torch import nn - -from .utils.clipping import NormClipper -from .utils.tensor_utils import calc_sample_norms - - -class PerSampleGradientClipper: - r""" - Class to define a per-sample gradient clipper for a module. Per-sample gradient clipping - bounds the sensitivity of the computation before applying the Gaussian mechanism. - """ - - def __init__( - self, - module: GradSampleModule, - norm_clipper: NormClipper, - batch_first: bool = True, - loss_reduction: str = "mean", - ): - r""" - Attaches to a module, and clips all grad_sample in the backward - pass. It then puts them in each parameter's ``.grad``. - - Args: - module: Module to which backward hooks are added and for which per-sample - gradients are clipped - - norm_clipper: A norm clipper object of class - :class:`~opacus.utils.clipping.NormClipper` which encapsulated different - clipping strategies (such as flat clipping for the entire model, or - per-layer clipping) - - batch_first: Flag to indicate if the input tensor to the corresponding module - has the first dimension represent the batch, for example of shape - [batch_size, ..., ...]. Set to True if batch appears in first - dimension else set to False (batch_first=False implies that the batch - is always in the second dimension). - - loss_reduction: Indicates if the loss reduction (for aggregating the gradients) - is a sum or a mean operation. Can take values ``sum`` or ``mean`` - """ - self.module = module - self.norm_clipper = norm_clipper - self.batch_first = batch_first - self.loss_reduction = loss_reduction - - self._reset_aggregated_state() - - self.on_batch_clip_func = None - - def set_on_batch_clip_func(self, on_batch_clip_func: Callable[..., None]) -> None: - r""" - Sets the function to be called after clipping to the input callable parameter - (for example clipping stats collection) - - Args: - on_batch_clip_func: Function to be called after clipping - """ - self.on_batch_clip_func = on_batch_clip_func - - def __repr__(self): - return f"PerSampleGradientClipModuleHook on {self.module}" - - def _reset_aggregated_state(self) -> None: - r""" - Resets the aggregated state of the clipper to be zero for - the batch size and zero tensors for the per-layer thresholds - """ - self._aggr_batch_size = 0 - self._aggr_thresh = torch.zeros(1) - - def _get_aggregated_state(self) -> Tuple[torch.Tensor, int]: - r""" - Returns an aggregated state of the clipper consisting of the - list of layer thresholds (for those providing gradient norms) - as well as the aggregate batch size - - Returns: - Aggregated state (layer thresholds and batch size) - """ - return self._aggr_thresh, self._aggr_batch_size - - def pre_step(self) -> Tuple[torch.Tensor, int]: - r""" - Prepares the ``.grad`` field of the parameters and provides statistics on the - maximum gradient norm which should be used to scale noise in the privacy engine - (:class:``~opacus.privacy_engine.PrivacyEngine``). This function is called before - the optimizer ``step()``. - - Returns: - The maximum gradient norm per batch (repeated in batch dimension - as a tensor) and the batch size - """ - - # check if we've already accumulated clipped gradients for this batch - if self._aggr_batch_size == 0: - raise ValueError("You need to call clip_and_accumulate first") - - threshs, batch_size = self._get_aggregated_state() - # now that we know the full batch size, we can average the gradients - n = 0 - for _, p in self._named_params(): - p.grad = self._scale_summed_grad(p.summed_grad, batch_size) - n += 1 - del p.summed_grad - - # NOTE: For Renyi-based epsilon calculation, we will calculate a flat - # max norm equal to the norm of all clip values per layer. - max_norm = threshs.new_full((n,), threshs.norm(2)) - self._reset_aggregated_state() - return max_norm, batch_size - - def clip_and_accumulate(self) -> None: - r""" - Clips and sums up per-sample gradients into an accumulator. When this function is called - ``N >= 1`` times on mini-batches of size ``B`` (could be smaller on final batch), a call to - :meth:`~opacus.per_sample_gradient_clip.PerSampleGradientClipper.pre_step` - will populate the ``.grad`` field with the average gradient over the entire batch of size - ``(N-1)* B + b`` with ``b <= B``. - """ - # step 0 : calculate the layer norms - all_norms = calc_sample_norms( - named_params=self._named_grad_samples(), - flat=not self.norm_clipper.is_per_layer, - ) - - # step 1: calculate the clipping factors based on the noise - clipping_factor = self.norm_clipper.calc_clipping_factors(all_norms) - - # step 2: update the aggreagated thresholds and batch size - self._aggr_thresh = torch.max( - self._aggr_thresh, self.norm_clipper.thresholds - ) # retain the largest clipping thresholds accross the entire batch - batch_size = next(p.shape[0] for (_, p) in self._named_grad_samples()) - # The size for every param.grad_sample is the batch size - self._aggr_batch_size += batch_size - - for i, (clip_factor, named_param) in enumerate( - zip(clipping_factor, self._named_params()) - ): - # Do the clipping - name, p = named_param - summed_grad = self._weighted_sum(clip_factor, p.grad_sample) - clipping_thresh = self.norm_clipper.thresholds[ - i if len(self.norm_clipper.thresholds) > 1 else 0 - ] - per_sample_norm = all_norms[i if len(all_norms) > 1 else 0] - # print(per_sample_norm) - # accumulate the summed gradient for this mini-batch - if hasattr(p, "summed_grad"): - p.summed_grad += summed_grad - else: - p.summed_grad = summed_grad - - self._on_batch_clip( - name, - clip_factor, - clipping_thresh, - per_sample_norm, - p.grad_sample, - grad_before_clip=p.grad, - grad_after_clip=self._scale_summed_grad(summed_grad, batch_size), - ) - - # remove the per-sample gradients - del p.grad_sample - self._on_batch_clip() # inform analysis of the whole module - - def zero_grad(self): - """ - Deletes the added attributes, ``grad_sample`` and ``summed_grad``. - - The two mentioned attributes are - automatically deleted when ``pre_step`` or - ``clip_and_accumulate`` are properly called. This is a safety measure - to avoid further issues if regular use has not been followed. - """ - for _, param in self._named_params(): - if hasattr(param, "grad_sample"): - del param.grad_sample - if hasattr(param, "summed_grad"): - del param.summed_grad - - def _named_params(self) -> Iterator[Tuple[str, nn.Parameter]]: - r""" - Helper function to get parameter with their names that require grad - - Returns: - Iterator over parameters with their names - """ - return ((n, p) for n, p in self.module.named_parameters() if p.requires_grad) - - def _named_grad_samples(self) -> Iterator[Tuple[str, torch.Tensor]]: - r""" - Helper function to get names and per-sample gradients for parameters - that required grad. - - Returns: - Iterator of parameter names and per-sample gradients - """ - - no_grad_samples = [ - n - for n, p in self.module.named_parameters() - if p.requires_grad and not hasattr(p, "grad_sample") - ] - if len(no_grad_samples) >= 1: - raise AttributeError( - f"The following layers do not have gradients: {no_grad_samples}. Are you sure they were included in the backward pass?" - ) - - return ( - (n, p.grad_sample) - for n, p in self.module.named_parameters() - if p.requires_grad - ) - - def _scale_summed_grad( - self, summed_grad: torch.Tensor, batch_size: int - ) -> torch.Tensor: - r""" - Depending on the loss type, this function averages the summed gradient over batch - if attribute ``loss_reduction`` is set to "mean", else it returns the input summed - gradient tensor. - - Args: - summed_grad: Summed gradient tensor which might be averaged depending on loss_reduction - - batch_size: Batch size of gradient tensor - - Returns: - Summed gradient tensor if loss_reduction is set to sum else averaged over batch. - - Raises: - ValueError - If the loss reduction is not defined to be either 'sum' or 'mean' - """ - if self.loss_reduction == "mean": - return summed_grad / batch_size - elif self.loss_reduction == "sum": - return summed_grad.detach() - else: - raise ValueError( - f"Loss reduction must be either sum or mean. Got {self.loss_reduction}" - ) - - def _weighted_sum( - self, batch_weight: torch.Tensor, param: torch.Tensor - ) -> torch.Tensor: - r""" - Helper function to calculate a weighted sum of tensor ``param`` - along the batch dimension weighted by tensor ``batch_weight``. - - Args: - batch_weight: Tensor of shape ``B`` (where ``B`` is the batch size) corresponding - to weights along the batch dimension. Each sample in the batch has its own weight. - param: Tensor to be weighted, is of shape ``[B,...]`` where ``B`` represents the - batch size. - - Returns: - Weighted sum tensor for ``param`` along the batch dimension weighted by batch_weight. - """ - return torch.einsum("i,i...", batch_weight, param) - - def _on_batch_clip( - self, - param_name: Optional[str] = None, - clipping_factor: Optional[torch.Tensor] = None, - clipping_threshold: Optional[torch.Tensor] = None, - per_sample_norm: Optional[torch.Tensor] = None, - per_sample_grad: Optional[torch.Tensor] = None, - grad_before_clip: Optional[torch.Tensor] = None, - grad_after_clip: Optional[torch.Tensor] = None, - ): - r""" - Calls a pre-specified function (for example, for clipping stats computation) and - grants access to that function about current parameter state during the back propagation - of each batch. - - Args: - param_name: Name of the parameter, the parameter could be accessed by - ``self.module.state_dict()[param_name]``. A value of ``None`` - indicates that all parameters have been processed. - clipping_factor: Scaling factor used in gradient clipping. - clipping_threshold: Threshold used in gradient clipping. - per_sample_norm: Per-sample gradient norms for clipping - per_sample_grad: Raw per sample gradients for parameter - grad_before_clip: Aggregated gradient before clipping (``= per_sample_grad.mean()``) - grad_after_clip: Aggregated gradients after clipping - """ - if self.on_batch_clip_func: - self.on_batch_clip_func( - param_name=param_name, - clipping_factor=clipping_factor, - clipping_threshold=clipping_threshold, - per_sample_norm=per_sample_norm, - per_sample_grad=per_sample_grad, - grad_before_clip=grad_before_clip, - grad_after_clip=grad_after_clip, - ) diff --git a/opacus/privacy_analysis.py b/opacus/privacy_analysis.py deleted file mode 100644 index f749833..0000000 --- a/opacus/privacy_analysis.py +++ /dev/null @@ -1,311 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -r""" -*Based on Google's TF Privacy:* https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/analysis/rdp_accountant.py. -*Here, we update this code to Python 3, and optimize dependencies.* - -Functionality for computing Renyi Differential Privacy (RDP) of an additive -Sampled Gaussian Mechanism (SGM). - -Example: - Suppose that we have run an SGM applied to a function with L2-sensitivity of 1. - - Its parameters are given as a list of tuples - ``[(q_1, sigma_1, steps_1), ..., (q_k, sigma_k, steps_k)],`` - and we wish to compute epsilon for a given target delta. - - The example code would be: - - >>> max_order = 32 - >>> orders = range(2, max_order + 1) - >>> rdp = np.zeros_like(orders, dtype=float) - >>> for q, sigma, steps in parameters: - >>> rdp += privacy_analysis.compute_rdp(q, sigma, steps, orders) - >>> epsilon, opt_order = privacy_analysis.get_privacy_spent(orders, rdp, delta) - -""" - -import math -from typing import List, Tuple, Union - -import numpy as np -from scipy import special - - -######################## -# LOG-SPACE ARITHMETIC # -######################## - - -def _log_add(logx: float, logy: float) -> float: - r"""Adds two numbers in the log space. - - Args: - logx: First term in log space. - logy: Second term in log space. - - Returns: - Sum of numbers in log space. - """ - a, b = min(logx, logy), max(logx, logy) - if a == -np.inf: # adding 0 - return b - # Use exp(a) + exp(b) = (exp(a - b) + 1) * exp(b) - return math.log1p(math.exp(a - b)) + b # log1p(x) = log(x + 1) - - -def _log_sub(logx: float, logy: float) -> float: - r"""Subtracts two numbers in the log space. - - Args: - logx: First term in log space. Expected to be greater than the second term. - logy: First term in log space. Expected to be less than the first term. - - Returns: - Difference of numbers in log space. - - Raises: - ValueError - If the result is negative. - """ - if logx < logy: - raise ValueError("The result of subtraction must be non-negative.") - if logy == -np.inf: # subtracting 0 - return logx - if logx == logy: - return -np.inf # 0 is represented as -np.inf in the log space. - - try: - # Use exp(x) - exp(y) = (exp(x - y) - 1) * exp(y). - return math.log(math.expm1(logx - logy)) + logy # expm1(x) = exp(x) - 1 - except OverflowError: - return logx - - -def _compute_log_a_for_int_alpha(q: float, sigma: float, alpha: int) -> float: - r"""Computes :math:`log(A_\alpha)` for integer ``alpha``. - - Notes: - Note that - :math:`A_\alpha` is real valued function of ``alpha`` and ``q``, - and that 0 < ``q`` < 1. - - Refer to Section 3.3 of https://arxiv.org/pdf/1908.10530.pdf for details. - - Args: - q: Sampling rate of SGM. - sigma: The standard deviation of the additive Gaussian noise. - alpha: The order at which RDP is computed. - - Returns: - :math:`log(A_\alpha)` as defined in Section 3.3 of - https://arxiv.org/pdf/1908.10530.pdf. - """ - - # Initialize with 0 in the log space. - log_a = -np.inf - - for i in range(alpha + 1): - log_coef_i = ( - math.log(special.binom(alpha, i)) - + i * math.log(q) - + (alpha - i) * math.log(1 - q) - ) - - s = log_coef_i + (i * i - i) / (2 * (sigma ** 2)) - log_a = _log_add(log_a, s) - - return float(log_a) - - -def _compute_log_a_for_frac_alpha(q: float, sigma: float, alpha: float) -> float: - r"""Computes :math:`log(A_\alpha)` for fractional ``alpha``. - - Notes: - Note that - :math:`A_\alpha` is real valued function of ``alpha`` and ``q``, - and that 0 < ``q`` < 1. - - Refer to Section 3.3 of https://arxiv.org/pdf/1908.10530.pdf for details. - - Args: - q: Sampling rate of SGM. - sigma: The standard deviation of the additive Gaussian noise. - alpha: The order at which RDP is computed. - - Returns: - :math:`log(A_\alpha)` as defined in Section 3.3 of - https://arxiv.org/pdf/1908.10530.pdf. - """ - # The two parts of A_alpha, integrals over (-inf,z0] and [z0, +inf), are - # initialized to 0 in the log space: - log_a0, log_a1 = -np.inf, -np.inf - i = 0 - - z0 = sigma ** 2 * math.log(1 / q - 1) + 0.5 - - while True: # do ... until loop - coef = special.binom(alpha, i) - log_coef = math.log(abs(coef)) - j = alpha - i - - log_t0 = log_coef + i * math.log(q) + j * math.log(1 - q) - log_t1 = log_coef + j * math.log(q) + i * math.log(1 - q) - - log_e0 = math.log(0.5) + _log_erfc((i - z0) / (math.sqrt(2) * sigma)) - log_e1 = math.log(0.5) + _log_erfc((z0 - j) / (math.sqrt(2) * sigma)) - - log_s0 = log_t0 + (i * i - i) / (2 * (sigma ** 2)) + log_e0 - log_s1 = log_t1 + (j * j - j) / (2 * (sigma ** 2)) + log_e1 - - if coef > 0: - log_a0 = _log_add(log_a0, log_s0) - log_a1 = _log_add(log_a1, log_s1) - else: - log_a0 = _log_sub(log_a0, log_s0) - log_a1 = _log_sub(log_a1, log_s1) - - i += 1 - if max(log_s0, log_s1) < -30: - break - - return _log_add(log_a0, log_a1) - - -def _compute_log_a(q: float, sigma: float, alpha: float) -> float: - r"""Computes :math:`log(A_\alpha)` for any positive finite ``alpha``. - - Notes: - Note that - :math:`A_\alpha` is real valued function of ``alpha`` and ``q``, - and that 0 < ``q`` < 1. - - Refer to Section 3.3 of https://arxiv.org/pdf/1908.10530.pdf - for details. - - Args: - q: Sampling rate of SGM. - sigma: The standard deviation of the additive Gaussian noise. - alpha: The order at which RDP is computed. - - Returns: - :math:`log(A_\alpha)` as defined in the paper mentioned above. - """ - if float(alpha).is_integer(): - return _compute_log_a_for_int_alpha(q, sigma, int(alpha)) - else: - return _compute_log_a_for_frac_alpha(q, sigma, alpha) - - -def _log_erfc(x: float) -> float: - r"""Computes :math:`log(erfc(x))` with high accuracy for large ``x``. - - Helper function used in computation of :math:`log(A_\alpha)` - for a fractional alpha. - - Args: - x: The input to the function - - Returns: - :math:`log(erfc(x))` - """ - return math.log(2) + special.log_ndtr(-x * 2 ** 0.5) - - -def _compute_rdp(q: float, sigma: float, alpha: float) -> float: - r"""Computes RDP of the Sampled Gaussian Mechanism at order ``alpha``. - - Args: - q: Sampling rate of SGM. - sigma: The standard deviation of the additive Gaussian noise. - alpha: The order at which RDP is computed. - - Returns: - RDP at order ``alpha``; can be np.inf. - """ - if q == 0: - return 0 - - # no privacy - if sigma == 0: - return np.inf - - if q == 1.0: - return alpha / (2 * sigma ** 2) - - if np.isinf(alpha): - return np.inf - - return _compute_log_a(q, sigma, alpha) / (alpha - 1) - - -def compute_rdp( - q: float, noise_multiplier: float, steps: int, orders: Union[List[float], float] -) -> Union[List[float], float]: - r"""Computes Renyi Differential Privacy (RDP) guarantees of the - Sampled Gaussian Mechanism (SGM) iterated ``steps`` times. - - Args: - q: Sampling rate of SGM. - noise_multiplier: The ratio of the standard deviation of the - additive Gaussian noise to the L2-sensitivity of the function - to which it is added. Note that this is same as the standard - deviation of the additive Gaussian noise when the L2-sensitivity - of the function is 1. - steps: The number of iterations of the mechanism. - orders: An array (or a scalar) of RDP orders. - - Returns: - The RDP guarantees at all orders; can be ``np.inf``. - """ - if isinstance(orders, float): - rdp = _compute_rdp(q, noise_multiplier, orders) - else: - rdp = np.array([_compute_rdp(q, noise_multiplier, order) for order in orders]) - - return rdp * steps - - -def get_privacy_spent( - orders: Union[List[float], float], rdp: Union[List[float], float], delta: float -) -> Tuple[float, float]: - r"""Computes epsilon given a list of Renyi Differential Privacy (RDP) values at - multiple RDP orders and target ``delta``. - The computation of epslion, i.e. conversion from RDP to (eps, delta)-DP, - is based on the theorem presented in the following work: - Borja Balle et al. "Hypothesis testing interpretations and Renyi differential privacy." - International Conference on Artificial Intelligence and Statistics. PMLR, 2020. - Particullary, Theorem 21 in the arXiv version https://arxiv.org/abs/1905.09982. - Args: - orders: An array (or a scalar) of orders (alphas). - rdp: A list (or a scalar) of RDP guarantees. - delta: The target delta. - Returns: - Pair of epsilon and optimal order alpha. - Raises: - ValueError - If the lengths of ``orders`` and ``rdp`` are not equal. - """ - orders_vec = np.atleast_1d(orders) - rdp_vec = np.atleast_1d(rdp) - - if len(orders_vec) != len(rdp_vec): - raise ValueError( - f"Input lists must have the same length.\n" - f"\torders_vec = {orders_vec}\n" - f"\trdp_vec = {rdp_vec}\n" - ) - - eps = ( - rdp_vec - - (np.log(delta) + np.log(orders_vec)) / (orders_vec - 1) - + np.log((orders_vec - 1) / orders_vec) - ) - - # special case when there is no privacy - if np.isnan(eps).all(): - return np.inf, np.nan - - idx_opt = np.nanargmin(eps) # Ignore NaNs - return eps[idx_opt], orders_vec[idx_opt] diff --git a/opacus/privacy_engine.py b/opacus/privacy_engine.py deleted file mode 100644 index e85e5dc..0000000 --- a/opacus/privacy_engine.py +++ /dev/null @@ -1,656 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import math -import os -import types -import warnings -from typing import List, Optional, Tuple, Union - -import torch -from opacus.grad_sample import GradSampleModule -from scipy.stats import planck -from torch import nn - -from . import privacy_analysis -from .dp_model_inspector import DPModelInspector -from .layers.dp_ddp import ( - DifferentiallyPrivateDistributedDataParallel, - average_gradients, -) -from .per_sample_gradient_clip import PerSampleGradientClipper -from .utils import clipping - - -DEFAULT_ALPHAS = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) - - -def get_noise_multiplier( - target_epsilon: float, - target_delta: float, - sample_rate: float, - epochs: int, - alphas: List[float], - sigma_min: Optional[float] = 0.01, - sigma_max: Optional[float] = 10.0, -) -> float: - r""" - Computes the noise level sigma to reach a total budget of (target_epsilon, target_delta) - at the end of epochs, with a given sample_rate - - Args: - target_epsilon: the privacy budget's epsilon - target_delta: the privacy budget's delta - sample_rate: the sampling rate (usually batch_size / n_data) - epochs: the number of epochs to run - alphas: the list of orders at which to compute RDP - - Returns: - The noise level sigma to ensure privacy budget of (target_epsilon, target_delta) - - """ - eps = float("inf") - while eps > target_epsilon: - sigma_max = 2 * sigma_max - rdp = privacy_analysis.compute_rdp( - sample_rate, sigma_max, epochs / sample_rate, alphas - ) - eps = privacy_analysis.get_privacy_spent(alphas, rdp, target_delta)[0] - if sigma_max > 2000: - raise ValueError("The privacy budget is too low.") - - while sigma_max - sigma_min > 0.01: - sigma = (sigma_min + sigma_max) / 2 - rdp = privacy_analysis.compute_rdp( - sample_rate, sigma, epochs / sample_rate, alphas - ) - eps = privacy_analysis.get_privacy_spent(alphas, rdp, target_delta)[0] - - if eps < target_epsilon: - sigma_max = sigma - else: - sigma_min = sigma - - return sigma - - -class PrivacyEngine: - r""" - The main component of Opacus is the ``PrivacyEngine``. - - To train a model with differential privacy, all you need to do - is to define a ``PrivacyEngine`` and later attach it to your - optimizer before running. - - - Example: - This example shows how to define a ``PrivacyEngine`` and to attach - it to your optimizer. - - >>> import torch - >>> model = torch.nn.Linear(16, 32) # An example model - >>> optimizer = torch.optim.SGD(model.parameters(), lr=0.05) - >>> privacy_engine = PrivacyEngine(model, sample_rate=0.01, noise_multiplier=1.3, max_grad_norm=1.0) - >>> privacy_engine.attach(optimizer) # That's it! Now it's business as usual. - """ - - # flake8: noqa: C901 - def __init__( - self, - module: nn.Module, - *, # As per PEP 3102, this forces clients to specify kwargs explicitly, not positionally - sample_rate: Optional[float] = None, - batch_size: Optional[int] = None, - sample_size: Optional[int] = None, - max_grad_norm: Union[float, List[float]], - noise_multiplier: Optional[float] = None, - alphas: List[float] = DEFAULT_ALPHAS, - secure_rng: bool = False, - batch_first: bool = True, - target_delta: float = 1e-6, - target_epsilon: Optional[float] = None, - epochs: Optional[float] = None, - loss_reduction: str = "mean", - poisson: bool = False, - **misc_settings, - ): - r""" - Args: - module: The Pytorch module to which we are attaching the privacy engine - alphas: A list of RDP orders - noise_multiplier: The ratio of the standard deviation of the Gaussian noise to - the L2-sensitivity of the function to which the noise is added - max_grad_norm: The maximum norm of the per-sample gradients. Any gradient with norm - higher than this will be clipped to this value. - batch_size: Training batch size. Used in the privacy accountant. - sample_size: The size of the sample (dataset). Used in the privacy accountant. - sample_rate: Sample rate used to build batches. Used in the privacy accountant. - secure_rng: If on, it will use ``torchcsprng`` for secure random number generation. - Comes with a significant performance cost, therefore it's recommended that you - turn it off when just experimenting. - batch_first: Flag to indicate if the input tensor to the corresponding module - has the first dimension representing the batch. If set to True, dimensions on - input tensor will be ``[batch_size, ..., ...]``. - target_delta: The target delta. If unset, we will set it for you. - loss_reduction: Indicates if the loss reduction (for aggregating the gradients) - is a sum or a mean operation. Can take values "sum" or "mean" - **misc_settings: Other arguments to the init - """ - - self.steps = 0 - self.poisson = poisson - self.loss_reduction = loss_reduction - self.batch_size = batch_size - self.sample_size = sample_size - self.sample_rate = sample_rate - self._set_sample_rate() - - if isinstance(module, DifferentiallyPrivateDistributedDataParallel): - rank = torch.distributed.get_rank() - n_replicas = torch.distributed.get_world_size() - self.sample_rate *= n_replicas - else: - rank = 0 - n_replicas = 1 - - self.module = GradSampleModule(module) - - if poisson: - # TODO: Check directly if sampler is UniformSampler when sampler gets passed to the Engine (in the future) - if sample_size is None: - raise ValueError( - "If using Poisson sampling, sample_size should get passed to the PrivacyEngine." - ) - - # Number of empty batches follows a geometric distribution - # Planck is the same distribution but its parameter is the (negative) log of the geometric's parameter - self._poisson_empty_batches_distribution = planck( - -math.log(1 - self.sample_rate) * self.sample_size - ) - - if noise_multiplier is None: - if target_epsilon is None or target_delta is None or epochs is None: - raise ValueError( - "If noise_multiplier is not specified, (target_epsilon, target_delta, epochs) should be given to the engine." - ) - self.noise_multiplier = get_noise_multiplier( - target_epsilon, target_delta, self.sample_rate, epochs, alphas - ) - else: - self.noise_multiplier = noise_multiplier - - self.max_grad_norm = max_grad_norm - self.alphas = alphas - self.target_delta = target_delta - self.secure_rng = secure_rng - self.batch_first = batch_first - self.misc_settings = misc_settings - self.n_replicas = n_replicas - self.rank = rank - - self.device = next(module.parameters()).device - self.steps = 0 - - if self.noise_multiplier < 0: - raise ValueError( - f"noise_multiplier={self.noise_multiplier} is not a valid value. Please provide a float >= 0." - ) - - if isinstance(self.max_grad_norm, float) and self.max_grad_norm <= 0: - raise ValueError( - f"max_grad_norm={self.max_grad_norm} is not a valid value. Please provide a float > 0." - ) - - if not self.target_delta: - if self.sample_size: - warnings.warn( - "target_delta unset. Setting it to an order of magnitude less than 1/sample_size." - ) - self.target_delta = 0.1 * (1 / self.sample_size) - else: - raise ValueError("Please provide a target_delta.") - - if self.secure_rng: - self.seed = None - try: - import torchcsprng as csprng - except ImportError as e: - msg = ( - "To use secure RNG, you must install the torchcsprng package! " - "Check out the instructions here: https://github.com/pytorch/csprng#installation" - ) - raise ImportError(msg) from e - - self.seed = None - self.random_number_generator = csprng.create_random_device_generator( - "/dev/urandom" - ) - else: - warnings.warn( - "Secure RNG turned off. This is perfectly fine for experimentation as it allows " - "for much faster training performance, but remember to turn it on and retrain " - "one last time before production with ``secure_rng`` turned on." - ) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - self.seed = int.from_bytes(os.urandom(8), byteorder="big", signed=True) - self.random_number_generator = self._set_seed(self.seed) - - self.validator = DPModelInspector() - self.clipper = None # lazy initialization in attach - - def state_dict(self): - return { - "steps": self.steps, - } - - def load_state_dict(self, state_dict): - self.steps = state_dict["steps"] - - def detach(self): - r""" - Detaches the privacy engine from optimizer. - - To detach the ``PrivacyEngine`` from optimizer, this method returns - the model and the optimizer to their original states (i.e. all - added attributes/methods will be removed). - """ - # 1. Fix optimizer - optim = self.optimizer - optim.step = optim.original_step - delattr(optim, "privacy_engine") - delattr(optim, "original_step") - delattr(optim, "original_zero_grad") - delattr(optim, "virtual_step") - - # 2. Fix module - self.module._close() - - def attach(self, optimizer: torch.optim.Optimizer): - r""" - Attaches the privacy engine to the optimizer. - - Attaches to the ``PrivacyEngine`` an optimizer object,and injects - itself into the optimizer's step. To do that it, - - 1. Validates that the model does not have unsupported layers. - - 2. Adds a pointer to this object (the ``PrivacyEngine``) inside the optimizer. - - 3. Moves optimizer's original ``step()`` function to ``original_step()``. - - 4. Monkeypatches the optimizer's ``step()`` function to call ``step()`` on - the query engine automatically whenever it would call ``step()`` for itself. - - Args: - optimizer: The optimizer to which the privacy engine will attach - """ - if hasattr(optimizer, "privacy_engine"): - if optimizer.privacy_engine != self: - raise ValueError( - f"Trying to attach to optimizer: {optimizer}, but that optimizer is " - f"already attached to a different Privacy Engine: {optimizer.privacy_engine}." - ) - else: - warnings.warn( - "Trying to attach twice to the same optimizer. Nothing to do." - ) - return - - self.validator.validate(self.module) - norm_clipper = ( - clipping.ConstantFlatClipper(self.max_grad_norm) - if not isinstance(self.max_grad_norm, list) - else clipping.ConstantPerLayerClipper(self.max_grad_norm) - ) - - if self.misc_settings.get("experimental", False): - norm_clipper = clipping._Dynamic_Clipper_( - [self.max_grad_norm], - self.misc_settings.get("clip_per_layer", False), - self.misc_settings.get( - "clipping_method", clipping.ClippingMethod.STATIC - ), - self.misc_settings.get("clipping_ratio", 0.0), - self.misc_settings.get("clipping_momentum", 0.0), - ) - - self.clipper = PerSampleGradientClipper( - self.module, norm_clipper, self.batch_first, self.loss_reduction, - ) - - def dp_zero_grad(self): - self.privacy_engine.zero_grad() - self.original_zero_grad() - - def dp_step(self, closure=None, is_empty=False): - self.privacy_engine.step(is_empty) - if isinstance( - self.privacy_engine.module, DifferentiallyPrivateDistributedDataParallel - ): - average_gradients(self.privacy_engine.module) - self.original_step(closure) - - def poisson_dp_step(self, closure=None): - # Perform one step as usual - self.dp_step(closure) - - # Taking empty steps to simulate empty batches - num_empty_batches = self.privacy_engine._sample_poisson_empty_batches() - for _ in range(num_empty_batches): - self.zero_grad() - self.dp_step(closure, is_empty=True) - - optimizer.privacy_engine = self - - optimizer.dp_step = types.MethodType(dp_step, optimizer) - optimizer.original_step = optimizer.step - optimizer.step = types.MethodType( - poisson_dp_step if self.poisson else dp_step, optimizer - ) - - optimizer.original_zero_grad = optimizer.zero_grad - optimizer.zero_grad = types.MethodType(dp_zero_grad, optimizer) - - def virtual_step(self): - self.privacy_engine.virtual_step() - - optimizer.virtual_step = types.MethodType(virtual_step, optimizer) - - # create a cross reference for detaching - self.optimizer = optimizer - - if self.poisson: - # Optional initial step on empty batch - num_empty_batches = self._sample_poisson_empty_batches() - for _ in range(num_empty_batches): - self.optimizer.zero_grad() - for p in self.module.parameters(): - if p.requires_grad: - p.grad = torch.zeros_like(p) - self.optimizer.dp_step(closure=None, is_empty=True) - - def _sample_poisson_empty_batches(self): - """ - Samples an integer which is equal to the number of (consecutive) empty batches when doing Poisson sampling - """ - return self._poisson_empty_batches_distribution.rvs(size=1)[0] - - def get_renyi_divergence(self): - rdp = torch.tensor( - privacy_analysis.compute_rdp( - self.sample_rate, self.noise_multiplier, 1, self.alphas - ) - ) - return rdp - - def get_privacy_spent( - self, target_delta: Optional[float] = None - ) -> Tuple[float, float]: - """ - Computes the (epsilon, delta) privacy budget spent so far. - - This method converts from an (alpha, epsilon)-DP guarantee for all alphas that - the ``PrivacyEngine`` was initialized with. It returns the optimal alpha together - with the best epsilon. - - Args: - target_delta: The Target delta. If None, it will default to the privacy - engine's target delta. - - Returns: - Pair of epsilon and optimal order alpha. - """ - if target_delta is None: - if self.target_delta is None: - raise ValueError( - "If self.target_delta is not specified, target_delta should be set as argument to get_privacy_spent." - ) - target_delta = self.target_delta - rdp = self.get_renyi_divergence() * self.steps - eps, best_alpha = privacy_analysis.get_privacy_spent( - self.alphas, rdp, target_delta - ) - return float(eps), float(best_alpha) - - def zero_grad(self): - """ - Resets clippers status. - - Clipper keeps internal gradient per sample in the batch in each - ``forward`` call of the module, they need to be cleaned before the - next round. - - If these variables are not cleaned the per sample gradients keep - being concatenated accross batches. If accumulating gradients - is intented behavious, e.g. simulating a large batch, prefer - using ``virtual_step()`` function. - """ - if self.clipper is not None: - self.clipper.zero_grad() - - def step(self, is_empty: bool = False): - """ - Takes a step for the privacy engine. - - Args: - is_empty: Whether the step is taken on an empty batch - In this case, we do not call clip_and_accumulate since there are no - per sample gradients. - - Notes: - You should not call this method directly. Rather, by attaching your - ``PrivacyEngine`` to the optimizer, the ``PrivacyEngine`` would have - the optimizer call this method for you. - - Raises: - ValueError: If the last batch of training epoch is greater than others. - This ensures the clipper consumed the right amount of gradients. - In the last batch of a training epoch, we might get a batch that is - smaller than others but we should never get a batch that is too large - - """ - self.steps += 1 - if not is_empty: - self.clipper.clip_and_accumulate() - clip_values, batch_size = self.clipper.pre_step() - else: - clip_values = ( - self.max_grad_norm - if type(self.max_grad_norm) is list - else [ - self.max_grad_norm - for p in self.module.parameters() - if p.requires_grad - ] - ) - batch_size = self.avg_batch_size - - params = (p for p in self.module.parameters() if p.requires_grad) - for p, clip_value in zip(params, clip_values): - noise = self._generate_noise(clip_value, p) - if self.loss_reduction == "mean": - noise /= batch_size - - if self.rank == 0: - # Noise only gets added on first worker - # This is easy to reason about for loss_reduction=sum - # For loss_reduction=mean, noise will get further divided by - # world_size as gradients are averaged. - p.grad += noise - - # For poisson, we are not supposed to know the batch size - # We have to divide by avg_batch_size instead of batch_size - if self.poisson and self.loss_reduction == "mean": - p.grad *= batch_size / self.avg_batch_size - - def to(self, device: Union[str, torch.device]): - """ - Moves the privacy engine to the target device. - - Args: - device : The device on which Pytorch Tensors are allocated. - See: https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device - - Example: - This example shows the usage of this method, on how to move the model - after instantiating the ``PrivacyEngine``. - - >>> model = torch.nn.Linear(16, 32) # An example model. Default device is CPU - >>> privacy_engine = PrivacyEngine(model, sample_rate=0.01, noise_multiplier=0.8, max_grad_norm=0.5) - >>> device = "cuda:3" # GPU - >>> model.to(device) # If we move the model to GPU, we should call the to() method of the privacy engine (next line) - >>> privacy_engine.to(device) - - Returns: - The current ``PrivacyEngine`` - """ - self.device = device - return self - - def virtual_step(self): - r""" - Takes a virtual step. - - Virtual batches enable training with arbitrary large batch sizes, while - keeping the memory consumption constant. This is beneficial, when training - models with larger batch sizes than standard models. - - Example: - Imagine you want to train a model with batch size of 2048, but you can only - fit batch size of 128 in your GPU. Then, you can do the following: - - >>> for i, (X, y) in enumerate(dataloader): - >>> logits = model(X) - >>> loss = criterion(logits, y) - >>> loss.backward() - >>> if i % 16 == 15: - >>> optimizer.step() # this will call privacy engine's step() - >>> optimizer.zero_grad() - >>> else: - >>> optimizer.virtual_step() # this will call privacy engine's virtual_step() - - The rough idea of virtual step is as follows: - - 1. Calling ``loss.backward()`` repeatedly stores the per-sample gradients - for all mini-batches. If we call ``loss.backward()`` ``N`` times on - mini-batches of size ``B``, then each weight's ``.grad_sample`` field will - contain ``NxB`` gradients. Then, when calling ``step()``, the privacy engine - clips all ``NxB`` gradients and computes the average gradient for an effective - batch of size ``NxB``. A call to ``optimizer.zero_grad()`` erases the - per-sample gradients. - - 2. By calling ``virtual_step()`` after ``loss.backward()``,the ``B`` - per-sample gradients for this mini-batch are clipped and summed up into a - gradient accumulator. The per-sample gradients can then be discarded. After - ``N`` iterations (alternating calls to ``loss.backward()`` and - ``virtual_step()``), a call to ``step()`` will compute the average gradient - for an effective batch of size ``NxB``. - - The advantage here is that this is memory-efficient: it discards the per-sample - gradients after every mini-batch. We can thus handle batches of arbitrary size. - """ - self.clipper.clip_and_accumulate() - - def _generate_noise( - self, max_grad_norm: float, reference: nn.parameter.Parameter - ) -> torch.Tensor: - r""" - Generates a tensor of Gaussian noise of the same shape as ``reference``. - - The generated tensor has zero mean and standard deviation - sigma = ``noise_multiplier x max_grad_norm `` - - Args: - max_grad_norm : The maximum norm of the per-sample gradients. - reference : The reference, based on which the dimention of the - noise tensor will be determined - - Returns: - the generated noise with noise zero and standard - deviation of ``noise_multiplier x max_grad_norm `` - """ - if self.noise_multiplier > 0 and max_grad_norm > 0: - return torch.normal( - 0, - self.noise_multiplier * max_grad_norm, - reference.grad.shape, - device=self.device, - generator=self.random_number_generator, - ) - return torch.zeros(reference.grad.shape, device=self.device) - - def _set_seed(self, seed: int): - r""" - Allows to manually set the seed allowing for a deterministic run. Useful if you want to - debug. - - WARNING: MANUALLY SETTING THE SEED BREAKS THE GUARANTEE OF SECURE RNG. - For this reason, this method will raise a ValueError if you had ``secure_rng`` turned on. - - Args: - seed : The **unsecure** seed - """ - if self.secure_rng: - raise ValueError( - "Seed was manually set on a ``PrivacyEngine`` with ``secure_rng`` turned on." - "This fundamentally breaks secure_rng, and cannot be allowed. " - "If you do need reproducibility with a fixed seed, first instantiate the PrivacyEngine " - "with ``secure_seed`` turned off." - ) - self.seed = seed - - return ( - torch.random.manual_seed(self.seed) - if self.device.type == "cpu" - else torch.cuda.manual_seed(self.seed) - ) - - def _set_sample_rate(self): - r""" - Determine the ``sample_rate``. - - If a ``sample_rate`` is provided, it will be used. - If no ``sample_rate``is provided, the used ``sample_rate`` will be equal to - ``batch_size`` /  ``sample_size``. - """ - if self.batch_size and not isinstance(self.batch_size, int): - raise ValueError( - f"batch_size={self.batch_size} is not a valid value. Please provide a positive integer." - ) - - if self.sample_size and not isinstance(self.sample_size, int): - raise ValueError( - f"sample_size={self.sample_size} is not a valid value. Please provide a positive integer." - ) - - if self.sample_rate is None: - if self.batch_size is None or self.sample_size is None: - raise ValueError( - "You must provide (batch_size and sample_sizes) or sample_rate." - ) - else: - self.sample_rate = self.batch_size / self.sample_size - if self.batch_size is not None or self.sample_size is not None: - warnings.warn( - "The sample rate will be defined from ``batch_size`` and ``sample_size``." - "The returned privacy budget will be incorrect." - ) - - self.avg_batch_size = self.sample_rate * self.sample_size - else: - warnings.warn( - "A ``sample_rate`` has been provided." - "Thus, the provided ``batch_size``and ``sample_size`` will be ignored." - ) - if self.poisson: - if self.loss_reduction == "mean" and not self.sample_size: - raise ValueError( - "Sample size has to be provided if using Poisson and loss_reduction=mean." - ) - self.avg_batch_size = self.sample_rate * self.sample_size - - if self.sample_rate > 1.0: - raise ValueError( - f"sample_rate={self.sample_rate} is not a valid value. Please provide a float between 0 and 1." - ) diff --git a/opacus/scripts/compute_dp_sgd_privacy.py b/opacus/scripts/compute_dp_sgd_privacy.py deleted file mode 100755 index 9a88a60..0000000 --- a/opacus/scripts/compute_dp_sgd_privacy.py +++ /dev/null @@ -1,151 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. - -""" -Command-line script for computing privacy of a model trained with DP-SGD. -The script applies the RDP accountant to estimate privacy budget of an iterated -Sampled Gaussian Mechanism. - -The code is mainly based on Google's TF Privacy: -https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py - - -Example: - - To call this script from command line, you can enter: - - >>> python compute_dp_sgd_privacy.py --dataset-size=60000 --batch-size=256 --noise_multiplier=1.12 --epochs=60 --delta=1e-5 --a 10 20 100 - - The training process with these parameters satisfies (epsilon,delta)-DP of (2.95, 1e-5). -""" -import argparse -import math -from typing import List, Tuple - -from opacus import privacy_analysis - - -def _apply_dp_sgd_analysis( - sample_rate: float, - noise_multiplier: float, - steps: int, - alphas: List[float], - delta: float, - verbose: bool = True, -) -> Tuple[float, float]: - """ - Computes the privacy Epsilon at a given delta via RDP accounting and - converting to an (epsilon, delta) guarantee for a target Delta. - - Args: - sample_rate : The sample rate in SGD - noise_multiplier : The ratio of the standard deviation of the Gaussian - noise to the L2-sensitivity of the function to which the noise is added - steps : The number of steps - alphas : A list of RDP orders - delta : Target delta - verbose : If enabled, will print the results of DP-SGD analysis - - Returns: - Pair of privacy loss epsilon and optimal order alpha - """ - rdp = privacy_analysis.compute_rdp(sample_rate, noise_multiplier, steps, alphas) - eps, opt_alpha = privacy_analysis.get_privacy_spent(alphas, rdp, delta=delta) - - if verbose: - print( - f"DP-SGD with\n\tsampling rate = {100 * sample_rate:.3g}%," - f"\n\tnoise_multiplier = {noise_multiplier}," - f"\n\titerated over {steps} steps,\nsatisfies " - f"differential privacy with\n\tepsilon = {eps:.3g}," - f"\n\tdelta = {delta}." - f"\nThe optimal alpha is {opt_alpha}." - ) - - if opt_alpha == max(alphas) or opt_alpha == min(alphas): - print( - "The privacy estimate is likely to be improved by expanding " - "the set of alpha orders." - ) - return eps, opt_alpha - - -def compute_dp_sgd_privacy( - sample_rate: float, - noise_multiplier: float, - epochs: int, - delta: float, - alphas: List[float], - verbose: bool = True, -) -> Tuple[float, float]: - """ - Performs the DP-SGD privacy analysis. - - Finds sample rate and number of steps based on the input parameters, and calls - DP-SGD privacy analysis to find the privacy loss epsilon and optimal order alpha. - - Args: - sample_rate : probability of each sample from the dataset to be selected for a next batch - noise_multiplier : The ratio of the standard deviation of the Gaussian noise - to the L2-sensitivity of the function to which the noise is added - epochs : Number of epochs - delta : Target delta - alphas : A list of RDP orders - verbose : If enabled, will print the results of DP-SGD analysis - - Returns: - Pair of privacy loss epsilon and optimal order alpha - - Raises: - ValueError - When batch size is greater than sample size - """ - if sample_rate > 1: - raise ValueError("sample_rate must be no greater than 1") - steps = epochs * math.ceil(1 / sample_rate) - - return _apply_dp_sgd_analysis( - sample_rate, noise_multiplier, steps, alphas, delta, verbose - ) - - -def main(): - parser = argparse.ArgumentParser(description="RDP computation") - parser.add_argument( - "-r", - "--sample-rate", - type=float, - required=True, - help="Input sample rate (probability of each sample from the dataset to be selected for a next batch)", - ) - parser.add_argument( - "-n", "--noise-multiplier", type=float, required=True, help="Noise multiplier", - ) - parser.add_argument( - "-e", "--epochs", type=int, required=True, help="Number of epochs to train", - ) - parser.add_argument( - "-d", "--delta", type=float, default=1e-5, help="Targeted delta (default: 1e-5)" - ) - parser.add_argument( - "-a", - "--alphas", - action="store", - dest="alphas", - type=float, - nargs="+", - default=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), - help="List of alpha values (alpha orders of Renyi-DP evaluation). " - "A default list is provided. Else, space separated numbers. E.g.," - "-a 10 100", - ) - - args = parser.parse_args() - - compute_dp_sgd_privacy( - args.sample_rate, args.noise_multiplier, args.epochs, args.delta, args.alphas, - ) - - -if __name__ == "__main__": - main() diff --git a/opacus/tests/__init__.py b/opacus/tests/__init__.py deleted file mode 100644 index 4e3dc62..0000000 --- a/opacus/tests/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/opacus/tests/docstring_examples_test.py b/opacus/tests/docstring_examples_test.py deleted file mode 100644 index 37fe94d..0000000 --- a/opacus/tests/docstring_examples_test.py +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest -from collections import defaultdict - -import numpy as np -import torch -import torch.nn as nn -from opacus import PrivacyEngine, privacy_analysis -from opacus.dp_model_inspector import DPModelInspector, IncompatibleModuleException -from opacus.layers.dp_multihead_attention import SequenceBias -from opacus.utils import stats -from opacus.utils.module_inspection import ModelInspector -from opacus.utils.module_modification import ( - convert_batchnorm_modules, - replace_all_modules, -) -from opacus.utils.tensor_utils import ( - calc_sample_norms, - sum_over_all_but_batch_and_last_n, -) - - -class DocstringExamplesTest(unittest.TestCase): - """ - This test checks the correctness of the code snippets we use across the docstrings in the project. - - We want to make sure code examples are always up-to-date and the quality of the documentation doesn't degrade over time. - This TestCase is a collection of all the examples we use at the moment. - It is intended to catch breaking changes and signal to update the docstring alongside with the code. - """ - - def setUp(self): - self.validator = DPModelInspector() - - def test_dp_model_inspector_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.dp_model_inspector.DPModelInspector.validate() - - inspector = DPModelInspector() - valid_model = nn.Linear(16, 32) - is_valid = inspector.validate(valid_model) - self.assertTrue(is_valid) - - invalid_model = nn.BatchNorm1d(2) - with self.assertRaises(IncompatibleModuleException): - is_valid = inspector.validate(invalid_model) - - def test_privacy_analysis_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.privacy_analysis module - parameters = [(1e-5, 1.0, 10), (1e-4, 3.0, 4)] - delta = 1e-5 - - max_order = 32 - orders = range(2, max_order + 1) - rdp = np.zeros_like(orders, dtype=float) - for q, sigma, steps in parameters: - rdp += privacy_analysis.compute_rdp(q, sigma, steps, orders) - - epsilon, opt_order = privacy_analysis.get_privacy_spent(orders, rdp, delta) - - def test_privacy_engine_class_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.privacy_engine.PrivacyEngine - model = torch.nn.Linear(16, 32) # An example model - optimizer = torch.optim.SGD(model.parameters(), lr=0.05) - privacy_engine = PrivacyEngine( - model, sample_rate=0.01, noise_multiplier=1.3, max_grad_norm=1.0, - ) - privacy_engine.attach(optimizer) # That's it! Now it's business as usual. - - def test_privacy_engine_to_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.privacy_engine.PrivacyEngine.to() - model = torch.nn.Linear(16, 32) # An example model. Default device is CPU - privacy_engine = PrivacyEngine( - model, sample_rate=0.01, noise_multiplier=0.8, max_grad_norm=0.5, - ) - device = "cpu" - model.to( - device - ) # If we move the model to GPU, we should call the to() method of the privacy engine (next line) - privacy_engine.to(device) - - def test_privacy_engine_virtual_step_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.privacy_engine.PrivacyEngine.virtual_step() - model = nn.Linear(16, 2) - dataloader = [] - - for _ in range(64): - data = torch.randn(4, 16) - labels = torch.randint(0, 2, (4,)) - dataloader.append((data, labels)) - - criterion = nn.CrossEntropyLoss() - optimizer = torch.optim.SGD(model.parameters(), lr=0.05) - - privacy_engine = PrivacyEngine( - model, sample_rate=0.01, noise_multiplier=0.8, max_grad_norm=0.5, - ) - privacy_engine.attach(optimizer) - - for i, (X, y) in enumerate(dataloader): - logits = model(X) - loss = criterion(logits, y) - loss.backward() - if i % 16 == 15: - optimizer.step() # this will call privacy engine's step() - optimizer.zero_grad() - else: - optimizer.virtual_step() # this will call privacy engine's virtual_step() - - def test_sequence_bias_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.layers.dp_multihead_attention.SequenceBias - m = SequenceBias(16) - input = torch.randn(20, 4, 16) - output = m(input) - self.assertEqual(output.size(), (21, 4, 16)) - - def test_module_inspection_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.utils.module_inspection.ModelInspector - inspector = ModelInspector("simple", lambda x: isinstance(x, nn.Conv2d)) - self.assertTrue(inspector.validate(nn.Conv2d(1, 1, 1))) - - def test_module_modification_replace_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.utils.module_modification.replace_all_modules() - from torchvision.models import resnet18 - - model = resnet18() - self.assertTrue(isinstance(model.layer1[0].bn1, nn.BatchNorm2d)) - - model = replace_all_modules(model, nn.BatchNorm2d, lambda _: nn.Identity()) - self.assertTrue(isinstance(model.layer1[0].bn1, nn.Identity)) - - def test_module_modification_convert_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstring for opacus.utils.module_modification.convert_batchnorm_modules() - from torchvision.models import resnet50 - - model = resnet50() - self.assertTrue(isinstance(model.layer1[0].bn1, nn.BatchNorm2d)) - - model = convert_batchnorm_modules(model) - self.assertTrue(isinstance(model.layer1[0].bn1, nn.GroupNorm)) - - def test_tensor_utils_examples(self): - # IMPORTANT: When changing this code you also need to update - # the docstrings for opacus.utils.tensor_utils - - t1 = torch.rand((2, 5)) - t2 = torch.rand((2, 5)) - - self.assertTrue( - calc_sample_norms([("1", t1), ("2", t2)])[0].shape, torch.Size([1, 2]) - ) - - tensor = torch.ones(1, 2, 3, 4, 5) - self.assertTrue( - sum_over_all_but_batch_and_last_n(tensor, n_dims=2).shape, - torch.Size([1, 4, 5]), - ) - - def test_stats_example(self): - # IMPORTANT: When changing this code you also need to update - # the docstrings for opacus.utils.stats.Stat - class MockSummaryWriter: - def __init__(self): - self.logs = defaultdict(dict) - - def add_scalar(self, name, value, iter): - self.logs[name][iter] = value - - mock_summary_writer = MockSummaryWriter() - stats.set_global_summary_writer(mock_summary_writer) - - stat = stats.Stat(stats.StatType.GRAD, "sample_stats", frequency=0.1) - for i in range(21): - stat.log({"val": i}) - - self.assertEqual(len(mock_summary_writer.logs["GRAD:sample_stats/val"]), 2) - - stats.add(stats.Stat(stats.StatType.TEST, "accuracy", frequency=1.0)) - stats.update(stats.StatType.TEST, acc1=1.0) diff --git a/opacus/tests/dp_layers/__init__.py b/opacus/tests/dp_layers/__init__.py deleted file mode 100644 index 4e3dc62..0000000 --- a/opacus/tests/dp_layers/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/opacus/tests/dp_layers/common.py b/opacus/tests/dp_layers/common.py deleted file mode 100644 index 5b54db0..0000000 --- a/opacus/tests/dp_layers/common.py +++ /dev/null @@ -1,375 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import io -import unittest -from typing import Callable, Optional, Sequence, Tuple, Union - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from torch.nn.utils.rnn import PackedSequence, pad_packed_sequence -from torch.testing import assert_allclose - - -def clone_module(module: nn.Module) -> nn.Module: - """ - Handy utility to clone an nn.Module. PyTorch doesn't always support copy.deepcopy(), so it is - just easier to serialize the model to a BytesIO and read it from there. - - Args: - module: The module to clone - - Returns: - The clone of ``module`` - """ - with io.BytesIO() as bytesio: - torch.save(module, bytesio) - bytesio.seek(0) - module_copy = torch.load(bytesio) - return module_copy - - -def flatten(seq: Sequence) -> Sequence: - """ - Utility function to flatten any sequence ie [1, [2, 3], [4, [5, 6]]] -> [1, 2, 3, 4, 5, 6] - - Args: - seq: The sequence to flatten - - Returns: - The flattened out sequence - """ - - def _flatten(seq, a): - for i in seq: - if isinstance(i, Sequence) and not isinstance(i, PackedSequence): - _flatten(i, a) - else: - a.append(i) - return a - - return _flatten(seq, []) - - -def default_train_fn(model: nn.Module, x: torch.Tensor, *args, **kwargs) -> None: - """ - Example of a default train_fn to be passed to ``compare_gradients``. - - Args: - Recommend to always have *args and **kwargs so you can pass whatever you want to it, - plus anything else that you need (in this case, we directly refer to x so we add it to - the list) - - Returns: - Nothing. But it must call ``loss.backward()`` to fill in the gradients. - """ - model.train() - criterion = nn.MSELoss() - logits = model(x) - y = torch.zeros_like(logits) - loss = criterion(logits, y) - loss.backward() - - -class DPModules_test(unittest.TestCase): - """ - Set of common testing utils. It is meant to be subclassed by your test. - See other tests as an example of how this is done. - - The objective of these tests is to make sure that our DP-friendly reimplementations of - standard nn.Modules such as LSTM are indeed drop-in replacements: we are checking that all - outputs and states are the same between the two implementations. Here, we do NOT test for - grad_samples, which is something we do in the grad_sample tests. - """ - - batch_first_nn, batch_first_dp = None, None - - def compare_forward_outputs( - self, - nn_module: nn.Module, - dp_module: nn.Module, - *module_args, - output_names: Optional[Tuple[str]] = None, - atol: float = 10e-6, - rtol: float = 10e-5, - **module_kwargs, - ) -> None: - """ - Runs forward through both the standard nn_module and the dp_module and checks that all - outputs are indeed the same. - - Args: - nn_module: The original ``nn.Module`` that will be our reference - dp_module: Our ``dp_module`` reimplementation that we want to test against ``nn_module`` - *module_args: args to pass to the model's forward (ie we'll call - ``module(*module_args, **module_kwargs))``. - output_names: If provided, will make printing more informative (rather than say - output number 3 does not match" we can say "output `h` does not match"). - atol: Absolute tolerance. - rtol: Relative tolerance. - **module_kwargs: kwargs to pass to the model's forward (ie we'll call - ``module(*module_args, **module_kwargs))``. - Returns: - None - - Raises: - AssertionError if either: - - The number of outputs of the forward doesn't match - - The shape of any output doesn't match - - The values for any output ``nn_out`` in ``nn_outs`` differ by more - than `atol + rtol * abs(nn_out)` - """ - torch.use_deterministic_algorithms(True) - torch.manual_seed(0) - np.random.seed(0) - - batch_first_nn, batch_first_dp = ( - getattr(nn_module, "batch_first", None), - getattr(dp_module, "batch_first", None), - ) - - nn_outs = nn_module(*module_args, **module_kwargs) - nn_len = 0 - try: - nn_len = len(nn_outs) - except AttributeError: - nn_outs = [nn_outs] - nn_outs = flatten(nn_outs) - - dp_outs = dp_module(*module_args, **module_kwargs) - dp_len = 0 - try: - dp_len = len(dp_outs) - except AttributeError: - dp_outs = [dp_outs] - dp_outs = flatten(dp_outs) - - self.assertEqual( - dp_len, - nn_len, - f"The number of outputs does not match! Reference nn has {nn_len} outputs, and " - f"DP reimplementation has {dp_len} outputs", - ) - - self._check_shapes(nn_outs, dp_outs, output_names=output_names) - self._check_values( - nn_outs, - dp_outs, - atol, - rtol, - output_names=output_names, - batch_first_nn=batch_first_nn, - batch_first_dp=batch_first_dp, - ) - - def compare_gradients( - self, - nn_module: nn.Module, - dp_module: nn.Module, - train_fn: Callable, - *train_fn_args, - atol: float = 10e-6, - rtol: float = 10e-5, - **train_fn_kwargs, - ) -> None: - """ - Runs forward and backward through both the standard nn_module and the dp_module and - checks that all gradients are indeed the same. - - Args: - nn_module: The original nn.Module that will be our reference - dp_module: Our dp_module reimplementation that we want to test against ``nn_module`` - train_fn: A function that we can run to train the model on a single input batch. - It should run forward and backward and stop there. - Refer to ``default_train_fn`` in this file for an example. - *train_fn_args: args to pass to the train_fn (ie we'll call - ``train_fn(*train_fn_args, **train_fn_kwargs))``. - atol: Absolute tolerance. - rtol: Relative tolerance. - **train_fn_kwargs: kwargs to pass to the train_fn (ie we'll call - ``module(*module_args, **module_kwargs))``. - Returns: - None - - Raises: - AssertionError if either: - - nn_module has a gradient for a parameter that dp_module doesn't have. - - dp_module has a gradient for a parameter that nn_module doesn't have. - - The shape of any parameter gradient doesn't match. - - The values for any parameter gradient ``nn_grad`` differ by more than - `atol + rtol * abs(nn_grad)`. - """ - - train_fn(nn_module, *train_fn_args, **train_fn_kwargs) - train_fn(dp_module, *train_fn_args, **train_fn_kwargs) - - nn_params = dict(nn_module.named_parameters()) - dp_params = dict(dp_module.named_parameters()) - - nn_only_grads = [ - param_name - for param_name, param in nn_params.items() - if param.grad is not None and param_name not in dp_params - ] - if nn_only_grads: - failed_str = "\n\t".join( - f"{i}. {s}" for i, s in enumerate(nn_only_grads, 1) - ) - raise AssertionError( - f"A total of {len(nn_only_grads)} gradients are in nn_module " - f"but not in dp_module: \n\t{failed_str}" - ) - - dp_only_grads = [ - param_name - for param_name, param in dp_params.items() - if param.grad is not None and param_name not in nn_params - ] - if dp_only_grads: - failed_str = "\n\t".join( - f"{i}. {s}" for i, s in enumerate(nn_only_grads, 1) - ) - raise AssertionError( - f"A total of {len(nn_only_grads)} gradients are in dp_module " - f"but not in nn_module: \n\t{failed_str}" - ) - - for param_name, nn_param in nn_module.named_parameters(): - dp_param = dp_params[param_name] - self._check_shapes((nn_param), (dp_param), (param_name)) - self._check_values((nn_param), (dp_param), atol, rtol, (param_name)) - - def _check_shapes( - self, - nn_outs: Tuple[Union[torch.Tensor, PackedSequence]], - dp_outs: Tuple[Union[torch.Tensor, PackedSequence]], - output_names: Optional[Tuple[str]] = None, - ) -> None: - output_names = output_names or [None] * len(nn_outs) - failed = [] - for i, (out_name, nn_out, dp_out) in enumerate( - zip(output_names, nn_outs, dp_outs) - ): - name = f"'{out_name}'" or f"#{i}" - if not torch.is_tensor(nn_out): - continue # Won't have a shape, and value check between nontensors is done in self._check_values() - - msg = ( - f"Output {name}: " - f"from our DP module: {dp_out.shape}, " - f"from reference nn.Module: {nn_out.shape}. " - ) - - try: - self.assertEqual( - dp_out.shape, nn_out.shape, msg=msg, - ) - - except AssertionError: - failed.append(msg) - - if failed: - failed_str = "\n\t".join(f"{i}. {s}" for i, s in enumerate(failed, 1)) - raise AssertionError( - f"A total of {len(failed)} shapes do not match \n\t{failed_str}" - ) - - def _check_values( - self, - nn_outs: Tuple[Union[torch.Tensor, PackedSequence]], - dp_outs: Tuple[Union[torch.Tensor, PackedSequence]], - atol: float, - rtol: float, - output_names: Optional[Tuple[str]] = None, - batch_first_nn: Optional[bool] = None, - batch_first_dp: Optional[bool] = None, - ) -> None: - output_names = output_names or [None] * len(nn_outs) - failed = [] - for i, (out_name, nn_out, dp_out) in enumerate( - zip(output_names, nn_outs, dp_outs) - ): - name = f"'{out_name}'" or f"#{i}" - - if isinstance(nn_out, PackedSequence): - self._check_packed_sequence( - name, - nn_out, - dp_out, - batch_first_nn, - batch_first_dp, - atol, - rtol, - failed, - ) - continue - - msg = ( - f"Output {name}: DP module L2 norm = : {dp_out.norm(2)}, ", - f"Reference nn.Module L2 norm = : {nn_out.norm(2)}, ", - f"MSE = {F.mse_loss(dp_out, nn_out)}, ", - f"L1 Loss = {F.l1_loss(dp_out, nn_out)}", - ) - try: - assert_allclose( - actual=dp_out, expected=nn_out, atol=atol, rtol=rtol, - ) - except AssertionError: - failed.append(msg) - if failed: - failed_str = "\n\t".join(f"{i}. {s}" for i, s in enumerate(failed, 1)) - raise AssertionError( - f"A total of {len(failed)} values do not match:\n\t{failed_str}" - ) - - def _check_packed_sequence( - self, - name: str, - nn_out: PackedSequence, - dp_out: PackedSequence, - batch_first_nn: bool, - batch_first_dp: bool, - atol: float, - rtol: float, - failure_msgs: Optional[Sequence] = None, - ) -> bool: - - try: - padded_seq_nn, seq_lens_nn = pad_packed_sequence(nn_out, batch_first_nn) - except ValueError: - raise ValueError("Incorrect format of the nn.module output PackedSequence") - - try: - padded_seq_dp, seq_lens_dp = pad_packed_sequence(dp_out, batch_first_dp) - except ValueError: - raise ValueError("Incorrect format of the DP module output PackedSequence") - - self._check_shapes( - (padded_seq_nn, seq_lens_nn), - (padded_seq_dp, seq_lens_dp), - ("padded_sequence", "batch_sequence_lengths"), - ) - - msg = ( - f"Output PackedSequence {name}: DP module padded sequence L2 norm = {padded_seq_dp.norm(2)}, ", - f"Reference nn.Module padded sequence L2 norm = {padded_seq_nn.norm(2)}, ", - f"MSE = {F.mse_loss(padded_seq_dp, padded_seq_nn)}, ", - f"L1 Loss = {F.l1_loss(padded_seq_dp, padded_seq_nn)}", - f"Manhattan distance (L1) between batch sequence lengths = {(seq_lens_nn - seq_lens_dp).abs().sum()}", # F.l1_loss is for floats, so we are computing this manually. - ) - - try: - assert_allclose( - actual=padded_seq_dp, expected=padded_seq_nn, atol=atol, rtol=rtol - ) - assert_allclose( - actual=seq_lens_dp, expected=seq_lens_nn, atol=atol, rtol=rtol - ) - except AssertionError: - if failure_msgs is not None: - failure_msgs.append(msg) - return False - - return True diff --git a/opacus/tests/dp_layers/dp_lstm_test.py b/opacus/tests/dp_layers/dp_lstm_test.py deleted file mode 100644 index a99ed20..0000000 --- a/opacus/tests/dp_layers/dp_lstm_test.py +++ /dev/null @@ -1,126 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Optional, Tuple, Union - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings -from opacus.layers import DPLSTM -from opacus.utils.packed_sequences import _gen_packed_data -from torch.nn.utils.rnn import PackedSequence - -from .common import DPModules_test - - -def lstm_train_fn( - model: nn.Module, - x: Union[torch.Tensor, PackedSequence], - state_init: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, -): - model.train() - criterion = nn.MSELoss() - logits, (hn, cn) = model(x, state_init) - if isinstance(logits, PackedSequence): - y = torch.zeros_like(logits[0]) - loss = criterion(logits[0], y) - else: - y = torch.zeros_like(logits) - loss = criterion(logits, y) - loss.backward() - - -class DPLSTM_test(DPModules_test): - @given( - batch_size=st.integers(1, 5), - seq_len=st.integers(1, 6), - emb_size=st.integers(5, 10), - hidden_size=st.integers(3, 7), - num_layers=st.integers(1, 3), - bidirectional=st.booleans(), - bias=st.booleans(), - batch_first=st.booleans(), - zero_init=st.booleans(), - packed_input_flag=st.integers( - 0, 2 - ), # 0 indicates no packed sequence input, 1 indicates packed sequence input in sorted order, 2 indicates packed sequence input in unsorted order - ) - @settings(deadline=20000) - def test_lstm( - self, - batch_size: int, - seq_len: int, - emb_size: int, - hidden_size: int, - num_layers: int, - bidirectional: bool, - bias: bool, - batch_first: bool, - zero_init: bool, - packed_input_flag: int, - ): - lstm = nn.LSTM( - emb_size, - hidden_size, - num_layers=num_layers, - batch_first=batch_first, - bidirectional=bidirectional, - bias=bias, - ) - dp_lstm = DPLSTM( - emb_size, - hidden_size, - num_layers=num_layers, - batch_first=batch_first, - bidirectional=bidirectional, - bias=bias, - ) - - dp_lstm.load_state_dict(lstm.state_dict()) - - if packed_input_flag == 0: - x = ( - torch.randn([batch_size, seq_len, emb_size]) - if batch_first - else torch.randn([seq_len, batch_size, emb_size]) - ) - elif packed_input_flag == 1: - x = _gen_packed_data( - batch_size, seq_len, emb_size, batch_first, sorted_=True - ) - elif packed_input_flag == 2: - x = _gen_packed_data( - batch_size, seq_len, emb_size, batch_first, sorted_=False - ) - - if zero_init: - self.compare_forward_outputs( - lstm, - dp_lstm, - x, - output_names=("out", "hn", "cn"), - atol=1e-5, - rtol=1e-3, - ) - - self.compare_gradients( - lstm, dp_lstm, lstm_train_fn, x, atol=1e-5, rtol=1e-3, - ) - - else: - num_directions = 2 if bidirectional else 1 - h0 = torch.randn([num_layers * num_directions, batch_size, hidden_size]) - c0 = torch.randn([num_layers * num_directions, batch_size, hidden_size]) - self.compare_forward_outputs( - lstm, - dp_lstm, - x, - (h0, c0), - output_names=("out", "hn", "cn"), - atol=1e-5, - rtol=1e-3, - ) - self.compare_gradients( - lstm, dp_lstm, lstm_train_fn, x, (h0, c0), atol=1e-5, rtol=1e-3, - ) diff --git a/opacus/tests/dp_layers/dp_multihead_attention_test.py b/opacus/tests/dp_layers/dp_multihead_attention_test.py deleted file mode 100644 index f8ea2ae..0000000 --- a/opacus/tests/dp_layers/dp_multihead_attention_test.py +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Optional - -import hypothesis.strategies as st -import pytest -import torch -import torch.nn as nn -from hypothesis import given, settings -from opacus.layers import DPMultiheadAttention - -from .common import DPModules_test - - -def attn_train_fn( - model: nn.Module, *args, **kwargs, -): - model.train() - criterion = nn.MSELoss() - logits, attn_weights = model(*args, **kwargs) - y = torch.zeros_like(logits) - loss = criterion(logits, y) - loss.backward() - - -class DPMultiheadAttention_test(DPModules_test): - @given( - batch_size=st.integers(1, 5), - src_seq_len=st.integers(1, 6), - tgt_seq_len=st.integers(1, 6), - num_heads=st.integers(1, 3), - bias=st.booleans(), - add_bias_kv=st.booleans(), - add_zero_attn=st.booleans(), - kdim=st.integers(2, 8) | st.none(), - vdim=st.integers(2, 8) | st.none(), - ) - @settings(deadline=10000) - @pytest.mark.skip( - "Failing due to a known problem. Should be enabled after issue #123 is fixed" - ) - def test_attn( - self, - batch_size: int, - src_seq_len: int, - tgt_seq_len: int, - num_heads: int, - bias: bool, - add_bias_kv: bool, - add_zero_attn: bool, - kdim: Optional[int], - vdim: Optional[int], - ): - embed_dim = 4 * num_heads # embed_dim must be divisible by num_heads - - attn = nn.MultiheadAttention( - embed_dim, - num_heads, - dropout=0.0, # Untestable between two different implementations - bias=bias, - add_bias_kv=add_bias_kv, - add_zero_attn=add_zero_attn, - kdim=kdim, - vdim=vdim, - ) - dp_attn = DPMultiheadAttention( - embed_dim, - num_heads, - dropout=0.0, # Untestable between two different implementations - bias=bias, - add_bias_kv=add_bias_kv, - add_zero_attn=add_zero_attn, - kdim=kdim, - vdim=vdim, - ) - - dp_attn.load_state_dict(attn.state_dict()) - - q = torch.randn(tgt_seq_len, batch_size, embed_dim) - k = torch.randn( - src_seq_len, batch_size, kdim if kdim is not None else embed_dim - ) - v = torch.randn( - src_seq_len, batch_size, vdim if vdim is not None else embed_dim - ) - - self.compare_forward_outputs( - attn, - dp_attn, - q, - k, - v, - output_names=("attn_out", "attn_out_weights"), - atol=1e-5, - rtol=1e-3, - key_padding_mask=None, - need_weights=True, - attn_mask=None, - ) - - self.compare_gradients( - attn, - dp_attn, - attn_train_fn, - q, - k, - v, - atol=1e-5, - rtol=1e-3, - key_padding_mask=None, - need_weights=True, - attn_mask=None, - ) diff --git a/opacus/tests/dp_model_inspector_test.py b/opacus/tests/dp_model_inspector_test.py deleted file mode 100644 index 5af59da..0000000 --- a/opacus/tests/dp_model_inspector_test.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import unittest - -import torch -import torch.nn as nn -from opacus import dp_model_inspector as dp_inspector -from opacus.utils.module_modification import convert_batchnorm_modules -from torchvision import models - - -class dp_model_inspector_test(unittest.TestCase): - def test_raises_exception(self): - inspector = dp_inspector.DPModelInspector() - model = models.resnet50() - with self.assertRaises(dp_inspector.IncompatibleModuleException): - inspector.validate(model) - - def test_returns_False(self): - inspector = dp_inspector.DPModelInspector(should_throw=False) - model = models.resnet50() - self.assertFalse(inspector.validate(model)) - - def test_raises_for_eval_mode(self): - inspector = dp_inspector.DPModelInspector() - model = models.resnet50() - model = model.eval() - with self.assertRaises(dp_inspector.IncompatibleModuleException): - inspector.validate(model) - - def test_convert_batchnorm(self): - inspector = dp_inspector.DPModelInspector() - model = convert_batchnorm_modules(models.resnet50()) - self.assertTrue(inspector.validate(model)) - - def test_running_stats(self): - inspector = dp_inspector.DPModelInspector(should_throw=False) - - self.assertTrue(inspector.validate(nn.InstanceNorm1d(16))) - self.assertTrue(inspector.validate(nn.InstanceNorm1d(16, affine=True))) - self.assertTrue( - inspector.validate(nn.InstanceNorm1d(16, track_running_stats=True)) - ) - self.assertFalse( - inspector.validate( - nn.InstanceNorm1d(16, affine=True, track_running_stats=True) - ) - ) - - def test_extra_param(self): - inspector = dp_inspector.DPModelInspector(should_throw=False) - - class SampleNetWithExtraParam(nn.Module): - def __init__(self): - super().__init__() - - self.fc = nn.Linear(8, 16) - self.extra_param = nn.Parameter(torch.Tensor(16, 2)) - - def forward(self, x): - x = self.fc(x) - x = x.matmul(self.extra_param) - return x - - model = SampleNetWithExtraParam() - self.assertFalse(inspector.validate(model)) - - model.extra_param.requires_grad = False - self.assertTrue(inspector.validate(model)) - - def test_unsupported_layer(self): - class SampleNetWithTransformer(nn.Module): - def __init__(self): - super().__init__() - - self.fc = nn.Linear(8, 16) - self.encoder = nn.Transformer() - - def forward(self, x): - x = self.fc(x) - x = self.encoder(x) - return x - - model = SampleNetWithTransformer() - inspector = dp_inspector.DPModelInspector(should_throw=False) - self.assertFalse(inspector.validate(model)) - - def test_conv2d(self): - inspector = dp_inspector.DPModelInspector(should_throw=False) - - self.assertTrue( - inspector.validate( - nn.Conv2d(in_channels=3, out_channels=6, kernel_size=1, groups=1) - ) - ) - self.assertTrue( - inspector.validate( - nn.Conv2d(in_channels=3, out_channels=6, kernel_size=1, groups=3) - ) - ) - self.assertFalse( - inspector.validate( - nn.Conv2d(in_channels=6, out_channels=6, kernel_size=1, groups=2) - ) - ) diff --git a/opacus/tests/grad_sample_module_test.py b/opacus/tests/grad_sample_module_test.py deleted file mode 100644 index bdf7ee5..0000000 --- a/opacus/tests/grad_sample_module_test.py +++ /dev/null @@ -1,159 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest - -import torch -import torch.nn as nn -import torch.nn.functional as F -from opacus.grad_sample import GradSampleModule -from torch.testing import assert_allclose -from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import FakeData -from torchvision.models import mobilenet_v3_small - - -class GradSampleModule_test(unittest.TestCase): - def setUp(self): - self.original_model = mobilenet_v3_small() - copy_of_original_model = mobilenet_v3_small() - copy_of_original_model.load_state_dict( - self.original_model.state_dict(), strict=True - ) - - self.grad_sample_module = GradSampleModule( - copy_of_original_model, batch_first=True, loss_reduction="mean" - ) - self.DATA_SIZE = 8 - self.setUp_data() - self.criterion = nn.L1Loss() - - def setUp_data(self): - self.ds = FakeData( - size=self.DATA_SIZE, - image_size=(3, 28, 28), - num_classes=10, - transform=transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] - ), - ] - ), - ) - self.dl = DataLoader(self.ds, batch_size=self.DATA_SIZE) - - def test_outputs_unaltered(self): - """ - Test that boxing with GradSampleModule won't alter any outputs. - Gradients are tested in the various `grad_samples` tests. - """ - x, _ = next(iter(self.dl)) - print(f"SHAPE: {x.shape}") - self.original_model = self.original_model.eval() - self.grad_sample_module = self.grad_sample_module.eval() - with torch.no_grad(): - normal_out = self.original_model(x) - gs_out = self.grad_sample_module(x) - msg = ( - f"GradSample L2 norm = : {gs_out.norm(2)}, ", - f"Original L2 norm = : {normal_out.norm(2)}, ", - f"MSE = {F.mse_loss(gs_out, normal_out)}, ", - f"L1 Loss = {F.l1_loss(gs_out, normal_out)}", - ) - assert_allclose(gs_out, normal_out, atol=1e-7, rtol=1e-5, msg=msg) - - def test_zero_grad(self): - x, _ = next(iter(self.dl)) - print(f"SHAPE: {x.shape}") - self.original_model = self.original_model.train() - self.grad_sample_module = self.grad_sample_module.train() - gs_out = self.grad_sample_module(x) - loss = self.criterion(gs_out, torch.zeros_like(gs_out)) - loss.backward() - - self.grad_sample_module.zero_grad() - params_with_gs = [ - n - for n, p in self.grad_sample_module.named_parameters() - if hasattr(p, "grad_sample") - ] - msg = ( - "After calling .zero_grad() on the GradSampleModule, the following parameters still " - f"have a grad_sample: {params_with_gs}" - ) - assert len(params_with_gs) == 0, msg - - def test_to_standard_module(self): - copy_of_original_model = mobilenet_v3_small() - copy_of_original_model.load_state_dict( - self.original_model.state_dict(), strict=True, - ) - new_grad_sample_module = GradSampleModule( - copy_of_original_model, batch_first=True, loss_reduction="mean" - ) - - new_grad_sample_module = new_grad_sample_module.to_standard_module() - - assert isinstance(new_grad_sample_module, type(self.original_model)) - - original_state_dict = self.original_model.state_dict() - gs_state_dict = new_grad_sample_module.state_dict() - - missing_keys = gs_state_dict.keys() - original_state_dict.keys() - assert not missing_keys, f"The following keys are missing: {missing_keys}" - - extra_keys = original_state_dict.keys() - gs_state_dict.keys() - assert not extra_keys, f"The following keys are extra: {extra_keys}" - - for key in original_state_dict: - original_tensor = original_state_dict[key].float() - gs_tensor = gs_state_dict[key].float() - msg = ( - f"Param {key}: GradSample L2 norm = : {gs_tensor.norm(2)}, ", - f"Original L2 norm = : {original_tensor.norm(2)}, ", - f"MSE = {F.mse_loss(gs_tensor, original_tensor)}, ", - f"L1 Loss = {F.l1_loss(gs_tensor, original_tensor)}", - ) - - assert_allclose(gs_tensor, original_tensor, atol=1e-6, rtol=1e-4, msg=msg) - - def test_remove_hooks(self): - """ - Test that after calling .remove_hooks() no hooks are left - """ - copy_of_original_model = mobilenet_v3_small() - copy_of_original_model.load_state_dict( - self.original_model.state_dict(), strict=True, - ) - new_grad_sample_module = GradSampleModule( - copy_of_original_model, batch_first=True, loss_reduction="mean" - ) - new_grad_sample_module.remove_hooks() - - remaining_forward_hooks = { - module: module._forward_hooks - for module in new_grad_sample_module.modules() - if module._forward_hooks - } - assert ( - not remaining_forward_hooks - ), f"Some forward hooks remain after .remove_hooks(): {remaining_forward_hooks}" - - remaining_backward_hooks = { - module: module._backward_hooks - for module in new_grad_sample_module.modules() - if module._backward_hooks - } - assert ( - not remaining_backward_hooks - ), f"Some backward hooks remain after .remove_hooks(): {remaining_backward_hooks}" - - def test_enable_hooks(self): - self.grad_sample_module.enable_hooks() - assert self.grad_sample_module.hooks_enabled - - def test_disable_hooks(self): - self.grad_sample_module.disable_hooks() - assert not self.grad_sample_module.hooks_enabled diff --git a/opacus/tests/grad_samples/__init__.py b/opacus/tests/grad_samples/__init__.py deleted file mode 100644 index 4e3dc62..0000000 --- a/opacus/tests/grad_samples/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved diff --git a/opacus/tests/grad_samples/common.py b/opacus/tests/grad_samples/common.py deleted file mode 100644 index c5cf80e..0000000 --- a/opacus/tests/grad_samples/common.py +++ /dev/null @@ -1,394 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import io -import unittest -from typing import Dict, List, Union - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from opacus.grad_sample import GradSampleModule -from torch.nn.utils.rnn import PackedSequence, pad_packed_sequence -from torch.testing import assert_allclose - - -def expander(x, factor: int = 2): - return x * factor - - -def shrinker(x, factor: int = 2): - return max(1, x // factor) # if avoid returning 0 for x == 1 - - -class ModelWithLoss(nn.Module): - """ - To test the gradients of a module, we need to have a loss. - This module makes it easy to get a loss from any nn.Module, and automatically generates - a target y vector for it in the forward (of all zeros of the correct size). - This reduces boilerplate while testing. - """ - - supported_reductions = ["mean", "sum"] - - def __init__(self, module: nn.Module, loss_reduction: str = "mean"): - """ - Instantiates this module. - - Args: - module: The nn.Module you want to test. - loss_reduction: What reduction to apply to the loss. Defaults to "mean". - - Raises: - ValueError: If ``loss_reduction`` is not among those supported. - """ - super().__init__() - self.wrapped_module = module - - if loss_reduction not in self.supported_reductions: - raise ValueError( - f"Passed loss_reduction={loss_reduction}. Only {self.supported_reductions} supported." - ) - self.criterion = nn.L1Loss(reduction=loss_reduction) - - def forward(self, x): - x = self.wrapped_module(x) - if type(x) is PackedSequence: - loss = _compute_loss_packedsequences(self.criterion, x) - else: - y = torch.zeros_like(x) - loss = self.criterion(x, y) - return loss - - -def clone_module(module: nn.Module) -> nn.Module: - """ - Handy utility to clone an nn.Module. PyTorch doesn't always support copy.deepcopy(), so it is - just easier to serialize the model to a BytesIO and read it from there. - - Args: - module: The module to clone - - Returns: - The clone of ``module`` - """ - with io.BytesIO() as bytesio: - torch.save(module, bytesio) - bytesio.seek(0) - module_copy = torch.load(bytesio) - return module_copy - - -class GradSampleHooks_test(unittest.TestCase): - """ - Set of common testing utils. It is meant to be subclassed by your test. - See other tests as an example of how this is done. - """ - - def compute_microbatch_grad_sample( - self, - x: Union[torch.Tensor, List[torch.Tensor]], - module: nn.Module, - batch_first=True, - loss_reduction="mean", - ) -> Dict[str, torch.tensor]: - """ - Computes per-sample gradients with the microbatch method, ie by computing normal gradients - with batch_size set to 1, and manually accumulating them. This is our reference for testing - as this method is obviously correct, but slow. - - Args: - x: The tensor in input to the ``module`` - module: The ``ModelWithLoss`` that wraps the nn.Module you want to test. - batch_first: Whether batch size is the first dimension (as opposed to the second). - Defaults to True. - - Returns: - Dictionary mapping parameter_name -> per-sample-gradient for that parameter - """ - torch.use_deterministic_algorithms(True) - torch.manual_seed(0) - np.random.seed(0) - - module = ModelWithLoss(clone_module(module), loss_reduction) - - for p in module.parameters(): - p.microbatch_grad_sample = [] - - if not batch_first and type(x) is not list: - # This allows us to iterate with x_i - x = x.transpose(0, 1) - - # Invariant: x is [B, T, ...] - - for x_i in x: - # x_i is [T, ...] - x_i = x_i.unsqueeze( - 0 if batch_first else 1 - ) # x_i of size [1, T, ...] if batch_first, else [T, 1, ...] - module.zero_grad() - loss_i = module(x_i) - loss_i.backward() - for p in module.parameters(): - p.microbatch_grad_sample.append(p.grad.detach().clone()) - - for p in module.parameters(): - if batch_first: - p.microbatch_grad_sample = torch.stack( - p.microbatch_grad_sample, dim=0 # [B, T, ...] - ) - else: - p.microbatch_grad_sample = torch.stack( - p.microbatch_grad_sample, dim=1 # [T, B, ...] - ).transpose( - 0, 1 - ) # Opacus's semantics is that grad_samples are ALWAYS batch_first: [B, T, ...] - - microbatch_grad_samples = { - name: p.microbatch_grad_sample - for name, p in module.wrapped_module.named_parameters() - } - return microbatch_grad_samples - - def compute_opacus_grad_sample( - self, - x: Union[torch.Tensor, PackedSequence], - module: nn.Module, - batch_first=True, - loss_reduction="mean", - ) -> Dict[str, torch.tensor]: - """ - Runs Opacus to compute per-sample gradients and return them for testing purposes. - - Args: - x: The tensor in input to the ``module`` - module: The ``ModelWithLoss`` that wraps the nn.Module you want to test. - batch_first: Whether batch size is the first dimension (as opposed to the second). - Defaults to True. - loss_reduction: What reduction to apply to the loss. Defaults to "mean". - - Returns: - Dictionary mapping parameter_name -> per-sample-gradient for that parameter - """ - torch.use_deterministic_algorithms(True) - torch.manual_seed(0) - np.random.seed(0) - - gs_module = GradSampleModule( - clone_module(module), batch_first=batch_first, loss_reduction=loss_reduction - ) - grad_sample_module = ModelWithLoss(gs_module, loss_reduction) - - grad_sample_module.zero_grad() - loss = grad_sample_module(x) - loss.backward() - - opacus_grad_samples = { - name: p.grad_sample - for name, p in grad_sample_module.wrapped_module._module.named_parameters() - } - - return opacus_grad_samples - - def run_test( - self, - x: Union[torch.Tensor, PackedSequence], - module: nn.Module, - batch_first=True, - atol=10e-6, - rtol=10e-5, - ): - self.run_test_with_reduction( - x, - module, - batch_first=batch_first, - loss_reduction="mean", - atol=atol, - rtol=rtol, - ) - self.run_test_with_reduction( - x, - module, - batch_first=batch_first, - loss_reduction="sum", - atol=atol, - rtol=rtol, - ) - - def run_test_with_reduction( - self, - x: Union[torch.Tensor, PackedSequence], - module: nn.Module, - batch_first=True, - loss_reduction="mean", - atol=10e-6, - rtol=10e-5, - ): - if type(x) is PackedSequence: - x_unpacked = _unpack_packedsequences(x) - microbatch_grad_samples = self.compute_microbatch_grad_sample( - x_unpacked, - module, - batch_first=batch_first, - loss_reduction=loss_reduction, - ) - else: - microbatch_grad_samples = self.compute_microbatch_grad_sample( - x, module, batch_first=batch_first, loss_reduction=loss_reduction - ) - - opacus_grad_samples = self.compute_opacus_grad_sample( - x, module, batch_first=batch_first, loss_reduction=loss_reduction - ) - - if microbatch_grad_samples.keys() != opacus_grad_samples.keys(): - raise ValueError( - "Keys not matching! " - f"Keys only in microbatch: {microbatch_grad_samples.keys() - opacus_grad_samples.keys()}; " - f"Keys only in Opacus: {opacus_grad_samples.keys() - microbatch_grad_samples.keys()}" - ) - - self.check_shapes(microbatch_grad_samples, opacus_grad_samples, loss_reduction) - self.check_values( - microbatch_grad_samples, opacus_grad_samples, loss_reduction, atol, rtol - ) - - def check_shapes( - self, microbatch_grad_samples, opacus_grad_samples, loss_reduction, - ) -> None: - failed = [] - for name, opacus_grad_sample in opacus_grad_samples.items(): - microbatch_grad_sample = microbatch_grad_samples[name] - msg = ( - f"Param '{name}': " - f"from Opacus: {opacus_grad_sample.shape}, " - f"from Microbatch: {microbatch_grad_sample.shape}. " - ) - try: - self.assertEqual( - opacus_grad_sample.shape, microbatch_grad_sample.shape, msg=msg, - ) - - except AssertionError: - failed.append(msg) - - if failed: - failed_str = "\n\t".join(f"{i}. {s}" for i, s in enumerate(failed, 1)) - raise AssertionError( - f"A total of {len(failed)} shapes do not match " - f"for loss_reduction={loss_reduction}: \n\t{failed_str}" - ) - - def check_values( - self, microbatch_grad_samples, opacus_grad_samples, loss_reduction, atol, rtol, - ) -> None: - failed = [] - for name, opacus_grad_sample in opacus_grad_samples.items(): - microbatch_grad_sample = microbatch_grad_samples[name] - msg = ( - f"Param {name}: Opacus L2 norm = : {opacus_grad_sample.norm(2)}, ", - f"Microbatch L2 norm = : {microbatch_grad_sample.norm(2)}, ", - f"MSE = {F.mse_loss(opacus_grad_sample, microbatch_grad_sample)}, ", - f"L1 Loss = {F.l1_loss(opacus_grad_sample, microbatch_grad_sample)}", - ) - try: - assert_allclose( - actual=microbatch_grad_sample, - expected=opacus_grad_sample, - atol=atol, - rtol=rtol, - ) - except AssertionError: - failed.append(msg) - if failed: - failed_str = "\n\t".join(f"{i}. {s}" for i, s in enumerate(failed, 1)) - raise AssertionError( - f"A total of {len(failed)} values do not match " - f"for loss_reduction={loss_reduction}: \n\t{failed_str}" - ) - - -def _unpack_packedsequences(X: PackedSequence) -> List[torch.Tensor]: - r""" - Produces a list of tensors from X (PackedSequence) such that this list was used to create X with batch_first=True - - Args: - X: A PackedSequence from which the output list of tensors will be produced. - - Returns: - unpacked_data: The list of tensors produced from X. - """ - - X_padded = pad_packed_sequence(X) - X_padded = X_padded[0].permute((1, 0, 2)) - - if X.sorted_indices is not None: - X_padded = X_padded[X.sorted_indices] - - seq_lens = _compute_seq_lengths(X.batch_sizes) - unpacked_data = [0] * len(seq_lens) - for idx, length in enumerate(seq_lens): - unpacked_data[idx] = X_padded[idx][:length, :] - - return unpacked_data - - -def _compute_seq_lengths(batch_sizes: torch.Tensor) -> List[int]: - r""" - Computes the sequence lengths (the length parameter used in the packed_padded_sequence function to create a PackedSequence). - - Args: - batch_sizes: Contains the batch sizes as stored in a PackedSequence - - Returns: - running_seq_lengths: the length parameter used in the torch.nn.utils.rnn.packed_padded_sequence function to create a PackedSequence. - It's a list of the same length as batch_sizes. - """ - - max_batch_size = batch_sizes[0] - if len(batch_sizes) == 1: - return [1] * max_batch_size - - running_seq = 0 - running_seq_lengths = [] - for i in range(1, len(batch_sizes)): - delta = batch_sizes[i - 1].item() - batch_sizes[i].item() - running_seq += 1 - running_seq_lengths += delta * [running_seq] - - running_seq += 1 - running_seq_lengths += batch_sizes[-1].item() * [running_seq] - running_seq_lengths.reverse() - return running_seq_lengths - - -def _compute_loss_packedsequences( - criterion: nn.L1Loss, x: PackedSequence -) -> torch.Tensor: - r""" - This function computes the loss in a different way for 'mean' reduced L1 loss while for 'sum' reduced L1 loss, - it computes the same way as with non-packed data. For 'mean' reduced L1 loss, it transforms x (PackedSequence) - into a list of tensors such that this list of tensors was used to create this PackedSequence in the first - place using batch_first=True and then takes the mean of the loss values produced from applying criterion on - each sequence sample. - - Args: - criterion: An L1 loss function with reduction either set to 'sum' or 'mean'. - x: Data in the form of a PackedSequence. - - Returns: - A loss variable, reduced either using summation or averaging from L1 errors. - """ - - if criterion.reduction == "sum": - y = torch.zeros_like(x[0]) - return criterion(x[0], y) - elif criterion.reduction == "mean": - x = _unpack_packedsequences(x) - loss_sum = 0 - for x_i in x: - y_i = torch.zeros_like(x_i) - loss_sum += criterion(x_i, y_i) - loss_mean = loss_sum / len(x) - return loss_mean diff --git a/opacus/tests/grad_samples/conv1d_test.py b/opacus/tests/grad_samples/conv1d_test.py deleted file mode 100644 index 8976f54..0000000 --- a/opacus/tests/grad_samples/conv1d_test.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Callable - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test, expander, shrinker - - -class Conv1d_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - C=st.sampled_from([1, 3, 32]), - W=st.integers(6, 10), - out_channels_mapper=st.sampled_from([expander, shrinker]), - kernel_size=st.integers(2, 3), - stride=st.integers(1, 2), - padding=st.integers(0, 2), - dilation=st.integers(1, 2), - groups=st.integers(1, 12), - ) - @settings(deadline=10000) - def test_conv1d( - self, - N: int, - C: int, - W: int, - out_channels_mapper: Callable[[int], int], - kernel_size: int, - stride: int, - padding: int, - dilation: int, - groups: int, - ): - - out_channels = out_channels_mapper(C) - if ( - C % groups != 0 or out_channels % groups != 0 - ): # since in_channels and out_channels must be divisible by groups - return - - x = torch.randn([N, C, W]) - conv = nn.Conv1d( - in_channels=C, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - ) - self.run_test(x, conv, batch_first=True, atol=10e-5, rtol=10e-4) diff --git a/opacus/tests/grad_samples/conv2d_test.py b/opacus/tests/grad_samples/conv2d_test.py deleted file mode 100644 index ed93054..0000000 --- a/opacus/tests/grad_samples/conv2d_test.py +++ /dev/null @@ -1,58 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Callable - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test, expander, shrinker - - -class Conv2d_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - C=st.sampled_from([1, 3, 32]), - H=st.integers(6, 10), - W=st.integers(6, 10), - out_channels_mapper=st.sampled_from([expander, shrinker]), - kernel_size=st.integers(2, 3), - stride=st.integers(1, 2), - padding=st.sampled_from([0, 2]), - dilation=st.integers(1, 2), - groups=st.integers(1, 16), - ) - @settings(deadline=10000) - def test_conv2d( - self, - N: int, - C: int, - H: int, - W: int, - out_channels_mapper: Callable[[int], int], - kernel_size: int, - stride: int, - padding: int, - dilation: int, - groups: int, - ): - - out_channels = out_channels_mapper(C) - if ( - C % groups != 0 or out_channels % groups != 0 - ): # since in_channels and out_channels must be divisible by groups - return - - x = torch.randn([N, C, H, W]) - conv = nn.Conv2d( - in_channels=C, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - ) - self.run_test(x, conv, batch_first=True, atol=10e-5, rtol=10e-4) diff --git a/opacus/tests/grad_samples/conv3d_test.py b/opacus/tests/grad_samples/conv3d_test.py deleted file mode 100644 index 9fb59e0..0000000 --- a/opacus/tests/grad_samples/conv3d_test.py +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Tuple, Union - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test, expander, shrinker - - -class Conv3d_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - C=st.sampled_from([1, 3, 32]), - D=st.integers(3, 6), - H=st.integers(6, 10), - W=st.integers(6, 10), - out_channels_mapper=st.sampled_from([expander, shrinker]), - kernel_size=st.sampled_from([2, 3, (1, 2, 3)]), - stride=st.sampled_from([1, 2, (1, 2, 3)]), - padding=st.sampled_from([0, 2, (1, 2, 3)]), - dilation=st.just(1), - groups=st.integers(1, 16), - ) - @settings(deadline=10000) - def test_conv3d( - self, - N: int, - C: int, - D: int, - H: int, - W: int, - out_channels_mapper: int, - kernel_size: Union[int, Tuple[int]], - stride: Union[int, Tuple[int]], - padding: Union[int, Tuple[int]], - dilation: int, - groups: int, - ): - - out_channels = out_channels_mapper(C) - if ( - C % groups != 0 or out_channels % groups != 0 - ): # since in_channels and out_channels must be divisible by groups - return - x = torch.randn([N, C, D, H, W]) - conv = nn.Conv3d( - in_channels=C, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups, - ) - self.run_test(x, conv, batch_first=True, atol=10e-5, rtol=10e-3) diff --git a/opacus/tests/grad_samples/dp_lstm_test.py b/opacus/tests/grad_samples/dp_lstm_test.py deleted file mode 100644 index 15b3ff8..0000000 --- a/opacus/tests/grad_samples/dp_lstm_test.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings -from opacus.layers import DPLSTM -from opacus.utils.packed_sequences import _gen_packed_data - -from .common import GradSampleHooks_test - - -class DPSLTMAdapter(nn.Module): - """ - Adapter for DPLSTM. - LSTM returns a tuple, but our testing tools need the model to return a single tensor in output. - We do this adaption here. - """ - - def __init__(self, *args, **kwargs): - super().__init__() - self.dplstm = DPLSTM(*args, **kwargs) - - def forward(self, x): - out, _rest = self.dplstm(x) - return out - - -class LSTM_test(GradSampleHooks_test): - @given( - N=st.integers(1, 3), - T=st.integers(1, 3), - D=st.integers(4, 5), - H=st.integers(8, 10), - num_layers=st.sampled_from([1, 2]), - bias=st.booleans(), - batch_first=st.booleans(), - bidirectional=st.booleans(), - using_packed_sequences=st.booleans(), - packed_sequences_sorted=st.booleans(), - ) - @settings(deadline=30000) - def test_lstm( - self, - N: int, - T: int, - D: int, - H: int, - num_layers: int, - bias: bool, - batch_first: bool, - bidirectional: bool, - using_packed_sequences: bool, - packed_sequences_sorted: bool, - ): - lstm = DPSLTMAdapter( - D, - H, - num_layers=num_layers, - batch_first=batch_first, - bias=bias, - bidirectional=bidirectional, - ) - if using_packed_sequences: - x = _gen_packed_data(N, T, D, batch_first, packed_sequences_sorted) - else: - if batch_first: - x = torch.randn([N, T, D]) - else: - x = torch.randn([T, N, D]) - self.run_test(x, lstm, batch_first=batch_first) diff --git a/opacus/tests/grad_samples/dp_multihead_attention_test.py b/opacus/tests/grad_samples/dp_multihead_attention_test.py deleted file mode 100644 index 25d26e0..0000000 --- a/opacus/tests/grad_samples/dp_multihead_attention_test.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings -from opacus.layers import DPMultiheadAttention - -from .common import GradSampleHooks_test - - -class DPMultiheadAttentionAdapter(nn.Module): - """ - Adapter for DPMultiHeadAttention. - This module takes three inputs, but our testing tools need that the model is given a single - tensor, and returns a single tensor in output. - - To adapt for this, we stack the three input tensors required (q, k, v) over the LAST dimension, - because our testing tools need to handle the `batch_first` argument which will manipulate x - over the first (and potentially second) dimension. - """ - - def __init__(self, *args, **kwargs): - super().__init__() - self.attn = DPMultiheadAttention(*args, **kwargs) - - def forward(self, x): - q, k, v = x.unbind(-1) - out, _attn_weights = self.attn(q, k, v) - return out - - -class MultiHeadAttention_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - T=st.integers(16, 20), - D=st.sampled_from([4]), - P=st.sampled_from([1, 2]), - bias=st.booleans(), - add_bias_kv=st.booleans(), - add_zero_attn=st.booleans(), - kv_dim=st.booleans(), - ) - @settings(deadline=10000) - def test_multihead_attention( - self, - N: int, - T: int, - D: int, - P: int, - bias: bool, - add_bias_kv: bool, - add_zero_attn: bool, - kv_dim: bool, - ): - - if kv_dim: - kdim, vdim = D, D - else: - kdim, vdim = None, None - attn = DPMultiheadAttentionAdapter( - D, - P, - bias=bias, - add_bias_kv=add_bias_kv, - add_zero_attn=add_zero_attn, - dropout=0.0, - kdim=kdim, - vdim=vdim, - ) - q = torch.randn([T, N, D]) - k = torch.randn([T, N, D]) - v = torch.randn([T, N, D]) - x = torch.stack((q, k, v), dim=-1) - - self.run_test(x, attn, batch_first=False) diff --git a/opacus/tests/grad_samples/embedding_test.py b/opacus/tests/grad_samples/embedding_test.py deleted file mode 100644 index 76362d1..0000000 --- a/opacus/tests/grad_samples/embedding_test.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class Embedding_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - T=st.integers(1, 5), - Q=st.integers(1, 4), - R=st.integers(1, 2), - V=st.integers(2, 32), - D=st.integers(10, 17), - dim=st.integers(1, 4), - ) - @settings(deadline=10000) - def test_input_across_dims( - self, N: int, T: int, Q: int, R: int, V: int, D: int, dim: int, - ): - - if dim == 1: - size = [T] - elif dim == 2: - size = [N, T] - elif dim == 3: - size = [N, T, Q] - elif dim == 4: - size = [N, T, Q, R] - - emb = nn.Embedding(V, D) - x = torch.randint(low=0, high=V - 1, size=size) - self.run_test(x, emb, batch_first=True) diff --git a/opacus/tests/grad_samples/group_norm_test.py b/opacus/tests/grad_samples/group_norm_test.py deleted file mode 100644 index 80d9f75..0000000 --- a/opacus/tests/grad_samples/group_norm_test.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Union - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class GroupNorm_test(GradSampleHooks_test): - """ - We only test the case with ``affine=True`` here, because it is the only case that will actually - compute a gradient. There is no grad_sample from this module otherwise. - """ - - @given( - N=st.integers(1, 4), - C=st.integers(1, 8), - H=st.integers(5, 10), - W=st.integers(4, 8), - num_groups=st.sampled_from([1, 4, "C"]), - ) - @settings(deadline=10000) - def test_3d_input_groups( - self, N: int, C: int, H: int, W: int, num_groups: Union[int, str], - ): - - if num_groups == "C": - num_groups = C - - if C % num_groups != 0: - return - - x = torch.randn([N, C, H, W]) - norm = nn.GroupNorm(num_groups=num_groups, num_channels=C, affine=True) - self.run_test(x, norm, batch_first=True) diff --git a/opacus/tests/grad_samples/instance_norm1d_test.py b/opacus/tests/grad_samples/instance_norm1d_test.py deleted file mode 100644 index da410c3..0000000 --- a/opacus/tests/grad_samples/instance_norm1d_test.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class InstanceNorm1d_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), C=st.integers(1, 3), W=st.integers(5, 10), - ) - @settings(deadline=10000) - def test_3d_input( - self, N: int, C: int, W: int, - ): - - x = torch.randn([N, C, W]) - norm = nn.InstanceNorm1d(num_features=C, affine=True, track_running_stats=False) - self.run_test(x, norm, batch_first=True) diff --git a/opacus/tests/grad_samples/instance_norm2d_test.py b/opacus/tests/grad_samples/instance_norm2d_test.py deleted file mode 100644 index 3c6aeb6..0000000 --- a/opacus/tests/grad_samples/instance_norm2d_test.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class InstanceNorm2d_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - C=st.integers(1, 3), - W=st.integers(5, 10), - H=st.integers(4, 8), - ) - @settings(deadline=10000) - def test_4d_input( - self, N: int, C: int, W: int, H: int, - ): - - x = torch.randn([N, C, H, W]) - norm = nn.InstanceNorm2d(num_features=C, affine=True, track_running_stats=False) - self.run_test(x, norm, batch_first=True) diff --git a/opacus/tests/grad_samples/instance_norm3d_test.py b/opacus/tests/grad_samples/instance_norm3d_test.py deleted file mode 100644 index f5c5c90..0000000 --- a/opacus/tests/grad_samples/instance_norm3d_test.py +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class InstanceNorm3d_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - C=st.integers(1, 3), - W=st.integers(5, 10), - H=st.integers(4, 8), - Z=st.integers(1, 4), - ) - @settings(deadline=10000) - def test_5d_input( - self, N: int, C: int, W: int, H: int, Z: int, - ): - x = torch.randn([N, C, Z, H, W]) - norm = nn.InstanceNorm3d(num_features=C, affine=True, track_running_stats=False) - self.run_test(x, norm, batch_first=True) diff --git a/opacus/tests/grad_samples/layer_norm_test.py b/opacus/tests/grad_samples/layer_norm_test.py deleted file mode 100644 index 3da5acf..0000000 --- a/opacus/tests/grad_samples/layer_norm_test.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class LayerNorm_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - Z=st.integers(1, 4), - H=st.integers(1, 3), - W=st.integers(5, 10), - input_dim=st.integers(2, 4), - norm_dim=st.integers(1, 3), - ) - @settings(deadline=10000) - def test_input_norm( - self, N: int, Z: int, W: int, H: int, input_dim: int, norm_dim: int, - ): - - if norm_dim >= input_dim: - return - if norm_dim == 1: - normalized_shape = W - if input_dim == 2: - x_shape = [N, W] - if input_dim == 3: - x_shape = [N, Z, W] - if input_dim == 4: - x_shape = [N, Z, H, W] - elif norm_dim == 2: - if input_dim == 3: - normalized_shape = [Z, W] - x_shape = [N, Z, W] - if input_dim == 4: - normalized_shape = [H, W] - x_shape = [N, Z, H, W] - elif norm_dim == 3: - normalized_shape = [Z, H, W] - x_shape = [N, Z, H, W] - - norm = nn.LayerNorm(normalized_shape, elementwise_affine=True) - x = torch.randn(x_shape) - self.run_test(x, norm, batch_first=True) diff --git a/opacus/tests/grad_samples/linear_test.py b/opacus/tests/grad_samples/linear_test.py deleted file mode 100644 index c182a41..0000000 --- a/opacus/tests/grad_samples/linear_test.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -import torch.nn as nn -from hypothesis import given, settings - -from .common import GradSampleHooks_test - - -class Linear_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), - Z=st.integers(1, 4), - H=st.integers(1, 3), - W=st.integers(10, 17), - input_dim=st.integers(2, 4), - bias=st.booleans(), - ) - @settings(deadline=10000) - def test_input_bias( - self, N: int, Z: int, W: int, H: int, input_dim: int, bias: bool, - ): - - if input_dim == 2: - x_shape = [N, W] - if input_dim == 3: - x_shape = [N, Z, W] - if input_dim == 4: - x_shape = [N, Z, H, W] - - linear = nn.Linear(W, W + 2, bias=bias) - x = torch.randn(x_shape) - self.run_test(x, linear, batch_first=True) diff --git a/opacus/tests/grad_samples/sequence_bias_test.py b/opacus/tests/grad_samples/sequence_bias_test.py deleted file mode 100644 index 532434d..0000000 --- a/opacus/tests/grad_samples/sequence_bias_test.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import hypothesis.strategies as st -import torch -from hypothesis import given, settings -from opacus.layers import SequenceBias - -from .common import GradSampleHooks_test - - -class SequenceBias_test(GradSampleHooks_test): - @given( - N=st.integers(1, 4), T=st.integers(10, 20), D=st.integers(4, 8), - ) - @settings(deadline=10000) - def test_batch_second( - self, N: int, T: int, D: int, - ): - - seqbias = SequenceBias(D) - x = torch.randn([T, N, D]) - self.run_test(x, seqbias, batch_first=False) diff --git a/opacus/tests/multigpu_gradcheck.py b/opacus/tests/multigpu_gradcheck.py deleted file mode 100644 index 32a8e90..0000000 --- a/opacus/tests/multigpu_gradcheck.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import os -import sys -import unittest - -import torch -import torch.distributed as dist -import torch.multiprocessing as mp -import torch.nn as nn -import torch.optim as optim -from opacus import PrivacyEngine -from opacus.layers import DifferentiallyPrivateDistributedDataParallel as DPDDP -from torch.nn.parallel import DistributedDataParallel as DDP - - -PRIVACY_ALPHAS = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) - - -def setup(rank, world_size): - if sys.platform == "win32": - # Distributed package only covers collective communications with Gloo - # backend and FileStore on Windows platform. Set init_method parameter - # in init_process_group to a local file. - # Example init_method="file:///f:/libtmp/some_file" - init_method = "file:///{your local file path}" - - # initialize the process group - dist.init_process_group( - "gloo", init_method=init_method, rank=rank, world_size=world_size - ) - else: - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "12355" - - # initialize the process group - # dist.init_process_group("gloo", rank=rank, world_size=world_size) - - os.environ["RANK"] = str(rank) - os.environ["WORLD_SIZE"] = str(world_size) - torch.distributed.init_process_group( - init_method="env://", backend="nccl", - ) - - -def cleanup(): - dist.destroy_process_group() - - -class ToyModel(nn.Module): - def __init__(self): - super(ToyModel, self).__init__() - self.net1 = nn.Linear(10, 10) - self.relu = nn.ReLU() - self.net2 = nn.Linear(10, 5) - - def forward(self, x): - return self.net2(self.relu(self.net1(x))) - - -def demo_basic(rank, weight, world_size, dp): - torch.manual_seed(world_size) - batch_size = 32 - withdp = "with" + ("out " if not dp else "") - print(f"Running basic DDP {withdp} differential privacy example on rank {rank}.") - setup(rank, world_size) - - # create model and move it to GPU with id rank - model = ToyModel().to(rank) - if dp: - ddp_model = DPDDP(model) - engine = PrivacyEngine( - ddp_model, - batch_size=batch_size, - sample_size=10 * batch_size, - alphas=PRIVACY_ALPHAS, - noise_multiplier=0, - max_grad_norm=1e8, - ) - else: - ddp_model = DDP(model, device_ids=[rank]) - - loss_fn = nn.MSELoss() - optimizer = optim.SGD(ddp_model.parameters(), lr=1) - if dp: - engine.attach(optimizer) - - # if rank == 0: - # print(model.net1.weight) - optimizer.zero_grad() - labels = torch.randn(batch_size, 5).to(rank) - outputs = ddp_model(torch.randn(batch_size, 10).to(rank)) - loss_fn(outputs, labels).backward() - optimizer.step() - # if rank == 0: - # print(model.net1.weight) - - weight.copy_(model.net1.weight.data.cpu()) - - cleanup() - - -def run_demo(demo_fn, weight, world_size, dp): - mp.spawn(demo_fn, args=(weight, world_size, dp), nprocs=world_size, join=True) - - -class GradientComputationTest(unittest.TestCase): - def test_gradient_correct(self): - # Tests that gradient is the same with DP or with DDP - n_gpus = torch.cuda.device_count() - self.assertTrue( - n_gpus >= 2, f"Need at least 2 gpus but was provided only {n_gpus}." - ) - weight_dp, weight_nodp = torch.zeros(10, 10), torch.zeros(10, 10) - run_demo(demo_basic, weight_dp, 2, dp=True) - run_demo(demo_basic, weight_nodp, 2, dp=False) - - self.assertTrue(torch.norm(weight_dp - weight_nodp) < 1e-7) diff --git a/opacus/tests/per_sample_gradient_clip_test.py b/opacus/tests/per_sample_gradient_clip_test.py deleted file mode 100644 index 83030fc..0000000 --- a/opacus/tests/per_sample_gradient_clip_test.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest - -import torch -import torch.nn as nn -import torch.nn.functional as F -from opacus import PerSampleGradientClipper -from opacus.grad_sample import GradSampleModule -from opacus.utils.clipping import ConstantFlatClipper, ConstantPerLayerClipper -from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import FakeData - - -class SampleConvNet(nn.Module): - def __init__(self): - super().__init__() - self.conv1 = nn.Conv2d(1, 16, 8, 3) - self.conv2 = nn.Conv1d(16, 32, 3, 1) - self.convf = nn.Conv1d(32, 32, 1, 1) - for p in self.convf.parameters(): - p.requires_grad = False - self.fc1 = nn.Linear(23, 17) - self.fc2 = nn.Linear(32 * 17, 10) - - def forward(self, x): - # x of shape [B, 1, 28, 28] - x = F.relu(self.conv1(x)) # -> [B, 16, 10, 10] - x = F.max_pool2d(x, 2, 2) # -> [B, 16, 5, 5] - x = x.view(x.shape[0], x.shape[1], x.shape[2] * x.shape[3]) # -> [B, 16, 25] - x = F.relu(self.conv2(x)) # -> [B, 32, 23] - x = self.convf(x) # -> [B, 32, 23] - x = self.fc1(x) # -> [B, 32, 17] - x = x.view(-1, x.shape[-2] * x.shape[-1]) # -> [B, 32 * 17] - x = self.fc2(x) # -> [B, 10] - return x - - def name(self): - return "SampleConvNet" - - -class PerSampleGradientClipper_test(unittest.TestCase): - def setUp(self): - self.DATA_SIZE = 64 - self.criterion = nn.CrossEntropyLoss() - - self.setUp_data() - self.setUp_original_model() - self.setUp_clipped_model(clip_value=0.003, run_clipper_step=True) - - def setUp_data(self): - self.ds = FakeData( - size=self.DATA_SIZE, - image_size=(1, 35, 35), - num_classes=10, - transform=transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ), - ) - self.dl = DataLoader(self.ds, batch_size=self.DATA_SIZE) - - def setUp_original_model(self): - self.original_model = SampleConvNet() - for x, y in self.dl: - logits = self.original_model(x) - loss = self.criterion(logits, y) - loss.backward() # puts grad in self.original_model.parameters() - self.original_grads_norms = torch.stack( - [ - p.grad.norm() - for p in self.original_model.parameters() - if p.requires_grad - ], - dim=-1, - ) - - def setUp_clipped_model(self, clip_value=0.003, run_clipper_step=True): - # Deep copy - self.clipped_model = SampleConvNet() # create the structure - self.clipped_model.load_state_dict(self.original_model.state_dict()) # fill it - - self.clipped_model = GradSampleModule( - self.clipped_model - ) # TODO change this as we refactor clipper - - # Intentionally clipping to a very small value - norm_clipper = ( - ConstantFlatClipper(clip_value) - if not isinstance(clip_value, list) - else ConstantPerLayerClipper(clip_value) - ) - self.clipper = PerSampleGradientClipper(self.clipped_model, norm_clipper) - - for x, y in self.dl: - logits = self.clipped_model(x) - loss = self.criterion(logits, y) - loss.backward() # puts grad in self.clipped_model.parameters() - if run_clipper_step: - self.clipper.clip_and_accumulate() - self.clipper.pre_step() - self.clipped_grads_norms = torch.stack( - [p.grad.norm() for p in self.clipped_model.parameters() if p.requires_grad], - dim=-1, - ) - - def test_clipped_grad_norm_is_smaller(self): - """ - Test that grad are clipped and their value changes - """ - for original_layer_norm, clipped_layer_norm in zip( - self.original_grads_norms, self.clipped_grads_norms - ): - self.assertLess(float(clipped_layer_norm), float(original_layer_norm)) - - def test_clipped_grad_norm_is_smaller_perlayer(self): - """ - Test that grad are clipped and their value changes - """ - # there are 8 parameter sets [bias, weight] * [conv1, conv2, fc1, fc2] - self.setUp_clipped_model(clip_value=[0.001] * 8) - for original_layer_norm, clipped_layer_norm in zip( - self.original_grads_norms, self.clipped_grads_norms - ): - self.assertLess(float(clipped_layer_norm), float(original_layer_norm)) - - def test_clipped_grad_norms_not_zero(self): - """ - Test that grads aren't killed by clipping - """ - allzeros = torch.zeros_like(self.clipped_grads_norms) - self.assertFalse(torch.allclose(self.clipped_grads_norms, allzeros)) - - def test_clipped_grad_norms_not_zero_per_layer(self): - """ - Test that grads aren't killed by clipping - """ - # there are 8 parameter sets [bias, weight] * [conv1, conv2, fc1, fc2] - self.setUp_clipped_model(clip_value=[0.001] * 8) - allzeros = torch.zeros_like(self.clipped_grads_norms) - self.assertFalse(torch.allclose(self.clipped_grads_norms, allzeros)) - - def test_clipping_to_high_value_does_nothing(self): - self.setUp_clipped_model( - clip_value=9999, run_clipper_step=True - ) # should be a no-op - self.assertTrue( - torch.allclose(self.original_grads_norms, self.clipped_grads_norms) - ) - - def test_clipping_to_high_value_does_nothing_per_layer(self): - self.setUp_clipped_model( - clip_value=[9999] * 8, run_clipper_step=True - ) # should be a no-op - self.assertTrue( - torch.allclose(self.original_grads_norms, self.clipped_grads_norms) - ) - - def test_grad_norms_untouched_without_clip_step(self): - """ - Test that grad are not clipped until clipper.step() is called - """ - self.setUp_clipped_model(clip_value=0.003, run_clipper_step=False) - self.assertTrue( - torch.allclose(self.original_grads_norms, self.clipped_grads_norms) - ) diff --git a/opacus/tests/poisson_test.py b/opacus/tests/poisson_test.py deleted file mode 100644 index 5f634df..0000000 --- a/opacus/tests/poisson_test.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest - -import torch -from opacus import PrivacyEngine -from opacus.utils.uniform_sampler import UniformWithReplacementSampler -from torch import nn, optim - - -class PoissonSamplingTest(unittest.TestCase): - def test_poisson_sampling(self): - B = 1 - N = 10 - d = 10 - dataset = [(i, torch.randn(d), torch.randn(d)) for i in range(N)] - - model = nn.Linear(d, d) - optimizer = optim.SGD(model.parameters(), lr=0.1) - engine = PrivacyEngine( - model, - sample_rate=B / N, - target_epsilon=1.0, - epochs=10, - poisson=True, - max_grad_norm=1, - sample_size=N, - ) - engine.attach(optimizer) - - generator = torch.Generator() - generator.manual_seed(7) - sampler = UniformWithReplacementSampler( - num_samples=N, sample_rate=B / N, generator=generator - ) - dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler) - - # Sampler with seed=7 should generate [], [7], [], [], [9], [0], [], [], [1], [4] - for (_, x, y) in dataloader: - prediction = model(x) - loss = torch.mean((prediction - y) ** 2) - - optimizer.zero_grad() - loss.backward() - optimizer.step() diff --git a/opacus/tests/privacy_engine_test.py b/opacus/tests/privacy_engine_test.py deleted file mode 100644 index d229e4d..0000000 --- a/opacus/tests/privacy_engine_test.py +++ /dev/null @@ -1,463 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest - -import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -from opacus import PrivacyEngine -from opacus.dp_model_inspector import IncompatibleModuleException -from opacus.utils.module_inspection import get_layer_type, requires_grad -from torch.utils.data import DataLoader -from torchvision import models, transforms -from torchvision.datasets import FakeData - - -def get_grad_sample_aggregated(tensor: torch.Tensor, loss_type: str = "mean"): - if tensor.grad_sample is None: - raise ValueError( - f"The input tensor {tensor} has grad computed, but missing grad_sample." - f"Please attach PrivacyEngine" - ) - - if loss_type not in ("sum", "mean"): - raise ValueError(f"loss_type = {loss_type}. Only 'sum' and 'mean' supported") - - grad_sample_aggregated = torch.einsum("i...->...", tensor.grad_sample) - if loss_type == "mean": - b_sz = tensor.grad_sample.shape[0] - grad_sample_aggregated /= b_sz - - return grad_sample_aggregated - - -class SampleConvNet(nn.Module): - def __init__(self): - super().__init__() - self.conv1 = nn.Conv2d(1, 16, 8, 3) - self.gnorm1 = nn.GroupNorm(4, 16) - self.conv2 = nn.Conv1d(16, 32, 3, 1) - self.lnorm1 = nn.LayerNorm((32, 23)) - self.conv3 = nn.Conv1d(32, 32, 3, 1) - self.instnorm1 = nn.InstanceNorm1d(32, affine=True) - self.convf = nn.Conv1d(32, 32, 1, 1) - for p in self.convf.parameters(): - p.requires_grad = False - self.fc1 = nn.Linear(21, 17) - self.lnorm2 = nn.LayerNorm(17) - self.fc2 = nn.Linear(32 * 17, 10) - - for layer in (self.gnorm1, self.lnorm1, self.lnorm2, self.instnorm1): - nn.init.uniform_(layer.weight) - nn.init.uniform_(layer.bias) - - def forward(self, x): - # x of shape [B, 1, 28, 28] - x = self.conv1(x) # -> [B, 16, 10, 10] - x = self.gnorm1(x) # -> [B, 16, 10, 10] - x = F.relu(x) # -> [B, 16, 10, 10] - x = F.max_pool2d(x, 2, 2) # -> [B, 16, 5, 5] - x = x.view(x.shape[0], x.shape[1], x.shape[2] * x.shape[3]) # -> [B, 16, 25] - x = self.conv2(x) # -> [B, 32, 23] - x = self.lnorm1(x) # -> [B, 32, 23] - x = F.relu(x) # -> [B, 32, 23] - x = self.conv3(x) # -> [B, 32, 21] - x = self.instnorm1(x) # -> [B, 32, 21] - x = self.convf(x) # -> [B, 32, 21] - x = self.fc1(x) # -> [B, 32, 17] - x = self.lnorm2(x) # -> [B, 32, 17] - x = x.view(-1, x.shape[-2] * x.shape[-1]) # -> [B, 32 * 17] - x = self.fc2(x) # -> [B, 10] - return x - - def name(self): - return "SampleConvNet" - - -class PrivacyEngine_test(unittest.TestCase): - def setUp(self): - self.DATA_SIZE = 64 - self.BATCH_SIZE = 64 - self.SAMPLE_RATE = self.BATCH_SIZE / self.DATA_SIZE - self.LR = 0.5 - self.ALPHAS = [1 + x / 10.0 for x in range(1, 100, 10)] - self.criterion = nn.CrossEntropyLoss() - - self.setUp_data() - self.original_model, self.original_optimizer = self.setUp_init_model() - self.private_model, self.private_optimizer = self.setUp_init_model( - private=True, - state_dict=self.original_model.state_dict(), - noise_multiplier=1.3, - max_grad_norm=1.0, - ) - - self.original_grads_norms = self.setUp_model_step( - self.original_model, self.original_optimizer - ) - self.private_grads_norms = self.setUp_model_step( - self.private_model, self.private_optimizer - ) - self.privacy_default_params = { - "noise_multiplier": 1.0, - "max_grad_norm": 1, - "secure_rng": False, - } - - def setUp_data(self): - self.ds = FakeData( - size=self.DATA_SIZE, - image_size=(1, 35, 35), - num_classes=10, - transform=transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ), - ) - self.dl = DataLoader(self.ds, batch_size=self.BATCH_SIZE) - - def setUp_init_model( - self, private=False, state_dict=None, model=None, **privacy_engine_kwargs - ): - model = model or SampleConvNet() - optimizer = torch.optim.SGD(model.parameters(), lr=self.LR, momentum=0) - if state_dict: - model.load_state_dict(state_dict) - - if private: - if len(privacy_engine_kwargs) == 0: - privacy_engine_kwargs = self.privacy_default_params - privacy_engine = PrivacyEngine( - model, - sample_rate=self.SAMPLE_RATE, - alphas=self.ALPHAS, - **privacy_engine_kwargs, - ) - privacy_engine.attach(optimizer) - - return model, optimizer - - def setUp_model_step(self, model: nn.Module, optimizer: torch.optim.Optimizer): - - for x, y in self.dl: - optimizer.zero_grad() - logits = model(x) - loss = self.criterion(logits, y) - loss.backward() - optimizer.step() - - return torch.stack( - [p.grad.norm() for p in model.parameters() if p.requires_grad], dim=-1 - ) - - def test_throws_on_bad_per_layer_maxnorm_size(self): - model, optimizer = self.setUp_init_model( - private=True, noise_multiplier=0.1, max_grad_norm=[999] * 10 - ) - # there are a total of 18 parameters sets, [bias, weight] * 9 layers - # the provided max_grad_norm is not either a scalar or a list of size 18 - with self.assertRaises(ValueError): - self.setUp_model_step(model, optimizer) - - def test_throws_double_attach(self): - model, optimizer = self.setUp_init_model(private=True) - self.setUp_model_step(model, optimizer) - with self.assertRaises(ValueError): - model, optimizer = self.setUp_init_model(private=True, model=model) - self.setUp_model_step(model, optimizer) - - def test_attach_detach_attach(self): - model, optimizer = self.setUp_init_model(private=True) - self.setUp_model_step(model, optimizer) - optimizer.privacy_engine.detach() - optimizer.step() - model, optimizer = self.setUp_init_model(private=True, model=model) - self.setUp_model_step(model, optimizer) - - def test_privacy_analysis_alpha_in_alphas(self): - target_delta = 1e-5 - eps, alpha = self.private_optimizer.privacy_engine.get_privacy_spent( - target_delta - ) - self.assertTrue(alpha in self.ALPHAS) - - def test_privacy_analysis_epsilon(self): - target_delta = 1e-5 - eps, alpha = self.private_optimizer.privacy_engine.get_privacy_spent( - target_delta - ) - self.assertTrue(eps > 0) - - def test_gradients_change(self): - """ - Test that gradients are different after one step of SGD - """ - for layer_grad, private_layer_grad in zip( - [p.grad for p in self.original_model.parameters() if p.requires_grad], - [p.grad for p in self.private_model.parameters() if p.requires_grad], - ): - self.assertFalse(torch.allclose(layer_grad, private_layer_grad)) - - def test_model_weights_change(self): - """ - Test that the updated models are different after one step of SGD - """ - for layer, private_layer in zip( - [p for p in self.original_model.parameters() if p.requires_grad], - [p for p in self.private_model.parameters() if p.requires_grad], - ): - self.assertFalse(torch.allclose(layer, private_layer)) - - def test_grad_consistency(self): - model, optimizer = self.setUp_init_model( - private=True, - state_dict=self.original_model.state_dict(), - noise_multiplier=0, - max_grad_norm=999, - ) - - grad_sample_aggregated = {} - - for x, y in self.dl: - optimizer.zero_grad() - logits = model(x) - loss = self.criterion(logits, y) - loss.backward() - - # collect all per-sample gradients before we take the step - for _, layer in model.named_modules(): - if get_layer_type(layer) == "SampleConvNet": - continue - - grad_sample_aggregated[layer] = {} - for p in layer.parameters(): - if p.requires_grad: - grad_sample_aggregated[layer][p] = get_grad_sample_aggregated(p) - - optimizer.step() - - for layer_name, layer in model.named_modules(): - if get_layer_type(layer) == "SampleConvNet": - continue - - for p in layer.parameters(): - if p.requires_grad: - self.assertTrue( - torch.allclose( - p.grad, - grad_sample_aggregated[layer][p], - atol=10e-5, - rtol=10e-2, - ), - f"grad_sample doesn't match grad. " - f"Layer: {layer_name}, Tensor: {p.shape}", - ) - - def test_grad_matches_original(self): - original_model, orignial_optimizer = self.setUp_init_model() - private_model, private_optimizer = self.setUp_init_model( - private=True, - state_dict=original_model.state_dict(), - noise_multiplier=0, - max_grad_norm=999, - ) - - for _ in range(3): - self.setUp_model_step(original_model, orignial_optimizer) - self.setUp_model_step(private_model, private_optimizer) - - for layer_name, private_layer in private_model.named_children(): - if not requires_grad(private_layer): - continue - - original_layer = getattr(original_model, layer_name) - - for layer, private_layer in zip( - [p.grad for p in original_layer.parameters() if p.requires_grad], - [p.grad for p in private_layer.parameters() if p.requires_grad], - ): - self.assertTrue( - torch.allclose(layer, private_layer, atol=10e-4, rtol=10e-2), - f"Layer: {layer_name}. Private gradients with noise 0 doesn't match original", - ) - - def test_grad_matches_original_per_layer_clipping(self): - original_model, orignial_optimizer = self.setUp_init_model() - private_model, private_optimizer = self.setUp_init_model( - private=True, - state_dict=original_model.state_dict(), - noise_multiplier=0, - max_grad_norm=[999] * 18, - clip_per_layer=True, - ) - - for _ in range(3): - self.setUp_model_step(original_model, orignial_optimizer) - self.setUp_model_step(private_model, private_optimizer) - - for layer_name, private_layer in private_model.named_children(): - if not requires_grad(private_layer): - continue - - original_layer = getattr(original_model, layer_name) - - for layer, private_layer in zip( - [p.grad for p in original_layer.parameters() if p.requires_grad], - [p.grad for p in private_layer.parameters() if p.requires_grad], - ): - self.assertTrue( - torch.allclose(layer, private_layer, atol=10e-4, rtol=10e-2), - f"Layer: {layer_name}. Private gradients with noise 0 doesn't match original", - ) - - def test_noise_changes_every_time(self): - """ - Test that adding noise results in ever different model params. - We disable clipping in this test by setting it to a very high threshold. - """ - model, optimizer = self.setUp_init_model( - private=True, - state_dict=self.original_model.state_dict(), - noise_multiplier=1.3, - max_grad_norm=999, - ) - self.setUp_model_step(model, optimizer) - first_run_params = (p for p in model.parameters() if p.requires_grad) - - model, optimizer = self.setUp_init_model( - private=True, - state_dict=self.original_model.state_dict(), - noise_multiplier=1.3, - max_grad_norm=999, - ) - self.setUp_model_step(model, optimizer) - second_run_params = (p for p in model.parameters() if p.requires_grad) - for p0, p1 in zip(first_run_params, second_run_params): - self.assertFalse(torch.allclose(p0, p1)) - - def test_model_validator(self): - """ - Test that the privacy engine throws on attach - if there are unsupported modules - """ - resnet = models.resnet18() - optimizer = torch.optim.SGD(resnet.parameters(), lr=1.0) - privacy_engine = PrivacyEngine( - resnet, - sample_rate=self.SAMPLE_RATE, - alphas=self.ALPHAS, - noise_multiplier=1.3, - max_grad_norm=1, - ) - with self.assertRaises(IncompatibleModuleException): - privacy_engine.attach(optimizer) - - def test_deterministic_run(self): - """ - Tests that for 2 different models, secure seed can be fixed - to produce same (deterministic) runs. - """ - model1, optimizer1 = self.setUp_init_model(private=True) - model2, optimizer2 = self.setUp_init_model( - private=True, state_dict=model1.state_dict() - ) - # assert the models are identical initially - first_model_params = [p for p in model1.parameters() if p.requires_grad] - second_model_params = [p for p in model2.parameters() if p.requires_grad] - for p0, p1 in zip(first_model_params, second_model_params): - self.assertTrue(torch.allclose(p0, p1)) - - optimizer1.privacy_engine._set_seed(10) - self.setUp_model_step(model1, optimizer1) - - optimizer2.privacy_engine._set_seed(10) - self.setUp_model_step(model2, optimizer2) - # assert the models are identical after we did one step - first_model_params = (p for p in model1.parameters() if p.requires_grad) - second_model_params = (p for p in model2.parameters() if p.requires_grad) - for p0, p1 in zip(first_model_params, second_model_params): - self.assertTrue(torch.allclose(p0, p1)) - - def test_deterministic_noise_generation(self): - """ - Tests that when a seed is set for a model, the sequence - of the generated noise is the same. - It performs the following test: - 1- Initiate a model, do one step, set the seed, and save the noise sequence - 2- Do 3 more steps, set the seed, and save the noise sequnece - The two noise sequences should be the same, because the seed has been set - prior to calling the noise generation each time - """ - max_norm = 5 - model, optimizer = self.setUp_init_model(private=True) - self.setUp_model_step(model, optimizer) # do one step so we have gradients - model_params = [p for p in model.parameters() if p.requires_grad] - - optimizer.privacy_engine._set_seed(20) - noise_generated_before = [ - optimizer.privacy_engine._generate_noise(max_norm, p).detach().numpy() - for p in model_params - ] - - for _ in range(3): - self.setUp_model_step(model, optimizer) - - optimizer.privacy_engine._set_seed(20) - noise_generated_after = [ - optimizer.privacy_engine._generate_noise(max_norm, p).detach().numpy() - for p in model_params - ] - - np.testing.assert_equal(noise_generated_before, noise_generated_after) - - def test_raises_seed_set_on_secure_rng(self): - """ - Tests that when a seed is set on a secure PrivacyEngine, we raise a ValueError - """ - model, optimizer = self.setUp_init_model( - private=True, secure_rng=True, noise_multiplier=1.3, max_grad_norm=1.0 - ) - with self.assertRaises(ValueError): - optimizer.privacy_engine._set_seed(20) - - def test_noise_changes_every_time_secure_rng(self): - """ - Test that adding noise results in ever different model params. - We disable clipping in this test by setting it to a very high threshold. - """ - model, optimizer = self.setUp_init_model( - private=True, - state_dict=self.original_model.state_dict(), - noise_multiplier=1.3, - max_grad_norm=999, - secure_rng=True, - ) - self.setUp_model_step(model, optimizer) - first_run_params = (p for p in model.parameters() if p.requires_grad) - - model, optimizer = self.setUp_init_model( - private=True, - state_dict=self.original_model.state_dict(), - noise_multiplier=1.3, - max_grad_norm=999, - secure_rng=True, - ) - self.setUp_model_step(model, optimizer) - second_run_params = (p for p in model.parameters() if p.requires_grad) - for p0, p1 in zip(first_run_params, second_run_params): - self.assertFalse(torch.allclose(p0, p1)) - - def test_sampling_rate_less_than_one(self): - """ - Tests that when the sampling rate in the privacy engine is more than 1.0 - we raise a ValueError - """ - self.SAMPLE_RATE = 1.5 - with self.assertRaises(ValueError): - PrivacyEngine( - SampleConvNet(), - sample_rate=self.SAMPLE_RATE, - alphas=self.ALPHAS, - noise_multiplier=1.0, - max_grad_norm=1.0, - ) diff --git a/opacus/tests/virtual_step_test.py b/opacus/tests/virtual_step_test.py deleted file mode 100644 index 045dff9..0000000 --- a/opacus/tests/virtual_step_test.py +++ /dev/null @@ -1,264 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import unittest - -import torch -import torch.nn as nn -import torch.nn.functional as F -from opacus import PrivacyEngine -from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import FakeData - - -class SampleConvNet(nn.Module): - def __init__(self): - super().__init__() - self.conv1 = nn.Conv2d(1, 16, 8, 3) - self.conv2 = nn.Conv1d(16, 32, 3, 1) - self.convf = nn.Conv1d(32, 32, 1, 1) - for p in self.convf.parameters(): - p.requires_grad = False - self.fc1 = nn.Linear(23, 17) - self.fc2 = nn.Linear(32 * 17, 10) - - def forward(self, x): - # x of shape [B, 1, 28, 28] - x = F.relu(self.conv1(x)) # -> [B, 16, 10, 10] - x = F.max_pool2d(x, 2, 2) # -> [B, 16, 5, 5] - x = x.view(x.shape[0], x.shape[1], x.shape[2] * x.shape[3]) # -> [B, 16, 25] - x = F.relu(self.conv2(x)) # -> [B, 32, 23] - x = self.convf(x) # -> [B, 32, 23] - x = self.fc1(x) # -> [B, 32, 17] - x = x.view(-1, x.shape[-2] * x.shape[-1]) # -> [B, 32 * 17] - x = self.fc2(x) # -> [B, 10] - return x - - def name(self): - return "SampleConvNet" - - -class GradientAccumulation_test(unittest.TestCase): - def setUp(self): - self.DATA_SIZE = 64 - self.BATCH_SIZE = 16 - self.SAMPLE_RATE = self.BATCH_SIZE / self.DATA_SIZE - self.LR = 0 # we want to call optimizer.step() without modifying the model - self.ALPHAS = [1 + x / 10.0 for x in range(1, 100, 10)] - self.criterion = nn.CrossEntropyLoss() - - self.setUp_data() - self.setUp_model_and_optimizer() - - def setUp_data(self): - self.ds = FakeData( - size=self.DATA_SIZE, - image_size=(1, 35, 35), - num_classes=10, - transform=transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ), - ) - self.dl = DataLoader(self.ds, batch_size=self.BATCH_SIZE) - - def setUp_model_and_optimizer(self): - self.model = SampleConvNet() - self.optimizer = torch.optim.SGD( - self.model.parameters(), lr=self.LR, momentum=0 - ) - - self.optimizer.zero_grad() - - # accumulate .grad over the entire dataset - for x, y in self.dl: - logits = self.model(x) - loss = self.criterion(logits, y) - loss.backward() - - self.effective_batch_grad = torch.cat( - [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad] - ) * (self.BATCH_SIZE / self.DATA_SIZE) - - self.optimizer.zero_grad() - - def setUp_privacy_engine(self, batch_size): - self.privacy_engine = PrivacyEngine( - self.model, - sample_rate=batch_size / self.DATA_SIZE, - alphas=self.ALPHAS, - noise_multiplier=0, - max_grad_norm=999, - ) - self.privacy_engine.attach(self.optimizer) - - def calc_per_sample_grads(self, data_iter, num_steps=1): - for x, y in data_iter: - num_steps -= 1 - logits = self.model(x) - loss = self.criterion(logits, y) - loss.backward() - if num_steps == 0: - break - - def test_grad_sample_accumulation(self): - """ - Calling loss.backward() multiple times should sum up the gradients in .grad - and accumulate all the individual gradients in .grad-sample - """ - self.setUp_privacy_engine(self.DATA_SIZE) - data_iter = iter(self.dl) # 4 batches of size 4 each - self.calc_per_sample_grads(data_iter, num_steps=4) - # should accumulate grads in .grad and .grad_sample - - # the accumulated per-sample gradients - per_sample_grads = torch.cat( - [ - p.grad_sample.reshape(self.DATA_SIZE, -1) - for p in self.model.parameters() - if p.requires_grad - ], - dim=-1, - ) - # average up all the per-sample gradients - accumulated_grad = torch.mean(per_sample_grads, dim=0) - - # the full data gradient accumulated in .grad - grad = torch.cat( - [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad] - ) * (self.BATCH_SIZE / self.DATA_SIZE) - - self.optimizer.step() - - # the accumulated gradients in .grad without any hooks - orig_grad = self.effective_batch_grad - - self.assertTrue( - torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3) - ) - self.assertTrue(torch.allclose(grad, orig_grad, atol=10e-5, rtol=10e-3)) - - def test_clipper_accumulation(self): - """ - Calling optimizer.virtual_step() should accumulate clipped gradients to form - one large batch. - """ - self.setUp_privacy_engine(self.DATA_SIZE) - data = iter(self.dl) # 4 batches of size 4 each - - for _ in range(3): # take 3 virtual steps - self.calc_per_sample_grads(data, num_steps=1) - self.optimizer.virtual_step() - - # accumulate on the last step - self.calc_per_sample_grads(data, num_steps=1) - self.optimizer.step() - - # .grad should contain the average gradient over the entire dataset - accumulated_grad = torch.cat( - [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad] - ) - - # the accumulated gradients in .grad without any hooks - orig_grad = self.effective_batch_grad - - self.assertTrue( - torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3), - f"Values are {accumulated_grad} vs {orig_grad}." - f"MAD is {(orig_grad - accumulated_grad).abs().mean()}", - ) - - def test_mixed_accumulation(self): - """ - Calling loss.backward() multiple times aggregates all per-sample gradients in - .grad-sample. Then, calling optimizer.virtual_step() should clip all gradients - and aggregate them into one large batch. - """ - self.setUp_privacy_engine(self.DATA_SIZE) - data = iter(self.dl) # 4 batches of size 4 each - - # accumulate per-sample grads for two mini batches - self.calc_per_sample_grads(data, num_steps=2) - # take a virtual step - self.optimizer.virtual_step() - # accumulate another two mini batches - self.calc_per_sample_grads(data, num_steps=2) - # take a step - self.optimizer.step() - - # .grad should contain the average gradient over the entire dataset - accumulated_grad = torch.cat( - [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad] - ) - - # the accumulated gradients in .grad without any hooks - orig_grad = self.effective_batch_grad - - self.assertTrue( - torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3) - ) - - def test_grad_sample_erased(self): - """ - Calling optimizer.step() should erase any accumulated per-sample gradients. - """ - self.setUp_privacy_engine(2 * self.BATCH_SIZE) - data = iter(self.dl) # 4 batches of size 4 each - - for _ in range(2): - # accumulate per-sample gradients for two mini-batches to form an - # effective batch of size `2*BATCH_SIZE`. Once an effective batch - # has been accumulated, we call `optimizer.step()` to clip and - # average the per-sample gradients. This should erase the - # `grad_sample` fields for each parameter - self.calc_per_sample_grads(data, num_steps=2) - self.optimizer.step() - - for param_name, param in self.model.named_parameters(): - if param.requires_grad: - self.assertFalse( - hasattr(param, "grad_sample"), - f"Per-sample gradients haven't been erased " - f"for {param_name}", - ) - - def test_summed_grad_erased(self): - """ - Calling optimizer.step() should erase any accumulated clipped gradients. - """ - - self.setUp_privacy_engine(2 * self.BATCH_SIZE) - data = iter(self.dl) # 4 batches of size 4 each - - for idx in range(4): - self.calc_per_sample_grads(data, num_steps=1) - - if idx % 2 == 0: - # perform a virtual step for each mini-batch - # this will accumulate clipped gradients in each parameter's - # `summed_grads` field. - self.optimizer.virtual_step() - for param_name, param in self.model.named_parameters(): - if param.requires_grad: - self.assertTrue( - hasattr(param, "summed_grad"), - f"Clipped gradients aren't accumulated " - f"for {param_name}", - ) - else: - # accumulate gradients for two mini-batches to form an - # effective batch of size `2*BATCH_SIZE`. Once an effective batch - # has been accumulated, we call `optimizer.step()` to compute the - # average gradient for the entire batch. This should erase the - # `summed_grads` fields for each parameter. - # take a step. The clipper will compute the mean gradient - # for the entire effective batch and populate each parameter's - # `.grad` field. - self.optimizer.step() - - for param_name, param in self.model.named_parameters(): - if param.requires_grad: - self.assertFalse( - hasattr(param, "summed_grad"), - f"Accumulated clipped gradients haven't been erased " - f"¨for {param_name}", - ) diff --git a/opacus/utils/clipping.py b/opacus/utils/clipping.py deleted file mode 100644 index 067f907..0000000 --- a/opacus/utils/clipping.py +++ /dev/null @@ -1,447 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import math -from abc import ABC -from enum import IntEnum -from itertools import cycle -from typing import Iterator, List, Union - -import torch - - -try: - from skimage.filters import threshold_otsu as otsu -except ImportError: - - def otsu(*args, **kwargs) -> float: - raise NotImplementedError("Install skimage!") - - -def _mean_plus_r_var(data: torch.Tensor, ratio: float = 0, **kwargs) -> float: - """ - Caclulates mean + ratio x standard_deviation of the provided tensor - and returns the larger of this value and the smallest element in - the tensor (can happen when ratio is negative). - - Args: - data: Pytorch tensor containing the data on which the mean and stdv. - is evaluated. - ratio: Value of the scaling factor in the value calculated by the - function. - - Returns: - The result of the function. - - """ - return max(data.min().item(), data.mean().item() + ratio * data.std().item() + 1e-8) - - -def _pvalue(data: torch.Tensor, ratio: float = 0.25, **kwargs) -> torch.Tensor: - """ - Finds the pth largest value in the tensor, where p = ratio x len(data). - - Args: - data: Pytorch tensor against which the function is evaluated. - ratio: Value of the scaling factor in the value calculated by - the function. - - Returns: - Tensor of dimension ``(1,)`` with the result of the function. - """ - cut = max(1, int(data.numel() * (1 - ratio))) - return torch.kthvalue(data, cut)[0].item() - - -def _static(data: torch.Tensor, current_thresh: float, **kwargs) -> float: - """ - Passes through the specified input ``current_threshold``. - - Args: - data: Pytorch tensor containing the data. - current_thresh: The threshold value. - - Returns: - The threshold value. - """ - return current_thresh - - -def _otsu(data: torch.Tensor, **kwargs) -> float: - """ - Returns an intensity threshold for an image that separates it - into backgorund and foreground pixels. - - The implementation uses Otsu's method, which assumes a GMM with - 2 components but uses some heuristic to maximize the variance - differences. The input data is shaped into a 2D image for the - purpose of evaluating the threshold value. - - Args: - data: Pytorch tensor containing the data. - - Returns: - Threshold value determined via Otsu's method. - """ - h = 2 ** int(1 + math.log2(data.shape[0]) / 2) - fake_img = data.view(h, -1).cpu().numpy() - return otsu(fake_img, h) - - -class ClippingMethod(IntEnum): - STATIC = 0 - PVALUE = 1 - MEAN = 2 - GMM = 3 - OTSU = 4 - - -_thresh_ = { - ClippingMethod.STATIC: _static, - ClippingMethod.PVALUE: _pvalue, - ClippingMethod.MEAN: _mean_plus_r_var, - ClippingMethod.OTSU: _otsu, -} - - -def _calculate_thresh_value( - data: torch.Tensor, - current_thresh: float, - clipping_mehod: ClippingMethod = ClippingMethod.STATIC, - clipping_ratio: float = -1, -) -> float: - """ - Calculates a clipping threshold by looking at the layer norms - of each example. - - Four methods are supported: static threshold, threshold calculated - based on mean and variance of the norms, and threshold calculated - based on percentile values of the norms. - - Args: - data: Pytorch tensor containing the data - current_thresh: Value of the current threshold. - clipping_method: Enum value defining the clipping strategy. Current - options are STATIC, PVALUE, MEAN, and OTSU. - clipping_ratio: Value that has different meaning for differnet strategies, it - is the percentile parameter for PVALUE, and a multiplier for - standard deviation for MEAN. It has no significance for OTSU and - STATIC. - - Returns: - Clipping threshold value - """ - return _thresh_[clipping_mehod]( - data, ratio=clipping_ratio, current_thresh=current_thresh - ) - - -class NormClipper(ABC): - """ - An abstract class to calculate the clipping factor - """ - - def calc_clipping_factors( - self, norms: List[torch.Tensor] - ) -> Union[List[torch.Tensor], Iterator[torch.Tensor]]: - """ - Calculates the clipping factor(s) based on the given - parameters. A concrete subclass must implement this. - - Returns: - The clipping factors - """ - pass - - @property - def thresholds(self) -> torch.Tensor: - """ - Depending on the type of clipper, returns threshold values. - - Returns: - The threshold values - """ - pass - - @property - def is_per_layer(self) -> bool: - """ - Depending on type of clipper, returns indicator as to whether - different clipping is applied to each layer in the model. - - Returns: - Flag indicator as to whether different clipping is applied - to each layer in the model. - """ - pass - - -class ConstantFlatClipper(NormClipper): - """ - A clipper that clips all gradients in such a way that their norm is - at most equal to a specified value. This value is shared for all - layers in a model. Note that the process of clipping really involves - multiplying all gradients by a scaling factor. If this scaling factor - is > 1.0, it is instead capped at 1.0. The net effect is that the final - norm of the scaled gradients will be less than the specified value in - such a case. Thus it is better to think of the specified value as an - upper bound on the norm of final clipped gradients. - """ - - def __init__(self, flat_value: float): - """ - Args: - flat_value: Constant value that is used to normalize gradients - such that their norm equals this value before clipping. - This threshold value is used for all layers. - """ - self.flat_value = float(flat_value) - - def calc_clipping_factors( - self, norms: List[torch.Tensor] - ) -> Iterator[torch.Tensor]: - """ - Calculates the clipping factor based on the given - norm of gradients for all layers, so that the new - norm of clipped gradients is at most equal to - ``self.flat_value``. - - Args: - norms: List containing a single tensor of dimension (1,) - with the norm of all gradients. - - Returns: - Tensor containing the single threshold value to be used - for all layers. - """ - # Expects a list of size one. - if len(norms) != 1: - raise ValueError( - "Waring: flat norm selected but " - f"received norm for {len(norms)} layers" - ) - per_sample_clip_factor = self.flat_value / (norms[0] + 1e-6) - # We are *clipping* the gradient, so if the factor is ever >1 we set it to 1 - per_sample_clip_factor = per_sample_clip_factor.clamp(max=1.0) - # return this clipping factor for all layers - return cycle([per_sample_clip_factor]) - - @property - def thresholds(self) -> torch.Tensor: - """ - Returns singleton tensor of dimension (1,) containing - the common threshold value used for clipping all - layers in the model. - - Returns: - Threshold values - """ - return torch.tensor([self.flat_value]) - - @property - def is_per_layer(self) -> bool: - """ - Returns indicator as to whether different clipping is applied - to each layer in the model. For this clipper, it is False. - - Returns: - Flag with value False - """ - return False - - -class ConstantPerLayerClipper(NormClipper): - """ - A clipper that clips all gradients in such a way that their norm is - at most equal to a specified value. This value is specified for each - layer in a model. Note that the process of clipping really involves - multiplying all gradients by a scaling factor. If this scaling factor - is > 1.0, it is instead capped at 1.0. The net effect is that the final - norm of the scaled gradients will be less than the specified value in - such a case. Thus it is better to think of the specified value as an - upper bound on the norm of final clipped gradients. - """ - - def __init__(self, flat_values: List[float]): - """ - Args: - flat_values: List of values that is used to normalize gradients - for each layer such that the norm equals the corresponding - value before clipping. - """ - self.flat_values = [float(fv) for fv in flat_values] - - def calc_clipping_factors(self, norms: List[torch.Tensor]) -> List[torch.Tensor]: - """ - Calculates separate clipping factors for each layer based on - its corresponding norm of gradients, such that its new norm is - at most equal to the flat value specified for that layer when - instantiating the object of - :class:`~opacus.utils.clipping.ConstantPerLayerClipper`. - - Args: - norms: List containing the desired norm of gradients for each layer. - - Returns: - List of tensors, each containing a single value specifying the - clipping factor per layer. - """ - if len(norms) != len(self.flat_values) and len(self.flat_values) != 1: - raise ValueError( - f"{len(norms)} layers have provided norms but the " - f"number of clipping thresholds is {len(self.flat_values)}" - ) - - self.flat_values = self.flat_values * ( - len(norms) if len(self.flat_values) == 1 else 1 - ) - - clipping_factor = [] - for norm, threshold in zip(norms, self.flat_values): - per_sample_clip_factor = threshold / (norm + 1e-6) - clipping_factor.append(per_sample_clip_factor.clamp(max=1.0)) - return clipping_factor - - @property - def thresholds(self) -> torch.Tensor: - """ - Returns a tensor of values that are used to normalize gradients for - each layer such that the norm at most equals the corresponding - value before clipping. - - Returns: - Tensor of thresholds - """ - return torch.tensor(self.flat_values) - - @property - def is_per_layer(self) -> bool: - """ - Returns indicator as to whether different clipping is applied - to each layer in the model. For this clipper, it is True. - - Returns: - Flag with value True - """ - return True - - -class _Dynamic_Clipper_(NormClipper): - """ - This is a generic clipper, that is in an experimental phase. - The clipper uses different stats to find a clipping threshold - based on the given per sample norms. - - Notes: - This clipper breaks DP guarantees [use only for experimentation] - """ - - def __init__( - self, - flat_values: List[float], - clip_per_layer: bool = False, - clipping_method: ClippingMethod = ClippingMethod.STATIC, - clipping_ratio: float = 0.0, - clipping_momentum: float = 0.9, - ): - """ - Args: - flat_value: List of float values that is used to normalize gradients - for each layer such that the norm equals the corresponding - value before clipping. - clip_per_layer: Flag indicating if a separate desired norm value is - specified per layer or if a single value is shared for all. - clipping_method: Value in the enum ClippingMethod that specifies one - of the currently supported clipping types. - clipping_ratio: Value that can be used to evaluate the clipping threshold - for certain clipping types. - clipping_momentum: value defines the decaing factor of an ubiased estimator - of exponential averaging of clipping thresholds, i.e. weight used to - combine the threshold from the current batch and the previous one. - """ - self.flat_values = [float(float_value) for float_value in flat_values] - self.clip_per_layer = clip_per_layer - if clipping_method != ClippingMethod.STATIC: - print( - "Warning! Current implementations of dynamic clipping " - "are not privacy safe; Caclulated privacy loss is not " - "indicative of a proper bound." - ) - self.clipping_method = clipping_method - self.clipping_ratio = clipping_ratio - self.clipping_momentum = clipping_momentum - self.thresh = [] - - def calc_clipping_factors( - self, norms: List[torch.Tensor] - ) -> Union[List[torch.Tensor], Iterator[torch.Tensor]]: - """ - Calculates separate clipping factors for each layer based on - stats such as a threshold determined by Otsu's method, combinations - of mean and std. deviation, kth median value etc. - - This is experimental and does not guarantee privacy and is not recommended - for production use. - - Args: - norms: List containing the desired norm of gradients for each layer. - - Returns: - Singleton list specifying a common clippng factor for all layers, - or an iterator of tensors specifying a clipping factor per layer - """ - - if len(self.thresh) == 0: - current_threshs = self.flat_values - if len(self.flat_values) == 1 and self.clip_per_layer: - # a constant clipping factor applied to all non-rozen layers - # need to replicate it by the number of number of those layers - # (= number of norms). - current_threshs *= len(norms) - else: - current_threshs = self.thresh - - clipping_factor = [] - self.thresh = [] - - if len(norms) != len(current_threshs): - raise ValueError( - f"Provided grad norm max's size {len(current_threshs)}" - f" does not match the number of layers {len(norms)}" - ) - - for norm, current_thresh in zip(norms, current_threshs): - thresh = _calculate_thresh_value( - norm, current_thresh, self.clipping_method, self.clipping_ratio - ) - thresh = float( - (1 - self.clipping_momentum) * thresh - + self.clipping_momentum * current_thresh - ) - self.thresh.append(thresh) - per_sample_clip_factor = thresh / (norm + 1e-6) - clipping_factor.append(per_sample_clip_factor.clamp(max=1.0)) - return clipping_factor if self.is_per_layer else cycle(clipping_factor) - - @property - def thresholds(self) -> torch.Tensor: - """ - Returns a tensor of values that are used to normalize gradients - for each layer such that the norm at most equals the corresponding - value before clipping. - - Returns: - Tensor of thresholds - """ - return torch.tensor(self.thresh) - - @property - def is_per_layer(self) -> bool: - """ - Returns indicator as to whether different clipping is applied - to each layer in the model. - - Returns: - Value of the flag - """ - return self.clip_per_layer diff --git a/opacus/utils/module_inspection.py b/opacus/utils/module_inspection.py deleted file mode 100644 index 4450467..0000000 --- a/opacus/utils/module_inspection.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -r""" -This module includes utils for inspecting model layers using specified -predicates to check for conditions, getting layer type etc. -""" -from typing import Callable, Optional - -from torch import nn - - -class ModelInspector: - """ - An inspector of models given a specific predicate. If a module - has children the predicate is checked on all children recursively. - - Example: - >>> inspector = ModelInspector('simple', lambda x: isinstance(x, Conv2d)) - >>> print(inspector.validate(nn.Conv2d(1, 1, 1))) - True - """ - - def __init__( - self, - name: str, - predicate: Callable[[nn.Module], bool], - check_leaf_nodes_only: bool = True, - message: Optional[str] = None, - ): - """ - Args: - name: String to represent the predicate. - predicate: Callable boolean function which tests a hypothesis on a module. - check_leaf_nodes_only: Flag to check only leaf nodes of a module. Here - leaf nodes are the ones that have parameters of their own. - message: Optional value to hold a message about violating this predicate. - - Notes: - The predicates will not be applied on non-leaf modules unless - ``check_leaf_nodes_only`` is set to False. E.g. A predicate like: - - ``lambda model: isinstance(model, nn.Sequential)`` - - will always return True unless ``check_leaf_nodes_only`` is set. - """ - self.name = name - if check_leaf_nodes_only: - self.predicate = ( - lambda x: has_no_param(x) or not requires_grad(x) or predicate(x) - ) - else: - self.predicate = predicate - self.message = message - self.violators = [] - # List that contains the module names that have violated the - # predicate. The list does not get automatically emptied if - # the predicate is applied on multiple modules. - - def validate(self, model: nn.Module) -> bool: - """ - Checks if the provided module satisfies the predicate specified - upon creation of the :class:`~opacus.utils.ModelInspector`. - - Args: - model: PyTorch module on which the predicate must be evaluated - and satisfied. - - Returns: - Flag indicate if predicate is satisfied. - """ - valid = True - for name, module in model.named_modules(): - if not self.predicate(module): - valid = False - self.violators.append(f"{name} ({get_layer_type(module)})") - return valid - - -def has_no_param(module: nn.Module) -> bool: - """ - Checks if a module does not have any parameters. - - Args: - module: The module on which this function is being evaluated. - - Returns: - Flag indicating if the provided module does not have any - parameters. - """ - has_params = any(p is not None for p in module.parameters(recurse=False)) - return not has_params - - -def requires_grad(module: nn.Module, recurse: bool = False) -> bool: - """ - Checks if any parameters in a specified module require gradients. - - Args: - module: PyTorch module whose parameters are examined - recurse: Flag specifying if the gradient requirement check should - be applied recursively to sub-modules of the specified module - - Returns: - Flag indicate if any parameters require gradients - """ - requires_grad = any(p.requires_grad for p in module.parameters(recurse)) - return requires_grad - - -def get_layer_type(layer: nn.Module) -> str: - """ - Returns the name of the type of the given layer. - - Args: - layer: The module corresponding to the layer whose type - is being queried. - - Returns: - Name of the class of the layer - """ - return layer.__class__.__name__ diff --git a/opacus/utils/module_modification.py b/opacus/utils/module_modification.py deleted file mode 100644 index 401ea6d..0000000 --- a/opacus/utils/module_modification.py +++ /dev/null @@ -1,170 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -r""" -This module includes utils for modifying model layers, replacing layers etc. -""" -from typing import Callable, Type - -from torch import nn - - -def _replace_child( - root: nn.Module, child_name: str, converter: Callable[[nn.Module], nn.Module] -) -> None: - """ - Converts a sub-module to a new module given a helper - function, the root module and a string representing - the name of the submodule to be replaced. - - Args: - root: Root module whose sub module must be replaced. - child_name: Name of submodule that must be replaced. - converter: Function or a lambda that takes a module - (the submodule to be replaced) and returns its - replacement. - """ - # find the immediate parent - parent = root - nameList = child_name.split(".") - for name in nameList[:-1]: - parent = parent._modules[name] - # set to identity - parent._modules[nameList[-1]] = converter(parent._modules[nameList[-1]]) - - -def replace_all_modules( - root: nn.Module, - target_class: Type[nn.Module], - converter: Callable[[nn.Module], nn.Module], -) -> nn.Module: - """ - Converts all the submodules (of root) that have the same - type as target_class, given a converter, a module root, - and a target class type. - - This method is useful for replacing modules that are not - supported by the Privacy Engine. - - Args: - root: Model instance, potentially with sub-modules - target_class: Target class that needs to be replaced. - converter: Function or a lambda that converts an instance - of a given target_class to another nn.Module. - - Returns: - Module with all the target_class types replaced using the - converter. root is modified and is equal to the return value. - - Example: - >>> from torchvision.models import resnet18 - >>> from torch import nn - >>> model = resnet18() - >>> print(model.layer1[0].bn1) - BatchNorm2d(64, eps=1e-05, ... - >>> model = replace_all_modules(model, nn.BatchNorm2d, lambda _: nn.Identity()) - >>> print(model.layer1[0].bn1) - Identity() - """ - # base case - if isinstance(root, target_class): - return converter(root) - - for name, obj in root.named_modules(): - if isinstance(obj, target_class): - _replace_child(root, name, converter) - return root - - -def _batchnorm_to_instancenorm(module: nn.modules.batchnorm._BatchNorm) -> nn.Module: - """ - Converts a BatchNorm module to the corresponding InstanceNorm module - - Args: - module: BatchNorm module to be replaced - - Returns: - InstanceNorm module that can replace the BatchNorm module provided - """ - - def matchDim(): - if isinstance(module, nn.BatchNorm1d): - return nn.InstanceNorm1d - elif isinstance(module, nn.BatchNorm2d): - return nn.InstanceNorm2d - elif isinstance(module, nn.BatchNorm3d): - return nn.InstanceNorm3d - - return matchDim()(module.num_features) - - -def _batchnorm_to_groupnorm(module: nn.modules.batchnorm._BatchNorm) -> nn.Module: - """ - Converts a BatchNorm ``module`` to GroupNorm module. - This is a helper function. - - Args: - module: BatchNorm module to be replaced - - Returns: - GroupNorm module that can replace the BatchNorm module provided - - Notes: - A default value of 32 is chosen for the number of groups based on the - paper *Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour* - https://arxiv.org/pdf/1706.02677.pdf - """ - return nn.GroupNorm(min(32, module.num_features), module.num_features, affine=True) - - -def nullify_batchnorm_modules(root: nn.Module) -> nn.Module: - """ - Replaces all the BatchNorm submodules (e.g. :class:`torch.nn.BatchNorm1d`, - :class:`torch.nn.BatchNorm2d` etc.) in ``root`` with :class:`torch.nn.Identity`. - - Args: - root: Module for which to replace BatchNorm submodules. - - Returns: - Module with all the BatchNorm sub modules replaced with - Identity. ``root`` is modified and is equal to the return value. - - Notes: - Most of the times replacing a BatchNorm module with Identity - will heavily affect convergence of the model. - """ - return replace_all_modules( - root, nn.modules.batchnorm._BatchNorm, lambda _: nn.Identity() - ) - - -def convert_batchnorm_modules( - model: nn.Module, - converter: Callable[ - [nn.modules.batchnorm._BatchNorm], nn.Module - ] = _batchnorm_to_groupnorm, -) -> nn.Module: - """ - Converts all BatchNorm modules to another module - (defaults to GroupNorm) that is privacy compliant. - - Args: - model: Module instance, potentially with sub-modules - converter: Function or a lambda that converts an instance of a - Batchnorm to another nn.Module. - - Returns: - Model with all the BatchNorm types replaced by another operation - by using the provided converter, defaulting to GroupNorm if one - isn't provided. - - Example: - >>> from torchvision.models import resnet50 - >>> from torch import nn - >>> model = resnet50() - >>> print(model.layer1[0].bn1) - BatchNorm2d module details - >>> model = convert_batchnorm_modules(model) - >>> print(model.layer1[0].bn1) - GroupNorm module details - """ - return replace_all_modules(model, nn.modules.batchnorm._BatchNorm, converter) diff --git a/opacus/utils/packed_sequences.py b/opacus/utils/packed_sequences.py deleted file mode 100644 index 01accf8..0000000 --- a/opacus/utils/packed_sequences.py +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -from typing import Optional - -import torch -from torch.nn.utils.rnn import PackedSequence, pack_padded_sequence, pad_sequence - - -def _gen_packed_data( - minibatch_size: int, - max_seq_length: int, - input_dim: int, - batch_first: bool, - sorted_: Optional[bool] = False, -) -> PackedSequence: - """ - This is used to generate random PackedSequence data, sampled from a normal distribution, for testing DPLSTM. - - Args: - minibatch_size : Total number of sequences to generate - max_seq_length : The maximum number of timesteps of a sequence - input_dim : The embedding dimension of a sequence at any timestep - batch_first : If this is true, data is first generated using a padded sequence of dimension (minibatch_size x max_seq_len x input_dim) , else: (max_seq_length x minibatch_size x input_dim) - sorted_ : If this is true then the original generated data used to produce the PackedSequence will already be ordered based on sequence lengths, else a random order and the 'sorted_indices' - and 'unsorted_indices' fields will be None. - - Return Value: - packed_data : A PackedSequence object with its data sampled from a normal distribution. - """ - - if batch_first: - data = [] - seq_lengths = [] - for _ in range(minibatch_size): - seq_length = torch.randint(1, max_seq_length + 1, (1,)).item() - seq_lengths.append(seq_length) - data.append(torch.randn(seq_length, input_dim)) - - if sorted_: - data = sorted(data, key=lambda x: x.shape[0], reverse=True) - seq_lengths = sorted(seq_lengths, reverse=True) - packed_data = pack_padded_sequence( - pad_sequence(data, batch_first=True), - seq_lengths, - batch_first=True, - enforce_sorted=True, - ) - else: - packed_data = pack_padded_sequence( - pad_sequence(data, batch_first=True), - seq_lengths, - batch_first=True, - enforce_sorted=False, - ) - else: - seq_lengths = [ - torch.randint(1, max_seq_length + 1, (1,)).item() - for _ in range(minibatch_size) - ] - if sorted_: - seq_lengths = sorted(seq_lengths, reverse=True) - padded_data = torch.zeros((max_seq_length, minibatch_size, input_dim)) - for i in range(minibatch_size): - padded_data[: seq_lengths[i], i, :] = torch.randn(seq_lengths[i], input_dim) - - if sorted_: - packed_data = pack_padded_sequence( - padded_data, seq_lengths, batch_first=False, enforce_sorted=True - ) - else: - packed_data = pack_padded_sequence( - padded_data, seq_lengths, batch_first=False, enforce_sorted=False - ) - - return packed_data diff --git a/opacus/utils/stats.py b/opacus/utils/stats.py deleted file mode 100644 index 0523db5..0000000 --- a/opacus/utils/stats.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import warnings -from copy import deepcopy -from enum import IntEnum -from typing import Any, Dict, Optional - - -try: - from torch.utils.tensorboard import SummaryWriter -except ImportError: - warnings.warn("Tensorboard library was not found. Using dummy SummaryWriter") - - class SummaryWriter: - def add_scalar(self, *args, **kwargs): - pass - - -class StatType(IntEnum): - r""" - This enum covers all the stat types we currently support. - - 1. LOSS: Monitors the training loss. - 2. Grads: Monitors stats about the gradients across iterations - 3. PRIVACY: Logs Epsilon so you can see how it evolves during training - 4. TRAIN: This is a TB namespace where you can attach training metrics - 5. TEST: Similar to TRAIN, just another TB namespace to log things under - """ - LOSS = 1 - GRAD = 2 - PRIVACY = 3 - TRAIN = 4 - TEST = 5 - - -class Stat: - r""" - Wrapper around tensorboard's ``SummaryWriter.add_scalar``, allowing for sampling - and easier interface. - - Use this to gather and visualize statistics to get insight about - differential privacy parameters, and to observe how clipping and noising affects the training process - (loss, accuracy, etc). - - We have already implemented some common ones inside ``opacus.utils.stat.StatType``. - - Internal Privacy metrics (such as ``StatType.PRIVACY`` and ``StatType.GRAD``) - are already added to the code and need only be activated by adding the stat - as shown in the example. Other stat types need to be added to the stat - and updated properly using ``update`` function. - - Examples: - To get stats about clipping you can add the following line - to your main file. By default the samples are averaged and the average is - reported every ``1 / frequency`` times. - - >>> stat = Stat(StatType.GRAD, 'sample_stats', frequency=0.1) - >>> for i in range(20): - >>> stat.log({"val":i}) - - If an instance of ``tensorboard.SummaryWriter`` exists it can be used - for stat gathering by passing it like this: - - >>> stats.set_global_summary_writer(tensorboard.SummaryWriter()) - - To add stats about test accuracy you can do: - - >>> stats.add(Stat(stats.StatType.TEST, 'accuracy', frequency=0.1)) - - and then update the stat meter in the proper location using: - - >>> acc1_value = compute_accuracy(x, y) # you can supply your metrics functions, and Stats later displays them - >>> stats.update(stats.StatType.TEST, acc1=acc1_value) # pass to Stats the result so that the result gets logged - """ - summary_writer: Optional[SummaryWriter] = None - - def __init__( - self, - stat_type: StatType, - name: str, - frequency: float = 1.0, - reduction: str = "avg", - ): - r""" - Args: - stat_type: Type of the statistic from ``StatType``. - name: Name of the stat that is used to identify this ``Stat`` - for update or to view in tensorboard. - frequency: The frequency of stat gathering. Its value is in [0, 1], - where e.g. 1 means report to tensorboard any time ``log`` is - called and 0.1 means report only 1 out of 10 times. - reduction: The reduction strategy used for reporting, e.g. if - ``frequency = 0.1`` and ``reduction='avg'`` then ``log`` averages - 10 samples and reports to tensorboard this average once every 10 - samples. Current valid values are 'avg' and 'sample'. - """ - self.type = stat_type - self.name = name - self.report = int(1 / frequency) - self.reduction = reduction - self.writer = Stat.summary_writer if Stat.summary_writer else SummaryWriter() - self.named_values = [] - self.reset() - - def reset(self): - """ - Resets the accumulated metrics. - """ - self.named_value = {} - self.iter = 0 - - def log(self, named_value: Dict[str, Any], hist: bool = False): - r""" - Logs a metrics to tensorboard. - - Generally not used directly (use ``update`` instead). - - Args: - named_value: A dictionary of metrics to log - """ - assert not (self.reduction == "avg" and hist) - if self.iter % self.report == 0: - for k, v in self.named_value.items(): - self.writer.add_histogram( - f"{self.type.name}:{self.name}/{k}", v, self.iter - ) if hist else self.writer.add_scalar( - f"{self.type.name}:{self.name}/{k}", v, self.iter - ) - self._aggregate(named_value) - - def _aggregate(self, named_value: Dict[str, Any]): - """ - Aggregates ``named_value`` using this object's ``reduction`` attribute. - - Args: - named_value: The value to aggregate - """ - if self.reduction == "sample": - self.named_value = deepcopy(named_value) - elif self.reduction == "avg": - for k, v in named_value.items(): - self.named_value[k] = ( - self.named_value[k] + float(v) / self.report - if (self.iter % self.report) - else float(v) / self.report - ) - self.iter += 1 - - -# global variable keeping the list of all the stats. -Stats = [] - - -def set_global_summary_writer(summary_writer: SummaryWriter): - """ - Sets this object's TensorBoard SummaryWriter to an externally provided one. - - Useful if you already have one instantiated and you don't want this to - create another unnecessarily. - - Args: - summary_writer: The externally provided SummaryWriter - """ - Stat.summary_writer = summary_writer - - -def add(*args: Stat): - r""" - Adds statistics gathering to the process. - - Args: - *args: An iterable of statistics to add - """ - [Stats.append(stat) for stat in args] - - -def clear(): - r""" - Clears all stats and stops collecting statistics. - """ - Stats.clear() - - -def remove(name: str): - r""" - Removes the Stat of name ``name`` from the global statistics gathering. - - Args: - name: The name of stats to remove - """ - global Stats - Stats = [stat for stat in Stats if stat.name != name] - - -def reset(stat_type: Optional[StatType] = None, name: Optional[str] = None): - r""" - Resets the stat with given `name` and `stat_type` - - Args: - stat_type: The stat_type to reset - name: The name of stats to reset - """ - [ - stat.reset() - for stat in Stats - if (stat_type is None or stat.type == stat_type) - and (name is None or stat.name == name) - ] - - -def update( - stat_type: Optional[StatType] = None, - name: Optional[str] = None, - hist: bool = False, - **named_values: str, -): - r""" - Updates the stat(s) with the given ``name`` and ``stat_type`` - - Args: - stat_type: The type of the stat from ``StatType``. Could be - ``None`` if ``name`` is unique. - name: The name of the stat. Could be ``None`` if there is only - one stat for the ``stat_type`` - **named_values: A set of values with their names - """ - [ - stat.log(named_values, hist) - for stat in Stats - if (stat_type is None or stat.type == stat_type) - and (name is None or stat.name == name) - ] diff --git a/opacus/utils/tensor_utils.py b/opacus/utils/tensor_utils.py deleted file mode 100644 index 790ff7c..0000000 --- a/opacus/utils/tensor_utils.py +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -""" -Utils for generating stats from torch tensors. -""" -from typing import Iterator, List, Tuple, Union - -import numpy as np -import torch -from torch.functional import F - - -def calc_sample_norms( - named_params: Iterator[Tuple[str, torch.Tensor]], flat: bool = True -) -> List[torch.Tensor]: - r""" - Calculates the norm of the given tensors for each sample. - - This function calculates the overall norm of the given tensors for each sample, - assuming the each batch's dim is zero. - - Args: - named_params: An iterator of tuples with name being a - string and param being a tensor of shape ``[B, ...]`` where ``B`` - is the size of the batch and is the 0th dimension. - flat: A flag, when set to `True` returns a flat norm over all - layers norms - - Example: - >>> t1 = torch.rand((2, 5)) - >>> t2 = torch.rand((2, 5)) - >>> calc_sample_norms([("1", t1), ("2", t2)]) - [tensor([1.5117, 1.0618])] - - Returns: - A list of tensor norms where length of the list is the number of layers - """ - norms = [param.view(len(param), -1).norm(2, dim=-1) for name, param in named_params] - # calc norm over all layer norms if flat = True - if flat: - norms = [torch.stack(norms, dim=0).norm(2, dim=0)] - return norms - - -def sum_over_all_but_batch_and_last_n( - tensor: torch.Tensor, n_dims: int -) -> torch.Tensor: - r""" - Calculates the sum over all dimensions, except the first - (batch dimension), and excluding the last n_dims. - - This function will ignore the first dimension and it will - not aggregate over the last n_dims dimensions. - - Args: - tensor: An input tensor of shape ``(B, ..., X[n_dims-1])``. - n_dims: Number of dimensions to keep. - - Example: - >>> tensor = torch.ones(1, 2, 3, 4, 5) - >>> sum_over_all_but_batch_and_last_n(tensor, n_dims=2).shape - torch.Size([1, 4, 5]) - - Returns: - A tensor of shape ``(B, ..., X[n_dims-1])`` - """ - if tensor.dim() == n_dims + 1: - return tensor - else: - dims = list(range(1, tensor.dim() - n_dims)) - return tensor.sum(dim=dims) - - -def unfold3d( - tensor: torch.Tensor, - kernel_size: Union[int, Tuple[int, int, int]], - padding: Union[int, Tuple[int, int, int]] = 0, - stride: Union[int, Tuple[int, int, int]] = 1, - dilation: Union[int, Tuple[int, int, int]] = 1, -): - r""" - Extracts sliding local blocks from an batched input tensor. - - :class:`torch.nn.Unfold` only supports 4D inputs (batched image-like tensors). - This method implements the same action for 5D inputs - - Args: - tensor: An input tensor of shape ``(B, C, D, H, W)``. - kernel_size: the size of the sliding blocks - padding: implicit zero padding to be added on both sides of input - stride: the stride of the sliding blocks in the input spatial dimensions - dilation: the spacing between the kernel points. - - Example: - >>> B, C, D, H, W = 3, 4, 5, 6, 7 - >>> tensor = torch.arange(1,B*C*D*H*W+1.).view(B,C,D,H,W) - >>> unfold3d(tensor, kernel_size=2, padding=0, stride=1).shape - torch.Size([3, 32, 120]) - - Returns: - A tensor of shape ``(B, C * np.product(kernel_size), L)``, where L - output spatial dimensions. - See :class:`torch.nn.Unfold` for more details - """ - - if len(tensor.shape) != 5: - raise ValueError( - f"Input tensor must be of the shape [B, C, D, H, W]. Got{tensor.shape}" - ) - - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size, kernel_size) - - if isinstance(padding, int): - padding = (padding, padding, padding) - - if isinstance(stride, int): - stride = (stride, stride, stride) - - if isinstance(dilation, int): - dilation = (dilation, dilation, dilation) - - if dilation != (1, 1, 1): - raise NotImplementedError(f"dilation={dilation} not supported. We'd love a PR!") - - batch_size, channels, _, _, _ = tensor.shape - - # Input shape: (B, C, D, H, W) - tensor = F.pad( - tensor, (padding[2], padding[2], padding[1], padding[1], padding[0], padding[0]) - ) - # Output shape: (B, C, D+2*padding[2], H+2*padding[1], W+2*padding[0]) - - tensor = tensor.unfold(dimension=2, size=kernel_size[0], step=stride[0]) - tensor = tensor.unfold(dimension=3, size=kernel_size[1], step=stride[1]) - tensor = tensor.unfold(dimension=4, size=kernel_size[2], step=stride[2]) - # Output shape: (B, C, D_out, H_out, W_out, kernel_size[0], kernel_size[1], kernel_size[2]) - # For D_out, H_out, W_out definitions see :class:`torch.nn.Unfold` - - tensor = tensor.permute(0, 2, 3, 4, 1, 5, 6, 7) - # Output shape: (B, D_out, H_out, W_out, C, kernel_size[0], kernel_size[1], kernel_size[2]) - - tensor = tensor.reshape(batch_size, -1, channels * np.prod(kernel_size)).transpose( - 1, 2 - ) - # Output shape: (B, D_out * H_out * W_out, C * kernel_size[0] * kernel_size[1] * kernel_size[2] - - return tensor diff --git a/opacus/utils/tests/module_inspection_test.py b/opacus/utils/tests/module_inspection_test.py deleted file mode 100644 index a19f624..0000000 --- a/opacus/utils/tests/module_inspection_test.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import unittest - -import torch.nn as nn -from opacus.utils import module_inspection as mi -from torchvision import models - - -class utils_ModelInspector_test(unittest.TestCase): - def setUp(self): - def pred_supported(module): - return isinstance(module, (nn.Conv2d, nn.Linear)) - - def pred_not_unsupported(module): - return not isinstance(module, (nn.BatchNorm2d, nn.BatchNorm3d)) - - def pred_requires_grad(module): - return all(p.requires_grad for p in module.parameters(recurse=False)) - - self.pred_supported = pred_supported - self.pred_not_unsupported = pred_not_unsupported - self.pred_mix = lambda m: (not pred_requires_grad(m)) or pred_not_unsupported(m) - - def test_validate_basic(self): - inspector = mi.ModelInspector( - "pred", lambda model: isinstance(model, nn.Linear) - ) - model = nn.Conv1d(1, 1, 1) - valid = inspector.validate(model) - self.assertFalse(valid, inspector.violators) - - def test_validate_positive_predicate_valid(self): - # test when a positive predicate (e.g. supported) returns true - inspector = mi.ModelInspector("pred", self.pred_supported) - model = nn.Conv2d(1, 1, 1) - valid = inspector.validate(model) - self.assertTrue(valid) - list_len = len(inspector.violators) - self.assertEqual(list_len, 0, f"violators = {inspector.violators}") - - def test_validate_positive_predicate_invalid(self): - # test when a positive predicate (e.g. supported) returns false - inspector = mi.ModelInspector("pred", self.pred_supported) - model = nn.Conv1d(1, 1, 1) - valid = inspector.validate(model) - self.assertFalse(valid) - list_len = len(inspector.violators) - self.assertEqual(list_len, 1, f"violators = {inspector.violators}") - - def test_validate_negative_predicate_ture(self): - # test when a negative predicate (e.g. not unsupported) returns true - inspector = mi.ModelInspector("pred1", self.pred_not_unsupported) - model = nn.Sequential(nn.Conv2d(1, 1, 1), nn.Linear(1, 1)) - valid = inspector.validate(model) - self.assertTrue(valid) - list_len = len(inspector.violators) - self.assertEqual(list_len, 0) - - def test_validate_negative_predicate_False(self): - # test when a negative predicate (e.g. not unsupported) returns false - inspector = mi.ModelInspector("pred", self.pred_not_unsupported) - model = nn.Sequential(nn.Conv2d(1, 1, 1), nn.BatchNorm2d(1)) - valid = inspector.validate(model) - self.assertFalse(valid) - list_len = len(inspector.violators) - self.assertEqual(list_len, 1, f"violators = {inspector.violators}") - - def test_validate_mix_predicate(self): - # check with a mix predicate not requires grad or is not unsupported - inspector = mi.ModelInspector("pred1", self.pred_mix) - model = nn.Sequential(nn.Conv2d(1, 1, 1), nn.BatchNorm2d(1)) - for p in model[1].parameters(): - p.requires_grad = False - valid = inspector.validate(model) - self.assertTrue(valid) - - def test_check_everything_flag(self): - # check to see if a model does not containt nn.sequential - inspector = mi.ModelInspector( - "pred", - lambda model: not isinstance(model, nn.Sequential), - check_leaf_nodes_only=False, - ) - model = nn.Sequential(nn.Conv1d(1, 1, 1)) - valid = inspector.validate(model) - self.assertFalse(valid, f"violators = {inspector.violators}") - - def test_complicated_case(self): - def good(x): - return isinstance(x, (nn.Conv2d, nn.Linear)) - - def bad(x): - return isinstance(x, nn.modules.batchnorm._BatchNorm) - - inspector1 = mi.ModelInspector("good_or_bad", lambda x: good(x) or bad(x)) - inspector2 = mi.ModelInspector("not_bad", lambda x: not bad(x)) - model = models.resnet50() - valid = inspector1.validate(model) - self.assertTrue(valid, f"violators = {inspector1.violators}") - self.assertEqual( - len(inspector1.violators), 0, f"violators = {inspector1.violators}" - ) - valid = inspector2.validate(model) - self.assertFalse(valid, f"violators = {inspector2.violators}") - self.assertEqual( - len(inspector2.violators), 53, f"violators = {inspector2.violators}" - ) diff --git a/opacus/utils/tests/module_modification_test.py b/opacus/utils/tests/module_modification_test.py deleted file mode 100644 index 6952f7f..0000000 --- a/opacus/utils/tests/module_modification_test.py +++ /dev/null @@ -1,179 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - - -import unittest -from typing import Tuple - -import torch -import torch.nn as nn -from opacus import PrivacyEngine -from opacus.dp_model_inspector import IncompatibleModuleException -from opacus.utils import module_modification as mm -from torch.utils.data import DataLoader -from torchvision import models, transforms -from torchvision.datasets import FakeData - - -class replace_all_modules_test(unittest.TestCase): - def checkModulePresent(self, root: nn.Module, targetclass): - result = any(isinstance(module, targetclass) for module in root.modules()) - self.assertTrue(result) - - def checkModuleNotPresent(self, root: nn.Module, targetclass): - for module in root.modules(): - self.assertFalse( - isinstance(module, targetclass), - msg=f"{module} has the given targetclass type", - ) - - def test_replace_basic_case(self): - model = nn.BatchNorm1d(10) - model = mm.replace_all_modules( - model, nn.BatchNorm1d, lambda _: nn.BatchNorm2d(10) - ) - self.checkModulePresent(model, nn.BatchNorm2d) - self.checkModuleNotPresent(model, nn.BatchNorm1d) - - def test_replace_sequential_case(self): - model = nn.Sequential(nn.Conv1d(1, 2, 3), nn.Sequential(nn.Conv2d(4, 5, 6))) - - def conv(m: nn.Conv2d): - return nn.Linear(4, 5) - - model = mm.replace_all_modules(model, nn.Conv2d, conv) - self.checkModulePresent(model, nn.Linear) - self.checkModuleNotPresent(model, nn.Conv2d) - - def test_nullify_resnet18(self): - model = models.resnet18() - # check module BatchNorms is there - self.checkModulePresent(model, nn.BatchNorm2d) - # nullify the module (replace with Idetity) - model = mm.nullify_batchnorm_modules(model) - # check module is not present - self.checkModuleNotPresent(model, nn.BatchNorm2d) - - def test_convert_batchnorm_modules_resnet50(self): - model = models.resnet50() - # check module BatchNorms is there - self.checkModulePresent(model, nn.BatchNorm2d) - # replace the module with instancenorm - model = mm.convert_batchnorm_modules(model) - # check module is not present - self.checkModuleNotPresent(model, nn.BatchNorm2d) - self.checkModulePresent(model, nn.GroupNorm) - - -class BasicModel(nn.Module): - def __init__(self, imgSize): - super().__init__() - self.size = imgSize[0] * imgSize[1] * imgSize[2] - self.bn = nn.BatchNorm2d(imgSize[0]) - self.fc = nn.Linear(self.size, 2) - - def forward(self, input): - x = self.bn(input) - x = x.view(-1, self.size) - x = self.fc(x) - return x - - -class convert_batchnorm_modules_test(unittest.TestCase): - def setUp(self): - self.criterion = nn.CrossEntropyLoss() - - def setUpOptimizer( - self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False - ): - # sample parameter values - optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) - optimizer.zero_grad() - if privacy_engine: - pe = PrivacyEngine( - model, - sample_rate=data_loader.batch_size / len(data_loader.dataset), - alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), - noise_multiplier=1.3, - max_grad_norm=1, - ) - pe.attach(optimizer) - return optimizer - - def genFakeData( - self, imgSize: Tuple[int, int, int], batch_size: int = 1, num_batches: int = 1 - ) -> DataLoader: - self.ds = FakeData( - size=num_batches, - image_size=imgSize, - num_classes=2, - transform=transforms.Compose([transforms.ToTensor()]), - ) - return DataLoader(self.ds, batch_size=batch_size) - - def runOneBatch( - self, - model: nn.Module, - imgsize: Tuple[int, int, int], - privacy_engine: bool = True, - ): - dl = self.genFakeData(imgsize, 1, 1) - optimizer = self.setUpOptimizer(model, dl, privacy_engine) - for x, y in dl: - # forward - try: - logits = model(x) - except Exception as err: - self.fail(f"Failed forward step with exception: {err}") - loss = self.criterion(logits, y) - # backward - try: - loss.backward() - except Exception as err: - self.fail(f"Failed backward step with exception: {err}") - # optimizer - try: - optimizer.step() - except Exception as err: - self.fail(f"Failed optimizer step with exception: {err}") - optimizer.zero_grad() - - def test_run_basic_case(self): - imgSize = (3, 4, 5) - # should throw because privacy engine does not work with batch norm - # remove the next two lines when we support batch norm - with self.assertRaises(IncompatibleModuleException): - self.runOneBatch(BasicModel(imgSize), imgSize) - self.runOneBatch(mm.convert_batchnorm_modules(BasicModel(imgSize)), imgSize) - - def test_run_resnet18(self): - imgSize = (3, 224, 224) - # should throw because privacy engine does not work with batch norm - # remove the next two lines when we support batch norm - with self.assertRaises(IncompatibleModuleException): - self.runOneBatch(models.resnet18(), imgSize) - self.runOneBatch(mm.convert_batchnorm_modules(models.resnet18()), imgSize) - - def test_run_resnet34(self): - imgSize = (3, 224, 224) - # should throw because privacy engine does not work with batch norm - # remove the next two lines when we support batch norm - with self.assertRaises(IncompatibleModuleException): - self.runOneBatch(models.resnet34(), imgSize) - self.runOneBatch(mm.convert_batchnorm_modules(models.resnet34()), imgSize) - - def test_run_resnet50(self): - imgSize = (3, 224, 224) - # should throw because privacy engine does not work with batch norm - # remove the next two lines when we support batch norm - with self.assertRaises(IncompatibleModuleException): - self.runOneBatch(models.resnet50(), imgSize) - self.runOneBatch(mm.convert_batchnorm_modules(models.resnet50()), imgSize) - - def test_run_resnet101(self): - imgSize = (3, 224, 224) - # should throw because privacy engine does not work with batch norm - # remove the next two lines when we support batch norm - with self.assertRaises(IncompatibleModuleException): - self.runOneBatch(models.resnet101(), imgSize) - self.runOneBatch(mm.convert_batchnorm_modules(models.resnet101()), imgSize) diff --git a/opacus/utils/uniform_sampler.py b/opacus/utils/uniform_sampler.py deleted file mode 100644 index 84387e8..0000000 --- a/opacus/utils/uniform_sampler.py +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -import torch -from torch.utils.data import Sampler - - -class UniformWithReplacementSampler(Sampler): - r""" - This sampler samples elements according to the Sampled Gaussian Mechanism. - Each sample is selected with a probability equal to ``sample_rate``. - """ - - def __init__(self, num_samples: int, sample_rate: float, generator=None): - r""" - Args: - num_samples (int): number of samples to draw. - sample_rate (float): probability used in sampling. - generator (Generator): Generator used in sampling. - """ - self.num_samples = num_samples - self.sample_rate = sample_rate - self.generator = generator - if self.generator is None: - generator = torch.Generator() - generator.manual_seed( - int(torch.empty((), dtype=torch.int64).random_().item()) - ) - - if self.num_samples <= 0: - raise ValueError( - "num_samples should be a positive integer " - "value, but got num_samples={}".format(self.num_samples) - ) - - def __len__(self): - return int(1 / self.sample_rate) - - def __iter__(self): - num_batches = int(1 / self.sample_rate) - while num_batches > 0: - mask = ( - torch.rand(self.num_samples, generator=self.generator) - < self.sample_rate - ) - indices = mask.nonzero(as_tuple=False).reshape(-1).tolist() - if len(indices) != 0: - # We only output non-empty list of indices, otherwise the dataloader is unhappy - # This is compensated by the privacy engine - yield indices - num_batches -= 1 diff --git a/opacus/version.py b/opacus/version.py deleted file mode 100644 index 3824efa..0000000 --- a/opacus/version.py +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved - -__version__ = "0.14.0" diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..e2e8680 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,2347 @@ +# This file is automatically @generated by Poetry 1.4.0 and should not be changed by hand. + +[[package]] +name = "brotli" +version = "1.0.9" +description = "Python bindings for the Brotli compression library" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "Brotli-1.0.9-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:268fe94547ba25b58ebc724680609c8ee3e5a843202e9a381f6f9c5e8bdb5c70"}, + {file = "Brotli-1.0.9-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:c2415d9d082152460f2bd4e382a1e85aed233abc92db5a3880da2257dc7daf7b"}, + {file = "Brotli-1.0.9-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5913a1177fc36e30fcf6dc868ce23b0453952c78c04c266d3149b3d39e1410d6"}, + {file = "Brotli-1.0.9-cp27-cp27m-win32.whl", hash = "sha256:afde17ae04d90fbe53afb628f7f2d4ca022797aa093e809de5c3cf276f61bbfa"}, + {file = "Brotli-1.0.9-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7cb81373984cc0e4682f31bc3d6be9026006d96eecd07ea49aafb06897746452"}, + {file = "Brotli-1.0.9-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:db844eb158a87ccab83e868a762ea8024ae27337fc7ddcbfcddd157f841fdfe7"}, + {file = "Brotli-1.0.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9744a863b489c79a73aba014df554b0e7a0fc44ef3f8a0ef2a52919c7d155031"}, + {file = "Brotli-1.0.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a72661af47119a80d82fa583b554095308d6a4c356b2a554fdc2799bc19f2a43"}, + {file = "Brotli-1.0.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ee83d3e3a024a9618e5be64648d6d11c37047ac48adff25f12fa4226cf23d1c"}, + {file = "Brotli-1.0.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:19598ecddd8a212aedb1ffa15763dd52a388518c4550e615aed88dc3753c0f0c"}, + {file = "Brotli-1.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:44bb8ff420c1d19d91d79d8c3574b8954288bdff0273bf788954064d260d7ab0"}, + {file = "Brotli-1.0.9-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e23281b9a08ec338469268f98f194658abfb13658ee98e2b7f85ee9dd06caa91"}, + {file = "Brotli-1.0.9-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3496fc835370da351d37cada4cf744039616a6db7d13c430035e901443a34daa"}, + {file = "Brotli-1.0.9-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83bb06a0192cccf1eb8d0a28672a1b79c74c3a8a5f2619625aeb6f28b3a82bb"}, + {file = "Brotli-1.0.9-cp310-cp310-win32.whl", hash = "sha256:26d168aac4aaec9a4394221240e8a5436b5634adc3cd1cdf637f6645cecbf181"}, + {file = "Brotli-1.0.9-cp310-cp310-win_amd64.whl", hash = "sha256:622a231b08899c864eb87e85f81c75e7b9ce05b001e59bbfbf43d4a71f5f32b2"}, + {file = "Brotli-1.0.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cc0283a406774f465fb45ec7efb66857c09ffefbe49ec20b7882eff6d3c86d3a"}, + {file = "Brotli-1.0.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:11d3283d89af7033236fa4e73ec2cbe743d4f6a81d41bd234f24bf63dde979df"}, + {file = "Brotli-1.0.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c1306004d49b84bd0c4f90457c6f57ad109f5cc6067a9664e12b7b79a9948ad"}, + {file = "Brotli-1.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1375b5d17d6145c798661b67e4ae9d5496920d9265e2f00f1c2c0b5ae91fbde"}, + {file = "Brotli-1.0.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cab1b5964b39607a66adbba01f1c12df2e55ac36c81ec6ed44f2fca44178bf1a"}, + {file = "Brotli-1.0.9-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ed6a5b3d23ecc00ea02e1ed8e0ff9a08f4fc87a1f58a2530e71c0f48adf882f"}, + {file = "Brotli-1.0.9-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cb02ed34557afde2d2da68194d12f5719ee96cfb2eacc886352cb73e3808fc5d"}, + {file = "Brotli-1.0.9-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b3523f51818e8f16599613edddb1ff924eeb4b53ab7e7197f85cbc321cdca32f"}, + {file = "Brotli-1.0.9-cp311-cp311-win32.whl", hash = "sha256:ba72d37e2a924717990f4d7482e8ac88e2ef43fb95491eb6e0d124d77d2a150d"}, + {file = "Brotli-1.0.9-cp311-cp311-win_amd64.whl", hash = "sha256:3ffaadcaeafe9d30a7e4e1e97ad727e4f5610b9fa2f7551998471e3736738679"}, + {file = "Brotli-1.0.9-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:c83aa123d56f2e060644427a882a36b3c12db93727ad7a7b9efd7d7f3e9cc2c4"}, + {file = "Brotli-1.0.9-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:6b2ae9f5f67f89aade1fab0f7fd8f2832501311c363a21579d02defa844d9296"}, + {file = "Brotli-1.0.9-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:68715970f16b6e92c574c30747c95cf8cf62804569647386ff032195dc89a430"}, + {file = "Brotli-1.0.9-cp35-cp35m-win32.whl", hash = "sha256:defed7ea5f218a9f2336301e6fd379f55c655bea65ba2476346340a0ce6f74a1"}, + {file = "Brotli-1.0.9-cp35-cp35m-win_amd64.whl", hash = "sha256:88c63a1b55f352b02c6ffd24b15ead9fc0e8bf781dbe070213039324922a2eea"}, + {file = "Brotli-1.0.9-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:503fa6af7da9f4b5780bb7e4cbe0c639b010f12be85d02c99452825dd0feef3f"}, + {file = "Brotli-1.0.9-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:40d15c79f42e0a2c72892bf407979febd9cf91f36f495ffb333d1d04cebb34e4"}, + {file = "Brotli-1.0.9-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:93130612b837103e15ac3f9cbacb4613f9e348b58b3aad53721d92e57f96d46a"}, + {file = "Brotli-1.0.9-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87fdccbb6bb589095f413b1e05734ba492c962b4a45a13ff3408fa44ffe6479b"}, + {file = "Brotli-1.0.9-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:6d847b14f7ea89f6ad3c9e3901d1bc4835f6b390a9c71df999b0162d9bb1e20f"}, + {file = "Brotli-1.0.9-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:495ba7e49c2db22b046a53b469bbecea802efce200dffb69b93dd47397edc9b6"}, + {file = "Brotli-1.0.9-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:4688c1e42968ba52e57d8670ad2306fe92e0169c6f3af0089be75bbac0c64a3b"}, + {file = "Brotli-1.0.9-cp36-cp36m-win32.whl", hash = "sha256:61a7ee1f13ab913897dac7da44a73c6d44d48a4adff42a5701e3239791c96e14"}, + {file = "Brotli-1.0.9-cp36-cp36m-win_amd64.whl", hash = "sha256:1c48472a6ba3b113452355b9af0a60da5c2ae60477f8feda8346f8fd48e3e87c"}, + {file = "Brotli-1.0.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3b78a24b5fd13c03ee2b7b86290ed20efdc95da75a3557cc06811764d5ad1126"}, + {file = "Brotli-1.0.9-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:9d12cf2851759b8de8ca5fde36a59c08210a97ffca0eb94c532ce7b17c6a3d1d"}, + {file = "Brotli-1.0.9-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:6c772d6c0a79ac0f414a9f8947cc407e119b8598de7621f39cacadae3cf57d12"}, + {file = "Brotli-1.0.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29d1d350178e5225397e28ea1b7aca3648fcbab546d20e7475805437bfb0a130"}, + {file = "Brotli-1.0.9-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7bbff90b63328013e1e8cb50650ae0b9bac54ffb4be6104378490193cd60f85a"}, + {file = "Brotli-1.0.9-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ec1947eabbaf8e0531e8e899fc1d9876c179fc518989461f5d24e2223395a9e3"}, + {file = "Brotli-1.0.9-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:12effe280b8ebfd389022aa65114e30407540ccb89b177d3fbc9a4f177c4bd5d"}, + {file = "Brotli-1.0.9-cp37-cp37m-win32.whl", hash = "sha256:f909bbbc433048b499cb9db9e713b5d8d949e8c109a2a548502fb9aa8630f0b1"}, + {file = "Brotli-1.0.9-cp37-cp37m-win_amd64.whl", hash = "sha256:97f715cf371b16ac88b8c19da00029804e20e25f30d80203417255d239f228b5"}, + {file = "Brotli-1.0.9-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e16eb9541f3dd1a3e92b89005e37b1257b157b7256df0e36bd7b33b50be73bcb"}, + {file = "Brotli-1.0.9-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:160c78292e98d21e73a4cc7f76a234390e516afcd982fa17e1422f7c6a9ce9c8"}, + {file = "Brotli-1.0.9-cp38-cp38-manylinux1_i686.whl", hash = "sha256:b663f1e02de5d0573610756398e44c130add0eb9a3fc912a09665332942a2efb"}, + {file = "Brotli-1.0.9-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5b6ef7d9f9c38292df3690fe3e302b5b530999fa90014853dcd0d6902fb59f26"}, + {file = "Brotli-1.0.9-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a674ac10e0a87b683f4fa2b6fa41090edfd686a6524bd8dedbd6138b309175c"}, + {file = "Brotli-1.0.9-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e2d9e1cbc1b25e22000328702b014227737756f4b5bf5c485ac1d8091ada078b"}, + {file = "Brotli-1.0.9-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b336c5e9cf03c7be40c47b5fd694c43c9f1358a80ba384a21969e0b4e66a9b17"}, + {file = "Brotli-1.0.9-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:85f7912459c67eaab2fb854ed2bc1cc25772b300545fe7ed2dc03954da638649"}, + {file = "Brotli-1.0.9-cp38-cp38-win32.whl", hash = "sha256:35a3edbe18e876e596553c4007a087f8bcfd538f19bc116917b3c7522fca0429"}, + {file = "Brotli-1.0.9-cp38-cp38-win_amd64.whl", hash = "sha256:269a5743a393c65db46a7bb982644c67ecba4b8d91b392403ad8a861ba6f495f"}, + {file = "Brotli-1.0.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2aad0e0baa04517741c9bb5b07586c642302e5fb3e75319cb62087bd0995ab19"}, + {file = "Brotli-1.0.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5cb1e18167792d7d21e21365d7650b72d5081ed476123ff7b8cac7f45189c0c7"}, + {file = "Brotli-1.0.9-cp39-cp39-manylinux1_i686.whl", hash = "sha256:16d528a45c2e1909c2798f27f7bf0a3feec1dc9e50948e738b961618e38b6a7b"}, + {file = "Brotli-1.0.9-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:56d027eace784738457437df7331965473f2c0da2c70e1a1f6fdbae5402e0389"}, + {file = "Brotli-1.0.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9bf919756d25e4114ace16a8ce91eb340eb57a08e2c6950c3cebcbe3dff2a5e7"}, + {file = "Brotli-1.0.9-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e4c4e92c14a57c9bd4cb4be678c25369bf7a092d55fd0866f759e425b9660806"}, + {file = "Brotli-1.0.9-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e48f4234f2469ed012a98f4b7874e7f7e173c167bed4934912a29e03167cf6b1"}, + {file = "Brotli-1.0.9-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9ed4c92a0665002ff8ea852353aeb60d9141eb04109e88928026d3c8a9e5433c"}, + {file = "Brotli-1.0.9-cp39-cp39-win32.whl", hash = "sha256:cfc391f4429ee0a9370aa93d812a52e1fee0f37a81861f4fdd1f4fb28e8547c3"}, + {file = "Brotli-1.0.9-cp39-cp39-win_amd64.whl", hash = "sha256:854c33dad5ba0fbd6ab69185fec8dab89e13cda6b7d191ba111987df74f38761"}, + {file = "Brotli-1.0.9-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9749a124280a0ada4187a6cfd1ffd35c350fb3af79c706589d98e088c5044267"}, + {file = "Brotli-1.0.9-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:73fd30d4ce0ea48010564ccee1a26bfe39323fde05cb34b5863455629db61dc7"}, + {file = "Brotli-1.0.9-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:02177603aaca36e1fd21b091cb742bb3b305a569e2402f1ca38af471777fb019"}, + {file = "Brotli-1.0.9-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:76ffebb907bec09ff511bb3acc077695e2c32bc2142819491579a695f77ffd4d"}, + {file = "Brotli-1.0.9-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:b43775532a5904bc938f9c15b77c613cb6ad6fb30990f3b0afaea82797a402d8"}, + {file = "Brotli-1.0.9-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:5bf37a08493232fbb0f8229f1824b366c2fc1d02d64e7e918af40acd15f3e337"}, + {file = "Brotli-1.0.9-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:330e3f10cd01da535c70d09c4283ba2df5fb78e915bea0a28becad6e2ac010be"}, + {file = "Brotli-1.0.9-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e1abbeef02962596548382e393f56e4c94acd286bd0c5afba756cffc33670e8a"}, + {file = "Brotli-1.0.9-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:3148362937217b7072cf80a2dcc007f09bb5ecb96dae4617316638194113d5be"}, + {file = "Brotli-1.0.9-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:336b40348269f9b91268378de5ff44dc6fbaa2268194f85177b53463d313842a"}, + {file = "Brotli-1.0.9-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b8b09a16a1950b9ef495a0f8b9d0a87599a9d1f179e2d4ac014b2ec831f87e7"}, + {file = "Brotli-1.0.9-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c8e521a0ce7cf690ca84b8cc2272ddaf9d8a50294fd086da67e517439614c755"}, + {file = "Brotli-1.0.9.zip", hash = "sha256:4d1b810aa0ed773f81dceda2cc7b403d01057458730e309856356d4ef4188438"}, +] + +[[package]] +name = "brotlicffi" +version = "1.0.9.2" +description = "Python CFFI bindings to the Brotli library" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "brotlicffi-1.0.9.2-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:408ec4359f9763280d5c4e0ad29c51d1240b25fdd18719067e972163b4125b98"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:2e4629f7690ded66c8818715c6d4dd6a7ff6a4f10fad6186fe99850f781ce210"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:137c4635edcdf593de5ce9d0daa596bf499591b16b8fca5fd72a490deb54b2ee"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:af8a1b7bcfccf9c41a3c8654994d6a81821fdfe4caddcfe5045bfda936546ca3"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:9078432af4785f35ab3840587eed7fb131e3fc77eb2a739282b649b343c584dd"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7bb913d5bf3b4ce2ec59872711dc9faaff5f320c3c3827cada2d8a7b793a7753"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:16a0c9392a1059e2e62839fbd037d2e7e03c8ae5da65e9746f582464f7fab1bb"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:94d2810efc5723f1447b332223b197466190518a3eeca93b9f357efb5b22c6dc"}, + {file = "brotlicffi-1.0.9.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:9e70f3e20f317d70912b10dbec48b29114d3dbd0e9d88475cb328e6c086f0546"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:586f0ea3c2eed455d5f2330b9ab4a591514c8de0ee53d445645efcfbf053c69f"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-manylinux1_i686.whl", hash = "sha256:4454c3baedc277fd6e65f983e3eb8e77f4bc15060f69370a0201746e2edeca81"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-manylinux1_x86_64.whl", hash = "sha256:52c1c12dad6eb1d44213a0a76acf5f18f64653bd801300bef5e2f983405bdde5"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-manylinux2010_i686.whl", hash = "sha256:21cd400d24b344c218d8e32b394849e31b7c15784667575dbda9f65c46a64b0a"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-manylinux2010_x86_64.whl", hash = "sha256:71061f8bc86335b652e442260c4367b782a92c6e295cf5a10eff84c7d19d8cf5"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-manylinux2014_aarch64.whl", hash = "sha256:15e0db52c56056be6310fc116b3d7c6f34185594e261f23790b2fb6489998363"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-win32.whl", hash = "sha256:551305703d12a2dd1ae43d3dde35dee20b1cb49b5796279d4d34e2c6aec6be4d"}, + {file = "brotlicffi-1.0.9.2-cp35-abi3-win_amd64.whl", hash = "sha256:2be4fb8a7cb482f226af686cd06d2a2cab164ccdf99e460f8e3a5ec9a5337da2"}, + {file = "brotlicffi-1.0.9.2-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:8e7221d8a084d32d15c7b58e0ce0573972375c5038423dbe83f217cfe512e680"}, + {file = "brotlicffi-1.0.9.2-pp27-pypy_73-manylinux1_x86_64.whl", hash = "sha256:75a46bc5ed2753e1648cc211dcb2c1ac66116038766822dc104023f67ff4dfd8"}, + {file = "brotlicffi-1.0.9.2-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:1e27c43ef72a278f9739b12b2df80ee72048cd4cbe498f8bbe08aaaa67a5d5c8"}, + {file = "brotlicffi-1.0.9.2-pp27-pypy_73-win32.whl", hash = "sha256:feb942814285bdc5e97efc77a04e48283c17dfab9ea082d79c0a7b9e53ef1eab"}, + {file = "brotlicffi-1.0.9.2-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a6208d82c3172eeeb3be83ed4efd5831552c7cd47576468e50fcf0fb23fcf97f"}, + {file = "brotlicffi-1.0.9.2-pp36-pypy36_pp73-manylinux1_x86_64.whl", hash = "sha256:408c810c599786fb806556ff17e844a903884e6370ca400bcec7fa286149f39c"}, + {file = "brotlicffi-1.0.9.2-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:a73099858ee343e8801710a08be8d194f47715ff21e98d92a19ac461058f52d1"}, + {file = "brotlicffi-1.0.9.2-pp36-pypy36_pp73-win32.whl", hash = "sha256:916b790f967a18a595e61f218c252f83718ac91f24157d622cf0fa710cd26ab7"}, + {file = "brotlicffi-1.0.9.2-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba4a00263af40e875ec3d6c7f623cbf8c795b55705da18c64ec36b6bf0848bc5"}, + {file = "brotlicffi-1.0.9.2-pp37-pypy37_pp73-manylinux1_x86_64.whl", hash = "sha256:df78aa47741122b0d5463f1208b7bb18bc9706dee5152d9f56e0ead4865015cd"}, + {file = "brotlicffi-1.0.9.2-pp37-pypy37_pp73-manylinux2010_x86_64.whl", hash = "sha256:9030cd5099252d16bfa4e22659c84a89c102e94f8e81d30764788b72e2d7cfb7"}, + {file = "brotlicffi-1.0.9.2-pp37-pypy37_pp73-win32.whl", hash = "sha256:7e72978f4090a161885b114f87b784f538dcb77dafc6602592c1cf39ae8d243d"}, + {file = "brotlicffi-1.0.9.2.tar.gz", hash = "sha256:0c248a68129d8fc6a217767406c731e498c3e19a7be05ea0a90c3c86637b7d96"}, +] + +[package.dependencies] +cffi = ">=1.0.0" + +[[package]] +name = "certifi" +version = "2022.12.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, + {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, +] + +[[package]] +name = "cffi" +version = "1.15.1" +description = "Foreign Function Interface for Python calling C code." +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, + {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, + {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, + {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, + {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, + {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, + {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, + {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, + {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, + {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, + {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, + {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, + {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, + {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, + {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, + {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, + {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, + {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, + {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, + {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, +] + +[[package]] +name = "cloudpickle" +version = "2.2.1" +description = "Extended pickling support for Python objects" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cloudpickle-2.2.1-py3-none-any.whl", hash = "sha256:61f594d1f4c295fa5cd9014ceb3a1fc4a70b0de1164b94fbc2d854ccba056f9f"}, + {file = "cloudpickle-2.2.1.tar.gz", hash = "sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "contourpy" +version = "1.0.7" +description = "Python library for calculating contours of 2D quadrilateral grids" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "contourpy-1.0.7-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:95c3acddf921944f241b6773b767f1cbce71d03307270e2d769fd584d5d1092d"}, + {file = "contourpy-1.0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fc1464c97579da9f3ab16763c32e5c5d5bb5fa1ec7ce509a4ca6108b61b84fab"}, + {file = "contourpy-1.0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8acf74b5d383414401926c1598ed77825cd530ac7b463ebc2e4f46638f56cce6"}, + {file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c71fdd8f1c0f84ffd58fca37d00ca4ebaa9e502fb49825484da075ac0b0b803"}, + {file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f99e9486bf1bb979d95d5cffed40689cb595abb2b841f2991fc894b3452290e8"}, + {file = "contourpy-1.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87f4d8941a9564cda3f7fa6a6cd9b32ec575830780677932abdec7bcb61717b0"}, + {file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9e20e5a1908e18aaa60d9077a6d8753090e3f85ca25da6e25d30dc0a9e84c2c6"}, + {file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a877ada905f7d69b2a31796c4b66e31a8068b37aa9b78832d41c82fc3e056ddd"}, + {file = "contourpy-1.0.7-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6381fa66866b0ea35e15d197fc06ac3840a9b2643a6475c8fff267db8b9f1e69"}, + {file = "contourpy-1.0.7-cp310-cp310-win32.whl", hash = "sha256:3c184ad2433635f216645fdf0493011a4667e8d46b34082f5a3de702b6ec42e3"}, + {file = "contourpy-1.0.7-cp310-cp310-win_amd64.whl", hash = "sha256:3caea6365b13119626ee996711ab63e0c9d7496f65641f4459c60a009a1f3e80"}, + {file = "contourpy-1.0.7-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ed33433fc3820263a6368e532f19ddb4c5990855e4886088ad84fd7c4e561c71"}, + {file = "contourpy-1.0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38e2e577f0f092b8e6774459317c05a69935a1755ecfb621c0a98f0e3c09c9a5"}, + {file = "contourpy-1.0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ae90d5a8590e5310c32a7630b4b8618cef7563cebf649011da80874d0aa8f414"}, + {file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:130230b7e49825c98edf0b428b7aa1125503d91732735ef897786fe5452b1ec2"}, + {file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58569c491e7f7e874f11519ef46737cea1d6eda1b514e4eb5ac7dab6aa864d02"}, + {file = "contourpy-1.0.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d43960d809c4c12508a60b66cb936e7ed57d51fb5e30b513934a4a23874fae"}, + {file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:152fd8f730c31fd67fe0ffebe1df38ab6a669403da93df218801a893645c6ccc"}, + {file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9056c5310eb1daa33fc234ef39ebfb8c8e2533f088bbf0bc7350f70a29bde1ac"}, + {file = "contourpy-1.0.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a9d7587d2fdc820cc9177139b56795c39fb8560f540bba9ceea215f1f66e1566"}, + {file = "contourpy-1.0.7-cp311-cp311-win32.whl", hash = "sha256:4ee3ee247f795a69e53cd91d927146fb16c4e803c7ac86c84104940c7d2cabf0"}, + {file = "contourpy-1.0.7-cp311-cp311-win_amd64.whl", hash = "sha256:5caeacc68642e5f19d707471890f037a13007feba8427eb7f2a60811a1fc1350"}, + {file = "contourpy-1.0.7-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fd7dc0e6812b799a34f6d12fcb1000539098c249c8da54f3566c6a6461d0dbad"}, + {file = "contourpy-1.0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0f9d350b639db6c2c233d92c7f213d94d2e444d8e8fc5ca44c9706cf72193772"}, + {file = "contourpy-1.0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e96a08b62bb8de960d3a6afbc5ed8421bf1a2d9c85cc4ea73f4bc81b4910500f"}, + {file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:031154ed61f7328ad7f97662e48660a150ef84ee1bc8876b6472af88bf5a9b98"}, + {file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e9ebb4425fc1b658e13bace354c48a933b842d53c458f02c86f371cecbedecc"}, + {file = "contourpy-1.0.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efb8f6d08ca7998cf59eaf50c9d60717f29a1a0a09caa46460d33b2924839dbd"}, + {file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6c180d89a28787e4b73b07e9b0e2dac7741261dbdca95f2b489c4f8f887dd810"}, + {file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b8d587cc39057d0afd4166083d289bdeff221ac6d3ee5046aef2d480dc4b503c"}, + {file = "contourpy-1.0.7-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:769eef00437edf115e24d87f8926955f00f7704bede656ce605097584f9966dc"}, + {file = "contourpy-1.0.7-cp38-cp38-win32.whl", hash = "sha256:62398c80ef57589bdbe1eb8537127321c1abcfdf8c5f14f479dbbe27d0322e66"}, + {file = "contourpy-1.0.7-cp38-cp38-win_amd64.whl", hash = "sha256:57119b0116e3f408acbdccf9eb6ef19d7fe7baf0d1e9aaa5381489bc1aa56556"}, + {file = "contourpy-1.0.7-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:30676ca45084ee61e9c3da589042c24a57592e375d4b138bd84d8709893a1ba4"}, + {file = "contourpy-1.0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3e927b3868bd1e12acee7cc8f3747d815b4ab3e445a28d2e5373a7f4a6e76ba1"}, + {file = "contourpy-1.0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:366a0cf0fc079af5204801786ad7a1c007714ee3909e364dbac1729f5b0849e5"}, + {file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89ba9bb365446a22411f0673abf6ee1fea3b2cf47b37533b970904880ceb72f3"}, + {file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:71b0bf0c30d432278793d2141362ac853859e87de0a7dee24a1cea35231f0d50"}, + {file = "contourpy-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2"}, + {file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b6d0f9e1d39dbfb3977f9dd79f156c86eb03e57a7face96f199e02b18e58d32a"}, + {file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7f6979d20ee5693a1057ab53e043adffa1e7418d734c1532e2d9e915b08d8ec2"}, + {file = "contourpy-1.0.7-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5dd34c1ae752515318224cba7fc62b53130c45ac6a1040c8b7c1a223c46e8967"}, + {file = "contourpy-1.0.7-cp39-cp39-win32.whl", hash = "sha256:c5210e5d5117e9aec8c47d9156d1d3835570dd909a899171b9535cb4a3f32693"}, + {file = "contourpy-1.0.7-cp39-cp39-win_amd64.whl", hash = "sha256:60835badb5ed5f4e194a6f21c09283dd6e007664a86101431bf870d9e86266c4"}, + {file = "contourpy-1.0.7-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ce41676b3d0dd16dbcfabcc1dc46090aaf4688fd6e819ef343dbda5a57ef0161"}, + {file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a011cf354107b47c58ea932d13b04d93c6d1d69b8b6dce885e642531f847566"}, + {file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31a55dccc8426e71817e3fe09b37d6d48ae40aae4ecbc8c7ad59d6893569c436"}, + {file = "contourpy-1.0.7-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69f8ff4db108815addd900a74df665e135dbbd6547a8a69333a68e1f6e368ac2"}, + {file = "contourpy-1.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efe99298ba37e37787f6a2ea868265465410822f7bea163edcc1bd3903354ea9"}, + {file = "contourpy-1.0.7-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a1e97b86f73715e8670ef45292d7cc033548266f07d54e2183ecb3c87598888f"}, + {file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc331c13902d0f50845099434cd936d49d7a2ca76cb654b39691974cb1e4812d"}, + {file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:24847601071f740837aefb730e01bd169fbcaa610209779a78db7ebb6e6a7051"}, + {file = "contourpy-1.0.7-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abf298af1e7ad44eeb93501e40eb5a67abbf93b5d90e468d01fc0c4451971afa"}, + {file = "contourpy-1.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:64757f6460fc55d7e16ed4f1de193f362104285c667c112b50a804d482777edd"}, + {file = "contourpy-1.0.7.tar.gz", hash = "sha256:d8165a088d31798b59e91117d1f5fc3df8168d8b48c4acc10fc0df0d0bdbcc5e"}, +] + +[package.dependencies] +numpy = ">=1.16" + +[package.extras] +bokeh = ["bokeh", "chromedriver", "selenium"] +docs = ["furo", "sphinx-copybutton"] +mypy = ["contourpy[bokeh]", "docutils-stubs", "mypy (==0.991)", "types-Pillow"] +test = ["Pillow", "matplotlib", "pytest"] +test-no-images = ["pytest"] + +[[package]] +name = "copulas" +version = "0.8.0" +description = "Create tabular synthetic data using copulas-based modeling." +category = "main" +optional = false +python-versions = ">=3.7,<3.12" +files = [ + {file = "copulas-0.8.0-py2.py3-none-any.whl", hash = "sha256:5127be7c4af2a5098954f718c234496f03525c63b3c6b5f593cab31049de643f"}, + {file = "copulas-0.8.0.tar.gz", hash = "sha256:fa38b4b5f14582a71242f1de6bada4485f9bd4adc50c6f6571f2c121d5a57c12"}, +] + +[package.dependencies] +matplotlib = [ + {version = ">=3.4.0,<4", markers = "python_version >= \"3.7\" and python_version < \"3.10\""}, + {version = ">=3.6.0,<4", markers = "python_version >= \"3.10\""}, +] +numpy = [ + {version = ">=1.20.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.23.3,<2", markers = "python_version >= \"3.10\""}, +] +pandas = [ + {version = ">=1.1.3,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.3.4,<2", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, +] +scipy = [ + {version = ">=1.5.4,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.9.2,<2", markers = "python_version >= \"3.10\""}, +] + +[package.extras] +dev = ["Jinja2 (>=2,<3)", "Sphinx (>=1.7.1,<3)", "autoflake (>=1.1,<2)", "autopep8 (>=1.4.3,<1.6)", "boto3 (>=1.7.47,<1.10)", "bumpversion (>=0.5.3,<0.6)", "coverage (>=4.5.1,<6)", "dlint (>=0.11.0,<0.12)", "doc8 (>=0.8.0,<0.9)", "docutils (>=0.10,<0.15)", "flake8 (>=3.7.7,<4)", "flake8-absolute-import (>=1.0,<2)", "flake8-builtins (>=1.5.3,<1.6)", "flake8-comprehensions (>=3.6.1,<3.7)", "flake8-debugger (>=4.0.0,<4.1)", "flake8-docstrings (>=1.5.0,<2)", "flake8-eradicate (>=1.1.0,<1.2)", "flake8-expression-complexity (>=0.0.9,<0.1)", "flake8-fixme (>=1.1.1,<1.2)", "flake8-mock (>=0.3,<0.4)", "flake8-multiline-containers (>=0.0.18,<0.1)", "flake8-mutable (>=1.2.0,<1.3)", "flake8-print (>=4.0.0,<4.1)", "flake8-pytest-style (>=1.5.0,<2)", "flake8-quotes (>=3.3.0,<4)", "flake8-sfs (>=0.0.3,<0.1)", "flake8-variables-names (>=0.0.4,<0.1)", "invoke", "isort (>=4.3.4,<5)", "jupyter (>=1.0.0,<2)", "m2r (>=0.2.0,<0.3)", "markupsafe (<=2.0.1)", "nbsphinx (>=0.5.0,<0.7)", "pandas-vet (>=0.2.2,<0.3)", "pep8-naming (>=0.12.1,<0.13)", "pip (>=9.0.1)", "pydocstyle (>=6.1.1,<6.2)", "pytest (>=6.2.5,<7)", "pytest-cov (>=2.6.0,<3)", "pytest-rerunfailures (>=9.0.0,<10)", "rundoc (>=0.4.3,<0.5)", "scikit-learn (>=0.24,<1.2)", "sphinx-rtd-theme (>=0.2.4,<0.5)", "tabulate (>=0.8.3,<0.9)", "tox (>=2.9.1,<4)", "twine (>=1.10.0,<4)", "urllib3 (>=1.20,<1.26)", "watchdog (>=0.8.3,<0.11)", "wheel (>=0.30.0)"] +test = ["jupyter (>=1.0.0,<2)", "markupsafe (<=2.0.1)", "pytest (>=6.2.5,<7)", "pytest-cov (>=2.6.0,<3)", "pytest-rerunfailures (>=9.0.0,<10)", "rundoc (>=0.4.3,<0.5)", "scikit-learn (>=0.24,<1.2)"] +tutorials = ["jupyter (>=1.0.0,<2)", "markupsafe (<=2.0.1)", "scikit-learn (>=0.24,<1.2)"] + +[[package]] +name = "ctgan" +version = "0.7.1" +description = "Create tabular synthetic data using a conditional GAN" +category = "main" +optional = false +python-versions = ">=3.7,<3.11" +files = [ + {file = "ctgan-0.7.1-py2.py3-none-any.whl", hash = "sha256:047896ae09e98ce32ec9bfc73ec2068228e681d0f8b3c89a2b49e23f324cdb23"}, + {file = "ctgan-0.7.1.tar.gz", hash = "sha256:a32a86bda8cb5b4df41558f64ad5c7623399428ab41e90609a66b90052ccf717"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.23.3,<2", markers = "python_version >= \"3.10\""}, +] +packaging = ">=20,<22" +pandas = [ + {version = ">=1.1.3,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.3.4,<2", markers = "python_version >= \"3.10\""}, +] +rdt = ">=1.3.0,<2.0" +scikit-learn = {version = ">=1.1.3,<2", markers = "python_version >= \"3.10\""} +torch = [ + {version = ">=1.8.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.11.0,<2", markers = "python_version >= \"3.10\""}, +] + +[package.extras] +dev = ["autoflake (>=1.1,<2)", "autopep8 (>=1.4.3,<1.6)", "bumpversion (>=0.5.3,<0.6)", "coverage (>=4.5.1,<6)", "dlint (>=0.11.0,<0.12)", "flake8 (>=3.7.7,<4)", "flake8-absolute-import (>=1.0,<2)", "flake8-builtins (>=1.5.3,<1.6)", "flake8-comprehensions (>=3.6.1,<3.7)", "flake8-debugger (>=4.0.0,<4.1)", "flake8-docstrings (>=1.5.0,<2)", "flake8-eradicate (>=1.1.0,<1.2)", "flake8-expression-complexity (>=0.0.9,<0.1)", "flake8-fixme (>=1.1.1,<1.2)", "flake8-mock (>=0.3,<0.4)", "flake8-multiline-containers (>=0.0.18,<0.1)", "flake8-mutable (>=1.2.0,<1.3)", "flake8-print (>=4.0.0,<4.1)", "flake8-pytest-style (>=1.5.0,<2)", "flake8-quotes (>=3.3.0,<4)", "flake8-sfs (>=0.0.3,<0.1)", "flake8-variables-names (>=0.0.4,<0.1)", "invoke", "isort (>=4.3.4,<5)", "pandas-vet (>=0.2.2,<0.3)", "pip (>=9.0.1)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-rerunfailures (>=9.1.1,<10)", "rundoc (>=0.4.3,<0.5)", "tox (>=2.9.1,<4)", "twine (>=1.10.0,<4)", "watchdog (>=0.8.3,<0.11)", "wheel (>=0.30.0)"] +test = ["pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-rerunfailures (>=9.1.1,<10)", "rundoc (>=0.4.3,<0.5)"] + +[[package]] +name = "cycler" +version = "0.11.0" +description = "Composable style cycles" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, + {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, +] + +[[package]] +name = "deepecho" +version = "0.4.0" +description = "Create sequential synthetic data of mixed types using a GAN." +category = "main" +optional = false +python-versions = ">=3.7,<3.11" +files = [ + {file = "deepecho-0.4.0-py2.py3-none-any.whl", hash = "sha256:286b390d94ae0f7487c9e1413a55e03b67696660bbfba453c44eae7cf71c18fb"}, + {file = "deepecho-0.4.0.tar.gz", hash = "sha256:c49baee281691f220b880b17fe095134c797dfb90bdb2b2d8b783da69386b51c"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.23.3,<2", markers = "python_version >= \"3.10\""}, +] +pandas = [ + {version = ">=1.1.3,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.3.4,<2", markers = "python_version >= \"3.10\""}, +] +torch = [ + {version = ">=1.8.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.11.0,<2", markers = "python_version >= \"3.10\""}, +] +tqdm = ">=4.15,<5" + +[package.extras] +dev = ["autoflake (>=1.1,<2)", "autopep8 (>=1.4.3,<1.6)", "bumpversion (>=0.5.3,<0.6)", "coverage (>=4.5.1,<6)", "dlint (>=0.11.0,<0.12)", "flake8 (>=3.7.7,<4)", "flake8-absolute-import (>=1.0,<2)", "flake8-builtins (>=1.5.3,<1.6)", "flake8-debugger (>=4.0.0,<4.1)", "flake8-docstrings (>=1.5.0,<2)", "flake8-eradicate (>=1.1.0,<1.2)", "flake8-expression-complexity (>=0.0.9,<0.1)", "flake8-fixme (>=1.1.1,<1.2)", "flake8-mock (>=0.3,<0.4)", "flake8-multiline-containers (>=0.0.18,<0.1)", "flake8-mutable (>=1.2.0,<1.3)", "flake8-print (>=4.0.0,<4.1)", "flake8-quotes (>=3.3.0,<4)", "flake8-sfs (>=0.0.3,<0.1)", "flake8-variables-names (>=0.0.4,<0.1)", "invoke", "isort (>=4.3.4,<5)", "jupyter (>=1.0.0,<2)", "pep8-naming (>=0.12.1,<0.13)", "pip (>=9.0.1)", "pylint (>=2.5.3,<3)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-rerunfailures (>=9.0.0,<10)", "rundoc (>=0.4.3,<0.5)", "setuptools (<49.2)", "tox (>=2.9.1,<4)", "twine (>=1.10.0,<4)", "watchdog (>=0.8.3,<0.11)", "wheel (>=0.30.0)"] +test = ["jupyter (>=1.0.0,<2)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-rerunfailures (>=9.0.0,<10)", "rundoc (>=0.4.3,<0.5)"] + +[[package]] +name = "faker" +version = "14.2.1" +description = "Faker is a Python package that generates fake data for you." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "Faker-14.2.1-py3-none-any.whl", hash = "sha256:2e28aaea60456857d4ce95dd12aed767769537ad23d13d51a545cd40a654e9d9"}, + {file = "Faker-14.2.1.tar.gz", hash = "sha256:daad7badb4fd916bd047b28c8459ef4689e4fe6acf61f6dfebee8cc602e4d009"}, +] + +[package.dependencies] +python-dateutil = ">=2.4" + +[[package]] +name = "feather-format" +version = "0.4.1" +description = "Simple wrapper library to the Apache Arrow-based Feather File Format" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "feather-format-0.4.1.tar.gz", hash = "sha256:45f67e3745d394d4f160ca6d636bbfd4f8b68d01199dc1649b6e487d3e878903"}, +] + +[package.dependencies] +pyarrow = ">=0.4.0" + +[[package]] +name = "fonttools" +version = "4.39.0" +description = "Tools to manipulate font files" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.39.0-py3-none-any.whl", hash = "sha256:f5e764e1fd6ad54dfc201ff32af0ba111bcfbe0d05b24540af74c63db4ed6390"}, + {file = "fonttools-4.39.0.zip", hash = "sha256:909c104558835eac27faeb56be5a4c32694192dca123d073bf746ce9254054af"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.0.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "scipy"] +lxml = ["lxml (>=4.0,<5)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.0.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + +[[package]] +name = "functorch" +version = "1.13.1" +description = "JAX-like composable function transforms for PyTorch" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "functorch-1.13.1-py2.py3-none-any.whl", hash = "sha256:a131ad83c514ad71efa1b18beeb88fd379d1c61078b2a1e29723bb85aa793d35"}, +] + +[package.dependencies] +torch = ">=1.13.1,<1.13.2" + +[package.extras] +aot = ["networkx"] + +[[package]] +name = "gower" +version = "0.1.2" +description = "Python implementation of Gowers distance, pairwise between records in two data sets" +category = "main" +optional = false +python-versions = ">=2.7" +files = [ + {file = "gower-0.1.2-py3-none-any.whl", hash = "sha256:cb46e18243e1d88d2fa0a23d20afb71e5469f25db4ee6236db40f897dfea9e6f"}, + {file = "gower-0.1.2.tar.gz", hash = "sha256:34ddb5158f0e8bfba093dca06b9f887bda244998d10af2a3ad8c74a6efa1b5f6"}, +] + +[package.dependencies] +numpy = "*" +scipy = "*" + +[[package]] +name = "graphviz" +version = "0.20.1" +description = "Simple Python interface for Graphviz" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "graphviz-0.20.1-py3-none-any.whl", hash = "sha256:587c58a223b51611c0cf461132da386edd896a029524ca61a1462b880bf97977"}, + {file = "graphviz-0.20.1.zip", hash = "sha256:8c58f14adaa3b947daf26c19bc1e98c4e0702cdc31cf99153e6f06904d492bf8"}, +] + +[package.extras] +dev = ["flake8", "pep8-naming", "tox (>=3)", "twine", "wheel"] +docs = ["sphinx (>=5)", "sphinx-autodoc-typehints", "sphinx-rtd-theme"] +test = ["coverage", "mock (>=4)", "pytest (>=7)", "pytest-cov", "pytest-mock (>=3)"] + +[[package]] +name = "h5py" +version = "3.8.0" +description = "Read and write HDF5 files from Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h5py-3.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:533d7dad466ddb7e3b30af274b630eb7c1a6e4ddf01d1c373a0334dc2152110a"}, + {file = "h5py-3.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c873ba9fd4fa875ad62ce0e4891725e257a8fe7f5abdbc17e51a5d54819be55c"}, + {file = "h5py-3.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98a240cd4c1bfd568aaa52ec42d263131a2582dab82d74d3d42a0d954cac12be"}, + {file = "h5py-3.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3389b63222b1c7a158bb7fe69d11ca00066740ec5574596d47a2fe5317f563a"}, + {file = "h5py-3.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:7f3350fc0a8407d668b13247861c2acd23f7f5fe7d060a3ad9b0820f5fcbcae0"}, + {file = "h5py-3.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:db03e3f2c716205fbdabb34d0848459840585225eb97b4f08998c743821ca323"}, + {file = "h5py-3.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36761693efbe53df179627a775476dcbc37727d6e920958277a7efbc18f1fb73"}, + {file = "h5py-3.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a506fc223def428f4329e7e1f9fe1c8c593eab226e7c0942c8d75308ad49950"}, + {file = "h5py-3.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33b15aae79e9147aebe1d0e54099cbcde8d65e3e227cd5b59e49b1272aa0e09d"}, + {file = "h5py-3.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:9f6f6ffadd6bfa9b2c5b334805eb4b19ca0a5620433659d8f7fb86692c40a359"}, + {file = "h5py-3.8.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8f55d9c6c84d7d09c79fb85979e97b81ec6071cc776a97eb6b96f8f6ec767323"}, + {file = "h5py-3.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b685453e538b2b5934c58a644ac3f3b3d0cec1a01b6fb26d57388e9f9b674ad0"}, + {file = "h5py-3.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:377865821fe80ad984d003723d6f8890bd54ceeb5981b43c0313b9df95411b30"}, + {file = "h5py-3.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:0fef76e10b9216657fa37e7edff6d8be0709b25bd5066474c229b56cf0098df9"}, + {file = "h5py-3.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:26ffc344ec9984d2cd3ca0265007299a8bac8d85c1ad48f4639d8d3aed2af171"}, + {file = "h5py-3.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bacaa1c16810dd2b3e4417f8e730971b7c4d53d234de61fe4a918db78e80e1e4"}, + {file = "h5py-3.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bae730580ae928de409d63cbe4fdca4c82c3ad2bed30511d19d34e995d63c77e"}, + {file = "h5py-3.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f47f757d1b76f0ecb8aa0508ec8d1b390df67a8b67ee2515dc1b046f3a1596ea"}, + {file = "h5py-3.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:f891b17e3a3e974e93f9e34e7cca9f530806543571ce078998676a555837d91d"}, + {file = "h5py-3.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:290e00fa2de74a10688d1bac98d5a9cdd43f14f58e562c580b5b3dfbd358ecae"}, + {file = "h5py-3.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:03890b1c123d024fb0239a3279737d5432498c1901c354f8b10d8221d1d16235"}, + {file = "h5py-3.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7865de06779b14d98068da387333ad9bf2756b5b579cc887fac169bc08f87c3"}, + {file = "h5py-3.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49bc857635f935fa30e92e61ac1e87496df8f260a6945a3235e43a9890426866"}, + {file = "h5py-3.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:5fd2252d1fc364ba0e93dd0b7089f4906b66805cb4e6aca7fa8874ac08649647"}, + {file = "h5py-3.8.0.tar.gz", hash = "sha256:6fead82f0c4000cf38d53f9c030780d81bfa0220218aee13b90b7701c937d95f"}, +] + +[package.dependencies] +numpy = ">=1.14.5" + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "importlib-metadata" +version = "6.0.0" +description = "Read metadata from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_metadata-6.0.0-py3-none-any.whl", hash = "sha256:7efb448ec9a5e313a57655d35aa54cd3e01b7e1fbcf72dce1bf06119420f5bad"}, + {file = "importlib_metadata-6.0.0.tar.gz", hash = "sha256:e354bedeb60efa6affdcc8ae121b73544a7aa74156d047311948f6d711cd378d"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "importlib-resources" +version = "5.12.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "importlib_resources-5.12.0-py3-none-any.whl", hash = "sha256:7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a"}, + {file = "importlib_resources-5.12.0.tar.gz", hash = "sha256:4be82589bf5c1d7999aedf2a45159d10cb3ca4f19b2271f8792bc8e6da7b22f6"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[[package]] +name = "inflate64" +version = "0.3.1" +description = "deflate64 compression/decompression library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "inflate64-0.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d4e2a337c6c03b0e96ccd79940cbb04fe2063974d56fff6d78f8d57839546c57"}, + {file = "inflate64-0.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c142fbbbfbe0877fe821ff8bc4cc10f96d344b7400721579b3d17deeae28f59"}, + {file = "inflate64-0.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3a17e1dd1a5a872edfc02bc4a048868ada4865a3f4ee3ad5d224b192f2e53df7"}, + {file = "inflate64-0.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cf41f82dd4e90e8684c7be4583d7232bd800a561f3ed0241c84e39148861887"}, + {file = "inflate64-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6059eaba5044739ad6424588e845bd856f89a1a18f1addc31b97c49f02f68728"}, + {file = "inflate64-0.3.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5c5b2eb7e89d550d287774dea7d429ee24ce44ca34499a6cef113a14f108e700"}, + {file = "inflate64-0.3.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1d861fed6b2098d1862b64db9df650b9bd41fc41caa9fcaeee399079342aa4a8"}, + {file = "inflate64-0.3.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e32a78c81afba5699569c3493066ecb38fb45ccdf4c35b3c2232c9c2585b5257"}, + {file = "inflate64-0.3.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:42a6ef375b3e7059bd52993a0938f2bf97725cb5dc380f0c4dbaa9fc3780e025"}, + {file = "inflate64-0.3.1-cp310-cp310-win32.whl", hash = "sha256:664929528047b6b472852a4c0d12b4b9cf6e663059ba64ebd10f08aa56365755"}, + {file = "inflate64-0.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:08c3b03514d4b849901762a32a45eeba7fd5d784fec698eca6975f41cca33672"}, + {file = "inflate64-0.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c71821f93c931ae379cf9c9bbdd7099738fa00802ccf2a5271e2b68bc67a6ab8"}, + {file = "inflate64-0.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3bacbe9d4b7c185011b59268223a010ed777a28ed8cf40efc74fab1b7262e904"}, + {file = "inflate64-0.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:130dfdca4bd38e588ea4f878bf62635e36f83ddf7f2842d1055d1c16a11890cf"}, + {file = "inflate64-0.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80a125dd5cb7b7985c05a78b0bfd7751249d0d84fc330901dbd9faa693e1f53f"}, + {file = "inflate64-0.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67efdfd21d7b99f30a43560b22264c1e580ff08ae9831e78c99445575962dbc7"}, + {file = "inflate64-0.3.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ad84ac611eae17a961124c5fbe754b6982291a3945ab2b9c334a08e2e56f9ccc"}, + {file = "inflate64-0.3.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a1b481343f12641b1ae7a19135a70c44ecf020dccb670e02522c2b02db920851"}, + {file = "inflate64-0.3.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ad4cae5097bdff7e0bb1ab676d86ad08716597baa3b616e5b710a724f5d5cbc4"}, + {file = "inflate64-0.3.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:906a4b57df32f903e847766ca685e44ed3e7ee3a960fa94264d5e68b836d446d"}, + {file = "inflate64-0.3.1-cp311-cp311-win32.whl", hash = "sha256:0b0c8aa2fcdb1052d3bc6c5b5b1191b9c708d30e47af98ba0a8117ae1f6c9efc"}, + {file = "inflate64-0.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:473e0081c268ffa4b18683586b55170eb96d8b4fc684dd3ed9599c17c512d2e4"}, + {file = "inflate64-0.3.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f6737a575c6e7e818963d95a998be4c91484374961734cee97265f3c4c3b979"}, + {file = "inflate64-0.3.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c913b679f023f5907a54bfa9a6e438407ed4e40eee23ed19b4118128bdd091c"}, + {file = "inflate64-0.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29946840e6970d68e7739207ca21140c59ffebe7e02d28c7e86348166ce32418"}, + {file = "inflate64-0.3.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ba954600441eafe8f6f54eadffeac4d1ab2416d5d1a6b0ab403e50284ba457b"}, + {file = "inflate64-0.3.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f2a4dac4ebc4ad58a4ac911e39cf97cd74906c0c82c16333887aa9f287e98d5b"}, + {file = "inflate64-0.3.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7b7966193f1bf23e050af72b4c4720dffa5f33471de7afea37ba0d0f0195adef"}, + {file = "inflate64-0.3.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7f8346e644de449a4a90dcb22971dea456398b6cc788102013675b11256ae47e"}, + {file = "inflate64-0.3.1-cp37-cp37m-win32.whl", hash = "sha256:f39b57974db0e85897fff40518da420f4c4012b73515ca6f415a472228fea288"}, + {file = "inflate64-0.3.1-cp37-cp37m-win_amd64.whl", hash = "sha256:74ceb9d172ce06572632bc8070d54b963455421e216013575383f991e722bb7d"}, + {file = "inflate64-0.3.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c28cb635ccb9aae399fbc8e82c85b89ea0a7bb2219e7d582bbc007a29fb6e149"}, + {file = "inflate64-0.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9297115bf144c585e9d6a746e851c64c81d8f1ce8b62da4885babe66c36f7d29"}, + {file = "inflate64-0.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a075b174bace5174828906c7c87019a2af3cc5707025f01ee0395fb4b88fd98e"}, + {file = "inflate64-0.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa7476129e7f81e67a9253470c3085a9fd75ec77e6fae3de61f7795138ce725e"}, + {file = "inflate64-0.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35e24ffd8d6225fbfe26c524b45ace1bb8956811bd79e9f3d523a721d51b0d4e"}, + {file = "inflate64-0.3.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:553cd992f02af574d2116c74ca48d7cf10894c6b9ba8159f488f3bfac3c201ae"}, + {file = "inflate64-0.3.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:82393e46b8ba2f8613d030f38c7c492b0896ff8803f7ff870677f25d3e5e7113"}, + {file = "inflate64-0.3.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:67e37d96ea2ee8257b12cde83a09e4f0276950268a7a2f777aee7de60db5ec72"}, + {file = "inflate64-0.3.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:09dd0f8d6dee0da467c264dbd9bca8b33f9c915860fc3385f2a633640a65bd10"}, + {file = "inflate64-0.3.1-cp38-cp38-win32.whl", hash = "sha256:26e8319fd032c520203e2c001f1693c1c03774d85915900427e884011718f41d"}, + {file = "inflate64-0.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ab8f9e14ba6495f440101751ba8aa371e4a52941b5e343c6f3e8c61021e2df5e"}, + {file = "inflate64-0.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:094ef56a87c7b7398d93af7bfe7f24f830f24b6e55b77426f6516cef43e05460"}, + {file = "inflate64-0.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:48fd2527a462374dc19be06301d6aa30a03190532f2f8bddfbc39b7158561750"}, + {file = "inflate64-0.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fde3f85864c84badb26f42d95639360e627fd09c529a76c46a06dbd7a5735c51"}, + {file = "inflate64-0.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5efd55c21b794601fd44b99b8e2f17498744f573116ce27a745bc5e08f0457e1"}, + {file = "inflate64-0.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d71af8b23ac23bc9e9f776451c125be6320ad4589a7d5bcb5ab5e1fc61b4e58f"}, + {file = "inflate64-0.3.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ced0af509a31dcba0cd98ecdd06cb7c9ce66ebde78e0d99ba3515d4e991e34d0"}, + {file = "inflate64-0.3.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:853f3442eceda8035072686533694ab833c4293d10c9d0685147200f0e964356"}, + {file = "inflate64-0.3.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a6bec3d2f30f6f2656e1c5a4147181e401c8d7026cd598d86ad5647c616fc618"}, + {file = "inflate64-0.3.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:84287d1d09fd879353d3ccadd43f3d8adea75e830476ddfd46d8849d36d25afe"}, + {file = "inflate64-0.3.1-cp39-cp39-win32.whl", hash = "sha256:a2f4aaa02f9a5ada944960428b6528a0a9d773925efc73485882f34bf42654be"}, + {file = "inflate64-0.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:6ff89f94823b2466bae45759fc324bd25bd20c490607a7d8407237cf64ccafa9"}, + {file = "inflate64-0.3.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c1faf43890dbfff31195f5d59e37e49824f5ff4be77d67f7144a6b953bbde51c"}, + {file = "inflate64-0.3.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1749da3a02b53035cde1cf95f885e78e0c2c49b201e97d368b3ba97e0f3d42c3"}, + {file = "inflate64-0.3.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17aaac096f40bd80dd72481831607a0846271d401ba3cd863386b8c244c7ebc1"}, + {file = "inflate64-0.3.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4d807cfa9ddad940401ef04502eb367a77f569850f59c2e71670347d558a3830"}, + {file = "inflate64-0.3.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:b7aa123c740f2f9798f72873e50d7c6d43664d12cad7a1405296079987bdb04a"}, + {file = "inflate64-0.3.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:91233b5300bbb7562804c3d07617e9ce2983e8434218991db98ef175491e417f"}, + {file = "inflate64-0.3.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:525bc309d8533ef9917e006284996ee7a9a71ac6dd19fb57c0f741ad0c805d4f"}, + {file = "inflate64-0.3.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90f95b92d0f672d11151cb964964d1723e2e3ce3a19d32d24aece1acdec1e287"}, + {file = "inflate64-0.3.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:41504988023042452d2d84e4110c9ef4ff8ebd33cb90ba83e44b92c9a6753c43"}, + {file = "inflate64-0.3.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3c270d373ca3717dbeb9b171eea53cbf2c9d7471b9b5de1e57f165e60cf58037"}, + {file = "inflate64-0.3.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ac60868745f7bfbcd615329fbdc35997fa36043ce358a1c64d229ef448ebecf0"}, + {file = "inflate64-0.3.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d881b605b7448be451f02c59128dc5fac262dbd0dcff4638e702dc8c7bbb8ef0"}, + {file = "inflate64-0.3.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd04764d0bb830414788cae897d082bf6ad92324e571a5511bd7e1de4a0cdc67"}, + {file = "inflate64-0.3.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1987bbc482aa3e2e7fb72c70b22483cfaed3dbebc5ba6f9ac6f75240794709b"}, + {file = "inflate64-0.3.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4e7b0a598adaa11366ffbbb7b3d3110db29edd4b732d9336570891363b22b002"}, + {file = "inflate64-0.3.1.tar.gz", hash = "sha256:b52dd8fefd2ba179e5dfa18d6eca7e2fc822584616271c039d5ef1f9ca90c71c"}, +] + +[package.extras] +check = ["check-manifest", "flake8", "flake8-black", "flake8-deprecated", "isort (>=5.0.3)", "mypy (>=0.940)", "mypy-extensions (>=0.4.1)", "pygments", "readme-renderer", "twine"] +docs = ["docutils", "sphinx (>=5.0)"] +test = ["pyannotate", "pytest"] + +[[package]] +name = "joblib" +version = "1.2.0" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "joblib-1.2.0-py3-none-any.whl", hash = "sha256:091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385"}, + {file = "joblib-1.2.0.tar.gz", hash = "sha256:e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.4" +description = "A fast implementation of the Cassowary constraint solver" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6"}, + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c"}, + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32"}, + {file = "kiwisolver-1.4.4-cp310-cp310-win32.whl", hash = "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408"}, + {file = "kiwisolver-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4"}, + {file = "kiwisolver-1.4.4-cp311-cp311-win32.whl", hash = "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e"}, + {file = "kiwisolver-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-win32.whl", hash = "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c"}, + {file = "kiwisolver-1.4.4-cp38-cp38-win32.whl", hash = "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191"}, + {file = "kiwisolver-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9"}, + {file = "kiwisolver-1.4.4-cp39-cp39-win32.whl", hash = "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea"}, + {file = "kiwisolver-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b"}, + {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"}, +] + +[[package]] +name = "llvmlite" +version = "0.39.1" +description = "lightweight wrapper around basic LLVM functionality" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "llvmlite-0.39.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6717c7a6e93c9d2c3d07c07113ec80ae24af45cde536b34363d4bcd9188091d9"}, + {file = "llvmlite-0.39.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ddab526c5a2c4ccb8c9ec4821fcea7606933dc53f510e2a6eebb45a418d3488a"}, + {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3f331a323d0f0ada6b10d60182ef06c20a2f01be21699999d204c5750ffd0b4"}, + {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2c00ff204afa721b0bb9835b5bf1ba7fba210eefcec5552a9e05a63219ba0dc"}, + {file = "llvmlite-0.39.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16f56eb1eec3cda3a5c526bc3f63594fc24e0c8d219375afeb336f289764c6c7"}, + {file = "llvmlite-0.39.1-cp310-cp310-win32.whl", hash = "sha256:d0bfd18c324549c0fec2c5dc610fd024689de6f27c6cc67e4e24a07541d6e49b"}, + {file = "llvmlite-0.39.1-cp310-cp310-win_amd64.whl", hash = "sha256:7ebf1eb9badc2a397d4f6a6c8717447c81ac011db00064a00408bc83c923c0e4"}, + {file = "llvmlite-0.39.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6546bed4e02a1c3d53a22a0bced254b3b6894693318b16c16c8e43e29d6befb6"}, + {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1578f5000fdce513712e99543c50e93758a954297575610f48cb1fd71b27c08a"}, + {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3803f11ad5f6f6c3d2b545a303d68d9fabb1d50e06a8d6418e6fcd2d0df00959"}, + {file = "llvmlite-0.39.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50aea09a2b933dab7c9df92361b1844ad3145bfb8dd2deb9cd8b8917d59306fb"}, + {file = "llvmlite-0.39.1-cp37-cp37m-win32.whl", hash = "sha256:b1a0bbdb274fb683f993198775b957d29a6f07b45d184c571ef2a721ce4388cf"}, + {file = "llvmlite-0.39.1-cp37-cp37m-win_amd64.whl", hash = "sha256:e172c73fccf7d6db4bd6f7de963dedded900d1a5c6778733241d878ba613980e"}, + {file = "llvmlite-0.39.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e31f4b799d530255aaf0566e3da2df5bfc35d3cd9d6d5a3dcc251663656c27b1"}, + {file = "llvmlite-0.39.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:62c0ea22e0b9dffb020601bb65cb11dd967a095a488be73f07d8867f4e327ca5"}, + {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ffc84ade195abd4abcf0bd3b827b9140ae9ef90999429b9ea84d5df69c9058c"}, + {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0f158e4708dda6367d21cf15afc58de4ebce979c7a1aa2f6b977aae737e2a54"}, + {file = "llvmlite-0.39.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22d36591cd5d02038912321d9ab8e4668e53ae2211da5523f454e992b5e13c36"}, + {file = "llvmlite-0.39.1-cp38-cp38-win32.whl", hash = "sha256:4c6ebace910410daf0bebda09c1859504fc2f33d122e9a971c4c349c89cca630"}, + {file = "llvmlite-0.39.1-cp38-cp38-win_amd64.whl", hash = "sha256:fb62fc7016b592435d3e3a8f680e3ea8897c3c9e62e6e6cc58011e7a4801439e"}, + {file = "llvmlite-0.39.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa9b26939ae553bf30a9f5c4c754db0fb2d2677327f2511e674aa2f5df941789"}, + {file = "llvmlite-0.39.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e4f212c018db951da3e1dc25c2651abc688221934739721f2dad5ff1dd5f90e7"}, + {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39dc2160aed36e989610fc403487f11b8764b6650017ff367e45384dff88ffbf"}, + {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ec3d70b3e507515936e475d9811305f52d049281eaa6c8273448a61c9b5b7e2"}, + {file = "llvmlite-0.39.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60f8dd1e76f47b3dbdee4b38d9189f3e020d22a173c00f930b52131001d801f9"}, + {file = "llvmlite-0.39.1-cp39-cp39-win32.whl", hash = "sha256:03aee0ccd81735696474dc4f8b6be60774892a2929d6c05d093d17392c237f32"}, + {file = "llvmlite-0.39.1-cp39-cp39-win_amd64.whl", hash = "sha256:3fc14e757bc07a919221f0cbaacb512704ce5774d7fcada793f1996d6bc75f2a"}, + {file = "llvmlite-0.39.1.tar.gz", hash = "sha256:b43abd7c82e805261c425d50335be9a6c4f84264e34d6d6e475207300005d572"}, +] + +[[package]] +name = "matplotlib" +version = "3.7.1" +description = "Python plotting package" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:95cbc13c1fc6844ab8812a525bbc237fa1470863ff3dace7352e910519e194b1"}, + {file = "matplotlib-3.7.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:08308bae9e91aca1ec6fd6dda66237eef9f6294ddb17f0d0b3c863169bf82353"}, + {file = "matplotlib-3.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:544764ba51900da4639c0f983b323d288f94f65f4024dc40ecb1542d74dc0500"}, + {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56d94989191de3fcc4e002f93f7f1be5da476385dde410ddafbb70686acf00ea"}, + {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e99bc9e65901bb9a7ce5e7bb24af03675cbd7c70b30ac670aa263240635999a4"}, + {file = "matplotlib-3.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb7d248c34a341cd4c31a06fd34d64306624c8cd8d0def7abb08792a5abfd556"}, + {file = "matplotlib-3.7.1-cp310-cp310-win32.whl", hash = "sha256:ce463ce590f3825b52e9fe5c19a3c6a69fd7675a39d589e8b5fbe772272b3a24"}, + {file = "matplotlib-3.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:3d7bc90727351fb841e4d8ae620d2d86d8ed92b50473cd2b42ce9186104ecbba"}, + {file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:770a205966d641627fd5cf9d3cb4b6280a716522cd36b8b284a8eb1581310f61"}, + {file = "matplotlib-3.7.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f67bfdb83a8232cb7a92b869f9355d677bce24485c460b19d01970b64b2ed476"}, + {file = "matplotlib-3.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2bf092f9210e105f414a043b92af583c98f50050559616930d884387d0772aba"}, + {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89768d84187f31717349c6bfadc0e0d8c321e8eb34522acec8a67b1236a66332"}, + {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83111e6388dec67822e2534e13b243cc644c7494a4bb60584edbff91585a83c6"}, + {file = "matplotlib-3.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a867bf73a7eb808ef2afbca03bcdb785dae09595fbe550e1bab0cd023eba3de0"}, + {file = "matplotlib-3.7.1-cp311-cp311-win32.whl", hash = "sha256:fbdeeb58c0cf0595efe89c05c224e0a502d1aa6a8696e68a73c3efc6bc354304"}, + {file = "matplotlib-3.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c0bd19c72ae53e6ab979f0ac6a3fafceb02d2ecafa023c5cca47acd934d10be7"}, + {file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:6eb88d87cb2c49af00d3bbc33a003f89fd9f78d318848da029383bfc08ecfbfb"}, + {file = "matplotlib-3.7.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:cf0e4f727534b7b1457898c4f4ae838af1ef87c359b76dcd5330fa31893a3ac7"}, + {file = "matplotlib-3.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:46a561d23b91f30bccfd25429c3c706afe7d73a5cc64ef2dfaf2b2ac47c1a5dc"}, + {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8704726d33e9aa8a6d5215044b8d00804561971163563e6e6591f9dcf64340cc"}, + {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4cf327e98ecf08fcbb82685acaf1939d3338548620ab8dfa02828706402c34de"}, + {file = "matplotlib-3.7.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:617f14ae9d53292ece33f45cba8503494ee199a75b44de7717964f70637a36aa"}, + {file = "matplotlib-3.7.1-cp38-cp38-win32.whl", hash = "sha256:7c9a4b2da6fac77bcc41b1ea95fadb314e92508bf5493ceff058e727e7ecf5b0"}, + {file = "matplotlib-3.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:14645aad967684e92fc349493fa10c08a6da514b3d03a5931a1bac26e6792bd1"}, + {file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:81a6b377ea444336538638d31fdb39af6be1a043ca5e343fe18d0f17e098770b"}, + {file = "matplotlib-3.7.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:28506a03bd7f3fe59cd3cd4ceb2a8d8a2b1db41afede01f66c42561b9be7b4b7"}, + {file = "matplotlib-3.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8c587963b85ce41e0a8af53b9b2de8dddbf5ece4c34553f7bd9d066148dc719c"}, + {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bf26ade3ff0f27668989d98c8435ce9327d24cffb7f07d24ef609e33d582439"}, + {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:def58098f96a05f90af7e92fd127d21a287068202aa43b2a93476170ebd99e87"}, + {file = "matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb"}, + {file = "matplotlib-3.7.1-cp39-cp39-win32.whl", hash = "sha256:4f99e1b234c30c1e9714610eb0c6d2f11809c9c78c984a613ae539ea2ad2eb4b"}, + {file = "matplotlib-3.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:3ba2af245e36990facf67fde840a760128ddd71210b2ab6406e640188d69d136"}, + {file = "matplotlib-3.7.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3032884084f541163f295db8a6536e0abb0db464008fadca6c98aaf84ccf4717"}, + {file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a2cb34336110e0ed8bb4f650e817eed61fa064acbefeb3591f1b33e3a84fd96"}, + {file = "matplotlib-3.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b867e2f952ed592237a1828f027d332d8ee219ad722345b79a001f49df0936eb"}, + {file = "matplotlib-3.7.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:57bfb8c8ea253be947ccb2bc2d1bb3862c2bccc662ad1b4626e1f5e004557042"}, + {file = "matplotlib-3.7.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:438196cdf5dc8d39b50a45cb6e3f6274edbcf2254f85fa9b895bf85851c3a613"}, + {file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21e9cff1a58d42e74d01153360de92b326708fb205250150018a52c70f43c290"}, + {file = "matplotlib-3.7.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75d4725d70b7c03e082bbb8a34639ede17f333d7247f56caceb3801cb6ff703d"}, + {file = "matplotlib-3.7.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:97cc368a7268141afb5690760921765ed34867ffb9655dd325ed207af85c7529"}, + {file = "matplotlib-3.7.1.tar.gz", hash = "sha256:7b73305f25eab4541bd7ee0b96d87e53ae9c9f1823be5659b806cd85786fe882"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +importlib-resources = {version = ">=3.2.0", markers = "python_version < \"3.10\""} +kiwisolver = ">=1.0.1" +numpy = ">=1.20" +packaging = ">=20.0" +pillow = ">=6.2.0" +pyparsing = ">=2.3.1" +python-dateutil = ">=2.7" + +[[package]] +name = "multivolumefile" +version = "0.2.3" +description = "multi volume file wrapper library" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "multivolumefile-0.2.3-py3-none-any.whl", hash = "sha256:237f4353b60af1703087cf7725755a1f6fcaeeea48421e1896940cd1c920d678"}, + {file = "multivolumefile-0.2.3.tar.gz", hash = "sha256:a0648d0aafbc96e59198d5c17e9acad7eb531abea51035d08ce8060dcad709d6"}, +] + +[package.extras] +check = ["check-manifest", "flake8", "flake8-black", "isort (>=5.0.3)", "pygments", "readme-renderer", "twine"] +test = ["coverage[toml] (>=5.2)", "coveralls (>=2.1.1)", "hypothesis", "pyannotate", "pytest", "pytest-cov"] +type = ["mypy", "mypy-extensions"] + +[[package]] +name = "numba" +version = "0.56.4" +description = "compiling Python code using LLVM" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "numba-0.56.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9f62672145f8669ec08762895fe85f4cf0ead08ce3164667f2b94b2f62ab23c3"}, + {file = "numba-0.56.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c602d015478b7958408d788ba00a50272649c5186ea8baa6cf71d4a1c761bba1"}, + {file = "numba-0.56.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85dbaed7a05ff96492b69a8900c5ba605551afb9b27774f7f10511095451137c"}, + {file = "numba-0.56.4-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f4cfc3a19d1e26448032049c79fc60331b104f694cf570a9e94f4e2c9d0932bb"}, + {file = "numba-0.56.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e08e203b163ace08bad500b0c16f6092b1eb34fd1fce4feaf31a67a3a5ecf3b"}, + {file = "numba-0.56.4-cp310-cp310-win32.whl", hash = "sha256:0611e6d3eebe4cb903f1a836ffdb2bda8d18482bcd0a0dcc56e79e2aa3fefef5"}, + {file = "numba-0.56.4-cp310-cp310-win_amd64.whl", hash = "sha256:fbfb45e7b297749029cb28694abf437a78695a100e7c2033983d69f0ba2698d4"}, + {file = "numba-0.56.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:3cb1a07a082a61df80a468f232e452d818f5ae254b40c26390054e4e868556e0"}, + {file = "numba-0.56.4-cp37-cp37m-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d69ad934e13c15684e7887100a8f5f0f61d7a8e57e0fd29d9993210089a5b531"}, + {file = "numba-0.56.4-cp37-cp37m-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:dbcc847bac2d225265d054993a7f910fda66e73d6662fe7156452cac0325b073"}, + {file = "numba-0.56.4-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8a95ca9cc77ea4571081f6594e08bd272b66060634b8324e99cd1843020364f9"}, + {file = "numba-0.56.4-cp37-cp37m-win32.whl", hash = "sha256:fcdf84ba3ed8124eb7234adfbb8792f311991cbf8aed1cad4b1b1a7ee08380c1"}, + {file = "numba-0.56.4-cp37-cp37m-win_amd64.whl", hash = "sha256:42f9e1be942b215df7e6cc9948cf9c15bb8170acc8286c063a9e57994ef82fd1"}, + {file = "numba-0.56.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:553da2ce74e8862e18a72a209ed3b6d2924403bdd0fb341fa891c6455545ba7c"}, + {file = "numba-0.56.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4373da9757049db7c90591e9ec55a2e97b2b36ba7ae3bf9c956a513374077470"}, + {file = "numba-0.56.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3a993349b90569518739009d8f4b523dfedd7e0049e6838c0e17435c3e70dcc4"}, + {file = "numba-0.56.4-cp38-cp38-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:720886b852a2d62619ae3900fe71f1852c62db4f287d0c275a60219e1643fc04"}, + {file = "numba-0.56.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e64d338b504c9394a4a34942df4627e1e6cb07396ee3b49fe7b8d6420aa5104f"}, + {file = "numba-0.56.4-cp38-cp38-win32.whl", hash = "sha256:03fe94cd31e96185cce2fae005334a8cc712fc2ba7756e52dff8c9400718173f"}, + {file = "numba-0.56.4-cp38-cp38-win_amd64.whl", hash = "sha256:91f021145a8081f881996818474ef737800bcc613ffb1e618a655725a0f9e246"}, + {file = "numba-0.56.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:d0ae9270a7a5cc0ede63cd234b4ff1ce166c7a749b91dbbf45e0000c56d3eade"}, + {file = "numba-0.56.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c75e8a5f810ce80a0cfad6e74ee94f9fde9b40c81312949bf356b7304ef20740"}, + {file = "numba-0.56.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a12ef323c0f2101529d455cfde7f4135eaa147bad17afe10b48634f796d96abd"}, + {file = "numba-0.56.4-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:03634579d10a6129181129de293dd6b5eaabee86881369d24d63f8fe352dd6cb"}, + {file = "numba-0.56.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0240f9026b015e336069329839208ebd70ec34ae5bfbf402e4fcc8e06197528e"}, + {file = "numba-0.56.4-cp39-cp39-win32.whl", hash = "sha256:14dbbabf6ffcd96ee2ac827389afa59a70ffa9f089576500434c34abf9b054a4"}, + {file = "numba-0.56.4-cp39-cp39-win_amd64.whl", hash = "sha256:0da583c532cd72feefd8e551435747e0e0fbb3c0530357e6845fcc11e38d6aea"}, + {file = "numba-0.56.4.tar.gz", hash = "sha256:32d9fef412c81483d7efe0ceb6cf4d3310fde8b624a9cecca00f790573ac96ee"}, +] + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} +llvmlite = ">=0.39.0dev0,<0.40" +numpy = ">=1.18,<1.24" +setuptools = "*" + +[[package]] +name = "numpy" +version = "1.23.5" +description = "NumPy is the fundamental package for array computing with Python." +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.23.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9c88793f78fca17da0145455f0d7826bcb9f37da4764af27ac945488116efe63"}, + {file = "numpy-1.23.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e9f4c4e51567b616be64e05d517c79a8a22f3606499941d97bb76f2ca59f982d"}, + {file = "numpy-1.23.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7903ba8ab592b82014713c491f6c5d3a1cde5b4a3bf116404e08f5b52f6daf43"}, + {file = "numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e05b1c973a9f858c74367553e236f287e749465f773328c8ef31abe18f691e1"}, + {file = "numpy-1.23.5-cp310-cp310-win32.whl", hash = "sha256:522e26bbf6377e4d76403826ed689c295b0b238f46c28a7251ab94716da0b280"}, + {file = "numpy-1.23.5-cp310-cp310-win_amd64.whl", hash = "sha256:dbee87b469018961d1ad79b1a5d50c0ae850000b639bcb1b694e9981083243b6"}, + {file = "numpy-1.23.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ce571367b6dfe60af04e04a1834ca2dc5f46004ac1cc756fb95319f64c095a96"}, + {file = "numpy-1.23.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56e454c7833e94ec9769fa0f86e6ff8e42ee38ce0ce1fa4cbb747ea7e06d56aa"}, + {file = "numpy-1.23.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5039f55555e1eab31124a5768898c9e22c25a65c1e0037f4d7c495a45778c9f2"}, + {file = "numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58f545efd1108e647604a1b5aa809591ccd2540f468a880bedb97247e72db387"}, + {file = "numpy-1.23.5-cp311-cp311-win32.whl", hash = "sha256:b2a9ab7c279c91974f756c84c365a669a887efa287365a8e2c418f8b3ba73fb0"}, + {file = "numpy-1.23.5-cp311-cp311-win_amd64.whl", hash = "sha256:0cbe9848fad08baf71de1a39e12d1b6310f1d5b2d0ea4de051058e6e1076852d"}, + {file = "numpy-1.23.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f063b69b090c9d918f9df0a12116029e274daf0181df392839661c4c7ec9018a"}, + {file = "numpy-1.23.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0aaee12d8883552fadfc41e96b4c82ee7d794949e2a7c3b3a7201e968c7ecab9"}, + {file = "numpy-1.23.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92c8c1e89a1f5028a4c6d9e3ccbe311b6ba53694811269b992c0b224269e2398"}, + {file = "numpy-1.23.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d208a0f8729f3fb790ed18a003f3a57895b989b40ea4dce4717e9cf4af62c6bb"}, + {file = "numpy-1.23.5-cp38-cp38-win32.whl", hash = "sha256:06005a2ef6014e9956c09ba07654f9837d9e26696a0470e42beedadb78c11b07"}, + {file = "numpy-1.23.5-cp38-cp38-win_amd64.whl", hash = "sha256:ca51fcfcc5f9354c45f400059e88bc09215fb71a48d3768fb80e357f3b457e1e"}, + {file = "numpy-1.23.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8969bfd28e85c81f3f94eb4a66bc2cf1dbdc5c18efc320af34bffc54d6b1e38f"}, + {file = "numpy-1.23.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7ac231a08bb37f852849bbb387a20a57574a97cfc7b6cabb488a4fc8be176de"}, + {file = "numpy-1.23.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf837dc63ba5c06dc8797c398db1e223a466c7ece27a1f7b5232ba3466aafe3d"}, + {file = "numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33161613d2269025873025b33e879825ec7b1d831317e68f4f2f0f84ed14c719"}, + {file = "numpy-1.23.5-cp39-cp39-win32.whl", hash = "sha256:af1da88f6bc3d2338ebbf0e22fe487821ea4d8e89053e25fa59d1d79786e7481"}, + {file = "numpy-1.23.5-cp39-cp39-win_amd64.whl", hash = "sha256:09b7847f7e83ca37c6e627682f145856de331049013853f344f37b0c9690e3df"}, + {file = "numpy-1.23.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:abdde9f795cf292fb9651ed48185503a2ff29be87770c3b8e2a14b0cd7aa16f8"}, + {file = "numpy-1.23.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9a909a8bae284d46bbfdefbdd4a262ba19d3bc9921b1e76126b1d21c3c34135"}, + {file = "numpy-1.23.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:01dd17cbb340bf0fc23981e52e1d18a9d4050792e8fb8363cecbf066a84b827d"}, + {file = "numpy-1.23.5.tar.gz", hash = "sha256:1b1766d6f397c18153d40015ddfc79ddb715cabadc04d2d228d4e5a8bc4ded1a"}, +] + +[[package]] +name = "nvidia-cublas-cu11" +version = "11.10.3.66" +description = "CUBLAS native runtime libraries" +category = "main" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-manylinux1_x86_64.whl", hash = "sha256:d32e4d75f94ddfb93ea0a5dda08389bcc65d8916a25cb9f37ac89edaeed3bded"}, + {file = "nvidia_cublas_cu11-11.10.3.66-py3-none-win_amd64.whl", hash = "sha256:8ac17ba6ade3ed56ab898a036f9ae0756f1e81052a317bf98f8c6d18dc3ae49e"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "nvidia-cuda-nvrtc-cu11" +version = "11.7.99" +description = "NVRTC native runtime libraries" +category = "main" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:9f1562822ea264b7e34ed5930567e89242d266448e936b85bc97a3370feabb03"}, + {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:f7d9610d9b7c331fa0da2d1b2858a4a8315e6d49765091d28711c8946e7425e7"}, + {file = "nvidia_cuda_nvrtc_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:f2effeb1309bdd1b3854fc9b17eaf997808f8b25968ce0c7070945c4265d64a3"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "nvidia-cuda-runtime-cu11" +version = "11.7.99" +description = "CUDA Runtime native Libraries" +category = "main" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl", hash = "sha256:cc768314ae58d2641f07eac350f40f99dcb35719c4faff4bc458a7cd2b119e31"}, + {file = "nvidia_cuda_runtime_cu11-11.7.99-py3-none-win_amd64.whl", hash = "sha256:bc77fa59a7679310df9d5c70ab13c4e34c64ae2124dd1efd7e5474b71be125c7"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "nvidia-cudnn-cu11" +version = "8.5.0.96" +description = "cuDNN runtime libraries" +category = "main" +optional = false +python-versions = ">=3" +files = [ + {file = "nvidia_cudnn_cu11-8.5.0.96-2-py3-none-manylinux1_x86_64.whl", hash = "sha256:402f40adfc6f418f9dae9ab402e773cfed9beae52333f6d86ae3107a1b9527e7"}, + {file = "nvidia_cudnn_cu11-8.5.0.96-py3-none-manylinux1_x86_64.whl", hash = "sha256:71f8111eb830879ff2836db3cccf03bbd735df9b0d17cd93761732ac50a8a108"}, +] + +[package.dependencies] +setuptools = "*" +wheel = "*" + +[[package]] +name = "opacus" +version = "1.3.0" +description = "Train PyTorch models with Differential Privacy" +category = "main" +optional = false +python-versions = ">=3.7.5" +files = [ + {file = "opacus-1.3.0-py3-none-any.whl", hash = "sha256:ef5d6f2aab56901d714ee56fd177a3627d14d820b2ec49cb8bc8a6f52c326507"}, + {file = "opacus-1.3.0.tar.gz", hash = "sha256:9bb0cc02f2508a86d8ce3ae67b32f8ee94edae0c432e426f54dc86a337959266"}, +] + +[package.dependencies] +functorch = "*" +numpy = ">=1.15" +opt-einsum = ">=3.3.0" +scipy = ">=1.2" +torch = ">=1.8" + +[package.extras] +dev = ["black", "coverage", "datasets", "flake8", "hypothesis", "isort", "jsonargparse[signatures] (>=3.19.3)", "lightning-bolts", "mypy (>=0.760)", "pytest", "pytorch-lightning", "requests (>=2.25.1)", "scikit-learn", "sphinx", "sphinx-autodoc-typehints", "tensorboard", "torch", "torchvision (>=0.9.1)", "tqdm (>=4.40)", "transformers"] + +[[package]] +name = "opt-einsum" +version = "3.3.0" +description = "Optimizing numpys einsum function" +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "opt_einsum-3.3.0-py3-none-any.whl", hash = "sha256:2455e59e3947d3c275477df7f5205b30635e266fe6dc300e3d9f9646bfcea147"}, + {file = "opt_einsum-3.3.0.tar.gz", hash = "sha256:59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549"}, +] + +[package.dependencies] +numpy = ">=1.7" + +[package.extras] +docs = ["numpydoc", "sphinx (==1.2.3)", "sphinx-rtd-theme", "sphinxcontrib-napoleon"] +tests = ["pytest", "pytest-cov", "pytest-pep8"] + +[[package]] +name = "packaging" +version = "21.3" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, + {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, +] + +[package.dependencies] +pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" + +[[package]] +name = "pandas" +version = "1.5.3" +description = "Powerful data structures for data analysis, time series, and statistics" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3749077d86e3a2f0ed51367f30bf5b82e131cc0f14260c4d3e499186fccc4406"}, + {file = "pandas-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:972d8a45395f2a2d26733eb8d0f629b2f90bebe8e8eddbb8829b180c09639572"}, + {file = "pandas-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50869a35cbb0f2e0cd5ec04b191e7b12ed688874bd05dd777c19b28cbea90996"}, + {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3ac844a0fe00bfaeb2c9b51ab1424e5c8744f89860b138434a363b1f620f354"}, + {file = "pandas-1.5.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0a56cef15fd1586726dace5616db75ebcfec9179a3a55e78f72c5639fa2a23"}, + {file = "pandas-1.5.3-cp310-cp310-win_amd64.whl", hash = "sha256:478ff646ca42b20376e4ed3fa2e8d7341e8a63105586efe54fa2508ee087f328"}, + {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6973549c01ca91ec96199e940495219c887ea815b2083722821f1d7abfa2b4dc"}, + {file = "pandas-1.5.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c39a8da13cede5adcd3be1182883aea1c925476f4e84b2807a46e2775306305d"}, + {file = "pandas-1.5.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f76d097d12c82a535fda9dfe5e8dd4127952b45fea9b0276cb30cca5ea313fbc"}, + {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e474390e60ed609cec869b0da796ad94f420bb057d86784191eefc62b65819ae"}, + {file = "pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f2b952406a1588ad4cad5b3f55f520e82e902388a6d5a4a91baa8d38d23c7f6"}, + {file = "pandas-1.5.3-cp311-cp311-win_amd64.whl", hash = "sha256:bc4c368f42b551bf72fac35c5128963a171b40dce866fb066540eeaf46faa003"}, + {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:14e45300521902689a81f3f41386dc86f19b8ba8dd5ac5a3c7010ef8d2932813"}, + {file = "pandas-1.5.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9842b6f4b8479e41968eced654487258ed81df7d1c9b7b870ceea24ed9459b31"}, + {file = "pandas-1.5.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:26d9c71772c7afb9d5046e6e9cf42d83dd147b5cf5bcb9d97252077118543792"}, + {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fbcb19d6fceb9e946b3e23258757c7b225ba450990d9ed63ccceeb8cae609f7"}, + {file = "pandas-1.5.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:565fa34a5434d38e9d250af3c12ff931abaf88050551d9fbcdfafca50d62babf"}, + {file = "pandas-1.5.3-cp38-cp38-win32.whl", hash = "sha256:87bd9c03da1ac870a6d2c8902a0e1fd4267ca00f13bc494c9e5a9020920e1d51"}, + {file = "pandas-1.5.3-cp38-cp38-win_amd64.whl", hash = "sha256:41179ce559943d83a9b4bbacb736b04c928b095b5f25dd2b7389eda08f46f373"}, + {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c74a62747864ed568f5a82a49a23a8d7fe171d0c69038b38cedf0976831296fa"}, + {file = "pandas-1.5.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c4c00e0b0597c8e4f59e8d461f797e5d70b4d025880516a8261b2817c47759ee"}, + {file = "pandas-1.5.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a50d9a4336a9621cab7b8eb3fb11adb82de58f9b91d84c2cd526576b881a0c5a"}, + {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd05f7783b3274aa206a1af06f0ceed3f9b412cf665b7247eacd83be41cf7bf0"}, + {file = "pandas-1.5.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f69c4029613de47816b1bb30ff5ac778686688751a5e9c99ad8c7031f6508e5"}, + {file = "pandas-1.5.3-cp39-cp39-win32.whl", hash = "sha256:7cec0bee9f294e5de5bbfc14d0573f65526071029d036b753ee6507d2a21480a"}, + {file = "pandas-1.5.3-cp39-cp39-win_amd64.whl", hash = "sha256:dfd681c5dc216037e0b0a2c821f5ed99ba9f03ebcf119c7dac0e9a7b960b9ec9"}, + {file = "pandas-1.5.3.tar.gz", hash = "sha256:74a3fd7e5a7ec052f183273dc7b0acd3a863edf7520f5d3a1765c04ffdb3b0b1"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, +] +python-dateutil = ">=2.8.1" +pytz = ">=2020.1" + +[package.extras] +test = ["hypothesis (>=5.5.3)", "pytest (>=6.0)", "pytest-xdist (>=1.31)"] + +[[package]] +name = "pillow" +version = "9.4.0" +description = "Python Imaging Library (Fork)" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pillow-9.4.0-1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b4b4e9dda4f4e4c4e6896f93e84a8f0bcca3b059de9ddf67dac3c334b1195e1"}, + {file = "Pillow-9.4.0-1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:fb5c1ad6bad98c57482236a21bf985ab0ef42bd51f7ad4e4538e89a997624e12"}, + {file = "Pillow-9.4.0-1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:f0caf4a5dcf610d96c3bd32932bfac8aee61c96e60481c2a0ea58da435e25acd"}, + {file = "Pillow-9.4.0-1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:3f4cc516e0b264c8d4ccd6b6cbc69a07c6d582d8337df79be1e15a5056b258c9"}, + {file = "Pillow-9.4.0-1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:b8c2f6eb0df979ee99433d8b3f6d193d9590f735cf12274c108bd954e30ca858"}, + {file = "Pillow-9.4.0-1-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b70756ec9417c34e097f987b4d8c510975216ad26ba6e57ccb53bc758f490dab"}, + {file = "Pillow-9.4.0-1-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:43521ce2c4b865d385e78579a082b6ad1166ebed2b1a2293c3be1d68dd7ca3b9"}, + {file = "Pillow-9.4.0-2-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:9d9a62576b68cd90f7075876f4e8444487db5eeea0e4df3ba298ee38a8d067b0"}, + {file = "Pillow-9.4.0-2-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:87708d78a14d56a990fbf4f9cb350b7d89ee8988705e58e39bdf4d82c149210f"}, + {file = "Pillow-9.4.0-2-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:8a2b5874d17e72dfb80d917213abd55d7e1ed2479f38f001f264f7ce7bae757c"}, + {file = "Pillow-9.4.0-2-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:83125753a60cfc8c412de5896d10a0a405e0bd88d0470ad82e0869ddf0cb3848"}, + {file = "Pillow-9.4.0-2-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9e5f94742033898bfe84c93c831a6f552bb629448d4072dd312306bab3bd96f1"}, + {file = "Pillow-9.4.0-2-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:013016af6b3a12a2f40b704677f8b51f72cb007dac785a9933d5c86a72a7fe33"}, + {file = "Pillow-9.4.0-2-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:99d92d148dd03fd19d16175b6d355cc1b01faf80dae93c6c3eb4163709edc0a9"}, + {file = "Pillow-9.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:2968c58feca624bb6c8502f9564dd187d0e1389964898f5e9e1fbc8533169157"}, + {file = "Pillow-9.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c5c1362c14aee73f50143d74389b2c158707b4abce2cb055b7ad37ce60738d47"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd752c5ff1b4a870b7661234694f24b1d2b9076b8bf337321a814c612665f343"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9a3049a10261d7f2b6514d35bbb7a4dfc3ece4c4de14ef5876c4b7a23a0e566d"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16a8df99701f9095bea8a6c4b3197da105df6f74e6176c5b410bc2df2fd29a57"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:94cdff45173b1919350601f82d61365e792895e3c3a3443cf99819e6fbf717a5"}, + {file = "Pillow-9.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ed3e4b4e1e6de75fdc16d3259098de7c6571b1a6cc863b1a49e7d3d53e036070"}, + {file = "Pillow-9.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5b2f8a31bd43e0f18172d8ac82347c8f37ef3e0b414431157718aa234991b28"}, + {file = "Pillow-9.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:09b89ddc95c248ee788328528e6a2996e09eaccddeeb82a5356e92645733be35"}, + {file = "Pillow-9.4.0-cp310-cp310-win32.whl", hash = "sha256:f09598b416ba39a8f489c124447b007fe865f786a89dbfa48bb5cf395693132a"}, + {file = "Pillow-9.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6e78171be3fb7941f9910ea15b4b14ec27725865a73c15277bc39f5ca4f8391"}, + {file = "Pillow-9.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:3fa1284762aacca6dc97474ee9c16f83990b8eeb6697f2ba17140d54b453e133"}, + {file = "Pillow-9.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eaef5d2de3c7e9b21f1e762f289d17b726c2239a42b11e25446abf82b26ac132"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4dfdae195335abb4e89cc9762b2edc524f3c6e80d647a9a81bf81e17e3fb6f0"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6abfb51a82e919e3933eb137e17c4ae9c0475a25508ea88993bb59faf82f3b35"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:451f10ef963918e65b8869e17d67db5e2f4ab40e716ee6ce7129b0cde2876eab"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6663977496d616b618b6cfa43ec86e479ee62b942e1da76a2c3daa1c75933ef4"}, + {file = "Pillow-9.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:60e7da3a3ad1812c128750fc1bc14a7ceeb8d29f77e0a2356a8fb2aa8925287d"}, + {file = "Pillow-9.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:19005a8e58b7c1796bc0167862b1f54a64d3b44ee5d48152b06bb861458bc0f8"}, + {file = "Pillow-9.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f715c32e774a60a337b2bb8ad9839b4abf75b267a0f18806f6f4f5f1688c4b5a"}, + {file = "Pillow-9.4.0-cp311-cp311-win32.whl", hash = "sha256:b222090c455d6d1a64e6b7bb5f4035c4dff479e22455c9eaa1bdd4c75b52c80c"}, + {file = "Pillow-9.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:ba6612b6548220ff5e9df85261bddc811a057b0b465a1226b39bfb8550616aee"}, + {file = "Pillow-9.4.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:5f532a2ad4d174eb73494e7397988e22bf427f91acc8e6ebf5bb10597b49c493"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dd5a9c3091a0f414a963d427f920368e2b6a4c2f7527fdd82cde8ef0bc7a327"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef21af928e807f10bf4141cad4746eee692a0dd3ff56cfb25fce076ec3cc8abe"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:847b114580c5cc9ebaf216dd8c8dbc6b00a3b7ab0131e173d7120e6deade1f57"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:653d7fb2df65efefbcbf81ef5fe5e5be931f1ee4332c2893ca638c9b11a409c4"}, + {file = "Pillow-9.4.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:46f39cab8bbf4a384ba7cb0bc8bae7b7062b6a11cfac1ca4bc144dea90d4a9f5"}, + {file = "Pillow-9.4.0-cp37-cp37m-win32.whl", hash = "sha256:7ac7594397698f77bce84382929747130765f66406dc2cd8b4ab4da68ade4c6e"}, + {file = "Pillow-9.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:46c259e87199041583658457372a183636ae8cd56dbf3f0755e0f376a7f9d0e6"}, + {file = "Pillow-9.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:0e51f608da093e5d9038c592b5b575cadc12fd748af1479b5e858045fff955a9"}, + {file = "Pillow-9.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:765cb54c0b8724a7c12c55146ae4647e0274a839fb6de7bcba841e04298e1011"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:519e14e2c49fcf7616d6d2cfc5c70adae95682ae20f0395e9280db85e8d6c4df"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d197df5489004db87d90b918033edbeee0bd6df3848a204bca3ff0a903bef837"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0845adc64fe9886db00f5ab68c4a8cd933ab749a87747555cec1c95acea64b0b"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:e1339790c083c5a4de48f688b4841f18df839eb3c9584a770cbd818b33e26d5d"}, + {file = "Pillow-9.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:a96e6e23f2b79433390273eaf8cc94fec9c6370842e577ab10dabdcc7ea0a66b"}, + {file = "Pillow-9.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7cfc287da09f9d2a7ec146ee4d72d6ea1342e770d975e49a8621bf54eaa8f30f"}, + {file = "Pillow-9.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d7081c084ceb58278dd3cf81f836bc818978c0ccc770cbbb202125ddabec6628"}, + {file = "Pillow-9.4.0-cp38-cp38-win32.whl", hash = "sha256:df41112ccce5d47770a0c13651479fbcd8793f34232a2dd9faeccb75eb5d0d0d"}, + {file = "Pillow-9.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:7a21222644ab69ddd9967cfe6f2bb420b460dae4289c9d40ff9a4896e7c35c9a"}, + {file = "Pillow-9.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0f3269304c1a7ce82f1759c12ce731ef9b6e95b6df829dccd9fe42912cc48569"}, + {file = "Pillow-9.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cb362e3b0976dc994857391b776ddaa8c13c28a16f80ac6522c23d5257156bed"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2e0f87144fcbbe54297cae708c5e7f9da21a4646523456b00cc956bd4c65815"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28676836c7796805914b76b1837a40f76827ee0d5398f72f7dcc634bae7c6264"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0884ba7b515163a1a05440a138adeb722b8a6ae2c2b33aea93ea3118dd3a899e"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:53dcb50fbdc3fb2c55431a9b30caeb2f7027fcd2aeb501459464f0214200a503"}, + {file = "Pillow-9.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:e8c5cf126889a4de385c02a2c3d3aba4b00f70234bfddae82a5eaa3ee6d5e3e6"}, + {file = "Pillow-9.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6c6b1389ed66cdd174d040105123a5a1bc91d0aa7059c7261d20e583b6d8cbd2"}, + {file = "Pillow-9.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0dd4c681b82214b36273c18ca7ee87065a50e013112eea7d78c7a1b89a739153"}, + {file = "Pillow-9.4.0-cp39-cp39-win32.whl", hash = "sha256:6d9dfb9959a3b0039ee06c1a1a90dc23bac3b430842dcb97908ddde05870601c"}, + {file = "Pillow-9.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:54614444887e0d3043557d9dbc697dbb16cfb5a35d672b7a0fcc1ed0cf1c600b"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b9b752ab91e78234941e44abdecc07f1f0d8f51fb62941d32995b8161f68cfe5"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3b56206244dc8711f7e8b7d6cad4663917cd5b2d950799425076681e8766286"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aabdab8ec1e7ca7f1434d042bf8b1e92056245fb179790dc97ed040361f16bfd"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db74f5562c09953b2c5f8ec4b7dfd3f5421f31811e97d1dbc0a7c93d6e3a24df"}, + {file = "Pillow-9.4.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e9d7747847c53a16a729b6ee5e737cf170f7a16611c143d95aa60a109a59c336"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b52ff4f4e002f828ea6483faf4c4e8deea8d743cf801b74910243c58acc6eda3"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:575d8912dca808edd9acd6f7795199332696d3469665ef26163cd090fa1f8bfa"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3c4ed2ff6760e98d262e0cc9c9a7f7b8a9f61aa4d47c58835cdaf7b0b8811bb"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e621b0246192d3b9cb1dc62c78cfa4c6f6d2ddc0ec207d43c0dedecb914f152a"}, + {file = "Pillow-9.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8f127e7b028900421cad64f51f75c051b628db17fb00e099eb148761eed598c9"}, + {file = "Pillow-9.4.0.tar.gz", hash = "sha256:a1c2d7780448eb93fbcc3789bf3916aa5720d942e37945f4056680317f1cd23e"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "plotly" +version = "5.13.1" +description = "An open-source, interactive data visualization library for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "plotly-5.13.1-py2.py3-none-any.whl", hash = "sha256:f776a5c664908450c6c1727f61e8e2e22798d9c6c69d37a9057735365084a2fa"}, + {file = "plotly-5.13.1.tar.gz", hash = "sha256:90ee9a1fee0dda30e2830e129855081ea17bd1b06a553a62b62de15caff1a219"}, +] + +[package.dependencies] +tenacity = ">=6.2.0" + +[[package]] +name = "psutil" +version = "5.9.4" +description = "Cross-platform lib for process and system monitoring in Python." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"}, + {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:68908971daf802203f3d37e78d3f8831b6d1014864d7a85937941bb35f09aefe"}, + {file = "psutil-5.9.4-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:3ff89f9b835100a825b14c2808a106b6fdcc4b15483141482a12c725e7f78549"}, + {file = "psutil-5.9.4-cp27-cp27m-win32.whl", hash = "sha256:852dd5d9f8a47169fe62fd4a971aa07859476c2ba22c2254d4a1baa4e10b95ad"}, + {file = "psutil-5.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:9120cd39dca5c5e1c54b59a41d205023d436799b1c8c4d3ff71af18535728e94"}, + {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:6b92c532979bafc2df23ddc785ed116fced1f492ad90a6830cf24f4d1ea27d24"}, + {file = "psutil-5.9.4-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:efeae04f9516907be44904cc7ce08defb6b665128992a56957abc9b61dca94b7"}, + {file = "psutil-5.9.4-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:54d5b184728298f2ca8567bf83c422b706200bcbbfafdc06718264f9393cfeb7"}, + {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16653106f3b59386ffe10e0bad3bb6299e169d5327d3f187614b1cb8f24cf2e1"}, + {file = "psutil-5.9.4-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54c0d3d8e0078b7666984e11b12b88af2db11d11249a8ac8920dd5ef68a66e08"}, + {file = "psutil-5.9.4-cp36-abi3-win32.whl", hash = "sha256:149555f59a69b33f056ba1c4eb22bb7bf24332ce631c44a319cec09f876aaeff"}, + {file = "psutil-5.9.4-cp36-abi3-win_amd64.whl", hash = "sha256:fd8522436a6ada7b4aad6638662966de0d61d241cb821239b2ae7013d41a43d4"}, + {file = "psutil-5.9.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:6001c809253a29599bc0dfd5179d9f8a5779f9dffea1da0f13c53ee568115e1e"}, + {file = "psutil-5.9.4.tar.gz", hash = "sha256:3d7f9739eb435d4b1338944abe23f49584bde5395f27487d2ee25ad9a8774a62"}, +] + +[package.extras] +test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] + +[[package]] +name = "py7zr" +version = "0.20.4" +description = "Pure python 7-zip library" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "py7zr-0.20.4-py3-none-any.whl", hash = "sha256:94d0c24217f6582741813ee94490a4ca82bd5f9bf35e4f8610cb588cf7445764"}, + {file = "py7zr-0.20.4.tar.gz", hash = "sha256:1d01f98ea1e1f5c49940358691b2076f9a5848056426541e783de33834f59e21"}, +] + +[package.dependencies] +brotli = {version = ">=1.0.9", markers = "platform_python_implementation == \"CPython\""} +brotlicffi = {version = ">=1.0.9.2", markers = "platform_python_implementation == \"PyPy\""} +inflate64 = {version = ">=0.3.1", markers = "python_version > \"3.6\""} +multivolumefile = ">=0.2.3" +psutil = {version = "*", markers = "sys_platform != \"cygwin\""} +pybcj = ">=0.6.0" +pycryptodomex = ">=3.6.6" +pyppmd = ">=0.18.1,<1.1.0" +pyzstd = ">=0.14.4" +texttable = "*" + +[package.extras] +check = ["check-manifest", "flake8 (<5)", "flake8-black", "flake8-deprecated", "flake8-isort", "isort (>=5.0.3)", "mypy (>=0.940)", "mypy-extensions (>=0.4.1)", "pygments", "readme-renderer", "twine"] +debug = ["pytest", "pytest-leaks", "pytest-profiling"] +docs = ["docutils", "sphinx (>=5.0)", "sphinx-a4doc", "sphinx-py3doc-enhanced-theme"] +test = ["coverage[toml] (>=5.2)", "coveralls (>=2.1.1)", "py-cpuinfo", "pyannotate", "pytest", "pytest-benchmark", "pytest-cov", "pytest-remotedata", "pytest-timeout"] +test-compat = ["libarchive-c"] + +[[package]] +name = "pyarrow" +version = "11.0.0" +description = "Python library for Apache Arrow" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyarrow-11.0.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:40bb42afa1053c35c749befbe72f6429b7b5f45710e85059cdd534553ebcf4f2"}, + {file = "pyarrow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7c28b5f248e08dea3b3e0c828b91945f431f4202f1a9fe84d1012a761324e1ba"}, + {file = "pyarrow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a37bc81f6c9435da3c9c1e767324ac3064ffbe110c4e460660c43e144be4ed85"}, + {file = "pyarrow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad7c53def8dbbc810282ad308cc46a523ec81e653e60a91c609c2233ae407689"}, + {file = "pyarrow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:25aa11c443b934078bfd60ed63e4e2d42461682b5ac10f67275ea21e60e6042c"}, + {file = "pyarrow-11.0.0-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:e217d001e6389b20a6759392a5ec49d670757af80101ee6b5f2c8ff0172e02ca"}, + {file = "pyarrow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ad42bb24fc44c48f74f0d8c72a9af16ba9a01a2ccda5739a517aa860fa7e3d56"}, + {file = "pyarrow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d942c690ff24a08b07cb3df818f542a90e4d359381fbff71b8f2aea5bf58841"}, + {file = "pyarrow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f010ce497ca1b0f17a8243df3048055c0d18dcadbcc70895d5baf8921f753de5"}, + {file = "pyarrow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:2f51dc7ca940fdf17893227edb46b6784d37522ce08d21afc56466898cb213b2"}, + {file = "pyarrow-11.0.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:1cbcfcbb0e74b4d94f0b7dde447b835a01bc1d16510edb8bb7d6224b9bf5bafc"}, + {file = "pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaee8f79d2a120bf3e032d6d64ad20b3af6f56241b0ffc38d201aebfee879d00"}, + {file = "pyarrow-11.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:410624da0708c37e6a27eba321a72f29d277091c8f8d23f72c92bada4092eb5e"}, + {file = "pyarrow-11.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2d53ba72917fdb71e3584ffc23ee4fcc487218f8ff29dd6df3a34c5c48fe8c06"}, + {file = "pyarrow-11.0.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f12932e5a6feb5c58192209af1d2607d488cb1d404fbc038ac12ada60327fa34"}, + {file = "pyarrow-11.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:41a1451dd895c0b2964b83d91019e46f15b5564c7ecd5dcb812dadd3f05acc97"}, + {file = "pyarrow-11.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:becc2344be80e5dce4e1b80b7c650d2fc2061b9eb339045035a1baa34d5b8f1c"}, + {file = "pyarrow-11.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f40be0d7381112a398b93c45a7e69f60261e7b0269cc324e9f739ce272f4f70"}, + {file = "pyarrow-11.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:362a7c881b32dc6b0eccf83411a97acba2774c10edcec715ccaab5ebf3bb0835"}, + {file = "pyarrow-11.0.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:ccbf29a0dadfcdd97632b4f7cca20a966bb552853ba254e874c66934931b9841"}, + {file = "pyarrow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e99be85973592051e46412accea31828da324531a060bd4585046a74ba45854"}, + {file = "pyarrow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69309be84dcc36422574d19c7d3a30a7ea43804f12552356d1ab2a82a713c418"}, + {file = "pyarrow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da93340fbf6f4e2a62815064383605b7ffa3e9eeb320ec839995b1660d69f89b"}, + {file = "pyarrow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:caad867121f182d0d3e1a0d36f197df604655d0b466f1bc9bafa903aa95083e4"}, + {file = "pyarrow-11.0.0.tar.gz", hash = "sha256:5461c57dbdb211a632a48facb9b39bbeb8a7905ec95d768078525283caef5f6d"}, +] + +[package.dependencies] +numpy = ">=1.16.6" + +[[package]] +name = "pybcj" +version = "1.0.1" +description = "bcj filter library" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pybcj-1.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:20fc0d8f67e2d9747e0c31082d5f64b112258ae602a85aa5c7e6bf5a7cad287b"}, + {file = "pybcj-1.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:43e8bc75773ca06ee7a64602b799613171e4edf4d9d8fd38fa5c49f1cdbb4407"}, + {file = "pybcj-1.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a81f14f213a75597f9be44feb97740a51adda558465fb159114472dc2ab39ef8"}, + {file = "pybcj-1.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:225a0addf4b3d580bf4eae583b5168dac0125a703c53ded8b3f120882e1e0312"}, + {file = "pybcj-1.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc79ed4773cd35328377a8fedbbdcafb3a9d242ee63b96863c0692c81faefab8"}, + {file = "pybcj-1.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0eaa90639992b6096afb1485380fae7f084483db6b92867847a3bfdf22cc4efc"}, + {file = "pybcj-1.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:570a3cf4e016dcb0fc561991833e5170a2a0bc6ee88fe5667591f356bd7b7895"}, + {file = "pybcj-1.0.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:46b82fe50eb8171ee2205e935f3fd5900e31beb5e54e10c88f23a5420902467d"}, + {file = "pybcj-1.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2d6b34ec233fcf5a83ccfbf422fef22256947eaa7077aaa012e5961d15aa302c"}, + {file = "pybcj-1.0.1-cp310-cp310-win32.whl", hash = "sha256:fa787b414c4dc6b6cd75338fac18a7dbb53a09443dd863020a2d2bda76940ca6"}, + {file = "pybcj-1.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:970dc23ca1c64611d35a3abe76a059cf551da53d62faefd84c5bf3e0af1602d1"}, + {file = "pybcj-1.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c854a206d8c3a5a959b803405760f3627bb4878450e2f36b5d35af09c89152fc"}, + {file = "pybcj-1.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:21098001200273c3c9fd90e7bf909fb905a8e1c102c80b604cb7c6a3103ef7e0"}, + {file = "pybcj-1.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:39dd836134e261ec769cd5aa9ae7a3a330a7dac81efb66eb5504643abd8235df"}, + {file = "pybcj-1.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:acfc4a02ddf22f6df7184441b39f38c31e95aa8af41de4d2f825821ab1fb85c6"}, + {file = "pybcj-1.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f4428b6808d781f4b605a27f53fc10a3ca343d1cd901c691b9ba2e4ed85a5fc7"}, + {file = "pybcj-1.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74d34340323996b70dbd73e9530cca71c05ff7c97e30fe4d32aeea2f877836ca"}, + {file = "pybcj-1.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bf87f2a7f827656bc6e1d9888d47931aa0ae35cdc4ff33b1cec70d8d462590b3"}, + {file = "pybcj-1.0.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e6a74cb618da93ac1322d6a548a4508e76eb4c388ed1c80560bc25d8764cf272"}, + {file = "pybcj-1.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f472da992a6ba58381c0314b994c01d20e522ff8836417ef1c0975bdae142406"}, + {file = "pybcj-1.0.1-cp311-cp311-win32.whl", hash = "sha256:f58e489e43c9a1688c7d5ceb7455b44952d87f183b7b9c915b301478a2b3bfbe"}, + {file = "pybcj-1.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:a74e70bf3fd50a413fdce4264e037b8e8f34cb8d9207ac364167b6eb076c14ec"}, + {file = "pybcj-1.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8e846a8272bf02202794fe22beaf389ed27c2d8ebf59aafb43af4935feac0389"}, + {file = "pybcj-1.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:421f211fb15aeb836b4ba61174cb409fc82222ab3b2486deb4953ae863e6507b"}, + {file = "pybcj-1.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdabbe7fd66886943393ecf98318d7801dd40183af80314acd4464bccdd44d53"}, + {file = "pybcj-1.0.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:731800acfc6112132aa2b7d08f9d6fe49a0c0071b30985809d084e238af98dac"}, + {file = "pybcj-1.0.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:09872b32edad4e3653d5b357b244d267ca58fe52d4e1dd3cdff816d3bb9d9f7c"}, + {file = "pybcj-1.0.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:5de90f8b6c7fc1d28dbe74c29b1d5053a7a8703cbc2c6f4f112907ffd7529f8e"}, + {file = "pybcj-1.0.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:10961ea10ae930b9348132707b9dd3cf3e71a41ef1df7656fbc4f14a71f10747"}, + {file = "pybcj-1.0.1-cp36-cp36m-win32.whl", hash = "sha256:6f589af70286ec6565e3415145a03abc3c14a23ed7ed198ac741de81af332f26"}, + {file = "pybcj-1.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b901f12380e988da07f21bb6b75da7f91fd9feffb43fcf70fad698e40a2ef3a7"}, + {file = "pybcj-1.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2e1859d36c073231737956fbeb5bbcfa8dba880e1b66bfbd001466718d6d89dc"}, + {file = "pybcj-1.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:421ed75e54ebecd79c80178c1df5bdbe1e0e3e10e7efef5f011b5f0be6a9a12f"}, + {file = "pybcj-1.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:872697d8bff2572e4225ed8cbce17be338faac28ec1ab3c00419aaef2f56dd3c"}, + {file = "pybcj-1.0.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc23f2ac2c1ded250f1aa66fbd1a3d823f76de549978b61eed4fb34affc11338"}, + {file = "pybcj-1.0.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8efed581f2ee74f1e0ec04a10e97881b93abc258d13b15ef966aee71732ac152"}, + {file = "pybcj-1.0.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:bb378b0f133e19d437eca4327bac7c3f38e30950c5c604092c72b18cba839bc2"}, + {file = "pybcj-1.0.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:534b8b253dbdb746c06bab28383db31d7e2b42aa9b33ed4e7836319622dcd75b"}, + {file = "pybcj-1.0.1-cp37-cp37m-win32.whl", hash = "sha256:15edd1786617127ecfda4274bbb04f09ae299c474ada86e369bcf050d5cb88dd"}, + {file = "pybcj-1.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:393d95f83e47976d137bcec7b66986f51282dcb2091933f88983dd7eb89e59c4"}, + {file = "pybcj-1.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e89a814f1727be7d543ac6910f0d94131f43a337e811ab684606d42dbc22b701"}, + {file = "pybcj-1.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b3861996b06b8238f799b4f1bd9542d1a8ae8e4765adbdde25ed011c3bda11df"}, + {file = "pybcj-1.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7801ee9a9fcd47b92d4d90ff9a28cfdc23195cad72bd8032938ab3c794942b43"}, + {file = "pybcj-1.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10182725b0e6aa944d13a10a4a9cb5208bafe0016b4326253340948153de4bc0"}, + {file = "pybcj-1.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fc313b1a5547c5416982853f2de1454980704f3ab3dbcad18dacdc565a2eafc"}, + {file = "pybcj-1.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b3773a77ae3b18778c9bf22c7ba6478a0e5416f84b7d2ac6d764001f6d0d985"}, + {file = "pybcj-1.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c72ff262613c9a6f20e80bcf1e8bbc000b78b95a7fa301164ab3e3bd23bd936c"}, + {file = "pybcj-1.0.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:262f53e27bca6096e3424c63e5e59948b10985eee4b03a5d70c3f3f6161a79e7"}, + {file = "pybcj-1.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:efe75e3b8768c4f9d454d3c1b2b2a67e757f2b00d638146d3a4cddb38460fc3a"}, + {file = "pybcj-1.0.1-cp38-cp38-win32.whl", hash = "sha256:a77796b4c5370cedd4fad2264b6d7a78cb40229c7fa3cbcab24df3adea768962"}, + {file = "pybcj-1.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:4d10dd75fad48555e9530c5565c7ccf13754adad2fe331feefb263055cdca7b3"}, + {file = "pybcj-1.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1c0e1657c233f9f4070ab578951e03d569f1b645042ce661341091f50e41b541"}, + {file = "pybcj-1.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:603daa737579cf69efb368fab716cdce18d0b2615af77bb623f5f42aa546b3d8"}, + {file = "pybcj-1.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:df75707f466ab6fa086f164bff2df75fd16543c8d43ca43a268f938c1144e792"}, + {file = "pybcj-1.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fda423836d7d69cba6a6f99e7a34c2e5fe3621e5e945cd25ea9ba60a96223254"}, + {file = "pybcj-1.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3847387b43af47d9677952b8a22d9c2d8a544c2175b6d5304c200669c05d39e1"}, + {file = "pybcj-1.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b99f4291e59dcbe548be5a1e8c6a1a19a860184526c2d14fc374ec687b98ad7d"}, + {file = "pybcj-1.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:05fad9a905772774aacc96cb174571ac1f5afa80b9f54c6ec414d369865d305c"}, + {file = "pybcj-1.0.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d5c4ca6faff0af4b5f3e7d88d13ec76f8cac36c9bcc814b8c84d9f3f951b2cf9"}, + {file = "pybcj-1.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4bc8720f3a224c27bd413a930b9bec5f225fda050641258967b1ebb252a053fb"}, + {file = "pybcj-1.0.1-cp39-cp39-win32.whl", hash = "sha256:d61f287f820787d3acf60d113c5ce6e506870d9d3103bc37a74373e72ce9d7a6"}, + {file = "pybcj-1.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:bbb49772fc3896850a704215160df8316db89e5e8876b2d8af6c6c15b4e0f6ea"}, + {file = "pybcj-1.0.1-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:c1e02170f8d358a8ddc716606760c73d55eea6bdb0cca2d97b86447e9524708b"}, + {file = "pybcj-1.0.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc1684b9f7ec92d2ae94a137ec311bd2227f684429521061af7ceed4952c7f72"}, + {file = "pybcj-1.0.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e6434a46f852cd3e6929633b43537887bd381bc614dbf5c4a128fdde4966b3a"}, + {file = "pybcj-1.0.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:795dff9229dc024e54bd0f618f5a3adb269ee0cccd7ac9a0bef29df388beed23"}, + {file = "pybcj-1.0.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:358dba3dc39a07cded6897b9f99bb5b951a0ad95d567eda535b44861caa02f5b"}, + {file = "pybcj-1.0.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6df9eccc99a0d7bc091b58cff2f507b89f076d657253975fa2ca9eb42dbb4733"}, + {file = "pybcj-1.0.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f46ba61c942ee64198444c9562c5cf089eaf97f17b413e15fa1c0614df304734"}, + {file = "pybcj-1.0.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2f950ca403ffaa808a017e40e3371115bcb0b4b1061772b03e7d842555132ac"}, + {file = "pybcj-1.0.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6485c6b091504c0e6431a9495309271626eaa9ecb23276903486824f94f4c551"}, + {file = "pybcj-1.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:9b56eeff51efa556ecc186260ac486a4ddd79ad37bc88d669e96c45190f3c0da"}, + {file = "pybcj-1.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d5b327df02761c42399c878cd6c37f885bf0639befbd4d1ab763cd44ba1e0552"}, + {file = "pybcj-1.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:023082fd677f67ebd36fe96322a4a45ac33a2b340d49010d88e1867c76744c50"}, + {file = "pybcj-1.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8576a1dcf445ef064bf8c3b2cdc1d6353e41cb4b366329946883e285dcbcec0"}, + {file = "pybcj-1.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a5365edcaa82dc47e7757ba2efb48f96b9b352e3811a2aaa90084802479ddbe"}, + {file = "pybcj-1.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:6ca6ddae1302477879099d4c4efc65790f4610d71ceff7fbe8f8b60f6ac6dcff"}, + {file = "pybcj-1.0.1.tar.gz", hash = "sha256:8b682ed08caabfb7c042d4be083e28ddc692afb1deff5567111f8855071b75c3"}, +] + +[package.extras] +check = ["check-manifest", "flake8 (<5)", "flake8-black", "flake8-colors", "flake8-isort", "flake8-pyi", "flake8-typing-imports", "mypy (>=0.812)", "mypy-extensions (>=0.4.3)", "pygments", "readme-renderer"] +test = ["coverage[toml] (>=5.2)", "hypothesis", "pytest (>=6.0)", "pytest-cov"] + +[[package]] +name = "pycox" +version = "0.2.3" +description = "Survival analysis with PyTorch" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pycox-0.2.3-py3-none-any.whl", hash = "sha256:9ea3c64a4a650ccf6c96cf512712de330f2d75de32122d86995c7cd37ff105d1"}, + {file = "pycox-0.2.3.tar.gz", hash = "sha256:c209c6b24d9262db7b8edb9a886b1a3bb73c9d6db9fb4559b5fb994e30743d6f"}, +] + +[package.dependencies] +feather-format = ">=0.4.0" +h5py = ">=2.9.0" +numba = ">=0.44" +py7zr = ">=0.11.3" +requests = ">=2.22.0" +scikit-learn = ">=0.21.2" +torchtuples = ">=0.2.0" + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + +[[package]] +name = "pycryptodomex" +version = "3.17" +description = "Cryptographic library for Python" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "pycryptodomex-3.17-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:12056c38e49d972f9c553a3d598425f8a1c1d35b2e4330f89d5ff1ffb70de041"}, + {file = "pycryptodomex-3.17-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ab33c2d9f275e05e235dbca1063753b5346af4a5cac34a51fa0da0d4edfb21d7"}, + {file = "pycryptodomex-3.17-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:caa937ff29d07a665dfcfd7a84f0d4207b2ebf483362fa9054041d67fdfacc20"}, + {file = "pycryptodomex-3.17-cp27-cp27m-manylinux2014_aarch64.whl", hash = "sha256:db23d7341e21b273d2440ec6faf6c8b1ca95c8894da612e165be0b89a8688340"}, + {file = "pycryptodomex-3.17-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:f854c8476512cebe6a8681cc4789e4fcff6019c17baa0fd72b459155dc605ab4"}, + {file = "pycryptodomex-3.17-cp27-cp27m-win32.whl", hash = "sha256:a57e3257bacd719769110f1f70dd901c5b6955e9596ad403af11a3e6e7e3311c"}, + {file = "pycryptodomex-3.17-cp27-cp27m-win_amd64.whl", hash = "sha256:d38ab9e53b1c09608ba2d9b8b888f1e75d6f66e2787e437adb1fecbffec6b112"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:3c2516b42437ae6c7a29ef3ddc73c8d4714e7b6df995b76be4695bbe4b3b5cd2"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:5c23482860302d0d9883404eaaa54b0615eefa5274f70529703e2c43cc571827"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-manylinux2014_aarch64.whl", hash = "sha256:7a8dc3ee7a99aae202a4db52de5a08aa4d01831eb403c4d21da04ec2f79810db"}, + {file = "pycryptodomex-3.17-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:7cc28dd33f1f3662d6da28ead4f9891035f63f49d30267d3b41194c8778997c8"}, + {file = "pycryptodomex-3.17-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:2d4d395f109faba34067a08de36304e846c791808524614c731431ee048fe70a"}, + {file = "pycryptodomex-3.17-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:55eed98b4150a744920597c81b3965b632038781bab8a08a12ea1d004213c600"}, + {file = "pycryptodomex-3.17-cp35-abi3-manylinux2014_aarch64.whl", hash = "sha256:7fa0b52df90343fafe319257b31d909be1d2e8852277fb0376ba89d26d2921db"}, + {file = "pycryptodomex-3.17-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78f0ddd4adc64baa39b416f3637aaf99f45acb0bcdc16706f0cc7ebfc6f10109"}, + {file = "pycryptodomex-3.17-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4fa037078e92c7cc49f6789a8bac3de06856740bb2038d05f2d9a2e4b165d59"}, + {file = "pycryptodomex-3.17-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:88b0d5bb87eaf2a31e8a759302b89cf30c97f2f8ca7d83b8c9208abe8acb447a"}, + {file = "pycryptodomex-3.17-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:6feedf4b0e36b395329b4186a805f60f900129cdf0170e120ecabbfcb763995d"}, + {file = "pycryptodomex-3.17-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:7a6651a07f67c28b6e978d63aa3a3fccea0feefed9a8453af3f7421a758461b7"}, + {file = "pycryptodomex-3.17-cp35-abi3-win32.whl", hash = "sha256:32e764322e902bbfac49ca1446604d2839381bbbdd5a57920c9daaf2e0b778df"}, + {file = "pycryptodomex-3.17-cp35-abi3-win_amd64.whl", hash = "sha256:4b51e826f0a04d832eda0790bbd0665d9bfe73e5a4d8ea93b6a9b38beeebe935"}, + {file = "pycryptodomex-3.17-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:d4cf0128da167562c49b0e034f09e9cedd733997354f2314837c2fa461c87bb1"}, + {file = "pycryptodomex-3.17-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:c92537b596bd5bffb82f8964cabb9fef1bca8a28a9e0a69ffd3ec92a4a7ad41b"}, + {file = "pycryptodomex-3.17-pp27-pypy_73-win32.whl", hash = "sha256:599bb4ae4bbd614ca05f49bd4e672b7a250b80b13ae1238f05fd0f09d87ed80a"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4c4674f4b040321055c596aac926d12f7f6859dfe98cd12f4d9453b43ab6adc8"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67a3648025e4ddb72d43addab764336ba2e670c8377dba5dd752e42285440d31"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40e8a11f578bd0851b02719c862d55d3ee18d906c8b68a9c09f8c564d6bb5b92"}, + {file = "pycryptodomex-3.17-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:23d83b610bd97704f0cd3acc48d99b76a15c8c1540d8665c94d514a49905bad7"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:fd29d35ac80755e5c0a99d96b44fb9abbd7e871849581ea6a4cb826d24267537"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64b876d57cb894b31056ad8dd6a6ae1099b117ae07a3d39707221133490e5715"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee8bf4fdcad7d66beb744957db8717afc12d176e3fd9c5d106835133881a049b"}, + {file = "pycryptodomex-3.17-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c84689c73358dfc23f9fdcff2cb9e7856e65e2ce3b5ed8ff630d4c9bdeb1867b"}, + {file = "pycryptodomex-3.17.tar.gz", hash = "sha256:0af93aad8d62e810247beedef0261c148790c52f3cd33643791cc6396dd217c1"}, +] + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +category = "main" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pyppmd" +version = "1.0.0" +description = "PPMd compression/decompression library" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pyppmd-1.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8049c19af4b78b400b2347bff4514763257b55516c359144e9d8091991ed12e8"}, + {file = "pyppmd-1.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1c0fd06aaf782e65b7b5bbc47f8a9dbe050c1ba18474ccbe0a2b37f57a8d8c72"}, + {file = "pyppmd-1.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e17b08a5c283faf48b4ee888f8fa53f919cd8afd0930eae4d59f719f6be519fb"}, + {file = "pyppmd-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71f994f281439705cb04c497adc2863551fa5813606af6fb26c673a44a36c4e3"}, + {file = "pyppmd-1.0.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31a09fd1b10518342ff442b57dd8c890b9bfea6bbdbb785c729f0d139092e42e"}, + {file = "pyppmd-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca6a926d229a6dbf2ccdb0d4e692d81ff927459b59a1cec14ef522522df6d757"}, + {file = "pyppmd-1.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6279f1c4b6aefacb95df49db2f2e232530592d1849c37b73478a4f26eb405d12"}, + {file = "pyppmd-1.0.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:f7a1b08612627d5280ef2dad1fadb0b1a10c70df0c484f9091eff5fab5e4c84e"}, + {file = "pyppmd-1.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3ecb83e0cc92960f959111518ea208b51a58e8cc303ff959e9cd2cc56dd36a63"}, + {file = "pyppmd-1.0.0-cp310-cp310-win32.whl", hash = "sha256:703c4fbc9b5e1454f403fb1d6b4a6c4c729f72eef14690146deecd2166429d6d"}, + {file = "pyppmd-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:6e2f5ff5071e4e43c92065f383753d4ad59778816485a01ee7b29e2a1ff48140"}, + {file = "pyppmd-1.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7e8d3c309061ae7fb40e4a26d30f8982b367abc562b9b8621cb79932cb3b94d9"}, + {file = "pyppmd-1.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5805c73590fb8f0ceb3e6cb115774b66a6f4700ae84b31d962ad69667e05dfbd"}, + {file = "pyppmd-1.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6a0c524be57698fe61fff893d485a9af21e6bc0aa2d385b71a63ff951921d4b6"}, + {file = "pyppmd-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18d7cf4d0a9ced96ff1fa44de9ee3d65f5b06278c8f9a61c3edeb660f12f146b"}, + {file = "pyppmd-1.0.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:61acfeee5ed59796037499119edd3159bf6b8c5fcaef17e295a2ca4103112d60"}, + {file = "pyppmd-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8dbe3076fe20c4d65cb1d1b51eeb17a1c177402b83100017a55daad888e198e"}, + {file = "pyppmd-1.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3bc75ed4e969b09fd1a766dd79cb3d5efe56edc11c86ac0b357b5648c7181ce2"}, + {file = "pyppmd-1.0.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:024f714ebb8ddf59dae164adc3c220c24555d470f4adb5bd022abc50298cfff3"}, + {file = "pyppmd-1.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ae419f71afa88784d53dd2449882af982bbd0328fa22a7e6a339221f3143918"}, + {file = "pyppmd-1.0.0-cp311-cp311-win32.whl", hash = "sha256:8680008b1b1e9e77f3337a1a53c1b32541cac9f93f79ae12d34de050585999ac"}, + {file = "pyppmd-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a5fbec6f39a307818593508d8623d9328baf494137d191fc98e11f47e058ceee"}, + {file = "pyppmd-1.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a7240c00083527cf0b1bbdc92f6967e522efb9ad6968c953be174c390b091b3e"}, + {file = "pyppmd-1.0.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfb716a4a07ccbef84ed9fc31d012cef3b38404a6510e24d307cf64025999b21"}, + {file = "pyppmd-1.0.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12be01e919a34c6944568592b35451acf7c98ed18e005bb4b1c046ed520aff7f"}, + {file = "pyppmd-1.0.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d90d87377d83d909eafbf23301057fe16e6662c98ffea738159a234d9000a68"}, + {file = "pyppmd-1.0.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:12a783a0e3c76484a1bc93783867a36ab9a60de5b5298d57c9fe7348e848346e"}, + {file = "pyppmd-1.0.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:b6b6c01e46fcf785ad6c272be400ebcbcb434a1d91150614e10de8cc569b8bff"}, + {file = "pyppmd-1.0.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:385a0b1341ebdfd7cb101c43eea130546830073c01bdb5036bca45c033ee633e"}, + {file = "pyppmd-1.0.0-cp36-cp36m-win32.whl", hash = "sha256:b8eee08c615ae9edd7bf1f214a377cac3d27417f22112685e581d4bab43029b0"}, + {file = "pyppmd-1.0.0-cp36-cp36m-win_amd64.whl", hash = "sha256:0e9c001719527dbafdd7fd8709b98bd63c173451c2eddbaa77abf62486a13da0"}, + {file = "pyppmd-1.0.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5847c47127ff9ea323f5910c62b9f136c3fab181a5144bfe72be13f051047357"}, + {file = "pyppmd-1.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63ddd5a81d6aaed9373cd9fc4de9529f10fa052aaf064ab283dc6218418cc5b5"}, + {file = "pyppmd-1.0.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:706d33cec3601d894f8a4a158bc652b7a3f01cd9e92c2da5d8711efeb9755835"}, + {file = "pyppmd-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07e067e114f05918c8a4ab1fa6a070e2c7a9e497aa73fbf6d87a90e7a6e62a57"}, + {file = "pyppmd-1.0.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:cecf0859b461bcf04439f32bcfb6e081016fa6204c92b5950d19d248fd1aad6b"}, + {file = "pyppmd-1.0.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:47ea218f7dfa94d15286c25d60db3091db1082ba958fa0a32ccaaaeaca7fc712"}, + {file = "pyppmd-1.0.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f751882dd529328ca43af8018f79cdd02ed707fcda30a2fa9acb1ee5c48261a6"}, + {file = "pyppmd-1.0.0-cp37-cp37m-win32.whl", hash = "sha256:18f863d58c4451e00765137be731c2b2150aff829468f59de4169e052429e1fd"}, + {file = "pyppmd-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:05950d8a39fd9bf6c64572d69a6dd0a1af3fadf8d4a2a0bb62f5b04c0a618300"}, + {file = "pyppmd-1.0.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5331a7780d3444d7029e15e68385c94d6a26f688c1e87a5a9ee2e836ea6e4559"}, + {file = "pyppmd-1.0.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:62f970173baf80aad9472c7c6edca4a021ae7965174b1c5d6f400c9571e92efc"}, + {file = "pyppmd-1.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ccdfc8b2a1d73b2186850b9a5bd96106d5fd4419a620d344b0ab8bf630680cf8"}, + {file = "pyppmd-1.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63adeeb9dc4afd6d377ac1c9801f9539f9a81430e9c96d332023bf2ad6c04a1"}, + {file = "pyppmd-1.0.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca8a842b4ff671642b63ed4edd4e1ff7dc0ba0a7af4135758233f056ab992fca"}, + {file = "pyppmd-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1aeba466617cf975cd6719070ca9721bcd83a1a84bd8cf74c3a2808724481e"}, + {file = "pyppmd-1.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b0a87399ade5820f787758449976e758604c7739eb5f79ed9e594b5fa3a6a1bc"}, + {file = "pyppmd-1.0.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:74bd56b165283bb5586ff9ac7a896b217b3c94effe144b768279807840142bb4"}, + {file = "pyppmd-1.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ea4b1a326afe2055304740a03a233f7389f615179b9f6377264b306f619bfb11"}, + {file = "pyppmd-1.0.0-cp38-cp38-win32.whl", hash = "sha256:d2c3c16f644afb1b3caa4f6c717682030f7c3f54a12af8b1416b21877f0b5226"}, + {file = "pyppmd-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:a7f83970a057157c88d4a53a40431d07d8d3f38029ad2eae621422f955bd243b"}, + {file = "pyppmd-1.0.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:93d0d6ed97046ce25d64427ec493e06c23f32838972258bf11d603c9c998d6b3"}, + {file = "pyppmd-1.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1658714d012a5f9a8a3e67f3a9ede3519a2558064ccbd3163c39aca0cfd2412b"}, + {file = "pyppmd-1.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab4e29f774e064af09baf8478acd967684524e566b78fcc4f6f669757f0a2ab5"}, + {file = "pyppmd-1.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd227b8c292ac43d3297a91055fab51c27894dba39d04ccc774a72d9e6f85752"}, + {file = "pyppmd-1.0.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68184b7246ea73a92a764e16cc18b74ccf3c8d6bfc438bbace57aeb1914118a7"}, + {file = "pyppmd-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8aafe6fc436a782e6d424a0ac00de08a1559b6d6ddd08031adbe791ff4e54c90"}, + {file = "pyppmd-1.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bae08176e0d3ed0a5cbd838ff1ac557dfa088a652af633ab1905ab35bb9d7bc4"}, + {file = "pyppmd-1.0.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c60031de93834e5cd262db4b27272101d04a9a18c4cc49f81d483221211a97c8"}, + {file = "pyppmd-1.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:aee9c52a6f232f3f7c683b87213aa3a9eacd281ab31187e784290ba1c05024fe"}, + {file = "pyppmd-1.0.0-cp39-cp39-win32.whl", hash = "sha256:2858471a291b51fab49242d78bd67c2b7719368618a02e4aa995de8c855da73c"}, + {file = "pyppmd-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:1ec00b07c6b68feb402d6596f3575a7892ad69e4f455deee7b5301df703e60dd"}, + {file = "pyppmd-1.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ac19ec1b6e3a0aadc1537466f537017189373593e23fe254df050fdd01f4a722"}, + {file = "pyppmd-1.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10c8a41093952cde52b6d89488dc601ee7b10f6c95c430488f68987393777b46"}, + {file = "pyppmd-1.0.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ea6a0d26db17027805a804d013cf761d732df5bce9d6b314cd1c727fe347277"}, + {file = "pyppmd-1.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c31e5b331923f3b3b2cfbc66a60ecfd73db1a19a646bd1faf25bfde709a80d0"}, + {file = "pyppmd-1.0.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:f488164e8876d213b0627a7a6cb798081eaf84fd9ec6dde5a1668296b15e1a6c"}, + {file = "pyppmd-1.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6059ea0c9acc3b52b2961412ac75d1da72656f8b69bb8fc3d92eec6776176011"}, + {file = "pyppmd-1.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aadc63d0ac83f8c5744eb34ea47a70ff7bfab519b293482d7ccb09946c374dc7"}, + {file = "pyppmd-1.0.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09668aa43e4f02b8725e6233dfc66e532c72f0e69fa1b34dd814a9f7200e0496"}, + {file = "pyppmd-1.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f79ebcd7312b541d3520e1a0d4c362731e24403e2f9f6761679b2ad819d5c706"}, + {file = "pyppmd-1.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:556b6a3af3fca2b41ca25f51c481e5df8df4da842fc5a567da7bb099cfa52423"}, + {file = "pyppmd-1.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f9a3782f5accab4186d68c86defc61fcc7d0146e9cdc5b54e18656852c71db16"}, + {file = "pyppmd-1.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c6c40f15b9fdea10bf966e5b07ee0a0ebcb8cf188ed9a466029c894816b303"}, + {file = "pyppmd-1.0.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a90b98f9d501eaedaca4d0e82f9e771bd2d780d71effcdeacc9fc6180a00e07"}, + {file = "pyppmd-1.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f8a3b9192714b3e4773fc49c100ca13defa2502cb38e56205eb5a131ccf555d"}, + {file = "pyppmd-1.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f1e7a1747518b5822eb755f3715d88bd1459e24de828aed86b7c1aa35e3ed76"}, + {file = "pyppmd-1.0.0.tar.gz", hash = "sha256:075c9bd297e3b0a87dd7aeabca7fee668218acbe69ecc1c6511064558de8840f"}, +] + +[package.extras] +check = ["check-manifest", "flake8", "flake8-black", "isort (>=5.0.3)", "mypy (>=0.812)", "mypy-extensions (>=0.4.3)", "pygments", "readme-renderer"] +docs = ["sphinx (>=2.3)", "sphinx-rtd-theme"] +fuzzer = ["atheris", "hypothesis"] +test = ["coverage[toml] (>=5.2)", "hypothesis", "pytest (>=6.0)", "pytest-benchmark", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2022.7.1" +description = "World timezone definitions, modern and historical" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2022.7.1-py2.py3-none-any.whl", hash = "sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a"}, + {file = "pytz-2022.7.1.tar.gz", hash = "sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0"}, +] + +[[package]] +name = "pyyaml" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] + +[[package]] +name = "pyzstd" +version = "0.15.4" +description = "Python bindings to Zstandard (zstd) compression library, the API style is similar to Python's bz2/lzma/zlib modules." +category = "main" +optional = false +python-versions = ">=3.5" +files = [ + {file = "pyzstd-0.15.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7aea5a1474cacdd0285f16bc5392271645147806986e17be8b100ef750520548"}, + {file = "pyzstd-0.15.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1e53d12e026605b254569a384191850b6e9f2fcd0fba8b3c80e09b9683fc86af"}, + {file = "pyzstd-0.15.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6250aa075408753cde58d79a80923c29c1791f32c4b3e58f25701ae5dbf1678"}, + {file = "pyzstd-0.15.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca8f520618a3a1c66ee0f6bc3a6b4fc6f9d55b7b1ce80a8f8a422c82a2f79418"}, + {file = "pyzstd-0.15.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bca4d7285363b73966e8c6d8af8da515d23bcaf8eb91293e866d5d4615ea5227"}, + {file = "pyzstd-0.15.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aed2fda29875d42a7f96b741582e6e9adb48f3903760439f60b0f8fb8f822f1"}, + {file = "pyzstd-0.15.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:448d4412462d00bbb7c4857f9a4bedc879f9834f01e2723c23f9de4edde8bde3"}, + {file = "pyzstd-0.15.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:8a7f8be2493356bf55a1261b7a9bb476324738ced6210c3f876f452df8288785"}, + {file = "pyzstd-0.15.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:15b84a04edb257cc7d75c208be43dc4c66e8bd2a44ad9e2af523a829386f0e4c"}, + {file = "pyzstd-0.15.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e1e7e43714b0646fb1d61b6b9bbf6e6ede6c77c89667f2addb086f75196ce389"}, + {file = "pyzstd-0.15.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:29f692a74f3d9778c71ad6899a1c7578fd44dfb5b6187fe37abdfc7cc034e9d3"}, + {file = "pyzstd-0.15.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:c2e35d5a169ef9107760bd276974331e0be8fafe2e26ec824020c4999013dc40"}, + {file = "pyzstd-0.15.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ab88c8123e2c8ad298d1ac79f5b5aeab68ef27a0afe968e5c5be1a2f24577e2e"}, + {file = "pyzstd-0.15.4-cp310-cp310-win32.whl", hash = "sha256:2e25838c7410245201a455ff000372c65654c781a7a168b2249432dae4ce260f"}, + {file = "pyzstd-0.15.4-cp310-cp310-win_amd64.whl", hash = "sha256:b28e6f095fd56ac373844ee76ab74b011dbd128e4113a033911675421742ce91"}, + {file = "pyzstd-0.15.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2988e5282c807634911d9336ff4797de84e308d94a1f57bfab4223d927e992c5"}, + {file = "pyzstd-0.15.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:00c9e9aa800b7ded58bdddd90930f949205475c17d1e5923ab936321b805ede0"}, + {file = "pyzstd-0.15.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c681938be80e4ceedc2f43b5d7c08e8e3719492a8250cc742382633c7176919"}, + {file = "pyzstd-0.15.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65dc2ac46389823214dd21141a05009cc2fc8d139d8a0aae5f7eb6830ffe5ded"}, + {file = "pyzstd-0.15.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91cbd6d8a2fa0b18f9b50076a46e837b90e9f1974578483fcc4b094b43cbd89b"}, + {file = "pyzstd-0.15.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e95ec6b499571eca1683c078d2f6cfe3815a5103bf4f488a4edbb1607be652"}, + {file = "pyzstd-0.15.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1da1bcbac16a075675609865e2705d9ec90a2312450d4d99b65a003fdef84f7"}, + {file = "pyzstd-0.15.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:55dd0cf038e9053147e58fd4d0509fc1b735f63ce276b88151560bae271a25af"}, + {file = "pyzstd-0.15.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7740d39f74f9154a7f42d15c1b16f87ba062ac16439d62525f2c85165ea9ebc2"}, + {file = "pyzstd-0.15.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c03e1bfc6c391a05ea04dfff485f4884b07805a16f198c1d5b4e24f4df1e1723"}, + {file = "pyzstd-0.15.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:e7361f4e5a354715d00c43ad3472188cb9eab83d2e75a2ab7ca75734779579bf"}, + {file = "pyzstd-0.15.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:56cb56e24e9d33ef571c23b0157593c44c052ffe5810599a76bc6c60ec77f67a"}, + {file = "pyzstd-0.15.4-cp311-cp311-win32.whl", hash = "sha256:ab55575f1c6c12ca65918dba8196bfb281df86038a9f9e376fb6ac5a501ac724"}, + {file = "pyzstd-0.15.4-cp311-cp311-win_amd64.whl", hash = "sha256:b73b18d8ebf5caba863bfccb6ad422b9322c7a806b50a555bb44f51f16b7af5e"}, + {file = "pyzstd-0.15.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:df80324759cb74a10818f5ea4dfb0332b6064a31305246f66b043c4f73b37918"}, + {file = "pyzstd-0.15.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4833842d5e6660c1b6cd316f102ffddb95566f12ec1f89c5630d4b511e7e084"}, + {file = "pyzstd-0.15.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d9069da3bb94cac8af3fd2d7b622c7a78a248d3295a478587d880553287d56e6"}, + {file = "pyzstd-0.15.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9db032965b8fabf0bac13b75706031444ebfa7214794afe80be41799d8a349fb"}, + {file = "pyzstd-0.15.4-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:16eb2b2b206489bf8a7b690cf3b06b24352a16ee25717cb38c4381aefb3a94ce"}, + {file = "pyzstd-0.15.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:821555ce9bc7f50130bf52dc7b7d342913e00435c68a5cac564c5f2e57f33cbf"}, + {file = "pyzstd-0.15.4-cp36-cp36m-win32.whl", hash = "sha256:4f4a96bdffa773f8820e065ec840c5d3f252b0611951b73567121757388f171c"}, + {file = "pyzstd-0.15.4-cp36-cp36m-win_amd64.whl", hash = "sha256:3b0d384a48592683edf812c1a3d75a0cf5af253b1629596567ce3a2991d6ccbc"}, + {file = "pyzstd-0.15.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e3b4ee242e00029c68fc5d8cddc3e01f6cdf7ac466d844a1440ebde4ae1f9e79"}, + {file = "pyzstd-0.15.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d62f3e5ca97150ddd32e83275170bcb7ea462f271df03d39de434e8e02889b09"}, + {file = "pyzstd-0.15.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01e723cafb6e681dccba134524f9f012c5f04078d6cb05e03b2f5d871716e0db"}, + {file = "pyzstd-0.15.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7da76175bbbec45ecd35e611eaa50e40c0f29a05e86a8cd50b1b20b5a52cd218"}, + {file = "pyzstd-0.15.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f2bbd863e674875882de8dd6f54ba98916cfeb89d9224393e9846c9e9aea1d"}, + {file = "pyzstd-0.15.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23c07257fa5df97ebde0a01a54e7c4ae295dc684908f8b1c4ac2f407b881d91b"}, + {file = "pyzstd-0.15.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b5f921cf4ba10cf2f1dbaaba3b9e7afe876a0962cf43f16455c4e5052c1dc0bd"}, + {file = "pyzstd-0.15.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c2c6f976e45e63a4f6cbaf20240bb0842ed2ed65703718d54a6fad8642d3c230"}, + {file = "pyzstd-0.15.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:60079134dd0967fc4c81ea50a7deedd99c52bb263a46750326cba9f53b03f93c"}, + {file = "pyzstd-0.15.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:55a368a2195bf9cabf8f20ea2841c6a7463c21d8345c8c7d50b809635fec262e"}, + {file = "pyzstd-0.15.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:78a5e30f90f499f6961e56263e8f029e228b92550aa4e8a5649ed220f8f19230"}, + {file = "pyzstd-0.15.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab21cc20922de335feef32245a9329bc3cb443346dcda243a711aab15bc130cc"}, + {file = "pyzstd-0.15.4-cp37-cp37m-win32.whl", hash = "sha256:0d742edd7340e4b8ba4cbe240e873bbc88c197c0b3838212eb5c4885094f191b"}, + {file = "pyzstd-0.15.4-cp37-cp37m-win_amd64.whl", hash = "sha256:39fa99cac5abe0cdb34afda54f54735593646d8c57de1b734d1fe3c9761e0575"}, + {file = "pyzstd-0.15.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2146d6ab27f3b5dd02b093d748151897e3c929ec009768f20b7a5f3627de7d9c"}, + {file = "pyzstd-0.15.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7422e119a9dd10695dee636707881fa6e1af872df5a4f5f1ae6feec34e3cdab6"}, + {file = "pyzstd-0.15.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9074eff37261effb5c7312911e11f0a4c26475ab55bae803d3b095fa291e631"}, + {file = "pyzstd-0.15.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa9ab4ca7e32c219ad0d420786897cdb3c09f1ffc3fd1a2785e7652ace5684e2"}, + {file = "pyzstd-0.15.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:452e1e774fd7e693f2f5065de61f8cf42eb449ea5614e4cd15ae458e70977d98"}, + {file = "pyzstd-0.15.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e1b4a3a6fdab664f1d415c3bbc63700a85f8ec46ad2bf7287e6a86bbea0581"}, + {file = "pyzstd-0.15.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:21b7f72717d58b39487c903854556fc363be3329b94aa696e9111291a13d714c"}, + {file = "pyzstd-0.15.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eb300366e9bf62776ef8c5c5fca57d5b18fdf6a62d055244e9f9f71ea263597e"}, + {file = "pyzstd-0.15.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1e138c585fc2d07fb20e61f6cbac77fdc77497299199a610a1fca28933128948"}, + {file = "pyzstd-0.15.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:242467cc26e2e4b344028887c934fce3e48354e5eddbc4bb4e695f2c011d3755"}, + {file = "pyzstd-0.15.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:929f894d0b1439826f45a248d7569f8c9ceb43ff3d5739e7ac9ff264acdcb070"}, + {file = "pyzstd-0.15.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e96731ecfff34aeefb5e0f93222c9831880e8a94ea700f4432d4ee45995badc1"}, + {file = "pyzstd-0.15.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f437e316cd24f21ea13ad635d7e24550458217af75182554963f461bae0bc189"}, + {file = "pyzstd-0.15.4-cp38-cp38-win32.whl", hash = "sha256:912345726aa45dba8fb4930e172149f558caf606bf22c219cde9deb984a1523a"}, + {file = "pyzstd-0.15.4-cp38-cp38-win_amd64.whl", hash = "sha256:2589c484c59a0ef2d52d1fd7ab3efd648ef38a396ef22b892c79c2cb878487e3"}, + {file = "pyzstd-0.15.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:33d42c56fde20a2a98bd200d91d09e44957ce782ffeef5041fc4c544df7640d1"}, + {file = "pyzstd-0.15.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:72bf7435dad00fe8a24815f67fcd9fddf44265aa8155a507ac79472cd13c97c1"}, + {file = "pyzstd-0.15.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1bcfb77bf00ab5e134321ddf9123845c80b1f49500d499998a17881aa644e28"}, + {file = "pyzstd-0.15.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4e18f986325eedf0b5d86feb87fad70a0430e3da0bff1a1c33ddce8c70fc371e"}, + {file = "pyzstd-0.15.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9860aeb47f88e61f8bd615f4f3e8372990c8e680c52d8a2f893157ebdb15e8a2"}, + {file = "pyzstd-0.15.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be764774531007ce83c32d24379c32474329655bcc177f1b154e2d2e7e4dba1"}, + {file = "pyzstd-0.15.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b41ca76a847286cd54ebabbfd37fa732a68dc2daf5178aede34ee377598cfa5"}, + {file = "pyzstd-0.15.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1659da1d388a85c8297083772fdd16543a43305ec0213f1d6f5a6b65702c8402"}, + {file = "pyzstd-0.15.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:94302b3a36c562a31f791a5e0112bcc129e0245c6f462d86627a4cefda0f5764"}, + {file = "pyzstd-0.15.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0820d84c272865e996696fc84c8c63cf6898d66bfda8ac4109880e24272ac13a"}, + {file = "pyzstd-0.15.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e111d203153837fca01a3c6ed48167af4b590f148998a2d8edefd98338615006"}, + {file = "pyzstd-0.15.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:67ee34825be593151ba0da78d6914914afce9a7ddc77683319d9406bf422c578"}, + {file = "pyzstd-0.15.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7048a34197205cb4459492a1d397059ec35a758e29cee226ffa8e593148300af"}, + {file = "pyzstd-0.15.4-cp39-cp39-win32.whl", hash = "sha256:cb03d3fca1fab2b33b4d0de9b80ab0efc9bb872eb28575eeb73af0d9aac08322"}, + {file = "pyzstd-0.15.4-cp39-cp39-win_amd64.whl", hash = "sha256:b06eabacbefaf440785ee1f499582c79083822fb3d2640fb74dd5d3391bbc2ae"}, + {file = "pyzstd-0.15.4-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:649f23ab5c203dade1594f593c6b65247e848f99ba5b11ef26b74541e1f0b605"}, + {file = "pyzstd-0.15.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c173226b2fab75bfcf9c0315d551e9b484e50f4d84869676bcf33d080f5b43f8"}, + {file = "pyzstd-0.15.4-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d774cf6406c33c9af6e1fde10a1b0aab7e1f573b2510c491ed24c6f4dbad677"}, + {file = "pyzstd-0.15.4-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4d47ea4a9a3a43a556da63ee59bf941aecfb529bb85957960e91339c6d6cf3d"}, + {file = "pyzstd-0.15.4-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:be71183e8e7a03b298d4e547f58f68ee1664889bef4e7460464fdf1e70349b66"}, + {file = "pyzstd-0.15.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:74750bee4fd2b584bf502cdc03861c9decb0875a7febcb2d629cf72a0470188d"}, + {file = "pyzstd-0.15.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4bf88010b15470f00810e8fc3cb46608af7dceb0d8ca696552d635be652bdfd7"}, + {file = "pyzstd-0.15.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5ec07186d5150a6129327e906c0c1f15fd505d4f5538957b2481913a959121ee"}, + {file = "pyzstd-0.15.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3e11b0534f958668dbcac6d3d943af93d1f5c4dfa7857147589398b405a9ee6"}, + {file = "pyzstd-0.15.4-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf63ad569ebc08fee48bc1080c1eb98c9620af4d52ecc56b2a54d6f01797d7be"}, + {file = "pyzstd-0.15.4-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:88e9f435a68ab9d04a4a90d5b11c587080295ab4057a403730990944b98d0b4e"}, + {file = "pyzstd-0.15.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:9b433e0495b0231c20733b4649d33c5ab51f3670db4b47e0964b0b4dd240e03f"}, + {file = "pyzstd-0.15.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e884faae04b2f52cc6aa5aa18de2631ab75786440fd8a708f6b886d75aade937"}, + {file = "pyzstd-0.15.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:229626e1ee592fd8a922935b997e9b91b8b8b874a72565fe1839041cf3ae1a54"}, + {file = "pyzstd-0.15.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8576440b3212d648fbff49d1127e2bffe0f74aa8be40c3ec8b783f276135cb30"}, + {file = "pyzstd-0.15.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddfbaad47edf70932a6e0fba37348ca240a34408e674c056ff16631fa07ea5fd"}, + {file = "pyzstd-0.15.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:be4e4bc65de7093e4781d98016b15224311f52eacb220f67b1cbffcae3ac5497"}, + {file = "pyzstd-0.15.4.tar.gz", hash = "sha256:de07ac54f57642f186732075cdce2be3d4a30228c3b17a6d8c6053765dc6eec8"}, +] + +[[package]] +name = "rdt" +version = "1.3.0" +description = "Reversible Data Transforms" +category = "main" +optional = false +python-versions = ">=3.7,<3.12" +files = [ + {file = "rdt-1.3.0-py2.py3-none-any.whl", hash = "sha256:e32b4af1c1836f87f3e40795042e18f58f73350466f97bc76e7ce1ba99daa982"}, + {file = "rdt-1.3.0.tar.gz", hash = "sha256:3e008acb734d894d7e7c21cdb77277ab70cabbbbd3342c082ec06ed6e8666f3c"}, +] + +[package.dependencies] +Faker = ">=10" +numpy = [ + {version = ">=1.20.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.23.3,<2", markers = "python_version >= \"3.10\""}, +] +pandas = [ + {version = ">=1.1.3,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.3.4,<2", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, +] +psutil = ">=5.7,<6" +scikit-learn = [ + {version = ">=0.24,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.1.3,<2", markers = "python_version >= \"3.10\""}, +] +scipy = [ + {version = ">=1.5.4,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.9.2,<2", markers = "python_version >= \"3.10\""}, +] + +[package.extras] +copulas = ["copulas (>=0.8.0,<0.9)"] +dev = ["autoflake (>=1.1,<2)", "autopep8 (>=1.4.3,<1.6)", "bumpversion (>=0.5.3,<0.6)", "copulas (>=0.8.0,<0.9)", "coverage (>=4.5.1,<6)", "dlint (>=0.11.0,<0.12)", "flake8 (>=3.7.7,<4)", "flake8-absolute-import (>=1.0,<2)", "flake8-builtins (>=1.5.3,<1.6)", "flake8-comprehensions (>=3.6.1,<3.7)", "flake8-debugger (>=4.0.0,<4.1)", "flake8-docstrings (>=1.5.0,<2)", "flake8-eradicate (>=1.1.0,<1.2)", "flake8-expression-complexity (>=0.0.9,<0.1)", "flake8-fixme (>=1.1.1,<1.2)", "flake8-mock (>=0.3,<0.4)", "flake8-multiline-containers (>=0.0.18,<0.1)", "flake8-mutable (>=1.2.0,<1.3)", "flake8-print (>=4.0.0,<4.1)", "flake8-pytest-style (>=1.5.0,<2)", "flake8-quotes (>=3.3.0,<4)", "flake8-sfs (>=0.0.3,<0.1)", "flake8-variables-names (>=0.0.4,<0.1)", "invoke", "isort (>=4.3.4,<5)", "jupyter (>=1.0.0,<2)", "pandas-vet (>=0.2.3,<0.3)", "pep8-naming (>=0.12.1,<0.13)", "pip (>=9.0.1)", "pycodestyle (>=2.7.0,<2.8.0)", "pydocstyle (>=6.1.1,<6.2)", "pyflakes (>=2.3.0,<2.4.0)", "pylint (>=2.5.3,<3)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-subtests (>=0.5,<1.0)", "rundoc (>=0.4.3,<0.5)", "tabulate (>=0.8.9,<1)", "tox (>=2.9.1,<4)", "twine (>=1.10.0,<4)", "watchdog (>=0.8.3,<0.11)", "wheel (>=0.30.0)"] +test = ["copulas (>=0.8.0,<0.9)", "jupyter (>=1.0.0,<2)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-subtests (>=0.5,<1.0)", "rundoc (>=0.4.3,<0.5)"] + +[[package]] +name = "requests" +version = "2.28.2" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" +files = [ + {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, + {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "scikit-learn" +version = "1.2.2" +description = "A set of python modules for machine learning and data mining" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scikit-learn-1.2.2.tar.gz", hash = "sha256:8429aea30ec24e7a8c7ed8a3fa6213adf3814a6efbea09e16e0a0c71e1a1a3d7"}, + {file = "scikit_learn-1.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99cc01184e347de485bf253d19fcb3b1a3fb0ee4cea5ee3c43ec0cc429b6d29f"}, + {file = "scikit_learn-1.2.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:e6e574db9914afcb4e11ade84fab084536a895ca60aadea3041e85b8ac963edb"}, + {file = "scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6fe83b676f407f00afa388dd1fdd49e5c6612e551ed84f3b1b182858f09e987d"}, + {file = "scikit_learn-1.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2642baa0ad1e8f8188917423dd73994bf25429f8893ddbe115be3ca3183584"}, + {file = "scikit_learn-1.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ad66c3848c0a1ec13464b2a95d0a484fd5b02ce74268eaa7e0c697b904f31d6c"}, + {file = "scikit_learn-1.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dfeaf8be72117eb61a164ea6fc8afb6dfe08c6f90365bde2dc16456e4bc8e45f"}, + {file = "scikit_learn-1.2.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:fe0aa1a7029ed3e1dcbf4a5bc675aa3b1bc468d9012ecf6c6f081251ca47f590"}, + {file = "scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:065e9673e24e0dc5113e2dd2b4ca30c9d8aa2fa90f4c0597241c93b63130d233"}, + {file = "scikit_learn-1.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf036ea7ef66115e0d49655f16febfa547886deba20149555a41d28f56fd6d3c"}, + {file = "scikit_learn-1.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:8b0670d4224a3c2d596fd572fb4fa673b2a0ccfb07152688ebd2ea0b8c61025c"}, + {file = "scikit_learn-1.2.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9c710ff9f9936ba8a3b74a455ccf0dcf59b230caa1e9ba0223773c490cab1e51"}, + {file = "scikit_learn-1.2.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:2dd3ffd3950e3d6c0c0ef9033a9b9b32d910c61bd06cb8206303fb4514b88a49"}, + {file = "scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44b47a305190c28dd8dd73fc9445f802b6ea716669cfc22ab1eb97b335d238b1"}, + {file = "scikit_learn-1.2.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:953236889928d104c2ef14027539f5f2609a47ebf716b8cbe4437e85dce42744"}, + {file = "scikit_learn-1.2.2-cp38-cp38-win_amd64.whl", hash = "sha256:7f69313884e8eb311460cc2f28676d5e400bd929841a2c8eb8742ae78ebf7c20"}, + {file = "scikit_learn-1.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8156db41e1c39c69aa2d8599ab7577af53e9e5e7a57b0504e116cc73c39138dd"}, + {file = "scikit_learn-1.2.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:fe175ee1dab589d2e1033657c5b6bec92a8a3b69103e3dd361b58014729975c3"}, + {file = "scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d5312d9674bed14f73773d2acf15a3272639b981e60b72c9b190a0cffed5bad"}, + {file = "scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea061bf0283bf9a9f36ea3c5d3231ba2176221bbd430abd2603b1c3b2ed85c89"}, + {file = "scikit_learn-1.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:6477eed40dbce190f9f9e9d0d37e020815825b300121307942ec2110302b66a3"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3" +scipy = ">=1.3.2" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.10.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=4.0.1)", "sphinx-gallery (>=0.7.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.10.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=22.3.0)", "flake8 (>=3.8.2)", "matplotlib (>=3.1.3)", "mypy (>=0.961)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=5.3.1)", "pytest-cov (>=2.9.0)", "scikit-image (>=0.16.2)"] + +[[package]] +name = "scipy" +version = "1.10.1" +description = "Fundamental algorithms for scientific computing in Python" +category = "main" +optional = false +python-versions = "<3.12,>=3.8" +files = [ + {file = "scipy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7354fd7527a4b0377ce55f286805b34e8c54b91be865bac273f527e1b839019"}, + {file = "scipy-1.10.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4b3f429188c66603a1a5c549fb414e4d3bdc2a24792e061ffbd607d3d75fd84e"}, + {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1553b5dcddd64ba9a0d95355e63fe6c3fc303a8fd77c7bc91e77d61363f7433f"}, + {file = "scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c0ff64b06b10e35215abce517252b375e580a6125fd5fdf6421b98efbefb2d2"}, + {file = "scipy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:fae8a7b898c42dffe3f7361c40d5952b6bf32d10c4569098d276b4c547905ee1"}, + {file = "scipy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0f1564ea217e82c1bbe75ddf7285ba0709ecd503f048cb1236ae9995f64217bd"}, + {file = "scipy-1.10.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:d925fa1c81b772882aa55bcc10bf88324dadb66ff85d548c71515f6689c6dac5"}, + {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaea0a6be54462ec027de54fca511540980d1e9eea68b2d5c1dbfe084797be35"}, + {file = "scipy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15a35c4242ec5f292c3dd364a7c71a61be87a3d4ddcc693372813c0b73c9af1d"}, + {file = "scipy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:43b8e0bcb877faf0abfb613d51026cd5cc78918e9530e375727bf0625c82788f"}, + {file = "scipy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5678f88c68ea866ed9ebe3a989091088553ba12c6090244fdae3e467b1139c35"}, + {file = "scipy-1.10.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:39becb03541f9e58243f4197584286e339029e8908c46f7221abeea4b749fa88"}, + {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bce5869c8d68cf383ce240e44c1d9ae7c06078a9396df68ce88a1230f93a30c1"}, + {file = "scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07c3457ce0b3ad5124f98a86533106b643dd811dd61b548e78cf4c8786652f6f"}, + {file = "scipy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:049a8bbf0ad95277ffba9b3b7d23e5369cc39e66406d60422c8cfef40ccc8415"}, + {file = "scipy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cd9f1027ff30d90618914a64ca9b1a77a431159df0e2a195d8a9e8a04c78abf9"}, + {file = "scipy-1.10.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:79c8e5a6c6ffaf3a2262ef1be1e108a035cf4f05c14df56057b64acc5bebffb6"}, + {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51af417a000d2dbe1ec6c372dfe688e041a7084da4fdd350aeb139bd3fb55353"}, + {file = "scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601"}, + {file = "scipy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:7ff7f37b1bf4417baca958d254e8e2875d0cc23aaadbe65b3d5b3077b0eb23ea"}, + {file = "scipy-1.10.1.tar.gz", hash = "sha256:2cf9dfb80a7b4589ba4c40ce7588986d6d5cebc5457cad2c2880f6bc2d42f3a5"}, +] + +[package.dependencies] +numpy = ">=1.19.5,<1.27.0" + +[package.extras] +dev = ["click", "doit (>=0.36.0)", "flake8", "mypy", "pycodestyle", "pydevtool", "rich-click", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-design (>=0.2.0)"] +test = ["asv", "gmpy2", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "sdmetrics" +version = "0.9.2" +description = "Metrics for Synthetic Data Generation Projects" +category = "main" +optional = false +python-versions = ">=3.7,<3.11" +files = [ + {file = "sdmetrics-0.9.2-py2.py3-none-any.whl", hash = "sha256:bab8c887b69e0865b7665704642dd6fe2bb82d858ba81ac87edbfc54c15809d8"}, + {file = "sdmetrics-0.9.2.tar.gz", hash = "sha256:b3cae08e280e8bca6df4d37af8482b5600b6623997f225f652e2076638c92b29"}, +] + +[package.dependencies] +copulas = ">=0.8.0,<0.9" +numpy = [ + {version = ">=1.20.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.23.3,<2", markers = "python_version >= \"3.10\""}, +] +pandas = [ + {version = ">=1.1.3,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.5.0,<2", markers = "python_version >= \"3.10\""}, +] +plotly = ">=5.10.0,<6" +scikit-learn = ">=0.24,<2" +scipy = [ + {version = ">=1.5.4,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.9.2,<2", markers = "python_version >= \"3.10\""}, +] +tqdm = ">=4.15,<5" + +[package.extras] +dev = ["autoflake (>=1.1,<2)", "autopep8 (>=1.4.3,<1.6)", "bumpversion (>=0.5.3,<0.6)", "coverage (>=4.5.1,<6)", "dlint (>=0.11.0,<0.12)", "flake8 (>=3.7.7,<4)", "flake8-absolute-import (>=1.0,<2)", "flake8-builtins (>=1.5.3,<1.6)", "flake8-comprehensions (>=3.6.1,<3.7)", "flake8-debugger (>=4.0.0,<4.1)", "flake8-docstrings (>=1.5.0,<2)", "flake8-eradicate (>=1.1.0,<1.2)", "flake8-expression-complexity (>=0.0.9,<0.1)", "flake8-fixme (>=1.1.1,<1.2)", "flake8-mock (>=0.3,<0.4)", "flake8-multiline-containers (>=0.0.18,<0.1)", "flake8-mutable (>=1.2.0,<1.3)", "flake8-print (>=4.0.0,<4.1)", "flake8-pytest-style (>=1.5.0,<2)", "flake8-quotes (>=3.3.0,<4)", "flake8-sfs (>=0.0.3,<0.1)", "flake8-variables-names (>=0.0.4,<0.1)", "invoke", "isort (>=4.3.4,<5)", "jupyter (>=1.0.0,<2)", "packaging (>=20,<21)", "pandas-vet (>=0.2.2,<0.3)", "pep8-naming (>=0.12.1,<0.13)", "pip (>=9.0.1)", "pomegranate (>=0.14.1,<0.14.7)", "pydocstyle (>=6.1.1,<6.2)", "pytest (>=6.2.5,<7)", "pytest-cov (>=2.6.0,<3)", "pytest-rerunfailures (>=10)", "rundoc (>=0.4.3,<0.5)", "torch (>=1.11.0,<2)", "torch (>=1.8.0,<2)", "tox (>=2.9.1,<4)", "twine (>=1.10.0,<4)", "watchdog (>=0.8.3,<0.11)", "wheel (>=0.30.0)"] +pomegranate = ["pomegranate (>=0.14.1,<0.14.7)"] +test = ["invoke", "jupyter (>=1.0.0,<2)", "pomegranate (>=0.14.1,<0.14.7)", "pytest (>=6.2.5,<7)", "pytest-cov (>=2.6.0,<3)", "pytest-rerunfailures (>=10)", "rundoc (>=0.4.3,<0.5)", "torch (>=1.11.0,<2)", "torch (>=1.8.0,<2)"] +torch = ["torch (>=1.11.0,<2)", "torch (>=1.8.0,<2)"] + +[[package]] +name = "sdv" +version = "0.18.0" +description = "Generate synthetic data for single table, multi table and sequential data" +category = "main" +optional = false +python-versions = ">=3.7,<3.11" +files = [ + {file = "sdv-0.18.0-py2.py3-none-any.whl", hash = "sha256:ace3fb7855a5ee4062ac05b0d5d93af1021b9edbe62cd64767ad77574b70a8d0"}, + {file = "sdv-0.18.0.tar.gz", hash = "sha256:e7316fe53e914abab965527ef82ddc1e5ce2a29e903d89cefb4be0cfd6f0bc40"}, +] + +[package.dependencies] +cloudpickle = ">=2.1.0,<3.0" +copulas = ">=0.8.0,<0.9" +ctgan = ">=0.7.0,<0.8" +deepecho = ">=0.4.0,<0.5" +Faker = ">=10,<15" +graphviz = ">=0.13.2,<1" +numpy = [ + {version = ">=1.20.0,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.23.3,<2", markers = "python_version >= \"3.10\""}, +] +pandas = [ + {version = ">=1.1.3,<2", markers = "python_version < \"3.10\""}, + {version = ">=1.5.0,<2", markers = "python_version >= \"3.10\""}, +] +rdt = ">=1.3.0,<2" +sdmetrics = ">=0.9.0,<0.10" +tqdm = ">=4.15,<5" + +[package.extras] +dev = ["Jinja2 (>=2,<3)", "Sphinx (>=3,<3.3)", "autoflake (>=1.1,<2)", "autopep8 (>=1.4.3,<1.6)", "bumpversion (>=0.5.3,<0.6)", "coverage (>=4.5.1,<6)", "docutils (>=0.12,<0.18)", "flake8 (>=3.7.7,<4)", "flake8-absolute-import (>=1.0,<2)", "flake8-docstrings (>=1.5.0,<2)", "flake8-sfs (>=0.0.3,<0.1)", "invoke", "isort (>=4.3.4,<5)", "jupyter (>=1.0.0,<2)", "m2r2 (>=0.2.5,<0.3)", "markupsafe (<2.1.0)", "nbsphinx (>=0.5.0,<0.7)", "pip (>=9.0.1)", "pydata-sphinx-theme (<0.5)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-rerunfailures (>10)", "rundoc (>=0.4.3,<0.5)", "sphinx-toolbox (>=2.5,<3)", "tox (>=2.9.1,<4)", "twine (>=1.10.0,<4)", "watchdog (>=0.8.3,<0.9)", "wheel (>=0.30.0)"] +pomegranate = ["pomegranate (>=0.14.3,<0.15)"] +test = ["jupyter (>=1.0.0,<2)", "pytest (>=3.4.2)", "pytest-cov (>=2.6.0)", "pytest-rerunfailures (>10)", "rundoc (>=0.4.3,<0.5)"] + +[[package]] +name = "setuptools" +version = "67.6.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setuptools-67.6.0-py3-none-any.whl", hash = "sha256:b78aaa36f6b90a074c1fa651168723acbf45d14cb1196b6f02c0fd07f17623b2"}, + {file = "setuptools-67.6.0.tar.gz", hash = "sha256:2ee892cd5f29f3373097f5a814697e397cf3ce313616df0af11231e2ad118077"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "tenacity" +version = "8.2.2" +description = "Retry code until it succeeds" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "tenacity-8.2.2-py3-none-any.whl", hash = "sha256:2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0"}, + {file = "tenacity-8.2.2.tar.gz", hash = "sha256:43af037822bd0029025877f3b2d97cc4d7bb0c2991000a3d59d71517c5c969e0"}, +] + +[package.extras] +doc = ["reno", "sphinx", "tornado (>=4.5)"] + +[[package]] +name = "texttable" +version = "1.6.7" +description = "module to create simple ASCII tables" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "texttable-1.6.7-py2.py3-none-any.whl", hash = "sha256:b7b68139aa8a6339d2c320ca8b1dc42d13a7831a346b446cb9eb385f0c76310c"}, + {file = "texttable-1.6.7.tar.gz", hash = "sha256:290348fb67f7746931bcdfd55ac7584ecd4e5b0846ab164333f0794b121760f2"}, +] + +[[package]] +name = "threadpoolctl" +version = "3.1.0" +description = "threadpoolctl" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "threadpoolctl-3.1.0-py3-none-any.whl", hash = "sha256:8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b"}, + {file = "threadpoolctl-3.1.0.tar.gz", hash = "sha256:a335baacfaa4400ae1f0d8e3a58d6674d2f8828e3716bb2802c44955ad391380"}, +] + +[[package]] +name = "torch" +version = "1.13.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:fd12043868a34a8da7d490bf6db66991108b00ffbeecb034228bfcbbd4197143"}, + {file = "torch-1.13.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:d9fe785d375f2e26a5d5eba5de91f89e6a3be5d11efb497e76705fdf93fa3c2e"}, + {file = "torch-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:98124598cdff4c287dbf50f53fb455f0c1e3a88022b39648102957f3445e9b76"}, + {file = "torch-1.13.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:393a6273c832e047581063fb74335ff50b4c566217019cc6ace318cd79eb0566"}, + {file = "torch-1.13.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:0122806b111b949d21fa1a5f9764d1fd2fcc4a47cb7f8ff914204fd4fc752ed5"}, + {file = "torch-1.13.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:22128502fd8f5b25ac1cd849ecb64a418382ae81dd4ce2b5cebaa09ab15b0d9b"}, + {file = "torch-1.13.1-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:76024be052b659ac1304ab8475ab03ea0a12124c3e7626282c9c86798ac7bc11"}, + {file = "torch-1.13.1-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:ea8dda84d796094eb8709df0fcd6b56dc20b58fdd6bc4e8d7109930dafc8e419"}, + {file = "torch-1.13.1-cp37-cp37m-win_amd64.whl", hash = "sha256:2ee7b81e9c457252bddd7d3da66fb1f619a5d12c24d7074de91c4ddafb832c93"}, + {file = "torch-1.13.1-cp37-none-macosx_10_9_x86_64.whl", hash = "sha256:0d9b8061048cfb78e675b9d2ea8503bfe30db43d583599ae8626b1263a0c1380"}, + {file = "torch-1.13.1-cp37-none-macosx_11_0_arm64.whl", hash = "sha256:f402ca80b66e9fbd661ed4287d7553f7f3899d9ab54bf5c67faada1555abde28"}, + {file = "torch-1.13.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:727dbf00e2cf858052364c0e2a496684b9cb5aa01dc8a8bc8bbb7c54502bdcdd"}, + {file = "torch-1.13.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:df8434b0695e9ceb8cc70650afc1310d8ba949e6db2a0525ddd9c3b2b181e5fe"}, + {file = "torch-1.13.1-cp38-cp38-win_amd64.whl", hash = "sha256:5e1e722a41f52a3f26f0c4fcec227e02c6c42f7c094f32e49d4beef7d1e213ea"}, + {file = "torch-1.13.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:33e67eea526e0bbb9151263e65417a9ef2d8fa53cbe628e87310060c9dcfa312"}, + {file = "torch-1.13.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:eeeb204d30fd40af6a2d80879b46a7efbe3cf43cdbeb8838dd4f3d126cc90b2b"}, + {file = "torch-1.13.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:50ff5e76d70074f6653d191fe4f6a42fdbe0cf942fbe2a3af0b75eaa414ac038"}, + {file = "torch-1.13.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:2c3581a3fd81eb1f0f22997cddffea569fea53bafa372b2c0471db373b26aafc"}, + {file = "torch-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:0aa46f0ac95050c604bcf9ef71da9f1172e5037fdf2ebe051962d47b123848e7"}, + {file = "torch-1.13.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6930791efa8757cb6974af73d4996b6b50c592882a324b8fb0589c6a9ba2ddaf"}, + {file = "torch-1.13.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:e0df902a7c7dd6c795698532ee5970ce898672625635d885eade9976e5a04949"}, +] + +[package.dependencies] +nvidia-cublas-cu11 = {version = "11.10.3.66", markers = "platform_system == \"Linux\""} +nvidia-cuda-nvrtc-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""} +nvidia-cuda-runtime-cu11 = {version = "11.7.99", markers = "platform_system == \"Linux\""} +nvidia-cudnn-cu11 = {version = "8.5.0.96", markers = "platform_system == \"Linux\""} +typing-extensions = "*" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] + +[[package]] +name = "torchtuples" +version = "0.2.2" +description = "Training neural networks in PyTorch" +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "torchtuples-0.2.2-py3-none-any.whl", hash = "sha256:186625230a149cc09f64116d51b203ffefe78160f5a0445adad195893663f55b"}, + {file = "torchtuples-0.2.2.tar.gz", hash = "sha256:e0c7fe69977eb594fd9e81015821bb62c849be94d378bd072bdcb08cd68fffc3"}, +] + +[package.dependencies] +matplotlib = ">=3.0.3" +numpy = ">=1.15.4" +pandas = ">=0.24.2" + +[[package]] +name = "tqdm" +version = "4.65.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, + {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] + +[[package]] +name = "urllib3" +version = "1.26.14" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, + {file = "urllib3-1.26.14.tar.gz", hash = "sha256:076907bf8fd355cde77728471316625a4d2f7e713c125f51953bb5b3eecf4f72"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "wheel" +version = "0.38.4" +description = "A built-package format for Python" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "wheel-0.38.4-py3-none-any.whl", hash = "sha256:b60533f3f5d530e971d6737ca6d58681ee434818fab630c83a734bb10c083ce8"}, + {file = "wheel-0.38.4.tar.gz", hash = "sha256:965f5259b566725405b05e7cf774052044b1ed30119b5d586b2703aafe8719ac"}, +] + +[package.extras] +test = ["pytest (>=3.0.0)"] + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8,<3.11" +content-hash = "202cb736299b3bdd672ac0adfca0529bb69a0df779aa1404f72bb304db36e869" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a8a8ca7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,30 @@ +[tool.poetry] +name = "nhssynth" +version = "0.1.0" +description = "Synthetic data generation pipeline leveraging a Differentially Private Variational Auto Encoder assessed using a variety of metrics" +authors = ["HarrisonWilde "] +license = "MIT" +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8,<3.11" +torch = "^1.13.1" +tqdm = "^4.65.0" +scikit-learn = "^1.2.1" +rdt = "^1.3.0" +opacus = "^1.3.0" +sdv = "^0.18.0" +pandas = "^1.5.3" +gower = "^0.1.2" +matplotlib = "^3.7.1" +llvmlite = "^0.39.1" +numba = "^0.56.4" +pycox = "^0.2.3" +pyyaml = "^6.0" + +[tool.poetry.scripts] +cli = 'nhssynth.cli:run' + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/rdt/__init__.py b/rdt/__init__.py deleted file mode 100644 index e482003..0000000 --- a/rdt/__init__.py +++ /dev/null @@ -1,55 +0,0 @@ -# -*- coding: utf-8 -*- - -"""Top-level package for RDT.""" - - -__author__ = """MIT Data To AI Lab""" -__email__ = "dailabmit@gmail.com" -__version__ = "0.6.5.dev0" - -import numpy as np -import pandas as pd - -from rdt import transformers -from rdt.hyper_transformer import HyperTransformer - -__all__ = ["HyperTransformer", "transformers"] - - -def get_demo(dtypes=("int", "float", "str", "datetime"), nans=0.2, size=10): - """Generate random demo data with multiple data types. - - Args: - dtypes (tuple or list): - Data types to include in the generated demo data. Defaults to all. - nans (float): - Proportion of null values to generate. Defaults to 0.2. - size (int): - Number of data rows to generate. - - Returns: - pd.DataFrame - """ - if np.isscalar(nans): - nans = [nans] * len(dtypes) - - columns = {} - for count, (dtype, nan) in enumerate(zip(dtypes, nans)): - if dtype == "int": - column = np.random.randint(100, size=size) - elif dtype == "float": - column = np.random.random(size) * 100 - elif dtype == "str": - column = np.random.choice(["a", "b", "c", "d"], size=size) - elif dtype == "datetime": - deltas = np.random.randint(1000000, size=10) - datetimes = np.array([np.datetime64("2019-10-13T18:34")] * size) - column = datetimes + deltas - - column = pd.Series(column) - nan_index = np.random.choice(range(size), size=int(size * nan), replace=False) - column.iloc[nan_index] = np.nan - - columns[f"{count}_{dtype}"] = column - - return pd.DataFrame(columns) diff --git a/rdt/errors.py b/rdt/errors.py deleted file mode 100644 index ea19ebd..0000000 --- a/rdt/errors.py +++ /dev/null @@ -1,5 +0,0 @@ -"""RDT Exceptions.""" - - -class NotFittedError(Exception): - """Error to raise when ``transform`` or ``reverse_transform`` are used before fitting.""" diff --git a/rdt/hyper_transformer.py b/rdt/hyper_transformer.py deleted file mode 100644 index 5721f63..0000000 --- a/rdt/hyper_transformer.py +++ /dev/null @@ -1,487 +0,0 @@ -"""Hyper transformer module.""" - -import warnings -from collections import defaultdict -from copy import deepcopy - -import yaml - -from rdt.errors import NotFittedError -from rdt.transformers import get_default_transformer, get_transformer_instance - - -class HyperTransformer: - """HyperTransformer class. - - The ``HyperTransformer`` class contains a collection of ``transformers`` that can be - used to transform and reverse transform one or more columns at once. - - Args: - field_transformers (dict or None): - Dict used to overwrite the transformer used for a field. If no transformer is - specified for a field, a default transformer is selected. The keys are fields - which can be defined as a string of the column name or a tuple of multiple column - names. Keys can also specify transformers for fields derived by other transformers. - This can be done by concatenating the name of the original field to the output name - using ``.`` as a separator (eg. {field_name}.{transformer_output_name}). - field_data_types (dict or None): - Dict mapping field names to their data types. If not provided, the data type is - inferred using the column's Pandas ``dtype``. - default_data_type_transformers (dict or None): - Dict used to overwrite the default transformer for a data type. The keys are - data types and the values are Transformers or Transformer instances. - copy (bool): - Whether to make a copy of the input data or not. Defaults to ``True``. - transform_output_types (list or None): - List of acceptable data types for the output of the ``transform`` method. - If ``None``, only ``numerical`` types will be considered acceptable. - - - Example: - Create a simple ``HyperTransformer`` instance that will decide which transformers - to use based on the fit data ``dtypes``. - - >>> ht = HyperTransformer() - - Create a ``HyperTransformer`` passing a dict mapping fields to data types. - - >>> field_data_types = { - ... 'a': 'categorical', - ... 'b': 'numerical - ... } - >>> ht = HyperTransformer(field_data_types=field_data_types) - - Create a ``HyperTransformer`` passing a ``field_transformers`` dict. - (Note: The transformers used in this example may not exist and are just used - to illustrate the different way that a transformer can be defined for a field). - - >>> field_transformers = { - ... 'email': EmailTransformer(), - ... 'email.domain': EmailDomainTransformer(), - ... ('year', 'month', 'day'): DateTimeTransformer() - ... } - >>> ht = HyperTransformer(field_transformers=field_transformers) - - Create a ``HyperTransformer`` passing a dict mapping data types to transformers. - >>> default_data_type_transformers = { - ... 'categorical': LabelEncodingTransformer(), - ... 'numerical': NumericalTransformer() - ... } - >>> ht = HyperTransformer(default_data_type_transformers=default_data_type_transformers) - """ - - # pylint: disable=too-many-instance-attributes - - _DTYPES_TO_DATA_TYPES = { - "i": "integer", - "f": "float", - "O": "categorical", - "b": "boolean", - "M": "datetime", - } - _DEFAULT_OUTPUT_TYPES = ["numerical", "float", "integer"] - - @staticmethod - def _add_field_to_set(field, field_set): - if isinstance(field, tuple): - field_set.update(field) - else: - field_set.add(field) # noqa -> set can't use opreator - - @staticmethod - def _field_in_set(field, field_set): - if isinstance(field, tuple): - return all(column in field_set for column in field) - - return field in field_set - - @staticmethod - def _subset(input_list, other_list, not_in=False): - return [element for element in input_list if (element in other_list) ^ not_in] - - def _create_multi_column_fields(self): - multi_column_fields = {} - for field in list(self.field_data_types) + list(self.field_transformers): - if isinstance(field, tuple): - for column in field: - multi_column_fields[column] = field - return multi_column_fields - - def _validate_field_transformers(self): - for field in self.field_transformers: - if self._field_in_set(field, self._specified_fields): - raise ValueError( - f"Multiple transformers specified for the field {field}. " - "Each field can have at most one transformer defined in " - "field_transformers." - ) - - self._add_field_to_set(field, self._specified_fields) - - def __init__( - self, - copy=True, - field_data_types=None, - default_data_type_transformers=None, - field_transformers=None, - transform_output_types=None, - ): - self.copy = copy - self.field_data_types = field_data_types or {} - self.default_data_type_transformers = default_data_type_transformers or {} - self.field_transformers = field_transformers or {} - self._specified_fields = set() - self._validate_field_transformers() - self.transform_output_types = ( - transform_output_types or self._DEFAULT_OUTPUT_TYPES - ) - self._multi_column_fields = self._create_multi_column_fields() - self._transformers_sequence = [] - self._output_columns = [] - self._input_columns = [] - self._fitted_fields = set() - self._fitted = False - self._transformers_tree = defaultdict(dict) - - @staticmethod - def _field_in_data(field, data): - all_columns_in_data = isinstance(field, tuple) and all( - col in data for col in field - ) - return field in data or all_columns_in_data - - def _populate_field_data_types(self, data): - # get set of provided fields including multi-column fields - provided_fields = set() - for field in self.field_data_types.keys(): - self._add_field_to_set(field, provided_fields) - - for field in data: - if field not in provided_fields: - clean_data = data[field].dropna() - kind = clean_data.infer_objects().dtype.kind - self.field_data_types[field] = self._DTYPES_TO_DATA_TYPES[kind] - - def _unfit(self): - self._transformers_sequence = [] - self._fitted_fields.clear() - self._fitted = False - - def get_field_data_types(self): - """Get the ``field_data_types`` dict. - - Returns: - dict: - Mapping of fields to their data types. Fields can be defined as a string - representing a column name or a tuple of multiple column names. - """ - return self.field_data_types - - def update_field_data_types(self, field_data_types): - """Update the ``field_data_types`` dict. - - Args: - field_data_types (dict): - Mapping of fields to their data types. Fields can be defined as a string - representing a column name or a tuple of multiple column names. It will - update the existing ``field_data_types`` values. Calling this method will - require ``fit`` to be run again. - """ - self.field_data_types.update(field_data_types) - self._unfit() - - def get_default_data_type_transformers(self): - """Get the ``default_data_type_transformer`` dict. - - Returns: - dict: - The ``default_data_type_transformers`` dictionary. The keys are - data types and the values are Transformers or Transformer instances. - """ - return self.default_data_type_transformers - - def update_default_data_type_transformers(self, new_data_type_transformers): - """Update the ``default_data_type_transformer`` dict. - - Args: - new_data_type_transformers (dict): - Dict mapping data types to the default transformer class or instance to use for - them. This dict does not need to contain an entry for every data type. It will be - used to overwrite the existing defaults. Calling this method will require ``fit`` - to be run again. - """ - self.default_data_type_transformers.update(new_data_type_transformers) - self._unfit() - - def set_first_transformers_for_fields(self, field_transformers): - """Set the first transformer to use for certain fields. - - Args: - field_transformers (dict): - Dict mapping fields to a transformer class name or instance. This transformer will - be the first used on that field when the ``HyperTransformer`` calls ``transform``. - The fields or keys can be defined as strings representing a single column name, or - tuples of strings representing multiple column names. Calling this method will - require ``fit`` to be run again. - """ - self.field_transformers.update(field_transformers) - self._unfit() - - def get_transformer(self, field): - """Get the transformer instance used for a field. - - Args: - field (str or tuple): - String representing a column name or a tuple of multiple column names. - - Returns: - Transformer: - Transformer instance used on the specified field during ``transform``. - """ - if not self._fitted: - raise NotFittedError - - return self._transformers_tree[field].get("transformer", None) - - def get_output_transformers(self, field): - """Return dict mapping output columns of field to transformers used on them. - - Args: - field (str or tuple): - String representing a column name or a tuple of multiple column names. - - Returns: - dict: - Dictionary mapping the output names of the columns created after transforming the - specified field, to the transformer instances used on them. - """ - if not self._fitted: - raise NotFittedError - - next_transformers = {} - for output in self._transformers_tree[field].get("outputs", []): - next_transformers[output] = self._transformers_tree[output].get( - "transformer", None - ) - - return next_transformers - - def get_final_output_columns(self, field): - """Return list of all final output columns related to a field. - - The ``HyperTransformer`` will figure out which transformers to use on a field during - ``transform``. If the outputs are not of an acceptable data type, they will also go - through transformations. This method finds all the output columns that are of an - acceptable final data type that originated from the specified field. - - Args: - field (str or tuple): - String representing a column name or a tuple of multiple column names. - - Returns: - list: - List of output column names that were created as a by-product of the specified - field. - """ - if not self._fitted: - raise NotFittedError - - final_outputs = [] - outputs = self._transformers_tree[field].get("outputs", []).copy() - while len(outputs) > 0: - output = outputs.pop() - if output in self._transformers_tree: - outputs.extend(self._transformers_tree[output].get("outputs", [])) - else: - final_outputs.append(output) - - return sorted(final_outputs, reverse=True) - - def get_transformer_tree_yaml(self): - """Return yaml representation of transformers tree. - - After running ``fit``, a sequence of transformers is created to run each original column - through. The sequence can be thought of as a tree, where each node is a field and the - transformer used on it, and each neighbor is an output from that transformer. This method - returns a YAML representation of this tree. - - Returns: - string: - YAML object representing the tree of transformers created during ``fit``. It has - the following form: - - field1: - transformer: ExampleTransformer instance - outputs: [field1.out1, field1.out2] - field1.out1: - transformer: CategoricalTransformer instance - outputs: [field1.out1.value] - field1.out2: - transformer: CategoricalTransformer instance - outputs: [field1.out2.value] - """ - modified_tree = deepcopy(self._transformers_tree) - for field in modified_tree: - class_name = modified_tree[field]["transformer"].__class__.__name__ - modified_tree[field]["transformer"] = class_name - - return yaml.safe_dump(dict(modified_tree)) - - def _get_next_transformer(self, output_field, output_type, next_transformers): - next_transformer = None - if output_field in self.field_transformers: - next_transformer = self.field_transformers[output_field] - - elif output_type not in self.transform_output_types: - if next_transformers is not None and output_field in next_transformers: - next_transformer = next_transformers[output_field] - else: - next_transformer = get_default_transformer(output_type) - - return next_transformer - - def _fit_field_transformer(self, data, field, transformer): - """Fit a transformer to its corresponding field. - - This method fits a transformer to the specified field which can be a column - name or tuple of column names. If the transformer outputs fields that aren't - ML ready, then this method recursively fits transformers to their outputs until - they are. This method keeps track of which fields are temporarily created by - transformers as well as which fields will be part of the final output from ``transform``. - - Args: - data (pandas.DataFrame): - Data to fit the transformer to. - field (str or tuple): - Name of column or tuple of columns in data that will be transformed - by the transformer. - transformer (Transformer): - Instance of transformer class that will fit the data. - """ - transformer = get_transformer_instance(transformer) - transformer.fit(data, field) - self._add_field_to_set(field, self._fitted_fields) - self._transformers_sequence.append(transformer) - data = transformer.transform(data) - - output_types = transformer.get_output_types() - next_transformers = transformer.get_next_transformers() - self._transformers_tree[field]["transformer"] = transformer - self._transformers_tree[field]["outputs"] = list(output_types) - for (output_name, output_type) in output_types.items(): - output_field = self._multi_column_fields.get(output_name, output_name) - next_transformer = self._get_next_transformer( - output_field, output_type, next_transformers - ) - - if next_transformer: - if self._field_in_data(output_field, data): - self._fit_field_transformer(data, output_field, next_transformer) - - return data - - def _validate_all_fields_fitted(self): - non_fitted_fields = self._specified_fields.difference(self._fitted_fields) - if non_fitted_fields: - warnings.warn( - "The following fields were specified in the input arguments but not" - + f"found in the data: {non_fitted_fields}" - ) - - def _sort_output_columns(self): - """Sort ``_output_columns`` to follow the same order as the ``_input_columns``.""" - for input_column in self._input_columns: - output_columns = self.get_final_output_columns(input_column) - self._output_columns.extend(output_columns) - - def fit(self, data): - """Fit the transformers to the data. - - Args: - data (pandas.DataFrame): - Data to fit the transformers to. - """ - self._input_columns = list(data.columns) - self._populate_field_data_types(data) - - # Loop through field_transformers that are first level - for field in self.field_transformers: - if self._field_in_data(field, data): - data = self._fit_field_transformer( - data, field, self.field_transformers[field] - ) - - for (field, data_type) in self.field_data_types.items(): - if not self._field_in_set(field, self._fitted_fields): - if data_type in self.default_data_type_transformers: - transformer = self.default_data_type_transformers[data_type] - else: - transformer = get_default_transformer(data_type) - - data = self._fit_field_transformer(data, field, transformer) - - self._validate_all_fields_fitted() - self._fitted = True - self._sort_output_columns() - - def transform(self, data): - """Transform the data. - - If ``self.copy`` is ``True`` make a copy of the input data to avoid modifying it. - - Args: - data (pandas.DataFrame): - Data to transform. - - Returns: - pandas.DataFrame: - Transformed data. - """ - if not self._fitted: - raise NotFittedError - - unknown_columns = self._subset(data.columns, self._input_columns, not_in=True) - if self.copy: - data = data.copy() - - for transformer in self._transformers_sequence: - data = transformer.transform(data, drop=False) - - transformed_columns = self._subset(self._output_columns, data.columns) - return data.reindex(columns=unknown_columns + transformed_columns) - - def fit_transform(self, data): - """Fit the transformers to the data and then transform it. - - Args: - data (pandas.DataFrame): - Data to transform. - - Returns: - pandas.DataFrame: - Transformed data. - """ - self.fit(data) - return self.transform(data) - - def reverse_transform(self, data): - """Revert the transformations back to the original values. - - Args: - data (pandas.DataFrame): - Data to revert. - - Returns: - pandas.DataFrame: - reversed data. - """ - if not self._fitted: - raise NotFittedError - - unknown_columns = self._subset(data.columns, self._output_columns, not_in=True) - for transformer in reversed(self._transformers_sequence): - data = transformer.reverse_transform(data, drop=False) - - reversed_columns = self._subset(self._input_columns, data.columns) - - return data.reindex(columns=unknown_columns + reversed_columns) diff --git a/rdt/performance/__init__.py b/rdt/performance/__init__.py deleted file mode 100644 index 6d45199..0000000 --- a/rdt/performance/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Functions to evaluate and test the performance of the RDT Transformers.""" - -from rdt.performance import profiling -from rdt.performance.performance import evaluate_transformer_performance - -__all__ = [ - "evaluate_transformer_performance", - "profiling", -] diff --git a/rdt/performance/datasets/__init__.py b/rdt/performance/datasets/__init__.py deleted file mode 100644 index 998ab4d..0000000 --- a/rdt/performance/datasets/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Dataset Generators to test the RDT Transformers.""" - -from collections import defaultdict - -from rdt.performance.datasets import boolean, categorical, datetime, numerical -from rdt.performance.datasets.base import BaseDatasetGenerator - -__all__ = [ - "boolean", - "categorical", - "datetime", - "numerical", - "BaseDatasetGenerator", -] - - -def get_dataset_generators_by_type(): - """Build a ``dict`` mapping data types to dataset generators. - - Returns: - dict: - Mapping of data type to a list of dataset generators that produce - data of that data type. - """ - dataset_generators = defaultdict(list) - for dataset_generator in BaseDatasetGenerator.get_subclasses(): - dataset_generators[dataset_generator.DATA_TYPE].append(dataset_generator) - - return dataset_generators diff --git a/rdt/performance/datasets/base.py b/rdt/performance/datasets/base.py deleted file mode 100644 index 6937c84..0000000 --- a/rdt/performance/datasets/base.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Base class for all the Dataset Generators.""" - -from abc import ABC, abstractmethod - - -class BaseDatasetGenerator(ABC): - """Parent class for all the Dataset Generators.""" - - DATA_TYPE = None - - @staticmethod - @abstractmethod - def generate(num_rows): - """Return array of data. This method serves as a template for dataset generators. - - Args: - num_rows (int): - Number of rows to generate. - - Returns: - numpy.ndarray of size ``num_rows`` - """ - raise NotImplementedError() - - @classmethod - def get_subclasses(cls): - """Recursively find subclasses of this Baseline. - - Returns: - list: - List of all subclasses of this class. - """ - subclasses = [] - for subclass in cls.__subclasses__(): - if ABC not in subclass.__bases__: - subclasses.append(subclass) - - subclasses += subclass.get_subclasses() - - return subclasses - - @staticmethod - @abstractmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - raise NotImplementedError() diff --git a/rdt/performance/datasets/boolean.py b/rdt/performance/datasets/boolean.py deleted file mode 100644 index 02aab0a..0000000 --- a/rdt/performance/datasets/boolean.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Dataset Generators for boolean transformers.""" - -from abc import ABC - -import numpy as np - -from rdt.performance.datasets.base import BaseDatasetGenerator - -MAX_PERCENT_NULL = 50 # cap the percentage of null values at 50% -MIN_PERCENT = 20 # the minimum percentage of true or false is 20% - - -class BooleanGenerator(BaseDatasetGenerator, ABC): - """Base class for generators that generate boolean data.""" - - DATA_TYPE = "boolean" - - -class RandomBooleanGenerator(BooleanGenerator): - """Generator that creates dataset of random booleans.""" - - @staticmethod - def generate(num_rows): - """Generate an array of random booleans. - - Args: - num_rows (int): - Number of rows of booleans to generate. - - Returns: - numpy.ndarray of size ``num_rows`` containing random booleans. - """ - return np.random.choice(a=[True, False], size=num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-6, "memory": 200.0}, - "transform": {"time": 1e-6, "memory": 200.0}, - "reverse_transform": {"time": 5e-6, "memory": 500.0,}, - } - - -class RandomBooleanNaNsGenerator(BooleanGenerator): - """Generator that creates an array of random booleans with nulls.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - percent_null = np.random.randint(MIN_PERCENT, MAX_PERCENT_NULL) - percent_true = (100 - percent_null) / 2 - percent_false = 100 - percent_true - percent_null - - return np.random.choice( - a=[True, False, None], - size=num_rows, - p=[percent_true / 100, percent_false / 100, percent_null / 100], - ) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-6, "memory": 200.0}, - "transform": {"time": 1e-5, "memory": 1000.0}, - "reverse_transform": {"time": 5e-5, "memory": 1000.0,}, - } - - -class RandomSkewedBooleanGenerator(BooleanGenerator): - """Generator that creates dataset of random booleans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - percent_true = np.random.randint(MIN_PERCENT, 100 - MIN_PERCENT) - - return np.random.choice( - a=[True, False], - size=num_rows, - p=[percent_true / 100, (100 - percent_true) / 100], - ) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-6, "memory": 200.0}, - "transform": {"time": 1e-6, "memory": 200.0}, - "reverse_transform": {"time": 5e-6, "memory": 500.0,}, - } - - -class RandomSkewedBooleanNaNsGenerator(BooleanGenerator): - """Generator that creates an array of random booleans with nulls.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - percent_null = np.random.randint(MIN_PERCENT, MAX_PERCENT_NULL) - percent_true = np.random.randint(MIN_PERCENT, 100 - percent_null - MIN_PERCENT) - percent_false = 100 - percent_null - percent_true - - return np.random.choice( - a=[True, False, None], - size=num_rows, - p=[percent_true / 100, percent_false / 100, percent_null / 100], - ) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-6, "memory": 200.0}, - "transform": {"time": 1e-5, "memory": 1000.0}, - "reverse_transform": {"time": 5e-5, "memory": 1000.0,}, - } - - -class ConstantBooleanGenerator(BooleanGenerator): - """Generator that creates a constant array with either True or False.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - constant = np.random.choice([True, False]) - return np.full(num_rows, constant) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-6, "memory": 200.0}, - "transform": {"time": 1e-6, "memory": 200.0}, - "reverse_transform": {"time": 5e-6, "memory": 500.0,}, - } - - -class ConstantBooleanNaNsGenerator(BooleanGenerator): - """Generator that creates a constant array with either True or False with some nulls.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - constant = np.random.choice([True, False]) - percent_null = np.random.randint(MIN_PERCENT, MAX_PERCENT_NULL) - - return np.random.choice( - a=[constant, None], - size=num_rows, - p=[(100 - percent_null) / 100, percent_null / 100], - ) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-6, "memory": 200.0}, - "transform": {"time": 1e-5, "memory": 1000.0}, - "reverse_transform": {"time": 5e-5, "memory": 1000.0,}, - } diff --git a/rdt/performance/datasets/categorical.py b/rdt/performance/datasets/categorical.py deleted file mode 100644 index 59ff3a0..0000000 --- a/rdt/performance/datasets/categorical.py +++ /dev/null @@ -1,299 +0,0 @@ -"""Dataset Generators for categorical transformers.""" - -from abc import ABC - -import numpy as np - -from rdt.performance.datasets.base import BaseDatasetGenerator -from rdt.performance.datasets.datetime import RandomGapDatetimeGenerator -from rdt.performance.datasets.utils import add_nans - - -class CategoricalGenerator(BaseDatasetGenerator, ABC): - """Base class for generators that generate catgorical data.""" - - DATA_TYPE = "categorical" - - -class RandomIntegerGenerator(CategoricalGenerator): - """Generator that creates an array of random integers.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - categories = [1, 2, 3, 4, 5] - return np.random.choice(a=categories, size=num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 3e-06, "memory": 400.0}, - "reverse_transform": {"time": 1e-05, "memory": 1000.0,}, - } - - -class RandomIntegerNaNsGenerator(CategoricalGenerator): - """Generator that creates an array of random integers with nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(RandomIntegerGenerator.generate(num_rows).astype(float)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 5e-06, "memory": 1000.0}, - "reverse_transform": {"time": 1e-05, "memory": 1000.0,}, - } - - -class RandomStringGenerator(CategoricalGenerator): - """Generator that creates an array of random strings.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - categories = ["Alice", "Bob", "Charlie", "Dave", "Eve"] - return np.random.choice(a=categories, size=num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 500.0}, - "transform": {"time": 1e-05, "memory": 500.0}, - "reverse_transform": {"time": 1e-05, "memory": 1000.0,}, - } - - -class RandomStringNaNsGenerator(CategoricalGenerator): - """Generator that creates an array of random strings with nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(RandomStringGenerator.generate(num_rows).astype("O")) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 1e-05, "memory": 1000.0}, - "reverse_transform": {"time": 1e-05, "memory": 1000.0,}, - } - - -class RandomMixedGenerator(CategoricalGenerator): - """Generator that creates an array of random mixed types. - - Mixed types include: int, float, bool, string, datetime. - """ - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - cat_size = 5 - categories = np.hstack( - [ - cat.astype("O") - for cat in [ - RandomGapDatetimeGenerator.generate(cat_size), - np.random.randint(0, 100, cat_size), - np.random.uniform(0, 100, cat_size), - np.arange(cat_size).astype(str), - np.array([True, False]), - ] - ] - ) - - return np.random.choice(a=categories, size=num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 1e-05, "memory": 1000.0}, - "reverse_transform": {"time": 1e-05, "memory": 2000.0,}, - } - - -class RandomMixedNaNsGenerator(CategoricalGenerator): - """Generator that creates an array of random mixed types with nans. - - Mixed types include: int, float, bool, string, datetime. - """ - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - array = RandomMixedGenerator.generate(num_rows) - - length = len(array) - num_nulls = np.random.randint(1, length) - nulls_idx = np.random.choice(range(length), num_nulls) - nulls = np.random.choice([np.nan, float("nan"), None], num_nulls) - array[nulls_idx] = nulls - - return array - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 1e-05, "memory": 2000.0}, - "reverse_transform": {"time": 1e-05, "memory": 2000.0,}, - } - - -class SingleIntegerGenerator(CategoricalGenerator): - """Generator that creates an array with a single integer.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - constant = np.random.randint(0, 100) - return np.full(num_rows, constant) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 3e-06, "memory": 200.0}, - "reverse_transform": {"time": 1e-05, "memory": 400.0,}, - } - - -class SingleIntegerNaNsGenerator(CategoricalGenerator): - """Generator that creates an array with a single integer with some nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(SingleIntegerGenerator.generate(num_rows).astype(float)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 3e-06, "memory": 200.0}, - "reverse_transform": {"time": 1e-05, "memory": 500.0,}, - } - - -class SingleStringGenerator(CategoricalGenerator): - """Generator that creates an array of a single string.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - constant = "A" - return np.full(num_rows, constant) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 4e-06, "memory": 200.0}, - "reverse_transform": {"time": 1e-05, "memory": 400.0,}, - } - - -class SingleStringNaNsGenerator(CategoricalGenerator): - """Generator that creates an array of a single string with nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(SingleStringGenerator.generate(num_rows).astype("O")) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 400.0}, - "transform": {"time": 3e-06, "memory": 200.0}, - "reverse_transform": {"time": 1e-05, "memory": 500.0,}, - } - - -class UniqueIntegerGenerator(CategoricalGenerator): - """Generator that creates an array of unique integers.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return np.arange(num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 2e-05, "memory": 2000.0}, - "transform": {"time": 0.0002, "memory": 500000.0}, - "reverse_transform": {"time": 0.0003, "memory": 1000000.0,}, - } - - -class UniqueIntegerNaNsGenerator(CategoricalGenerator): - """Generator that creates an array of unique integers with nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(UniqueIntegerGenerator.generate(num_rows)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-05, "memory": 1000.0}, - "transform": {"time": 0.0002, "memory": 1000000.0}, - "reverse_transform": {"time": 0.0002, "memory": 1000000.0,}, - } - - -class UniqueStringGenerator(CategoricalGenerator): - """Generator that creates an array of unique strings.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return np.arange(num_rows).astype(str) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 2e-05, "memory": 2000.0}, - "transform": {"time": 0.0002, "memory": 500000.0}, - "reverse_transform": {"time": 0.0003, "memory": 1000000.0,}, - } - - -class UniqueStringNaNsGenerator(CategoricalGenerator): - """Generator that creates an array of unique strings with nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(UniqueStringGenerator.generate(num_rows).astype("O")) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 2e-05, "memory": 1000.0}, - "transform": {"time": 0.0005, "memory": 1000000.0}, - "reverse_transform": {"time": 0.0002, "memory": 1000000.0,}, - } diff --git a/rdt/performance/datasets/datetime.py b/rdt/performance/datasets/datetime.py deleted file mode 100644 index 2bdd8b1..0000000 --- a/rdt/performance/datasets/datetime.py +++ /dev/null @@ -1,146 +0,0 @@ -"""Dataset Generators for datetime transformers.""" - -import datetime -from abc import ABC - -import numpy as np -import pandas as pd - -from rdt.performance.datasets.base import BaseDatasetGenerator -from rdt.performance.datasets.utils import add_nans - - -class DatetimeGenerator(BaseDatasetGenerator, ABC): - """Base class for generators that generate datatime data.""" - - DATA_TYPE = "datetime" - - -class RandomGapDatetimeGenerator(DatetimeGenerator): - """Generator that creates dates with random gaps between them.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - today = datetime.datetime.today() - delta = datetime.timedelta(days=1) - dates = [(np.random.random() * delta + today) for i in range(num_rows)] - return np.array(dates, dtype="datetime64") - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 5e-06, "memory": 500.0}, - "transform": {"time": 5e-06, "memory": 300.0}, - "reverse_transform": {"time": 5e-06, "memory": 1000.0,}, - } - - -class RandomGapSecondsDatetimeGenerator(DatetimeGenerator): - """Generator that creates dates with random gaps of seconds between them.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - today = datetime.datetime.today() - delta = datetime.timedelta(seconds=1) - dates = [(np.random.random() * delta + today) for i in range(num_rows)] - return np.array(dates, dtype="datetime64") - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 5e-06, "memory": 500.0}, - "transform": {"time": 5e-06, "memory": 300.0}, - "reverse_transform": {"time": 5e-06, "memory": 1000.0,}, - } - - -class RandomGapDatetimeNaNsGenerator(DatetimeGenerator): - """Generator that creates dates with random gaps and NaNs.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - dates = RandomGapDatetimeGenerator.generate(num_rows) - return add_nans(dates.astype("O")) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 5e-06, "memory": 500.0}, - "transform": {"time": 5e-06, "memory": 1000.0}, - "reverse_transform": {"time": 5e-06, "memory": 1000.0,}, - } - - -class EqualGapHoursDatetimeGenerator(DatetimeGenerator): - """Generator that creates dates with hour gaps between them.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - today = datetime.datetime.today() - delta = datetime.timedelta - dates = [delta(hours=i) + today for i in range(num_rows)] - return np.array(dates, dtype="datetime64") - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 5e-06, "memory": 500.0}, - "transform": {"time": 5e-06, "memory": 300.0}, - "reverse_transform": {"time": 5e-06, "memory": 1000.0,}, - } - - -class EqualGapDaysDatetimeGenerator(DatetimeGenerator): - """Generator that creates dates with 1 day gaps between them.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - today = datetime.datetime.today() - delta = datetime.timedelta - - today = min(datetime.datetime.today(), pd.Timestamp.max - delta(num_rows)) - dates = [delta(i) + today for i in range(num_rows)] - - return np.array(dates, dtype="datetime64") - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 5e-06, "memory": 500.0}, - "transform": {"time": 5e-06, "memory": 300.0}, - "reverse_transform": {"time": 5e-06, "memory": 1000.0,}, - } - - -class EqualGapWeeksDatetimeGenerator(DatetimeGenerator): - """Generator that creates dates with 1 week gaps between them.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - today = datetime.datetime.today() - delta = datetime.timedelta - - today = datetime.datetime.today() - dates = [min(delta(weeks=i) + today, pd.Timestamp.max) for i in range(num_rows)] - - return np.array(dates, dtype="datetime64") - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 5e-06, "memory": 500.0}, - "transform": {"time": 5e-06, "memory": 300.0}, - "reverse_transform": {"time": 5e-06, "memory": 1000.0,}, - } diff --git a/rdt/performance/datasets/numerical.py b/rdt/performance/datasets/numerical.py deleted file mode 100644 index 371e903..0000000 --- a/rdt/performance/datasets/numerical.py +++ /dev/null @@ -1,207 +0,0 @@ -"""Dataset Generators for numerical transformers.""" - -from abc import ABC - -import numpy as np - -from rdt.performance.datasets.base import BaseDatasetGenerator -from rdt.performance.datasets.utils import add_nans - - -class NumericalGenerator(BaseDatasetGenerator, ABC): - """Base class for generators that create numerical data.""" - - DATA_TYPE = "numerical" - - -class RandomIntegerGenerator(NumericalGenerator): - """Generator that creates an array of random integers.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - ii32 = np.iinfo(np.int32) - return np.random.randint(ii32.min, ii32.max, num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 5e-06, "memory": 200.0}, - "reverse_transform": {"time": 5e-06, "memory": 200.0,}, - } - - -class RandomIntegerNaNsGenerator(NumericalGenerator): - """Generator that creates an array of random integers with nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(RandomIntegerGenerator.generate(num_rows).astype(float)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 4e-06, "memory": 400.0}, - "reverse_transform": {"time": 2e-06, "memory": 300.0,}, - } - - -class ConstantIntegerGenerator(NumericalGenerator): - """Generator that creates a constant array with a random integer.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - ii32 = np.iinfo(np.int32) - constant = np.random.randint(ii32.min, ii32.max) - return np.full(num_rows, constant) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 400.0}, - "transform": {"time": 1e-05, "memory": 200.0}, - "reverse_transform": {"time": 5e-06, "memory": 200.0,}, - } - - -class ConstantIntegerNaNsGenerator(NumericalGenerator): - """Generator that creates a constant array with a random integer with some nans.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(ConstantIntegerGenerator.generate(num_rows).astype(float)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 600.0}, - "transform": {"time": 3e-06, "memory": 400.0}, - "reverse_transform": {"time": 2e-06, "memory": 300.0,}, - } - - -class AlmostConstantIntegerGenerator(NumericalGenerator): - """Generator that creates an array with 2 only values, one of them repeated.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - ii32 = np.iinfo(np.int32) - values = np.random.randint(ii32.min, ii32.max, size=2) - additional_values = np.full(num_rows - 2, values[1]) - array = np.concatenate([values, additional_values]) - np.random.shuffle(array) - return array - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 1e-05, "memory": 2000.0}, - "reverse_transform": {"time": 5e-06, "memory": 2000.0,}, - } - - -class AlmostConstantIntegerNaNsGenerator(NumericalGenerator): - """Generator that creates an array with 2 only values, one of them repeated, and NaNs.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - ii32 = np.iinfo(np.int32) - values = np.random.randint(ii32.min, ii32.max, size=2) - additional_values = np.full(num_rows - 2, values[1]).astype(float) - array = np.concatenate([values, add_nans(additional_values)]) - np.random.shuffle(array) - return array - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 3e-06, "memory": 1000.0}, - "reverse_transform": {"time": 2e-06, "memory": 1000.0,}, - } - - -class NormalGenerator(NumericalGenerator): - """Generator that creates an array of normally distributed float values.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return np.random.normal(size=num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 1e-05, "memory": 200.0}, - "reverse_transform": {"time": 1e-05, "memory": 200.0,}, - } - - -class NormalNaNsGenerator(NumericalGenerator): - """Generator that creates an array of normally distributed float values, with NaNs.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(NormalGenerator.generate(num_rows)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 4e-06, "memory": 400.0}, - "reverse_transform": {"time": 5e-06, "memory": 300.0,}, - } - - -class BigNormalGenerator(NumericalGenerator): - """Generator that creates an array of big normally distributed float values.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return np.random.normal(scale=1e10, size=num_rows) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 5e-06, "memory": 200.0}, - "reverse_transform": {"time": 5e-06, "memory": 200.0,}, - } - - -class BigNormalNaNsGenerator(NumericalGenerator): - """Generator that creates an array of normally distributed float values, with NaNs.""" - - @staticmethod - def generate(num_rows): - """Generate a ``num_rows`` number of rows.""" - return add_nans(BigNormalGenerator.generate(num_rows)) - - @staticmethod - def get_performance_thresholds(): - """Return the expected threseholds.""" - return { - "fit": {"time": 1e-03, "memory": 2500.0}, - "transform": {"time": 3e-06, "memory": 400.0}, - "reverse_transform": {"time": 2e-06, "memory": 300.0,}, - } diff --git a/rdt/performance/datasets/utils.py b/rdt/performance/datasets/utils.py deleted file mode 100644 index 71f4814..0000000 --- a/rdt/performance/datasets/utils.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Utils for the dataset generators.""" - -import numpy as np - - -def add_nans(array): - """Add a random amount of NaN values to the given array. - - Args: - array (np.array): - 1 dimensional numpy array. - - Returns: - np.array: - The same array with some values replaced by NaNs. - """ - if array.dtype.kind == "i": - array = array.astype(float) - - length = len(array) - num_nulls = np.random.randint(1, length) - nulls = np.random.choice(range(length), num_nulls) - array[nulls] = np.nan - return array diff --git a/rdt/performance/performance.py b/rdt/performance/performance.py deleted file mode 100644 index 6f28d7b..0000000 --- a/rdt/performance/performance.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Functions for evaluating transformer performance.""" - -import numpy as np -import pandas as pd - -from rdt.performance.profiling import profile_transformer - -DATASET_SIZES = [1000, 10000, 100000] - - -def _get_dataset_sizes(data_type): - """Get a list of (fit_size, transform_size) for each dataset generator. - - Based on the data type of the dataset generator, return the list of - sizes to run performance tests on. Each element in this list is a tuple - of (fit_size, transform_size). - - Args: - input_type (str): - The type of data that the generator returns. - - Returns: - sizes (list[tuple]): - A list of (fit_size, transform_size) configs to run tests on. - """ - sizes = [(s, s) for s in DATASET_SIZES] - - if data_type == "categorical": - sizes = [(s, max(s, 1000)) for s in DATASET_SIZES if s <= 10000] - - return sizes - - -def evaluate_transformer_performance(transformer, dataset_generator, verbose=False): - """Evaluate the given transformer's performance against the given dataset generator. - - Args: - transformer (rdt.transformers.BaseTransformer): - The transformer to evaluate. - dataset_generator (rdt.tests.datasets.BaseDatasetGenerator): - The dataset generator to performance test against. - verbose (bool): - Whether or not to add extra columns about the dataset and transformer, - and return data for all dataset sizes. If false, it will only return - the max performance values of all the dataset sizes used. - - Returns: - pandas.DataFrame: - The performance test results. - """ - transformer_instance = transformer() - - sizes = _get_dataset_sizes(dataset_generator.DATA_TYPE) - - out = [] - for fit_size, transform_size in sizes: - performance = profile_transformer( - transformer=transformer_instance, - dataset_generator=dataset_generator, - fit_size=fit_size, - transform_size=transform_size, - ) - size = np.array([fit_size, transform_size, transform_size] * 2) - performance = performance / size - if verbose: - performance = performance.rename( - lambda x: x + " (s)" if "Time" in x else x + " (B)" - ) - performance["Number of fit rows"] = fit_size - performance["Number of transform rows"] = transform_size - performance["Dataset"] = dataset_generator.__name__ - performance[ - "Transformer" - ] = f"{transformer.__module__ }.{transformer.__name__}" - - out.append(performance) - - summary = pd.DataFrame(out) - if verbose: - return summary - - return summary.max(axis=0) diff --git a/rdt/performance/profiling.py b/rdt/performance/profiling.py deleted file mode 100644 index 0236699..0000000 --- a/rdt/performance/profiling.py +++ /dev/null @@ -1,112 +0,0 @@ -"""Functions to profile performance of RDT Transformers.""" - -# pylint: disable=W0212 - -import multiprocessing as mp -import timeit -import tracemalloc -from copy import deepcopy - -import pandas as pd - - -def _profile_time(transformer, method_name, dataset, iterations=10, copy=False): - total_time = 0 - for _ in range(iterations): - if copy: - transformer_copy = deepcopy(transformer) - method = getattr(transformer_copy, method_name) - - else: - method = getattr(transformer, method_name) - - start_time = timeit.default_timer() - method(dataset) - total_time += timeit.default_timer() - start_time - - return total_time / iterations - - -def _set_memory_for_method(method, dataset, peak_memory): - tracemalloc.start() - method(dataset) - peak_memory.value = tracemalloc.get_traced_memory()[1] - tracemalloc.stop() - tracemalloc.clear_traces() - - -def _profile_memory(method, dataset): - ctx = mp.get_context("spawn") - peak_memory = ctx.Value("i", 0) - profiling_process = ctx.Process( - target=_set_memory_for_method, args=(method, dataset, peak_memory) - ) - profiling_process.start() - profiling_process.join() - return peak_memory.value - - -def profile_transformer(transformer, dataset_generator, transform_size, fit_size=None): - """Profile a Transformer on a dataset. - - This function will get the total time and peak memory - for the ``fit``, ``transform`` and ``reverse_transform`` - methods of the provided transformer against the provided - dataset. - - Args: - transformer (Transformer): - Transformer instance. - dataset_generator (DatasetGenerator): - DatasetGenerator instance. - transform_size (int): - Number of rows to generate for ``transform`` and ``reverse_transform``. - fit_size (int or None): - Number of rows to generate for ``fit``. If None, use ``transform_size``. - - Returns: - pandas.Series: - Series containing the time and memory taken by ``fit``, ``transform``, - and ``reverse_transform`` for the transformer. - """ - fit_size = fit_size or transform_size - fit_dataset = pd.Series(dataset_generator.generate(fit_size)) - replace = transform_size > fit_size - transform_dataset = fit_dataset.sample(transform_size, replace=replace) - - try: - fit_time = _profile_time(transformer, "fit", fit_dataset, copy=True) - fit_memory = _profile_memory(transformer.fit, fit_dataset) - transformer.fit(fit_dataset) - - transform_time = _profile_time(transformer, "transform", transform_dataset) - transform_memory = _profile_memory(transformer.transform, transform_dataset) - - reverse_dataset = transformer.transform(transform_dataset) - reverse_time = _profile_time(transformer, "reverse_transform", reverse_dataset) - reverse_memory = _profile_memory(transformer.reverse_transform, reverse_dataset) - except TypeError: - # temporarily support both old and new style transformers - fit_time = _profile_time(transformer, "_fit", fit_dataset, copy=True) - fit_memory = _profile_memory(transformer._fit, fit_dataset) - transformer._fit(fit_dataset) - - transform_time = _profile_time(transformer, "_transform", transform_dataset) - transform_memory = _profile_memory(transformer._transform, transform_dataset) - - reverse_dataset = transformer._transform(transform_dataset) - reverse_time = _profile_time(transformer, "_reverse_transform", reverse_dataset) - reverse_memory = _profile_memory( - transformer._reverse_transform, reverse_dataset - ) - - return pd.Series( - { - "Fit Time": fit_time, - "Fit Memory": fit_memory, - "Transform Time": transform_time, - "Transform Memory": transform_memory, - "Reverse Transform Time": reverse_time, - "Reverse Transform Memory": reverse_memory, - } - ) diff --git a/rdt/transformers/__init__.py b/rdt/transformers/__init__.py deleted file mode 100644 index 83c6bde..0000000 --- a/rdt/transformers/__init__.py +++ /dev/null @@ -1,149 +0,0 @@ -"""Transformers module.""" - -import importlib -import json -import sys -from collections import defaultdict -from copy import deepcopy -from functools import lru_cache -from pathlib import Path - -import numpy as np - -from rdt.transformers.base import BaseTransformer -from rdt.transformers.boolean import BooleanTransformer -from rdt.transformers.categorical import CategoricalTransformer -from rdt.transformers.datetime import DatetimeTransformer -from rdt.transformers.null import NullTransformer -from rdt.transformers.numerical import NumericalTransformer - -__all__ = [ - "BaseTransformer", - "NullTransformer", - "get_transformer_class", - "get_transformer_instance", - "get_transformers_by_type", - "get_default_transformers", - "get_default_transformer", -] - - -def _import_addons(): - """Import all the addon modules.""" - addons_path = Path(__file__).parent / "addons" - for addon_json_path in addons_path.glob("*/*.json"): - with open(addon_json_path, "r", encoding="utf-8") as addon_json_file: - transformers = json.load(addon_json_file).get("transformers", []) - for transformer in transformers: - module = transformer.rsplit(".", 1)[0] - if module not in sys.modules: - importlib.import_module(module) - - -_import_addons() - -TRANSFORMERS = { - transformer.__name__: transformer - for transformer in BaseTransformer.get_subclasses() -} - -globals().update(TRANSFORMERS) -__all__.extend(TRANSFORMERS.keys()) - -DEFAULT_TRANSFORMERS = { - "numerical": NumericalTransformer, - "integer": NumericalTransformer(dtype=np.int64), - "float": NumericalTransformer(dtype=np.float64), - "categorical": CategoricalTransformer(fuzzy=True), - "boolean": BooleanTransformer, - "datetime": DatetimeTransformer, -} - - -def get_transformer_class(transformer): - """Return a ``transformer`` class from a ``str``. - - Args: - transforemr (str): - Python path or transformer's name. - - Returns: - BaseTransformer: - BaseTransformer subclass class object. - """ - if len(transformer.split(".")) == 1: - return TRANSFORMERS[transformer] - - package, name = transformer.rsplit(".", 1) - return TRANSFORMERS.get(name, getattr(importlib.import_module(package), name)) - - -def get_transformer_instance(transformer): - """Load a new instance of a ``Transformer``. - - The ``transformer`` is expected to be a ``string`` containing the transformer ``class`` - name, a transformer instance or a transformer type. - - Args: - transformer (dict or BaseTransformer): - ``dict`` with the transformer specification or instance of a BaseTransformer - subclass. - - Returns: - BaseTransformer: - BaseTransformer subclass instance. - """ - if isinstance(transformer, BaseTransformer): - return deepcopy(transformer) - - if isinstance(transformer, str): - transformer = TRANSFORMERS[transformer] - - return transformer() - - -@lru_cache() -def get_transformers_by_type(): - """Build a ``dict`` mapping data types to valid existing transformers for that type. - - Returns: - dict: - Mapping of data types to a list of existing transformers that take that - type as an input. - """ - data_type_transformers = defaultdict(list) - transformer_classes = BaseTransformer.get_subclasses() - for transformer in transformer_classes: - input_type = transformer.get_input_type() - data_type_transformers[input_type].append(transformer) - - return data_type_transformers - - -@lru_cache() -def get_default_transformers(): - """Build a ``dict`` mapping data types to a default transformer for that type. - - Returns: - dict: - Mapping of data types to a transformer. - """ - transformers_by_type = get_transformers_by_type() - defaults = deepcopy(DEFAULT_TRANSFORMERS) - for (data_type, transformers) in transformers_by_type.items(): - if data_type not in defaults: - defaults[data_type] = transformers[0] - - return defaults - - -@lru_cache() -def get_default_transformer(data_type): - """Get default transformer for a data type. - - Returns: - Transformer: - Default transformer for data type. - """ - default_transformers = get_default_transformers() - return default_transformers[data_type] diff --git a/rdt/transformers/addons/README.md b/rdt/transformers/addons/README.md deleted file mode 100644 index 330a6fb..0000000 --- a/rdt/transformers/addons/README.md +++ /dev/null @@ -1,31 +0,0 @@ -

- - DAI-Lab - - An Open Source Project from the Data to AI Lab, at MIT -

- -[![Development Status](https://img.shields.io/badge/Development%20Status-2%20--%20Pre--Alpha-yellow)](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha) -[![PyPi Shield](https://img.shields.io/pypi/v/RDT.svg)](https://pypi.python.org/pypi/RDT) -[![Unit Tests](https://github.com/sdv-dev/RDT/actions/workflows/unit.yml/badge.svg)](https://github.com/sdv-dev/RDT/actions/workflows/unit.yml) -[![Downloads](https://pepy.tech/badge/rdt)](https://pepy.tech/project/rdt) -[![Coverage Status](https://codecov.io/gh/sdv-dev/RDT/branch/master/graph/badge.svg)](https://codecov.io/gh/sdv-dev/RDT) - - - -* Website: https://sdv.dev -* Documentation: https://sdv.dev/SDV -* Repository: https://github.com/sdv-dev/RDT -* License: [MIT](https://github.com/sdv-dev/RDT/blob/master/LICENSE) -* Development Status: [Pre-Alpha](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha) - -# Overview - -**RDT** is a Python library used to transform data for data science libraries and preserve -the transformations in order to revert them as needed. - -# Addons - -**RDT** addons are families of transformers that are optionally installed to provide -a wider range of transformers that can be optionally installed to the main package. - diff --git a/rdt/transformers/addons/addons_setup.py b/rdt/transformers/addons/addons_setup.py deleted file mode 100644 index 93aabcd..0000000 --- a/rdt/transformers/addons/addons_setup.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""The setup script for the addons packages.""" - -import json -import os -import shutil -import sys -from copy import deepcopy -from glob import glob -from tempfile import TemporaryDirectory - -from setuptools import find_namespace_packages, setup - -import rdt - -with open("README.md", encoding="utf-8") as readme_file: - README = readme_file.read() - -RDT_VERSION = rdt.__version__ -ADDONS_PATH = os.path.dirname(os.path.realpath(__file__)) - - -def _build_setup(addon_json): - - with open(addon_json, "r", encoding="utf-8") as f: - addon = json.load(f) - - addon_name = addon.get("name") - addon_module = addon.get("transformers")[0].split(".") - addon_module = ".".join(addon_module[:-2]) - - install_requires = [f"rdt>={RDT_VERSION}"] - install_requires.extend(addon.get("requirements", [])) - - # this does something - setup( - author="MIT Data To AI Lab", - author_email="dailabmit@gmail.com", - classifiers=[ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - ], - description="Reversible Data Transforms", - include_package_data=False, - install_requires=install_requires, - keywords=["rdt", addon_name], - license="MIT license", - long_description=README, - long_description_content_type="text/markdown", - name=addon_name, - packages=find_namespace_packages(include=[addon_module]), - python_requires=">=3.6,<3.10", - url="https://github.com/sdv-dev/RDT", - version=RDT_VERSION, - zip_safe=False, - ) - - -def _run(): - path = sys.argv[0] - base_path = os.path.realpath(path).replace(path, "") - - # clear build if exists - build_path = os.path.join(base_path, "build") - if os.path.exists(build_path): - shutil.rmtree(build_path) - - families = deepcopy(sys.argv[1:]) - all_families = [family for family in os.listdir(".") if os.path.isdir(family)] - - families = list(set(families).intersection(set(all_families))) - for addon in glob(f"{ADDONS_PATH}/*/*.json"): - with TemporaryDirectory() as temp_dir: - build_command = [ - path, - "bdist_wheel", - "--keep-temp", - "--dist-dir", - "dist", - "--bdist-dir", - temp_dir, - "sdist", - "--keep-temp", - "--dist-dir", - "dist", - "egg_info", - "--egg-base", - temp_dir, - ] - - base_name = os.path.basename(os.path.dirname(addon)) - sys.argv = deepcopy(build_command) - - if not families: - _build_setup(addon) - else: - if os.path.basename(os.path.dirname(addon)) in families: - _build_setup(addon) - - remove_addon_build = os.path.join( - base_path, "build", "lib", "rdt", "transformers", "addons", base_name - ) - - # delete only the processed addon folder - shutil.rmtree(remove_addon_build) - - -_run() diff --git a/rdt/transformers/addons/identity/__init__.py b/rdt/transformers/addons/identity/__init__.py deleted file mode 100644 index 59bd0f1..0000000 --- a/rdt/transformers/addons/identity/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Identity addons module.""" - -from rdt.transformers.addons.identity.identity import IdentityTransformer - -__all__ = ["IdentityTransformer"] diff --git a/rdt/transformers/addons/identity/config.json b/rdt/transformers/addons/identity/config.json deleted file mode 100644 index f3ad1e2..0000000 --- a/rdt/transformers/addons/identity/config.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "name": "rdt_identity", - "transformers": [ - "rdt.transformers.addons.identity.identity.IdentityTransformer" - ] -} diff --git a/rdt/transformers/addons/identity/identity.py b/rdt/transformers/addons/identity/identity.py deleted file mode 100644 index ce1523b..0000000 --- a/rdt/transformers/addons/identity/identity.py +++ /dev/null @@ -1,44 +0,0 @@ -"""IdentityTransformer module.""" - -from rdt.transformers.base import BaseTransformer - - -class IdentityTransformer(BaseTransformer): - """Identity transformer that produces the same data. - - This transformer is intended for testing purposes only. The transform and reverse transform - of this data is equal to the input. - """ - - def _fit(self, data): - """Fit the transformer to the data. - - Args: - data (pandas.Series or numpy.ndarray): - Data to fit the transformer to. - """ - self.OUTPUT_TYPES = {column: None for column in self.columns} - - def _transform(self, data): - """Return the same input data. - - Args: - data (pandas.Series or numpy.ndarray): - Data to transform. - - Returns: - pandas.DataFrame or pandas.Series - """ - return data - - def _reverse_transform(self, data): - """Return the same input data. - - Args: - data (pandas.Series or numpy.ndarray): - Data to revert. - - Returns: - pandas.DataFrame or pandas.Series - """ - return data diff --git a/rdt/transformers/base.py b/rdt/transformers/base.py deleted file mode 100644 index 6cc4d03..0000000 --- a/rdt/transformers/base.py +++ /dev/null @@ -1,291 +0,0 @@ -"""BaseTransformer module.""" -import abc - -import pandas as pd - - -class BaseTransformer: - """Base class for all transformers. - - The ``BaseTransformer`` class contains methods that must be implemented - in order to create a new transformer. The ``_fit`` method is optional, - and ``fit_transform`` method is already implemented. - """ - - INPUT_TYPE = None - OUTPUT_TYPES = None - DETERMINISTIC_TRANSFORM = None - DETERMINISTIC_REVERSE = None - COMPOSITION_IS_IDENTITY = None - NEXT_TRANSFORMERS = None - - columns = None - column_prefix = None - output_columns = None - - @classmethod - def get_subclasses(cls): - """Recursively find subclasses of this Baseline. - - Returns: - list: - List of all subclasses of this class. - """ - subclasses = [] - for subclass in cls.__subclasses__(): - if abc.ABC not in subclass.__bases__: - subclasses.append(subclass) - - subclasses += subclass.get_subclasses() - - return subclasses - - @classmethod - def get_input_type(cls): - """Return the input type supported by the transformer. - - Returns: - string: - Accepted input type of the transformer. - """ - return cls.INPUT_TYPE - - def _add_prefix(self, dictionary): - if not dictionary: - return {} - - output = {} - for output_columns, output_type in dictionary.items(): - output[f"{self.column_prefix}.{output_columns}"] = output_type - - return output - - def get_output_types(self): - """Return the output types produced by this transformer. - - Returns: - dict: - Mapping from the transformed column names to the produced data types. - """ - return self._add_prefix(self.OUTPUT_TYPES) - - def is_transform_deterministic(self): - """Return whether the transform is deterministic. - - Returns: - bool: - Whether or not the transform is deterministic. - """ - return self.DETERMINISTIC_TRANSFORM - - def is_reverse_deterministic(self): - """Return whether the reverse transform is deterministic. - - Returns: - bool: - Whether or not the reverse transform is deterministic. - """ - return self.DETERMINISTIC_REVERSE - - def is_composition_identity(self): - """Return whether composition of transform and reverse transform produces the input data. - - Returns: - bool: - Whether or not transforming and then reverse transforming returns the input data. - """ - return self.COMPOSITION_IS_IDENTITY - - def get_next_transformers(self): - """Return the suggested next transformer to be used for each column. - - Returns: - dict: - Mapping from transformed column names to the transformers to apply to each column. - """ - return self._add_prefix(self.NEXT_TRANSFORMERS) - - def get_input_columns(self): - """Return list of input column names for transformer. - - Returns: - list: - Input column names. - """ - return self.columns - - def get_output_columns(self): - """Return list of column names created in ``transform``. - - Returns: - list: - Names of columns created during ``transform``. - """ - return list(self.get_output_types()) - - def _store_columns(self, columns, data): - if isinstance(columns, tuple) and columns not in data: - columns = list(columns) - elif not isinstance(columns, list): - columns = [columns] - - missing = set(columns) - set(data.columns) - if missing: - raise KeyError(f"Columns {missing} were not present in the data.") - - self.columns = columns - - @staticmethod - def _get_columns_data(data, columns): - if len(columns) == 1: - columns = columns[0] - - return data[columns] - - @staticmethod - def _set_columns_data(data, columns_data, columns): - if columns_data is None: - return - - if isinstance(columns_data, (pd.DataFrame, pd.Series)): - columns_data.index = data.index - - if len(columns_data.shape) == 1: - data[columns[0]] = columns_data - else: - data[columns] = columns_data - - def _build_output_columns(self, data): - self.column_prefix = "#".join(self.columns) - self.output_columns = list(self.get_output_types().keys()) - - # make sure none of the generated `output_columns` exists in the data - data_columns = set(data.columns) - while data_columns & set(self.output_columns): - self.column_prefix += "#" - self.output_columns = list(self.get_output_types().keys()) - - def _fit(self, columns_data): - """Fit the transformer to the data. - - Args: - columns_data (pandas.DataFrame or pandas.Series): - Data to transform. - """ - raise NotImplementedError() - - def fit(self, data, columns): - """Fit the transformer to the `columns` of the `data`. - - Args: - data (pandas.DataFrame): - The entire table. - columns (list): - Column names. Must be present in the data. - """ - self._store_columns(columns, data) - - columns_data = self._get_columns_data(data, self.columns) - self._fit(columns_data) - - self._build_output_columns(data) - - def _transform(self, columns_data): - """Transform the data. - - Args: - columns_data (pandas.DataFrame or pandas.Series): - Data to transform. - - Returns: - pandas.DataFrame or pandas.Series: - Transformed data. - """ - raise NotImplementedError() - - def transform(self, data, drop=True): - """Transform the `self.columns` of the `data`. - - Args: - data (pandas.DataFrame): - The entire table. - drop (bool): - Whether or not to drop original columns. - - Returns: - pd.DataFrame: - The entire table, containing the transformed data. - """ - # if `data` doesn't have the columns that were fitted on, don't transform - if any(column not in data.columns for column in self.columns): - return data - - data = data.copy() - - columns_data = self._get_columns_data(data, self.columns) - transformed_data = self._transform(columns_data) - - self._set_columns_data(data, transformed_data, self.output_columns) - if drop: - data = data.drop(self.columns, axis=1) - - return data - - def fit_transform(self, data, columns): - """Fit the transformer to the `columns` of the `data` and then transform them. - - Args: - data (pandas.DataFrame): - The entire table. - columns (list or tuple or str): - List or tuple of column names from the data to transform. - If only one column is provided, it can be passed as a string instead. - If none are passed, fits on the entire dataset. - - Returns: - pd.DataFrame: - The entire table, containing the transformed data. - """ - self.fit(data, columns) - return self.transform(data) - - def _reverse_transform(self, columns_data): - """Revert the transformations to the original values. - - Args: - columns_data (pandas.DataFrame or pandas.Series): - Data to revert. - - Returns: - pandas.DataFrame or pandas.Series: - Reverted data. - """ - raise NotImplementedError() - - def reverse_transform(self, data, drop=True): - """Revert the transformations to the original values. - - Args: - data (pandas.DataFrame): - The entire table. - drop (bool): - Whether or not to drop derived columns. - - Returns: - pandas.DataFrame: - The entire table, containing the reverted data. - """ - # if `data` doesn't have the columns that were transformed, don't reverse_transform - if any(column not in data.columns for column in self.output_columns): - return data - - data = data.copy() - - columns_data = self._get_columns_data(data, self.output_columns) - reversed_data = self._reverse_transform(columns_data) - - self._set_columns_data(data, reversed_data, self.columns) - if drop: - data = data.drop(self.output_columns, axis=1) - - return data diff --git a/rdt/transformers/boolean.py b/rdt/transformers/boolean.py deleted file mode 100644 index 1f99db4..0000000 --- a/rdt/transformers/boolean.py +++ /dev/null @@ -1,104 +0,0 @@ -"""Transformer for boolean data.""" - -import numpy as np -import pandas as pd - -from rdt.transformers.base import BaseTransformer -from rdt.transformers.null import NullTransformer - - -class BooleanTransformer(BaseTransformer): - """Transformer for boolean data. - - This transformer replaces boolean values with their integer representation - transformed to float. - - Null values are replaced using a ``NullTransformer``. - - Args: - nan (int or None): - Replace null values with the given value. If ``None``, do not replace them. - Defaults to ``-1``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the fit data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - """ - - INPUT_TYPE = "boolean" - DETERMINISTIC_TRANSFORM = True - DETERMINISTIC_REVERSE = True - - null_transformer = None - - def __init__(self, nan=-1, null_column=None): - self.nan = nan - self.null_column = null_column - - def get_output_types(self): - """Return the output types returned by this transformer. - - Returns: - dict: - Mapping from the transformed column names to the produced data types. - """ - output_types = { - "value": "float", - } - if self.null_transformer and self.null_transformer.creates_null_column(): - output_types["is_null"] = "float" - - return self._add_prefix(output_types) - - def _fit(self, data): - """Fit the transformer to the data. - - Args: - data (pandas.Series): - Data to fit to. - """ - self.null_transformer = NullTransformer(self.nan, self.null_column, copy=True) - self.null_transformer.fit(data) - - def _transform(self, data): - """Transform boolean to float. - - The boolean values will be replaced by the corresponding integer - representations as float values. - - Args: - data (pandas.Series): - Data to transform. - - Returns - pandas.DataFrame or pandas.Series - """ - data = pd.to_numeric(data, errors="coerce") - return self.null_transformer.transform(data).astype(float) - - def _reverse_transform(self, data): - """Transform float values back to the original boolean values. - - Args: - data (pandas.DataFrame or pandas.Series): - Data to revert. - - Returns: - pandas.Series: - Reverted data. - """ - if not isinstance(data, np.ndarray): - data = data.to_numpy() - - if self.nan is not None: - data = self.null_transformer.reverse_transform(data) - - if isinstance(data, np.ndarray): - if data.ndim == 2: - data = data[:, 0] - - data = pd.Series(data) - - return np.round(data).clip(0, 1).astype("boolean").astype("object") diff --git a/rdt/transformers/categorical.py b/rdt/transformers/categorical.py deleted file mode 100644 index 9d0cbeb..0000000 --- a/rdt/transformers/categorical.py +++ /dev/null @@ -1,509 +0,0 @@ -"""Transformers for categorical data.""" - -import numpy as np -import pandas as pd -import psutil -from scipy.stats import norm - -from rdt.transformers.base import BaseTransformer - - -class CategoricalTransformer(BaseTransformer): - """Transformer for categorical data. - - This transformer computes a float representative for each one of the categories - found in the fit data, and then replaces the instances of these categories with - the corresponding representative. - - The representatives are decided by sorting the categorical values by their relative - frequency, then dividing the ``[0, 1]`` interval by these relative frequencies, and - finally assigning the middle point of each interval to the corresponding category. - - When the transformation is reverted, each value is assigned the category that - corresponds to the interval it falls in. - - Null values are considered just another category. - - Args: - fuzzy (bool): - Whether to generate gaussian noise around the class representative of each interval - or just use the mean for all the replaced values. Defaults to ``False``. - clip (bool): - If ``True``, clip the values to [0, 1]. Otherwise normalize them using modulo 1. - Defaults to ``False``. - """ - - INPUT_TYPE = "categorical" - OUTPUT_TYPES = {"value": "float"} - DETERMINISTIC_REVERSE = True - COMPOSITION_IS_IDENTITY = True - - mapping = None - intervals = None - starts = None - means = None - dtype = None - _get_category_from_index = None - - def __setstate__(self, state): - """Replace any ``null`` key by the actual ``np.nan`` instance.""" - intervals = state.get("intervals") - if intervals: - for key in list(intervals): - if pd.isna(key): - intervals[np.nan] = intervals.pop(key) - - self.__dict__ = state - - def __init__(self, fuzzy=False, clip=False): - self.fuzzy = fuzzy - self.clip = clip - - def is_transform_deterministic(self): - """Return whether the transform is deterministic. - - Returns: - bool: - Whether or not the transform is deterministic. - """ - return not self.fuzzy - - def is_composition_identity(self): - """Return whether composition of transform and reverse transform produces the input data. - - Returns: - bool: - Whether or not transforming and then reverse transforming returns the input data. - """ - return self.COMPOSITION_IS_IDENTITY and not self.fuzzy - - @staticmethod - def _get_intervals(data): - """Compute intervals for each categorical value. - - Args: - data (pandas.Series): - Data to analyze. - - Returns: - dict: - intervals for each categorical value (start, end). - """ - data = data.fillna(np.nan) - frequencies = data.value_counts(dropna=False) - - start = 0 - end = 0 - elements = len(data) - - intervals = {} - means = [] - starts = [] - for value, frequency in frequencies.items(): - prob = frequency / elements - end = start + prob - mean = start + prob / 2 - std = prob / 6 - if pd.isna(value): - value = np.nan - - intervals[value] = (start, end, mean, std) - means.append(mean) - starts.append((value, start)) - start = end - - means = pd.Series(means, index=list(frequencies.keys())) - starts = pd.DataFrame(starts, columns=["category", "start"]).set_index("start") - - return intervals, means, starts - - def _fit(self, data): - """Fit the transformer to the data. - - Create the mapping dict to save the label encoding. - Finally, compute the intervals for each categorical value. - - Args: - data (pandas.Series): - Data to fit the transformer to. - """ - self.mapping = {} - self.dtype = data.dtype - - self.intervals, self.means, self.starts = self._get_intervals(data) - self._get_category_from_index = list(self.means.index).__getitem__ - - def _transform_by_category(self, data): - """Transform the data by iterating over the different categories.""" - result = np.empty(shape=(len(data),), dtype=float) - - # loop over categories - for category, values in self.intervals.items(): - mean, std = values[2:] - if category is np.nan: - mask = data.isna() - else: - mask = data.to_numpy() == category - - if self.fuzzy: - result[mask] = norm.rvs(mean, std, size=mask.sum()) - else: - result[mask] = mean - - return result - - def _get_value(self, category): - """Get the value that represents this category.""" - if pd.isna(category): - category = np.nan - - mean, std = self.intervals[category][2:] - - if self.fuzzy: - return norm.rvs(mean, std) - - return mean - - def _transform_by_row(self, data): - """Transform the data row by row.""" - return data.fillna(np.nan).apply(self._get_value).to_numpy() - - def _transform(self, data): - """Transform categorical values to float values. - - Replace the categories with their float representative value. - - Args: - data (pandas.Series): - Data to transform. - - Returns: - numpy.ndarray: - """ - if len(self.means) < len(data): - return self._transform_by_category(data) - - return self._transform_by_row(data) - - def _normalize(self, data): - """Normalize data to the range [0, 1]. - - This is done by either clipping or computing the values modulo 1. - """ - if self.clip: - return data.clip(0, 1) - - return data % 1 - - def _reverse_transform_by_matrix(self, data): - """Reverse transform the data with matrix operations.""" - num_rows = len(data) - num_categories = len(self.means) - - data = np.broadcast_to(data, (num_categories, num_rows)).T - means = np.broadcast_to(self.means, (num_rows, num_categories)) - diffs = np.abs(data - means) - indexes = np.argmin(diffs, axis=1) - - self._get_category_from_index = list(self.means.index).__getitem__ - return ( - pd.Series(indexes).apply(self._get_category_from_index).astype(self.dtype) - ) - - def _reverse_transform_by_category(self, data): - """Reverse transform the data by iterating over all the categories.""" - result = np.empty(shape=(len(data),), dtype=self.dtype) - - # loop over categories - for category, values in self.intervals.items(): - start = values[0] - mask = start <= data.to_numpy() - result[mask] = category - - return pd.Series(result, index=data.index, dtype=self.dtype) - - def _get_category_from_start(self, value): - lower = self.starts.loc[:value] - return lower.iloc[-1].category - - def _reverse_transform_by_row(self, data): - """Reverse transform the data by iterating over each row.""" - return data.apply(self._get_category_from_start).astype(self.dtype) - - def _reverse_transform(self, data): - """Convert float values back to the original categorical values. - - Args: - data (pd.Series): - Data to revert. - - Returns: - pandas.Series - """ - data = self._normalize(data) - - num_rows = len(data) - num_categories = len(self.means) - - # total shape * float size * number of matrices needed - needed_memory = num_rows * num_categories * 8 * 3 - available_memory = psutil.virtual_memory().available - if available_memory > needed_memory: - return self._reverse_transform_by_matrix(data) - - if num_rows > num_categories: - return self._reverse_transform_by_category(data) - - # loop over rows - return self._reverse_transform_by_row(data) - - -class CategoricalFuzzyTransformer(CategoricalTransformer): - """Transformer for categorical data. - - This transformer computes a float representative for each one of the categories - found in the fit data. Then, when transforming, it replaces the instances of these - categories with the corresponding representatives plus some added gaussian noise. - - The representatives are decided by sorting the categorical values by their relative - frequency, then dividing the ``[0, 1]`` interval by these relative frequencies, and - finally assigning the middle point of each interval to the corresponding category. - - When the transformation is reverted, each value is assigned the category that - corresponds to the interval it falls in. - - Null values are considered just another category. - - This class behaves exactly as the ``CategoricalTransformer`` with ``fuzzy=True``. - - Args: - clip (bool): - If ``True``, clip the values to [0, 1]. Otherwise normalize them using modulo 1. - Defaults to ``False``. - """ - - def __init__(self, clip=False): - super().__init__(fuzzy=True, clip=clip) - - -class OneHotEncodingTransformer(BaseTransformer): - """OneHotEncoding for categorical data. - - This transformer replaces a single vector with N unique categories in it - with N vectors which have 1s on the rows where the corresponding category - is found and 0s on the rest. - - Null values are considered just another category. - - Args: - error_on_unknown (bool): - If a value that was not seen during the fit stage is passed to - transform, then an error will be raised if this is True. - Defaults to ``True``. - """ - - INPUT_TYPE = "categorical" - DETERMINISTIC_TRANSFORM = True - DETERMINISTIC_REVERSE = True - - dummies = None - _dummy_na = None - _num_dummies = None - _dummy_encoded = False - _indexer = None - _uniques = None - - def __init__(self, error_on_unknown=True): - self.error_on_unknown = error_on_unknown - - @staticmethod - def _prepare_data(data): - """Transform data to appropriate format. - - If data is a valid list or a list of lists, transforms it into an np.array, - otherwise returns it. - - Args: - data (pandas.Series or pandas.DataFrame): - Data to prepare. - - Returns: - pandas.Series or numpy.ndarray - """ - if isinstance(data, list): - data = np.array(data) - - if len(data.shape) > 2: - raise ValueError("Unexpected format.") - if len(data.shape) == 2: - if data.shape[1] != 1: - raise ValueError("Unexpected format.") - - data = data[:, 0] - - return data - - def get_output_types(self): - """Return the output types produced by this transformer. - - Returns: - dict: - Mapping from the transformed column names to the produced data types. - """ - output_types = {f"value{i}": "float" for i in range(len(self.dummies))} - - return self._add_prefix(output_types) - - def _fit(self, data): - """Fit the transformer to the data. - - Get the pandas `dummies` which will be used later on for OneHotEncoding. - - Args: - data (pandas.Series or pandas.DataFrame): - Data to fit the transformer to. - """ - data = self._prepare_data(data) - - null = pd.isna(data) - self._uniques = list(pd.unique(data[~null])) - self._dummy_na = null.any() - self._num_dummies = len(self._uniques) - self._indexer = list(range(self._num_dummies)) - self.dummies = self._uniques.copy() - - if not np.issubdtype(data.dtype, np.number): - self._dummy_encoded = True - - if self._dummy_na: - self.dummies.append(np.nan) - - def _transform_helper(self, data): - if self._dummy_encoded: - coder = self._indexer - codes = pd.Categorical(data, categories=self._uniques).codes - else: - coder = self._uniques - codes = data - - rows = len(data) - dummies = np.broadcast_to(coder, (rows, self._num_dummies)) - coded = np.broadcast_to(codes, (self._num_dummies, rows)).T - array = (coded == dummies).astype(int) - - if self._dummy_na: - null = np.zeros((rows, 1), dtype=int) - null[pd.isna(data)] = 1 - array = np.append(array, null, axis=1) - - return array - - def _transform(self, data): - """Replace each category with the OneHot vectors. - - Args: - data (pandas.Series, list or list of lists): - Data to transform. - - Returns: - numpy.ndarray: - """ - data = self._prepare_data(data) - array = self._transform_helper(data) - - if self.error_on_unknown: - unknown = array.sum(axis=1) == 0 - if unknown.any(): - raise ValueError( - f"Attempted to transform {list(data[unknown])} ", - "that were not seen during fit stage.", - ) - - return array - - def _reverse_transform(self, data): - """Convert float values back to the original categorical values. - - Args: - data (pd.Series or numpy.ndarray): - Data to revert. - - Returns: - pandas.Series - """ - if not isinstance(data, np.ndarray): - data = data.to_numpy() - - if data.ndim == 1: - data = data.reshape(-1, 1) - - indices = np.argmax(data, axis=1) - - return pd.Series(indices).map(self.dummies.__getitem__) - - -class LabelEncodingTransformer(BaseTransformer): - """LabelEncoding for categorical data. - - This transformer generates a unique integer representation for each category - and simply replaces each category with its integer value. - - Null values are considered just another category. - - Attributes: - values_to_categories (dict): - Dictionary that maps each integer value for its category. - categories_to_values (dict): - Dictionary that maps each category with the corresponding - integer value. - """ - - INPUT_TYPE = "categorical" - OUTPUT_TYPES = {"value": "integer"} - DETERMINISTIC_TRANSFORM = True - DETERMINISTIC_REVERSE = True - COMPOSITION_IS_IDENTITY = True - - values_to_categories = None - categories_to_values = None - - def _fit(self, data): - """Fit the transformer to the data. - - Generate a unique integer representation for each category and - store them in the `categories_to_values` dict and its reverse - `values_to_categories`. - - Args: - data (pandas.Series): - Data to fit the transformer to. - """ - self.values_to_categories = dict(enumerate(pd.unique(data))) - self.categories_to_values = { - category: value for value, category in self.values_to_categories.items() - } - - def _transform(self, data): - """Replace each category with its corresponding integer value. - - Args: - data (pandas.Series): - Data to transform. - - Returns: - numpy.ndarray: - """ - return pd.Series(data).map(self.categories_to_values) - - def _reverse_transform(self, data): - """Convert float values back to the original categorical values. - - Args: - data (pd.Series or numpy.ndarray): - Data to revert. - - Returns: - pandas.Series - """ - data = data.clip(min(self.values_to_categories), max(self.values_to_categories)) - return data.round().map(self.values_to_categories) diff --git a/rdt/transformers/datetime.py b/rdt/transformers/datetime.py deleted file mode 100644 index 51c66a5..0000000 --- a/rdt/transformers/datetime.py +++ /dev/null @@ -1,203 +0,0 @@ -"""Transformer for datetime data.""" -import numpy as np -import pandas as pd - -from rdt.transformers.base import BaseTransformer -from rdt.transformers.null import NullTransformer - - -class DatetimeTransformer(BaseTransformer): - """Transformer for datetime data. - - This transformer replaces datetime values with an integer timestamp - transformed to float. - - Null values are replaced using a ``NullTransformer``. - - Args: - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - strip_constant (bool): - Whether to optimize the output values by finding the smallest time unit that - is not zero on the training datetimes and dividing the generated numerical - values by the value of the next smallest time unit. This, a part from reducing the - orders of magnitued of the transformed values, ensures that reverted values always - are zero on the lower time units. - format (str): - The strftime to use for parsing time. For more information, see - https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. - """ - - INPUT_TYPE = "datetime" - DETERMINISTIC_TRANSFORM = True - DETERMINISTIC_REVERSE = True - COMPOSITION_IS_IDENTITY = True - - null_transformer = None - divider = None - - def __init__( - self, nan="mean", null_column=None, strip_constant=False, datetime_format=None - ): - self.nan = nan - self.null_column = null_column - self.strip_constant = strip_constant - self.datetime_format = datetime_format - - def is_composition_identity(self): - """Return whether composition of transform and reverse transform produces the input data. - - Returns: - bool: - Whether or not transforming and then reverse transforming returns the input data. - """ - if self.null_transformer and not self.null_transformer.creates_null_column(): - return False - - return self.COMPOSITION_IS_IDENTITY - - def get_output_types(self): - """Return the output types supported by the transformer. - - Returns: - dict: - Mapping from the transformed column names to supported data types. - """ - output_types = { - "value": "float", - } - if self.null_transformer and self.null_transformer.creates_null_column(): - output_types["is_null"] = "float" - - return self._add_prefix(output_types) - - def _find_divider(self, transformed): - self.divider = 1 - multipliers = [10] * 9 + [60, 60, 24] - for multiplier in multipliers: - candidate = self.divider * multiplier - if (transformed % candidate).any(): - break - - self.divider = candidate - - def _convert_to_datetime(self, data): - if data.dtype == "object": - try: - data = pd.to_datetime(data, format=self.datetime_format) - - except ValueError as error: - if "Unknown string format:" in str(error): - message = "Data must be of dtype datetime, or castable to datetime." - raise TypeError(message) from None - - raise ValueError( - "Data does not match specified datetime format." - ) from None - - return data - - def _transform_helper(self, datetimes): - """Transform datetime values to integer.""" - datetimes = self._convert_to_datetime(datetimes) - nulls = datetimes.isna() - integers = ( - pd.to_numeric(datetimes, errors="coerce").to_numpy().astype(np.float64) - ) - integers[nulls] = np.nan - transformed = pd.Series(integers) - - if self.strip_constant: - self._find_divider(transformed) - transformed = transformed // self.divider - - return transformed - - def _fit(self, data): - """Fit the transformer to the data. - - Args: - data (pandas.Series): - Data to fit the transformer to. - """ - transformed = self._transform_helper(data) - self.null_transformer = NullTransformer(self.nan, self.null_column, copy=True) - self.null_transformer.fit(transformed) - - def _transform(self, data): - """Transform datetime values to float values. - - Args: - data (pandas.Series): - Data to transform. - - Returns: - numpy.ndarray - """ - data = self._transform_helper(data) - return self.null_transformer.transform(data) - - def _reverse_transform(self, data): - """Convert float values back to datetimes. - - Args: - data (pandas.Series or numpy.ndarray): - Data to transform. - - Returns: - pandas.Series - """ - if not isinstance(data, np.ndarray): - data = data.to_numpy() - - if self.nan is not None: - data = self.null_transformer.reverse_transform(data) - - if isinstance(data, np.ndarray) and (data.ndim == 2): - data = data[:, 0] - - data = np.round(data.astype(np.float64)) - if self.strip_constant: - data = data * self.divider - - return pd.to_datetime(data) - - -class DatetimeRoundedTransformer(DatetimeTransformer): - """Transformer for datetime data. - - This transformer replaces datetime values with an integer timestamp transformed to float. - It optimizes the output values by finding the smallest time unit that is not zero on - the training datetimes and dividing the generated numerical values by the value of the next - smallest time unit. This, apart from reducing the orders of magnitued of the transformed - values, ensures that reverted values always are zero on the lower time units. - - Null values are replaced using a ``NullTransformer``. - - This class behaves exactly as the ``DatetimeTransformer`` with ``strip_constant=True``. - - Args: - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - """ - - def __init__(self, nan="mean", null_column=None): - super().__init__(nan=nan, null_column=null_column, strip_constant=True) diff --git a/rdt/transformers/null.py b/rdt/transformers/null.py deleted file mode 100644 index 467f19d..0000000 --- a/rdt/transformers/null.py +++ /dev/null @@ -1,160 +0,0 @@ -"""Transformer for data that contains Null values.""" - -import warnings - -import numpy as np -import pandas as pd - -IRREVERSIBLE_WARNING = ( - "Replacing nulls with existing value without `null_column`, which is not reversible. " - "Use `null_column=True` to ensure that the transformation is reversible." -) - - -class NullTransformer: - """Transformer for data that contains Null values. - - Args: - fill_value (object or None): - Value to replace nulls, or strategy to compute the value, which can - be ``mean`` or ``mode``. If ``None`` is given, the ``mean`` or ``mode`` - strategy will be applied depending on whether the input data is numerical - or not. Defaults to `None`. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - copy (bool): - Whether to create a copy of the input data or modify it destructively. - """ - - nulls = None - _null_column = None - _fill_value = None - - def __init__(self, fill_value=None, null_column=None, copy=False): - self.fill_value = fill_value - self.null_column = null_column - self.copy = copy - - def creates_null_column(self): - """Indicate whether this transformer creates a null column on transform. - - Returns: - bool: - Whether a null column is created on transform. - """ - return bool(self._null_column) - - def _get_fill_value(self, data, null_values): - """Get the fill value to use for the given data. - - Args: - data (pd.Series): - The data that is being transformed. - null_values (np.array): - Array of boolean values that indicate which values in the - input data are nulls. - - Return: - object: - The fill value that needs to be used. - """ - fill_value = self.fill_value - - if fill_value in (None, "mean", "mode") and null_values.all(): - return 0 - - if fill_value is None: - if pd.api.types.is_numeric_dtype(data): - fill_value = "mean" - else: - fill_value = "mode" - - if fill_value == "mean": - return data.mean() - - if fill_value == "mode": - return data.mode(dropna=True)[0] - - return fill_value - - def fit(self, data): - """Fit the transformer to the data. - - Evaluate if the transformer has to create the null column or not. - - Args: - data (pandas.Series): - Data to transform. - """ - null_values = data.isna().to_numpy() - self.nulls = null_values.any() - - self._fill_value = self._get_fill_value(data, null_values) - - if self.null_column is None: - self._null_column = self.nulls - else: - self._null_column = self.null_column - - def transform(self, data): - """Replace null values with the indicated fill_value. - - If required, create the null indicator column. - - Args: - data (pandas.Series or numpy.ndarray): - Data to transform. - - Returns: - numpy.ndarray - """ - isna = data.isna() - if isna.any(): - if not self._null_column and self._fill_value in data.to_numpy(): - warnings.warn(IRREVERSIBLE_WARNING) - - if not self.copy: - data[isna] = self._fill_value - else: - data = data.fillna(self._fill_value) - - if self._null_column: - return pd.concat([data, isna.astype(np.float64)], axis=1).to_numpy() - - return data.to_numpy() - - def reverse_transform(self, data): - """Restore null values to the data. - - If a null indicator column was created during fit, use it as a reference. - Otherwise, replace all instances of ``fill_value`` that can be found in - data. - - Args: - data (numpy.ndarray): - Data to transform. - - Returns: - pandas.Series - """ - if self._null_column: - if self.nulls: - isna = data[:, 1] > 0.5 - - data = data[:, 0] - if self.copy: - data = data.copy() - - elif self.nulls: - isna = self._fill_value == data - - data = pd.Series(data) - - if self.nulls and isna.any(): - data.loc[isna] = np.nan - - return data diff --git a/rdt/transformers/numerical.py b/rdt/transformers/numerical.py deleted file mode 100644 index 1452a1b..0000000 --- a/rdt/transformers/numerical.py +++ /dev/null @@ -1,718 +0,0 @@ -"""Transformers for numerical data.""" -import copy -import sys - -import numpy as np -import pandas as pd -import scipy -from sklearn.mixture import BayesianGaussianMixture - -from rdt.transformers.base import BaseTransformer -from rdt.transformers.null import NullTransformer - -EPSILON = np.finfo(np.float32).eps -MAX_DECIMALS = sys.float_info.dig - 1 - - -class NumericalTransformer(BaseTransformer): - """Transformer for numerical data. - - This transformer replaces integer values with their float equivalent. - Non null float values are not modified. - - Null values are replaced using a ``NullTransformer``. - - Args: - dtype (data type): - Data type of the data to transform. It will be used when reversing the - transformation. If not provided, the dtype of the fit data will be used. - Defaults to ``None``. - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - rounding (int, str or None): - Define rounding scheme for data. If set to an int, values will be rounded - to that number of decimal places. If ``None``, values will not be rounded. - If set to ``'auto'``, the transformer will round to the maximum number of - decimal places detected in the fitted data. - min_value (int, str or None): - Indicate whether or not to set a minimum value for the data. If an integer is given, - reverse transformed data will be greater than or equal to it. If the string ``'auto'`` - is given, the minimum will be the minimum value seen in the fitted data. If ``None`` - is given, there won't be a minimum. - max_value (int, str or None): - Indicate whether or not to set a maximum value for the data. If an integer is given, - reverse transformed data will be less than or equal to it. If the string ``'auto'`` - is given, the maximum will be the maximum value seen in the fitted data. If ``None`` - is given, there won't be a maximum. - """ - - INPUT_TYPE = "numerical" - DETERMINISTIC_TRANSFORM = True - DETERMINISTIC_REVERSE = True - COMPOSITION_IS_IDENTITY = True - - null_transformer = None - nan = None - _dtype = None - _rounding_digits = None - _min_value = None - _max_value = None - - def __init__( - self, - dtype=None, - nan="mean", - null_column=None, - rounding=None, - min_value=None, - max_value=None, - ): - self.nan = nan - self.null_column = null_column - self.dtype = dtype - self.rounding = rounding - self.min_value = min_value - self.max_value = max_value - - def get_output_types(self): - """Return the output types supported by the transformer. - - Returns: - dict: - Mapping from the transformed column names to supported data types. - """ - output_types = { - "value": "float", - } - if self.null_transformer and self.null_transformer.creates_null_column(): - output_types["is_null"] = "float" - - return self._add_prefix(output_types) - - def is_composition_identity(self): - """Return whether composition of transform and reverse transform produces the input data. - - Returns: - bool: - Whether or not transforming and then reverse transforming returns the input data. - """ - if self.null_transformer and not self.null_transformer.creates_null_column(): - return False - - return self.COMPOSITION_IS_IDENTITY - - @staticmethod - def _learn_rounding_digits(data): - # check if data has any decimals - data = np.array(data) - roundable_data = data[~(np.isinf(data) | pd.isna(data))] - if ((roundable_data % 1) != 0).any(): - if not (roundable_data == roundable_data.round(MAX_DECIMALS)).all(): - return None - - for decimal in range(MAX_DECIMALS + 1): - if (roundable_data == roundable_data.round(decimal)).all(): - return decimal - - elif len(roundable_data) > 0: - maximum = max(abs(roundable_data)) - start = int(np.log10(maximum)) if maximum != 0 else 0 - for decimal in range(-start, 1): - if (roundable_data == roundable_data.round(decimal)).all(): - return decimal - - return None - - def _fit(self, data): - """Fit the transformer to the data. - - Args: - data (pandas.DataFrame or pandas.Series): - Data to fit. - """ - self._dtype = self.dtype or data.dtype - self._min_value = data.min() if self.min_value == "auto" else self.min_value - self._max_value = data.max() if self.max_value == "auto" else self.max_value - - if self.rounding == "auto": - self._rounding_digits = self._learn_rounding_digits(data) - elif isinstance(self.rounding, int): - self._rounding_digits = self.rounding - - self.null_transformer = NullTransformer(self.nan, self.null_column, copy=True) - self.null_transformer.fit(data) - - def _transform(self, data): - """Transform numerical data. - - Integer values are replaced by their float equivalent. Non null float values - are left unmodified. - - Args: - data (pandas.Series): - Data to transform. - - Returns: - numpy.ndarray - """ - return self.null_transformer.transform(data) - - def _reverse_transform(self, data): - """Convert data back into the original format. - - Args: - data (pd.Series or numpy.ndarray): - Data to transform. - - Returns: - numpy.ndarray - """ - if not isinstance(data, np.ndarray): - data = data.to_numpy() - - if self._min_value is not None or self._max_value is not None: - if len(data.shape) > 1: - data[:, 0] = data[:, 0].clip(self._min_value, self._max_value) - else: - data = data.clip(self._min_value, self._max_value) - - if self.nan is not None: - data = self.null_transformer.reverse_transform(data) - - is_integer = np.dtype(self._dtype).kind == "i" - if self._rounding_digits is not None or is_integer: - data = data.round(self._rounding_digits or 0) - - if pd.isna(data).any() and is_integer: - return data - - return data.astype(self._dtype) - - -class NumericalRoundedBoundedTransformer(NumericalTransformer): - """Transformer for numerical data. - - This transformer replaces integer values with their float equivalent, bounded by the fitted - data (the minimum and maximum values seen while fitting). It will also round all values to - the maximum number of decimal places detected in the fitted data. - - Non null float values are not modified. - - This class behaves exactly as the ``NumericalTransformer`` with ``min_value='auto'``, - ``max_value='auto'`` and ``rounding='auto'``. - - Args: - dtype (data type): - Data type of the data to transform. It will be used when reversing the - transformation. If not provided, the dtype of the fit data will be used. - Defaults to ``None``. - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - """ - - def __init__(self, dtype=None, nan="mean", null_column=None): - super().__init__( - dtype=dtype, - nan=nan, - null_column=null_column, - min_value="auto", - max_value="auto", - rounding="auto", - ) - - -class NumericalBoundedTransformer(NumericalTransformer): - """Transformer for numerical data. - - This transformer replaces integer values with their float equivalent, bounded by the fitted - data (the minimum and maximum values seen while fitting). - - Non null float values are not modified. - - This class behaves exactly as the ``NumericalTransformer`` with ``min_value='auto'``, - ``max_value='auto'`` and ``rounding=None``. - - Args: - dtype (data type): - Data type of the data to transform. It will be used when reversing the - transformation. If not provided, the dtype of the fit data will be used. - Defaults to ``None``. - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - """ - - def __init__(self, dtype=None, nan="mean", null_column=None): - super().__init__( - dtype=dtype, - nan=nan, - null_column=null_column, - min_value="auto", - max_value="auto", - rounding=None, - ) - - -class NumericalRoundedTransformer(NumericalTransformer): - """Transformer for numerical data. - - This transformer replaces integer values with their float equivalent, rounding all values to - the maximum number of decimal places detected in the fitted data. - - Non null float values are not modified. - - This class behaves exactly as the ``NumericalTransformer`` with ``min_value=None``, - ``max_value=None`` and ``rounding='auto'``. - - Args: - dtype (data type): - Data type of the data to transform. It will be used when reversing the - transformation. If not provided, the dtype of the fit data will be used. - Defaults to ``None``. - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - """ - - def __init__(self, dtype=None, nan="mean", null_column=None): - super().__init__( - dtype=dtype, - nan=nan, - null_column=null_column, - min_value=None, - max_value=None, - rounding="auto", - ) - - -class GaussianCopulaTransformer(NumericalTransformer): - r"""Transformer for numerical data based on copulas transformation. - - Transformation consists on bringing the input data to a standard normal space - by using a combination of *cdf* and *inverse cdf* transformations: - - Given a variable :math:`x`: - - - Find the best possible marginal or use user specified one, :math:`P(x)`. - - do :math:`u = \phi (x)` where :math:`\phi` is cumulative density function, - given :math:`P(x)`. - - do :math:`z = \phi_{N(0,1)}^{-1}(u)`, where :math:`\phi_{N(0,1)}^{-1}` is - the *inverse cdf* of a *standard normal* distribution. - - The reverse transform will do the inverse of the steps above and go from :math:`z` - to :math:`u` and then to :math:`x`. - - Args: - dtype (data type): - Data type of the data to transform. It will be used when reversing the - transformation. If not provided, the dtype of the fit data will be used. - Defaults to ``None``. - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - distribution (copulas.univariate.Univariate or str): - Copulas univariate distribution to use. Defaults to ``parametric``. To choose from: - - * ``univariate``: Let ``copulas`` select the optimal univariate distribution. - This may result in non-parametric models being used. - * ``parametric``: Let ``copulas`` select the optimal univariate distribution, - but restrict the selection to parametric distributions only. - * ``bounded``: Let ``copulas`` select the optimal univariate distribution, - but restrict the selection to bounded distributions only. - This may result in non-parametric models being used. - * ``semi_bounded``: Let ``copulas`` select the optimal univariate distribution, - but restrict the selection to semi-bounded distributions only. - This may result in non-parametric models being used. - * ``parametric_bounded``: Let ``copulas`` select the optimal univariate - distribution, but restrict the selection to parametric and bounded distributions - only. - * ``parametric_semi_bounded``: Let ``copulas`` select the optimal univariate - distribution, but restrict the selection to parametric and semi-bounded - distributions only. - * ``gaussian``: Use a Gaussian distribution. - * ``gamma``: Use a Gamma distribution. - * ``beta``: Use a Beta distribution. - * ``student_t``: Use a Student T distribution. - * ``gussian_kde``: Use a GaussianKDE distribution. This model is non-parametric, - so using this will make ``get_parameters`` unusable. - * ``truncated_gaussian``: Use a Truncated Gaussian distribution. - """ - - _univariate = None - COMPOSITION_IS_IDENTITY = False - - def __init__( - self, dtype=None, nan="mean", null_column=None, distribution="parametric" - ): - super().__init__(dtype=dtype, nan=nan, null_column=null_column) - self._distributions = self._get_distributions() - - if isinstance(distribution, str): - distribution = self._distributions[distribution] - - self._distribution = distribution - - @staticmethod - def _get_distributions(): - try: - from copulas import univariate # pylint: disable=import-outside-toplevel - except ImportError as error: - error.msg += ( - "\n\nIt seems like `copulas` is not installed.\n" - "Please install it using:\n\n pip install rdt[copulas]" - ) - raise - - return { - "univariate": univariate.Univariate, - "parametric": ( - univariate.Univariate, - {"parametric": univariate.ParametricType.PARAMETRIC,}, - ), - "bounded": ( - univariate.Univariate, - {"bounded": univariate.BoundedType.BOUNDED,}, - ), - "semi_bounded": ( - univariate.Univariate, - {"bounded": univariate.BoundedType.SEMI_BOUNDED,}, - ), - "parametric_bounded": ( - univariate.Univariate, - { - "parametric": univariate.ParametricType.PARAMETRIC, - "bounded": univariate.BoundedType.BOUNDED, - }, - ), - "parametric_semi_bounded": ( - univariate.Univariate, - { - "parametric": univariate.ParametricType.PARAMETRIC, - "bounded": univariate.BoundedType.SEMI_BOUNDED, - }, - ), - "gaussian": univariate.GaussianUnivariate, - "gamma": univariate.GammaUnivariate, - "beta": univariate.BetaUnivariate, - "student_t": univariate.StudentTUnivariate, - "gaussian_kde": univariate.GaussianKDE, - "truncated_gaussian": univariate.TruncatedGaussian, - } - - def _get_univariate(self): - distribution = self._distribution - if isinstance(distribution, self._distributions["univariate"]): - return copy.deepcopy(distribution) - if isinstance(distribution, tuple): - return distribution[0](**distribution[1]) - if isinstance(distribution, type) and issubclass( - distribution, self._distributions["univariate"] - ): - return distribution() - - raise TypeError(f"Invalid distribution: {distribution}") - - def _fit(self, data): - """Fit the transformer to the data. - - Args: - data (pandas.Series): - Data to fit to. - """ - self._univariate = self._get_univariate() - - super()._fit(data) - data = super()._transform(data) - if data.ndim > 1: - data = data[:, 0] - - self._univariate.fit(data) - - def _copula_transform(self, data): - cdf = self._univariate.cdf(data) - return scipy.stats.norm.ppf(cdf.clip(0 + EPSILON, 1 - EPSILON)) - - def _transform(self, data): - """Transform numerical data. - - Args: - data (pandas.Series): - Data to transform. - - Returns: - numpy.ndarray - """ - transformed = super()._transform(data) - if transformed.ndim > 1: - transformed[:, 0] = self._copula_transform(transformed[:, 0]) - else: - transformed = self._copula_transform(transformed) - - return transformed - - def _reverse_transform(self, data): - """Convert data back into the original format. - - Args: - data (pd.Series or numpy.ndarray): - Data to transform. - - Returns: - pandas.Series - """ - if not isinstance(data, np.ndarray): - data = data.to_numpy() - - if data.ndim > 1: - data[:, 0] = self._univariate.ppf(scipy.stats.norm.cdf(data[:, 0])) - else: - data = self._univariate.ppf(scipy.stats.norm.cdf(data)) - - return super()._reverse_transform(data) - - -class BayesGMMTransformer(NumericalTransformer): - """Transformer for numerical data using a Bayesian Gaussian Mixture Model. - - This transformation takes a numerical value and transforms it using a Bayesian GMM - model. It generates two outputs, a discrete value which indicates the selected - 'component' of the GMM and a continuous value which represents the normalized value - based on the mean and std of the selected component. - - Args: - dtype (data type): - Data type of the data to transform. It will be used when reversing the - transformation. If not provided, the dtype of the fit data will be used. - Defaults to ``None``. - nan (int, str or None): - Indicate what to do with the null values. If an integer is given, replace them - with the given value. If the strings ``'mean'`` or ``'mode'`` are given, replace - them with the corresponding aggregation. If ``None`` is given, do not replace them. - Defaults to ``'mean'``. - null_column (bool): - Whether to create a new column to indicate which values were null or not. - If ``None``, only create a new column when the data contains null values. - If ``True``, always create the new column whether there are null values or not. - If ``False``, do not create the new column. - Defaults to ``None``. - rounding (int, str or None): - Define rounding scheme for data. If set to an int, values will be rounded - to that number of decimal places. If ``None``, values will not be rounded. - If set to ``'auto'``, the transformer will round to the maximum number of - decimal places detected in the fitted data. - min_value (int, str or None): - Indicate whether or not to set a minimum value for the data. If an integer is given, - reverse transformed data will be greater than or equal to it. If the string ``'auto'`` - is given, the minimum will be the minimum value seen in the fitted data. If ``None`` - is given, there won't be a minimum. - max_value (int, str or None): - Indicate whether or not to set a maximum value for the data. If an integer is given, - reverse transformed data will be less than or equal to it. If the string ``'auto'`` - is given, the maximum will be the maximum value seen in the fitted data. If ``None`` - is given, there won't be a maximum. - max_clusters (int): - The maximum number of mixture components. Depending on the data, the model may select - fewer components (based on the ``weight_threshold``). - Defaults to 10. - weight_threshold (int, float): - The minimum value a component weight can take to be considered a valid component. - ``weights_`` under this value will be ignored. - Defaults to 0.005. - random_state(int): - Sets the random state for the bayesgmm model from sklearn - allows for reproducible results - - Attributes: - _bgm_transformer: - An instance of sklearn`s ``BayesianGaussianMixture`` class. - valid_component_indicator: - An array indicating the valid components. If the weight of a component is greater - than the ``weight_threshold``, it's indicated with True, otherwise it's set to False. - """ - - STD_MULTIPLIER = 4 - DETERMINISTIC_TRANSFORM = False - DETERMINISTIC_REVERSE = True - COMPOSITION_IS_IDENTITY = False - - _bgm_transformer = None - valid_component_indicator = None - - def __init__( - self, - dtype=None, - nan="mean", - null_column=None, - rounding=None, - min_value=None, - max_value=None, - random_state=None, - max_clusters=10, - weight_threshold=0.005, - ): - super().__init__( - dtype=dtype, - nan=nan, - null_column=null_column, - rounding=rounding, - min_value=min_value, - max_value=max_value, - ) - self._max_clusters = max_clusters - self._weight_threshold = weight_threshold - self.random_state = random_state - - def get_output_types(self): - """Return the output types supported by the transformer. - - Returns: - dict: - Mapping from the transformed column names to supported data types. - """ - output_types = {"normalized": "float", "component": "categorical"} - if self.null_transformer and self.null_transformer.creates_null_column(): - output_types["is_null"] = "float" - - return self._add_prefix(output_types) - - def _fit(self, data): - """Fit the transformer to the data. - - Args: - data (pandas.Series): - Data to fit to. - """ - self._bgm_transformer = BayesianGaussianMixture( - n_components=self._max_clusters, - weight_concentration_prior_type="dirichlet_process", - weight_concentration_prior=0.001, - n_init=1, - random_state=self.random_state, - ) - - super()._fit(data) - data = super()._transform(data) - if data.ndim > 1: - data = data[:, 0] - - self._bgm_transformer.fit(data.reshape(-1, 1)) - self.valid_component_indicator = ( - self._bgm_transformer.weights_ > self._weight_threshold - ) - - def _transform(self, data): - """Transform the numerical data. - - Args: - data (pandas.Series): - Data to transform. - - Returns: - numpy.ndarray. - """ - data = super()._transform(data) - if data.ndim > 1: - data, null_column = data[:, 0], data[:, 1] - - data = data.reshape((len(data), 1)) - means = self._bgm_transformer.means_.reshape((1, self._max_clusters)) - - stds = np.sqrt(self._bgm_transformer.covariances_).reshape( - (1, self._max_clusters) - ) - normalized_values = (data - means) / (self.STD_MULTIPLIER * stds) - normalized_values = normalized_values[:, self.valid_component_indicator] - component_probs = self._bgm_transformer.predict_proba(data) - component_probs = component_probs[:, self.valid_component_indicator] - - selected_component = np.zeros(len(data), dtype="int") - for i in range(len(data)): - component_prob_t = component_probs[i] + 1e-6 - component_prob_t = component_prob_t / component_prob_t.sum() - selected_component[i] = np.random.choice( - np.arange(self.valid_component_indicator.sum()), p=component_prob_t - ) - - aranged = np.arange(len(data)) - normalized = normalized_values[aranged, selected_component].reshape([-1, 1]) - normalized = np.clip(normalized, -0.99, 0.99) - normalized = normalized[:, 0] - rows = [normalized, selected_component] - if self.null_transformer and self.null_transformer.creates_null_column(): - rows.append(null_column) - - return np.stack(rows, axis=1) # noqa: PD013 - - def _reverse_transform_helper(self, data): - normalized = np.clip(data[:, 0], -1, 1) - means = self._bgm_transformer.means_.reshape([-1]) - stds = np.sqrt(self._bgm_transformer.covariances_).reshape([-1]) - selected_component = data[:, 1].astype(int) - - std_t = stds[self.valid_component_indicator][selected_component] - mean_t = means[self.valid_component_indicator][selected_component] - reversed_data = normalized * self.STD_MULTIPLIER * std_t + mean_t - - return reversed_data - - def _reverse_transform(self, data): - """Convert data back into the original format. - - Args: - data (pd.DataFrame or numpy.ndarray): - Data to transform. - - Returns: - pandas.Series. - """ - if not isinstance(data, np.ndarray): - data = data.to_numpy() - - recovered_data = self._reverse_transform_helper(data) - if self.null_transformer and self.null_transformer.creates_null_column(): - data = np.stack([recovered_data, data[:, -1]], axis=1) # noqa: PD013 - else: - data = recovered_data - - return super()._reverse_transform(data) diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 405df98..0000000 --- a/requirements.txt +++ /dev/null @@ -1,72 +0,0 @@ -appdirs==1.4.4 -bcj-cffi==0.5.1 -boto3==1.18.34 -botocore==1.21.34 -Brotli==1.0.9 -cached-property==1.5.2 -certifi==2021.10.8 -cffi==1.15.0 -charset-normalizer==2.0.4 -compress-pickle==1.2.0 -copulas==0.6.1 -ctgan==0.5.1 -cycler==0.10.0 -deepecho==0.3.0.post1 -Faker==4.14.2 -feather-format==0.4.1 -gower==0.0.5 -graphviz==0.17 -h5py==3.3.0 -humanfriendly==8.2 -idna==3.2 -importlib-metadata==4.8.2 -jmespath==0.10.0 -joblib==1.2.0 -kiwisolver==1.3.1 -llvmlite==0.36.0 -matplotlib==3.4.2 -multivolumefile==0.2.3 -networkx==2.6.2 -numba==0.53.1 -numpy==1.21.1 -optuna==2.10.0 -packaging==21.3 -pandas==1.1.4 -patsy==0.5.1 -Pillow==9.0.1 -pomegranate==0.14.1 -psutil==5.8.0 -py7zr==0.16.1 -pyarrow==5.0.0 -pycox==0.2.2 -pycparser==2.21 -pycryptodomex==3.10.1 -pyparsing==3.0.4 -pyppmd==0.15.2 -python-dateutil==2.8.2 -pytz==2021.1 -PyYAML==5.4.1 -pyzstd==0.14.4 -requests==2.26.0 -s3transfer==0.5.0 -scikit-learn==0.24.2 -scipy==1.6.3 -sdgym==0.4.1 -sdmetrics==0.4.1 -sdv==0.13.1 -six==1.16.0 -sklearn-pandas==2.2.0 -sktime==0.5.3 -statsmodels==0.12.2 -tabulate==0.8.9 -text-unidecode==1.3 -texttable==1.6.4 -threadpoolctl==2.2.0 -torch==1.9.0 -torchtuples==0.2.0 -torchvision==0.10.0 -tqdm==4.62.0 -typing-extensions==3.10.0.2 -urllib3==1.26.6 -XlsxWriter==1.2.9 -zipp==3.7.0 diff --git a/run.py b/run.py new file mode 100644 index 0000000..0637b0a --- /dev/null +++ b/run.py @@ -0,0 +1,4 @@ +from src.nhssynth import cli + +if __name__ == "__main__": + cli.run() diff --git a/scratch_vae_expts.py b/scratch_vae_expts.py index 2899a3f..0a15de2 100644 --- a/scratch_vae_expts.py +++ b/scratch_vae_expts.py @@ -161,8 +161,7 @@ "SVCDetection", "GMLogLikelihood", "CSTest", - "KSTest", - "KSTestExtended", + "KSComplement", "ContinuousKLDivergence", "DiscreteKLDivergence", ] @@ -172,7 +171,6 @@ gmm_all_seeds = [] cs_all_seeds = [] ks_all_seeds = [] -kses_all_seeds = [] contkls_all_seeds = [] disckls_all_seeds = [] @@ -262,8 +260,7 @@ synthetic_supp=synthetic_supp, categorical_columns=original_categorical_columns, continuous_columns=original_continuous_columns, - saving_filepath="", - pre_proc_method=pre_proc_method, + pre_proc_method=pre_proc_method ) list_metrics = [metrics[i] for i in metrics.columns] @@ -273,11 +270,10 @@ gmm_all_seeds.append(np.array(list_metrics[1])) cs_all_seeds.append(np.array(list_metrics[2])) ks_all_seeds.append(np.array(list_metrics[3])) - kses_all_seeds.append(np.array(list_metrics[4])) - contkls_all_seeds.append(np.array(list_metrics[5])) - disckls_all_seeds.append(np.array(list_metrics[6])) + contkls_all_seeds.append(np.array(list_metrics[4])) + disckls_all_seeds.append(np.array(list_metrics[5])) if args.gower: - gowers_all_seeds.append(np.array(list_metrics[7])) + gowers_all_seeds.append(np.array(list_metrics[6])) gowers = np.array(gowers_all_seeds) print(f"Gowers : {np.mean(gowers)} +/- {np.std(gowers)}") @@ -285,7 +281,6 @@ gmm = np.array(gmm_all_seeds) cs = np.array(cs_all_seeds) ks = np.array(ks_all_seeds) - kses = np.array(kses_all_seeds) contkls = np.array(contkls_all_seeds) disckls = np.array(disckls_all_seeds) @@ -293,7 +288,6 @@ print(f"GMM: {np.mean(gmm)} +/- {np.std(gmm)}") print(f"CS: {np.mean(cs)} +/- {np.std(cs)}") print(f"KS: {np.mean(ks)} +/- {np.std(ks)}") - print(f"KSE: {np.mean(kses)} +/- {np.std(kses)}") print(f"ContKL: {np.mean(contkls)} +/- {np.std(contkls)}") print(f"DiscKL: {np.mean(disckls)} +/- {np.std(disckls)}") @@ -304,13 +298,12 @@ if args.gower: metrics = pd.DataFrame( - data=[[svc_all_seeds, gmm_all_seeds, cs_all_seeds, ks_all_seeds, kses_all_seeds, contkls_all_seeds, disckls_all_seeds, gowers_all_seeds]], + data=[[svc_all_seeds, gmm_all_seeds, cs_all_seeds, ks_all_seeds, contkls_all_seeds, disckls_all_seeds, gowers_all_seeds]], columns=[ - "SVCDetection", + "SVCDetection" "GMLogLikelihood", "CSTest", - "KSTest", - "KSTestExtended", + "KSComplement", "ContinuousKLDivergence", "DiscreteKLDivergence", "Gower", @@ -323,8 +316,7 @@ "SVCDetection": svc_all_seeds, "GMLogLikelihood": gmm_all_seeds, "CSTest": cs_all_seeds, - "KSTest": ks_all_seeds, - "KSTestExtended": kses_all_seeds, + "KSComplement": ks_all_seeds, "ContinuousKLDivergence": contkls_all_seeds, "DiscreteKLDivergence": disckls_all_seeds, } diff --git a/sdv_baselines.py b/sdv_baselines.py index 539c545..fcdae45 100644 --- a/sdv_baselines.py +++ b/sdv_baselines.py @@ -101,8 +101,7 @@ "SVCDetection", "GMLogLikelihood", "CSTest", - "KSTest", - "KSTestExtended", + "KSComplement", "ContinuousKLDivergence", "DiscreteKLDivergence", ] @@ -113,7 +112,6 @@ gmm = [] cs = [] ks = [] -kses = [] contkls = [] disckls = [] @@ -156,8 +154,7 @@ synthetic_supp=synthetic_supp, categorical_columns=original_categorical_columns, continuous_columns=original_continuous_columns, - saving_filepath=None, - pre_proc_method=pre_proc_method, + pre_proc_method=pre_proc_method ) list_metrics = [metrics[i] for i in metrics.columns] @@ -167,17 +164,15 @@ gmm.append(np.array(list_metrics[1])) cs.append(np.array(list_metrics[2])) ks.append(np.array(list_metrics[3])) - kses.append(np.array(list_metrics[4])) - contkls.append(np.array(list_metrics[5])) - disckls.append(np.array(list_metrics[6])) + contkls.append(np.array(list_metrics[4])) + disckls.append(np.array(list_metrics[5])) if args.gower: - gowers.append(np.array(list_metrics[7])) + gowers.append(np.array(list_metrics[6])) svc = np.array(svc) gmm = np.array(gmm) cs = np.array(cs) ks = np.array(ks) -kses = np.array(kses) contkls = np.array(contkls) disckls = np.array(disckls) @@ -190,7 +185,6 @@ print(f"GMM: {np.mean(gmm)} +/- {np.std(gmm)}") print(f"CS: {np.mean(cs)} +/- {np.std(cs)}") print(f"KS: {np.mean(ks)} +/- {np.std(ks)}") -print(f"KSE: {np.mean(kses)} +/- {np.std(kses)}") print(f"ContKL: {np.mean(contkls)} +/- {np.std(contkls)}") print(f"DiscKL: {np.mean(disckls)} +/- {np.std(disckls)}") @@ -202,8 +196,7 @@ "SVCDetection": svc[:, 0], "GMLogLikelihood": gmm[:, 0], "CSTest": cs[:, 0], - "KSTest": ks[:, 0], - "KSTestExtended": kses[:, 0], + "KSComplement": ks[:, 0], "ContinuousKLDivergence": contkls[:, 0], "DiscreteKLDivergence": disckls[:, 0], "Gower": gowers[:, 0], @@ -216,7 +209,6 @@ "GMLogLikelihood": gmm[:, 0], "CSTest": cs[:, 0], "KSTest": ks[:, 0], - "KSTestExtended": kses[:, 0], "ContinuousKLDivergence": contkls[:, 0], "DiscreteKLDivergence": disckls[:, 0], } diff --git a/src/nhssynth/README.md b/src/nhssynth/README.md new file mode 100644 index 0000000..4f0a62e --- /dev/null +++ b/src/nhssynth/README.md @@ -0,0 +1,62 @@ +# Modules + +This folder contains all of the modules contained in this package. They can be used together or independently - through importing them into your existing codebase or using the `cli` module and `runner.py` to select which / all modules to run. + +## Importing a module from this package + +After installing the package, you can simply do: +```python +from nhssynth import +``` +and you will be able to use it in your code! + +## Creating a new module and folding it into the CLI + +The following instructions specify how to extend this package with a new module: + +1. Create a folder for your module within the package, i.e. `src/nhssynth/mymodule` +2. Include within it a main executor that accepts arguments from the `cli` module, e.g. `def myexecutor(args): ...` in `mymodule/executor.py` and export this by adding `from .executor import myexecutor` in `mymodule/__init__.py`. +3. In the `cli` module folder, add the following code blocks to `run.py` (the second is optional depending on whether this module should be executed as part of a full pipeline run): + ```python + from modules import ..., mymodule, ... + + ... + + def run() + ... + parser_mymodule = subparsers.add_parser( + name="mymodule", + description=..., + help=..., + ) + add_mymodule_args(parser_mymodule) + parser_mymodule.set_defaults(func=mymodule.executor) + ... + ``` + ```python + def run_pipeline(args): + ... + mymodule.executor(args) + ... + ``` +4. Similarly, add the following code blocks to `arguments.py` (again, the second block is optional): + ```python + def add_mymodule_args(parser: argparse.ArgumentParser): + ... + ``` + ```python + def add_all_module_args(parser: argparse.ArgumentParser): + ... + mymodule_group = parser.add_argument_group(title="mymodule") + add_mymodule_args(mymodule_group) + ... + + ... + + def add_mymodule_args(parser: argparse.ArgumentParser, override=False): + ... + add_mymodule_args(overrides_group) + ... + ``` +5. After populating the functions in a similar fashion to the existing modules, your module will work as part of the CLI! + diff --git a/opacus/utils/__init__.py b/src/nhssynth/__init__.py similarity index 100% rename from opacus/utils/__init__.py rename to src/nhssynth/__init__.py diff --git a/src/nhssynth/cli/__init__.py b/src/nhssynth/cli/__init__.py new file mode 100644 index 0000000..ba18589 --- /dev/null +++ b/src/nhssynth/cli/__init__.py @@ -0,0 +1 @@ +from .run import run \ No newline at end of file diff --git a/src/nhssynth/cli/arguments.py b/src/nhssynth/cli/arguments.py new file mode 100644 index 0000000..c9efa42 --- /dev/null +++ b/src/nhssynth/cli/arguments.py @@ -0,0 +1,76 @@ +import argparse + + +def add_all_module_args(parser: argparse.ArgumentParser): + dataloader_group = parser.add_argument_group(title="dataloader") + add_dataloader_args(dataloader_group) + structure_group = parser.add_argument_group(title="structure") + add_structure_args(structure_group) + model_group = parser.add_argument_group(title="model") + add_model_args(model_group) + evaluation_group = parser.add_argument_group(title="evaluation") + add_evaluation_args(evaluation_group) + plotting_group = parser.add_argument_group(title="plotting") + add_plotting_args(plotting_group) + + +def add_config_args(parser: argparse.ArgumentParser): + parser.add_argument( + "--input-config", + "-c", + required=True, + help="Specify the config file to unpack.", + ) + overrides_group = parser.add_argument_group(title="overrides") + # TODO is there a way to do this using `add_all_module_args`, i.e. can we nest groups? Doesn't seem to work + add_dataloader_args(overrides_group, override=True) + add_structure_args(overrides_group, override=True) + add_model_args(overrides_group, override=True) + add_evaluation_args(overrides_group, override=True) + add_plotting_args(overrides_group, override=True) + + +def add_dataloader_args(parser: argparse.ArgumentParser, override=False): + parser.add_argument( + "--input-file", + "-i", + required=(not override), + help="Specify the name of the `.csv` file to prepare.", + ) + parser.add_argument( + "--output-file", + "-o", + default="_prepared", + help="Specify where to write the prepared data, defaults to `\{args.dir\}/\{args.input_file\}_prepared.csv`.", + ) + parser.add_argument( + "--dir", + "-d", + default="data", + help="Specify the directory to read and write data from and to, defaults to `./data`.", + ) + parser.add_argument( + "--imputation-strategy", + "--impute-strategy", + "--impute", + "-is", + default="mean", + choices=["mean", "median", "cull"], + help="Specify the imputation strategy for missing values, defaults to inserting the mean of the relevant column.", + ) + + +def add_structure_args(parser: argparse.ArgumentParser, override=False): + pass + + +def add_model_args(parser: argparse.ArgumentParser, override=False): + pass + + +def add_evaluation_args(parser: argparse.ArgumentParser, override=False): + pass + + +def add_plotting_args(parser: argparse.ArgumentParser, override=False): + pass diff --git a/src/nhssynth/cli/config.py b/src/nhssynth/cli/config.py new file mode 100644 index 0000000..3cf6ebd --- /dev/null +++ b/src/nhssynth/cli/config.py @@ -0,0 +1,6 @@ +import argparse +import json + + +def read_config(args): + pass diff --git a/src/nhssynth/cli/run.py b/src/nhssynth/cli/run.py new file mode 100644 index 0000000..e29b0c3 --- /dev/null +++ b/src/nhssynth/cli/run.py @@ -0,0 +1,98 @@ +import argparse + +import yaml +from nhssynth.modules import dataloader, evaluation, model, plotting, structure + +from .arguments import * +from .config import * + + +def run_pipeline(args): + print("Running full pipeline...") + dataloader.run(args) + structure.run(args) + model.run(args) + evaluation.run(args) + plotting.run(args) + + +def run(): + + parser = argparse.ArgumentParser( + prog="SynthVAE", description="CLI for preparing, training and evaluating a synthetic data generator." + ) + + # Below we instantiate one subparser for each module + one for running with config file and one for doing a full run with CLI-specified config + subparsers = parser.add_subparsers() + + parser_full = subparsers.add_parser( + name="pipeline", + description="Run an automatically configured module or set of modules specified by a tape file in `tapes/`. Note that you can override parts of the configuration on the fly by using the usual CLI flags.", + help="Run full pipeline", + ) + add_all_module_args(parser_full) + parser_full.set_defaults(func=run_pipeline) + + parser_config = subparsers.add_parser( + name="config", + description="Run module(s) according to configuration specified by a tape file in `tapes/`. Note that you can override parts of the configuration on the fly by using the usual CLI flags.", + help="Run module(s) using configuration tape file", + ) + add_config_args(parser_config) + parser_config.set_defaults(func=read_config) + + parser_dataloader = subparsers.add_parser( + name="prepare", + description="Run the Data Loader module, to prepare data for use in other modules.", + help="Prepare input data", + ) + add_dataloader_args(parser_dataloader) + parser_dataloader.set_defaults(func=dataloader.run) + + parser_structure = subparsers.add_parser( + name="structure", + description="Run the Structural Discovery module, to learn a structural model for use in training and evaluation.", + help="Discover structure", + ) + add_structure_args(parser_structure) + parser_structure.set_defaults(func=structure.run) + + parser_model = subparsers.add_parser( + name="train", + description="Run the Architecture module, to train a model.", + help="Train a model", + ) + add_model_args(parser_model) + parser_model.set_defaults(func=model.run) + + parser_evaluation = subparsers.add_parser( + name="evaluate", + description="Run the Evaluation module, to evaluate a model.", + help="Evaluate a model", + ) + add_evaluation_args(parser_evaluation) + parser_evaluation.set_defaults(func=evaluation.run) + + parser_plotting = subparsers.add_parser( + name="plot", + description="Run the Evaluation module, to generate plots for a given model and / or evaluation.", + help="Generate plots", + ) + add_plotting_args(parser_plotting) + parser_plotting.set_defaults(func=plotting.run) + + args = parser.parse_args() + # TODO come up with a better solution than try:catch, perhaps it is possible to check if a subparser default func has been set + try: + # Run the appropriate function depending on the positional option selected + args.func(args) + except: + parser.parse_args(["--help"]) + + print(yaml.dump(vars(args))) + + print("Complete!") + + +if __name__ == "__main__": + run() diff --git a/src/nhssynth/modules/__init__.py b/src/nhssynth/modules/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/nhssynth/modules/dataloader/__init__.py b/src/nhssynth/modules/dataloader/__init__.py new file mode 100644 index 0000000..ba18589 --- /dev/null +++ b/src/nhssynth/modules/dataloader/__init__.py @@ -0,0 +1 @@ +from .run import run \ No newline at end of file diff --git a/src/nhssynth/modules/dataloader/run.py b/src/nhssynth/modules/dataloader/run.py new file mode 100644 index 0000000..84267fa --- /dev/null +++ b/src/nhssynth/modules/dataloader/run.py @@ -0,0 +1,8 @@ +from .utils import * + + +def run(args) -> None: + print("Preparing data") + input_path, output_path, experiment_path = format_io(args.input_file, args.output_file, args.dir) + print(input_path, output_path, experiment_path) + print(args) diff --git a/src/nhssynth/modules/dataloader/utils.py b/src/nhssynth/modules/dataloader/utils.py new file mode 100644 index 0000000..f9a1e89 --- /dev/null +++ b/src/nhssynth/modules/dataloader/utils.py @@ -0,0 +1,42 @@ +import time +import warnings +from pathlib import Path + + +def check_ending(fn: str, ending=".csv") -> str: + return fn if fn.endswith(ending) else fn + ending + + +def format_io( + fn_in: str, + fn_out: str, + dir_data: str, + dir_exp="experiments", +) -> tuple[Path, Path, Path]: + # ensure .csv ending consistency + fn_in, fn_out = check_ending(fn_in), check_ending(fn_out) + + dir_data = Path(dir_data) + + if "/" in fn_in: + fn_in = Path(fn_in).name + warnings.warn( + f"Paths are not supported via `--input-file`, using the name part appended to `--dir` instead, i.e. reading from {dir_data / fn_in}" + ) + + # check if `fn_out` is given as a suffix (starts with an underscore) to append to `fn_in`, if not assume it is a name in its own right + if fn_out[0] == "_": + fn_out = check_ending(fn_in[:-4] + fn_out) + else: + fn_out + + # generate timestamped experiment folder + dir_exp = Path(dir_exp) / time.strftime("%Y_%m_%d___%H_%M_%S") + + if "/" in fn_out: + fn_out = Path(fn_out).name + warnings.warn( + f"Paths are not supported via `--output-file`, using the name part instead, i.e. writing to {dir_exp / fn_out}" + ) + + return dir_data / fn_in, dir_exp / fn_out, dir_exp diff --git a/src/nhssynth/modules/evaluation/__init__.py b/src/nhssynth/modules/evaluation/__init__.py new file mode 100644 index 0000000..3ce5f2b --- /dev/null +++ b/src/nhssynth/modules/evaluation/__init__.py @@ -0,0 +1 @@ +from .run import run diff --git a/metrics.py b/src/nhssynth/modules/evaluation/metrics.py similarity index 99% rename from metrics.py rename to src/nhssynth/modules/evaluation/metrics.py index be1104f..e8e0757 100644 --- a/metrics.py +++ b/src/nhssynth/modules/evaluation/metrics.py @@ -14,7 +14,7 @@ def distribution_metrics( synthetic_supp, categorical_columns, continuous_columns, - saving_filepath=None, + saving_filepath="metrics/", pre_proc_method="GMM", ): diff --git a/src/nhssynth/modules/evaluation/run.py b/src/nhssynth/modules/evaluation/run.py new file mode 100644 index 0000000..12eead8 --- /dev/null +++ b/src/nhssynth/modules/evaluation/run.py @@ -0,0 +1,2 @@ +def run(args): + print("Running evaluation module... (NOT IMPLEMENTED)") \ No newline at end of file diff --git a/VAE.py b/src/nhssynth/modules/model/DPVAE.py similarity index 100% rename from VAE.py rename to src/nhssynth/modules/model/DPVAE.py diff --git a/src/nhssynth/modules/model/__init__.py b/src/nhssynth/modules/model/__init__.py new file mode 100644 index 0000000..3ce5f2b --- /dev/null +++ b/src/nhssynth/modules/model/__init__.py @@ -0,0 +1 @@ +from .run import run diff --git a/src/nhssynth/modules/model/run.py b/src/nhssynth/modules/model/run.py new file mode 100644 index 0000000..3b74aa6 --- /dev/null +++ b/src/nhssynth/modules/model/run.py @@ -0,0 +1,2 @@ +def run(args): + print("Running model architecture module... (NOT IMPLEMENTED)") \ No newline at end of file diff --git a/src/nhssynth/modules/plotting/__init__.py b/src/nhssynth/modules/plotting/__init__.py new file mode 100644 index 0000000..3ce5f2b --- /dev/null +++ b/src/nhssynth/modules/plotting/__init__.py @@ -0,0 +1 @@ +from .run import run diff --git a/plot.py b/src/nhssynth/modules/plotting/plot.py similarity index 97% rename from plot.py rename to src/nhssynth/modules/plotting/plot.py index 51e2ccb..1f1d37e 100644 --- a/plot.py +++ b/src/nhssynth/modules/plotting/plot.py @@ -68,11 +68,13 @@ num_continuous, ) = support_pre_proc(data_supp=data_supp, pre_proc_method=pre_proc_method) + ############################################################################### # Load saved model - ensure parameters are equivalent to the saved model -latent_dim = 2 -encoder = Encoder(x_train.shape[1], latent_dim) +latent_dim = 256 +hidden_dim = 256 +encoder = Encoder(x_train.shape[1], latent_dim, hidden_dim=hidden_dim) decoder = Decoder(latent_dim, num_continuous, num_categories=num_categories) vae = VAE(encoder, decoder) vae.load(args.save_file) diff --git a/src/nhssynth/modules/plotting/run.py b/src/nhssynth/modules/plotting/run.py new file mode 100644 index 0000000..247eda5 --- /dev/null +++ b/src/nhssynth/modules/plotting/run.py @@ -0,0 +1,2 @@ +def run(args): + print("Running plotting module... (NOT IMPLEMENTED)") \ No newline at end of file diff --git a/src/nhssynth/modules/structure/__init__.py b/src/nhssynth/modules/structure/__init__.py new file mode 100644 index 0000000..3ce5f2b --- /dev/null +++ b/src/nhssynth/modules/structure/__init__.py @@ -0,0 +1 @@ +from .run import run diff --git a/src/nhssynth/modules/structure/run.py b/src/nhssynth/modules/structure/run.py new file mode 100644 index 0000000..7933570 --- /dev/null +++ b/src/nhssynth/modules/structure/run.py @@ -0,0 +1,2 @@ +def run(args): + print("Running structural discovery module... (NOT IMPLEMENTED)") diff --git a/utils.py b/src/nhssynth/utils/utils.py similarity index 95% rename from utils.py rename to src/nhssynth/utils/utils.py index f5cbf5c..ac06638 100644 --- a/utils.py +++ b/src/nhssynth/utils/utils.py @@ -1,8 +1,6 @@ -from datetime import datetime -from lzma import CHECK_CRC32 import numpy as np import torch -from rdt.transformers import numerical, categorical, DatetimeTransformer +from rdt.transformers import numerical, categorical, datetime import pandas as pd # Graph Visualisation @@ -57,10 +55,8 @@ def support_pre_proc(data_supp, pre_proc_method="GMM"): if pre_proc_method == "GMM": for index, column in enumerate(continuous_columns): # Fit GMM - temp_continuous = numerical.BayesGMMTransformer( - random_state=gmm_seed - ) - temp_continuous.fit(transformed_dataset, columns=column) + temp_continuous = numerical.ClusterBasedNormalizer() + temp_continuous.fit(transformed_dataset, column=column) continuous_transformers[ "continuous_{}".format(column) ] = temp_continuous @@ -110,8 +106,8 @@ def support_pre_proc(data_supp, pre_proc_method="GMM"): for index, column in enumerate(categorical_columns): - temp_categorical = categorical.OneHotEncodingTransformer() - temp_categorical.fit(transformed_dataset, columns=column) + temp_categorical = categorical.OneHotEncoder() + temp_categorical.fit(transformed_dataset, column=column) categorical_transformers[ "categorical_{}".format(index) ] = temp_categorical @@ -210,8 +206,8 @@ def mimic_pre_proc(data_supp, pre_proc_method="GMM"): for index, column in enumerate(original_datetime_columns): # Fit datetime transformer - converts to seconds - temp_datetime = DatetimeTransformer() - temp_datetime.fit(transformed_dataset, columns=column) + temp_datetime = datetime.OptimizedTimestampEncoder() + temp_datetime.fit(transformed_dataset, column=column) datetime_transformers["datetime_{}".format(column)] = temp_datetime transformed_dataset = temp_datetime.transform(transformed_dataset) @@ -229,10 +225,8 @@ def mimic_pre_proc(data_supp, pre_proc_method="GMM"): for index, column in enumerate(continuous_columns): # Fit GMM - temp_continuous = numerical.BayesGMMTransformer( - random_state=gmm_seed - ) - temp_continuous.fit(transformed_dataset, columns=column) + temp_continuous = numerical.BayesGMMTransformer() + temp_continuous.fit(transformed_dataset, column=column) continuous_transformers[ "continuous_{}".format(column) ] = temp_continuous @@ -293,8 +287,8 @@ def mimic_pre_proc(data_supp, pre_proc_method="GMM"): for index, column in enumerate(categorical_columns): - temp_categorical = categorical.OneHotEncodingTransformer() - temp_categorical.fit(transformed_dataset, columns=column) + temp_categorical = categorical.OneHotEncoder() + temp_categorical.fit(transformed_dataset, column=column) categorical_transformers[ "categorical_{}".format(index) ] = temp_categorical