From ea4601e4eed2d668ce98e0d35f38560442c33930 Mon Sep 17 00:00:00 2001 From: Florian Wilhelm Date: Thu, 12 Mar 2020 17:18:51 +0100 Subject: [PATCH] Remove dependencies to causalml, rpy2 and tensorflow --- CHANGELOG.rst | 7 +- docs/requirements.txt | 1 - environment.yaml | 11 +- notebooks/template.ipynb | 4 +- setup.cfg | 2 +- src/justcause/contrib/__init__.py | 7 - src/justcause/contrib/dragonnet/__init__.py | 0 .../contrib/dragonnet/dragon_utils.py | 325 ------------- src/justcause/contrib/dragonnet/dragonnet.py | 460 ------------------ src/justcause/contrib/ganite/__init__.py | 0 .../contrib/ganite/ganite_builder.py | 442 ----------------- src/justcause/contrib/ganite/ganite_model.py | 327 ------------- src/justcause/learners/__init__.py | 9 - src/justcause/learners/meta/rlearner.py | 118 ----- src/justcause/learners/meta/xlearner.py | 147 ------ src/justcause/learners/nn/__init__.py | 0 src/justcause/learners/nn/dragonnet.py | 107 ---- src/justcause/learners/tree/__init__.py | 0 src/justcause/learners/tree/causal_forest.py | 123 ----- src/justcause/learners/utils.py | 23 +- tests/conftest.py | 29 -- tests/test_learners.py | 85 +--- tests/test_utils.py | 9 +- 23 files changed, 13 insertions(+), 2223 deletions(-) delete mode 100644 src/justcause/contrib/__init__.py delete mode 100644 src/justcause/contrib/dragonnet/__init__.py delete mode 100644 src/justcause/contrib/dragonnet/dragon_utils.py delete mode 100644 src/justcause/contrib/dragonnet/dragonnet.py delete mode 100644 src/justcause/contrib/ganite/__init__.py delete mode 100644 src/justcause/contrib/ganite/ganite_builder.py delete mode 100644 src/justcause/contrib/ganite/ganite_model.py delete mode 100644 src/justcause/learners/meta/rlearner.py delete mode 100644 src/justcause/learners/meta/xlearner.py delete mode 100644 src/justcause/learners/nn/__init__.py delete mode 100644 src/justcause/learners/nn/dragonnet.py delete mode 100644 src/justcause/learners/tree/__init__.py delete mode 100644 src/justcause/learners/tree/causal_forest.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d3b8fea..08e5408 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,9 +2,12 @@ Changelog ========= -Version 0.3.2 -============= +Version 0.4 +=========== +- removed dependency to ``causalml`` thus x- and t- learner were removed +- removed dependency to ``rpy`` thus CausalForest method was removed +- removed dependency to ``tensorflow`` thus ganite and dragonnet were removed - added missing ``requests`` library in dependencies Version 0.3.2 diff --git a/docs/requirements.txt b/docs/requirements.txt index f33f475..b4a3abd 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,5 +5,4 @@ pandas matplotlib==2.2.4 pyarrow>=0.15 # Mac problems with 0.13 scikit-learn>=0.22 -causalml sphinx_rtd_theme diff --git a/environment.yaml b/environment.yaml index 499a848..30ffd61 100644 --- a/environment.yaml +++ b/environment.yaml @@ -11,16 +11,10 @@ dependencies: - scipy - requests - pandas - - r-base - - rpy2 - seaborn - - tensorflow=1.13.1 # required for dragonnet project code - - keras - - cython # needed by CausalML - - arrow-cpp # check if needed - pyarrow>=0.15 # Mac problems with 0.13 - - scikit-learn==0.20.3 # Currently required to downgrade scikit-learn in order for CausalML to compile - - matplotlib==2.2.4 # Required for CausalML to work instantly + - scikit-learn + - matplotlib - seaborn # for development below here - jupyterlab @@ -32,5 +26,4 @@ dependencies: - flake8 - pip: - pre-commit - - causalml - pygam diff --git a/notebooks/template.ipynb b/notebooks/template.ipynb index fc44152..caebae1 100644 --- a/notebooks/template.ipynb +++ b/notebooks/template.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ diff --git a/setup.cfg b/setup.cfg index 4055ac2..7a83d00 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ package_dir = # DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD! setup_requires = pyscaffold>=3.2a0,<3.3a0 # Add here dependencies of your project (semicolon/line-separated), e.g. -install_requires = numpy; pyarrow; pandas>=1.0; scikit-learn; requests; causalml; rpy2; pygam +install_requires = numpy; pyarrow; pandas>=1.0; scikit-learn; requests; pygam python_requires = >=3.6 [options.packages.find] diff --git a/src/justcause/contrib/__init__.py b/src/justcause/contrib/__init__.py deleted file mode 100644 index e884986..0000000 --- a/src/justcause/contrib/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -""" -Contribution packages used by JustCause - -All packages inside ``contrib`` are external packages that come with their -own licences and are not part of the JustCause source code itself. -The copyright remains with the original authors or copyright holders. -""" diff --git a/src/justcause/contrib/dragonnet/__init__.py b/src/justcause/contrib/dragonnet/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/justcause/contrib/dragonnet/dragon_utils.py b/src/justcause/contrib/dragonnet/dragon_utils.py deleted file mode 100644 index 9431ba8..0000000 --- a/src/justcause/contrib/dragonnet/dragon_utils.py +++ /dev/null @@ -1,325 +0,0 @@ -import keras.backend as K -import tensorflow as tf -from keras import regularizers -from keras.engine.topology import Layer -from keras.layers import Concatenate, Dense, Input -from keras.metrics import binary_accuracy -from keras.models import Model - - -def binary_classification_loss(concat_true, concat_pred): - t_true = concat_true[:, 1] - t_pred = concat_pred[:, 2] - t_pred = (t_pred + 0.001) / 1.002 - losst = tf.reduce_sum(K.binary_crossentropy(t_true, t_pred)) - - return losst - - -def regression_loss(concat_true, concat_pred): - y_true = concat_true[:, 0] - t_true = concat_true[:, 1] - - y0_pred = concat_pred[:, 0] - y1_pred = concat_pred[:, 1] - - loss0 = tf.reduce_sum((1.0 - t_true) * tf.square(y_true - y0_pred)) - loss1 = tf.reduce_sum(t_true * tf.square(y_true - y1_pred)) - - return loss0 + loss1 - - -def ned_loss(concat_true, concat_pred): - t_true = concat_true[:, 1] - - t_pred = concat_pred[:, 1] - return tf.reduce_sum(K.binary_crossentropy(t_true, t_pred)) - - -def dead_loss(concat_true, concat_pred): - return regression_loss(concat_true, concat_pred) - - -def dragonnet_loss_binarycross(concat_true, concat_pred): - return regression_loss(concat_true, concat_pred) + binary_classification_loss( - concat_true, concat_pred - ) - - -def treatment_accuracy(concat_true, concat_pred): - t_true = concat_true[:, 1] - t_pred = concat_pred[:, 2] - return binary_accuracy(t_true, t_pred) - - -def track_epsilon(concat_true, concat_pred): - epsilons = concat_pred[:, 3] - return tf.abs(tf.reduce_mean(epsilons)) - - -class EpsilonLayer(Layer): - def __init__(self): - super(EpsilonLayer, self).__init__() - - def build(self, input_shape): - # Create a trainable weight variable for this layer. - self.epsilon = self.add_weight( - name="epsilon", - shape=[1, 1], - initializer="RandomNormal", - # initializer='ones', - trainable=True, - ) - super(EpsilonLayer, self).build(input_shape) # Be sure to call this at the end - - def call(self, inputs, **kwargs): - # import ipdb; ipdb.set_trace() - return self.epsilon * tf.ones_like(inputs)[:, 0:1] - - -def make_tarreg_loss(ratio=1.0, dragonnet_loss=dragonnet_loss_binarycross): - def tarreg_ATE_unbounded_domain_loss(concat_true, concat_pred): - vanilla_loss = dragonnet_loss(concat_true, concat_pred) - - y_true = concat_true[:, 0] - t_true = concat_true[:, 1] - - y0_pred = concat_pred[:, 0] - y1_pred = concat_pred[:, 1] - t_pred = concat_pred[:, 2] - - epsilons = concat_pred[:, 3] - t_pred = (t_pred + 0.01) / 1.02 - # t_pred = tf.clip_by_value(t_pred,0.01, 0.99,name='t_pred') - - y_pred = t_true * y1_pred + (1 - t_true) * y0_pred - - h = t_true / t_pred - (1 - t_true) / (1 - t_pred) - - y_pert = y_pred + epsilons * h - targeted_regularization = tf.reduce_sum(tf.square(y_true - y_pert)) - - # final - loss = vanilla_loss + ratio * targeted_regularization - return loss - - return tarreg_ATE_unbounded_domain_loss - - -def make_dragonnet(input_dim, reg_l2): - """ - Neural net predictive model. The dragon has three heads. - :param input_dim: - :param reg: - :return: - """ - - K.clear_session() # - - t_l1 = 0.0 # noqa: F841 - t_l2 = reg_l2 # noqa: F841 - inputs = Input(shape=(input_dim,), name="input") - - # representation - x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(inputs) - x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x) - x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x) - - t_predictions = Dense(units=1, activation="sigmoid")(x) - - # HYPOTHESIS - y0_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(x) - y1_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(x) - - # second layer - y0_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(y0_hidden) - y1_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(y1_hidden) - - # third - y0_predictions = Dense( - units=1, - activation=None, - kernel_regularizer=regularizers.l2(reg_l2), - name="y0_predictions", - )(y0_hidden) - y1_predictions = Dense( - units=1, - activation=None, - kernel_regularizer=regularizers.l2(reg_l2), - name="y1_predictions", - )(y1_hidden) - - dl = EpsilonLayer() - epsilons = dl(t_predictions, name="epsilon") - # logging.info(epsilons) - concat_pred = Concatenate(1)( - [y0_predictions, y1_predictions, t_predictions, epsilons] - ) - model = Model(inputs=inputs, outputs=concat_pred) - - return model - - -def make_tarnet(input_dim, reg_l2): - """ - Neural net predictive model. The dragon has three heads. - :param input_dim: - :param reg: - :return: - """ - - inputs = Input(shape=(input_dim,), name="input") - - # representation - x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(inputs) - x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x) - x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x) - - t_predictions = Dense(units=1, activation="sigmoid")(inputs) - - # HYPOTHESIS - y0_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(x) - y1_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(x) - - # second layer - y0_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(y0_hidden) - y1_hidden = Dense( - units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2) - )(y1_hidden) - - # third - y0_predictions = Dense( - units=1, - activation=None, - kernel_regularizer=regularizers.l2(reg_l2), - name="y0_predictions", - )(y0_hidden) - y1_predictions = Dense( - units=1, - activation=None, - kernel_regularizer=regularizers.l2(reg_l2), - name="y1_predictions", - )(y1_hidden) - - dl = EpsilonLayer() - epsilons = dl(t_predictions, name="epsilon") - # logging.info(epsilons) - concat_pred = Concatenate(1)( - [y0_predictions, y1_predictions, t_predictions, epsilons] - ) - model = Model(inputs=inputs, outputs=concat_pred) - - return model - - -def make_ned(input_dim, reg_l2=0.01): - """ - Neural net predictive model. The dragon has three heads. - :param input_dim: - :param reg: - :return: - """ - - inputs = Input(shape=(input_dim,), name="input") - - # representation - x = Dense( - units=200, - activation="elu", - kernel_initializer="RandomNormal", - name="ned_hidden1", - )(inputs) - x = Dense( - units=200, - activation="elu", - kernel_initializer="RandomNormal", - name="ned_hidden2", - )(x) - x = Dense( - units=200, - activation="elu", - kernel_initializer="RandomNormal", - name="ned_hidden3", - )(x) - - t_predictions = Dense(units=1, activation="sigmoid", name="ned_t_activation")(x) - y_predictions = Dense(units=1, activation=None, name="ned_y_prediction")(x) - - concat_pred = Concatenate(1)([y_predictions, t_predictions]) - - model = Model(inputs=inputs, outputs=concat_pred) - return model - - -def post_cut(nednet, input_dim, reg_l2=0.01): - for layer in nednet.layers: - layer.trainable = False - nednet.layers.pop() - nednet.layers.pop() - nednet.layers.pop() - - frozen = nednet - - x = frozen.layers[-1].output - frozen.layers[-1].outbound_nodes = [] - input = frozen.input - - y0_hidden = Dense( - units=100, - activation="elu", - kernel_regularizer=regularizers.l2(reg_l2), - name="post_cut_y0_1", - )(x) - y1_hidden = Dense( - units=100, - activation="elu", - kernel_regularizer=regularizers.l2(reg_l2), - name="post_cut_y1_1", - )(x) - - # second layer - y0_hidden = Dense( - units=100, - activation="elu", - kernel_regularizer=regularizers.l2(reg_l2), - name="post_cut_y0_2", - )(y0_hidden) - y1_hidden = Dense( - units=100, - activation="elu", - kernel_regularizer=regularizers.l2(reg_l2), - name="post_cut_y1_2", - )(y1_hidden) - - # third - y0_predictions = Dense( - units=1, - activation=None, - kernel_regularizer=regularizers.l2(reg_l2), - name="y0_predictions", - )(y0_hidden) - y1_predictions = Dense( - units=1, - activation=None, - kernel_regularizer=regularizers.l2(reg_l2), - name="y1_predictions", - )(y1_hidden) - - concat_pred = Concatenate(1)([y0_predictions, y1_predictions]) - - model = Model(inputs=input, outputs=concat_pred) - return model diff --git a/src/justcause/contrib/dragonnet/dragonnet.py b/src/justcause/contrib/dragonnet/dragonnet.py deleted file mode 100644 index c6b5916..0000000 --- a/src/justcause/contrib/dragonnet/dragonnet.py +++ /dev/null @@ -1,460 +0,0 @@ -import time - -import keras.backend as K -import numpy as np -import tensorflow as tf -from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN -from keras.optimizers import SGD, Adam -from sklearn.model_selection import train_test_split -from sklearn.preprocessing import StandardScaler - -from .dragon_utils import ( - binary_classification_loss, - dead_loss, - dragonnet_loss_binarycross, - make_dragonnet, - make_ned, - make_tarnet, - make_tarreg_loss, - ned_loss, - post_cut, - regression_loss, - track_epsilon, - treatment_accuracy, -) - - -def _split_output(yt_hat, t, y, y_scaler, x, index): - q_t0 = y_scaler.inverse_transform(yt_hat[:, 0].copy()) - q_t1 = y_scaler.inverse_transform(yt_hat[:, 1].copy()) - g = yt_hat[:, 2].copy() - - if yt_hat.shape[1] == 4: - eps = yt_hat[:, 3][0] - else: - eps = np.zeros_like(yt_hat[:, 2]) - - y = y_scaler.inverse_transform(y.copy()) - var = "average propensity for treated: {} and untreated: {}".format( - g[t.squeeze() == 1.0].mean(), g[t.squeeze() == 0.0].mean() - ) - print(var) - - return { - "q_t0": q_t0, - "q_t1": q_t1, - "g": g, - "t": t, - "y": y, - "x": x, - "index": index, - "eps": eps, - } - - -def train_dragon( - t, - y_unscaled, - x, - targeted_regularization=True, - knob_loss=dragonnet_loss_binarycross, - ratio=1.0, - val_split=0.1, - batch_size=512, - num_epochs=100, - learning_rate=0.001, - verbose=1, -): - """Build and Train the DragonNet Model on given data - - :param t: treatment treatment to train on - :param y_unscaled: outcomes to train on - :param x: covariates to train on - :param targeted_regularization: use targeted regularization? - :param knob_loss: base loss to use for tar_reg - :param ratio: - :param val_split: - :param batch_size: batch size to use - :param num_epochs: number of epochs to run - :return: - """ - - # Build Model structure - dragonnet = make_dragonnet(x.shape[1], 0.01) - - # Only report regression loss for now - metrics = [regression_loss] - - if targeted_regularization: - loss = make_tarreg_loss(ratio=ratio, dragonnet_loss=knob_loss) - else: - loss = knob_loss - - yt = np.c_[y_unscaled, t] - - start_time = time.time() - - dragonnet.compile(optimizer=Adam(lr=learning_rate), loss=loss, metrics=metrics) - - class EarlyStoppingByLossVal(Callback): - def __init__(self, monitor="regression_loss", value=400, verbose=0): - super(Callback, self).__init__() - self.monitor = monitor - self.value = value - self.verbose = verbose - - def on_epoch_end(self, epoch, logs={}): - current = logs.get(self.monitor) - if current < self.value: - if self.verbose > 0: - print("Epoch %05d: early stopping THR" % epoch) - self.model.stop_training = True - - adam_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="regression_loss", patience=10, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=1e-8, - cooldown=0, - min_lr=0, - ) - ] - - dragonnet.fit( - x, - yt, - callbacks=adam_callbacks, - validation_split=val_split, - epochs=num_epochs, - batch_size=batch_size, - verbose=verbose, - ) - - elapsed_time = time.time() - start_time - if verbose: - print("***************************** elapsed_time is: ", elapsed_time) - - return dragonnet - - -def train_and_predict_dragons( - t, - y_unscaled, - x, - targeted_regularization=True, - output_dir="", - knob_loss=dragonnet_loss_binarycross, - ratio=1.0, - dragon=1, - val_split=0.1, - batch_size=512, -): - - verbose = 1 - y_scaler = StandardScaler().fit(y_unscaled) - y = y_scaler.transform(y_unscaled) - train_outputs = [] - test_outputs = [] - runs = 1 - for i in range(runs): - if dragon == 0: - - dragonnet = make_tarnet(x.shape[1], 0.01) - elif dragon == 1: - dragonnet = make_dragonnet(x.shape[1], 0.01) - - metrics = [ - regression_loss, - binary_classification_loss, - treatment_accuracy, - track_epsilon, - ] - - if targeted_regularization: - loss = make_tarreg_loss(ratio=ratio, dragonnet_loss=knob_loss) - else: - loss = knob_loss - - tf.random.set_random_seed(i) - np.random.seed(i) - train_index, test_index = train_test_split( - np.arange(x.shape[0]), test_size=0, random_state=1 - ) - test_index = train_index - - x_train, x_test = x[train_index], x[test_index] - y_train, y_test = y[train_index], y[test_index] - t_train, t_test = t[train_index], t[test_index] - yt_train = np.concatenate([y_train, t_train], 1) - - start_time = time.time() - - dragonnet.compile(optimizer=Adam(lr=1e-3), loss=loss, metrics=metrics) - - adam_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=1e-8, - cooldown=0, - min_lr=0, - ), - ] - - dragonnet.fit( - x_train, - yt_train, - callbacks=adam_callbacks, - validation_split=val_split, - epochs=100, - batch_size=batch_size, - verbose=verbose, - ) - - sgd_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=0.0, - cooldown=0, - min_lr=0, - ), - ] - - # should pick something better! - sgd_lr = 1e-5 - momentum = 0.9 - dragonnet.compile( - optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True), - loss=loss, - metrics=metrics, - ) - dragonnet.fit( - x_train, - yt_train, - callbacks=sgd_callbacks, - validation_split=val_split, - epochs=300, - batch_size=batch_size, - verbose=verbose, - ) - - elapsed_time = time.time() - start_time - print("***************************** elapsed_time is: ", elapsed_time) - - yt_hat_test = dragonnet.predict(x_test) - yt_hat_train = dragonnet.predict(x_train) - - test_outputs += [ - _split_output(yt_hat_test, t_test, y_test, y_scaler, x_test, test_index) - ] - train_outputs += [ - _split_output( - yt_hat_train, t_train, y_train, y_scaler, x_train, train_index - ) - ] - K.clear_session() - - return test_outputs, train_outputs - - -def train_and_predict_ned( - t, - y_unscaled, - x, - targeted_regularization=True, - output_dir="", - knob_loss=dragonnet_loss_binarycross, - ratio=1.0, - dragon=1, - val_split=0.1, - batch_size=512, -): - - verbose = 0 - y_scaler = StandardScaler().fit(y_unscaled) - y = y_scaler.transform(y_unscaled) - - train_outputs = [] - test_outputs = [] - runs = 25 - for i in range(runs): - - nednet = make_ned(x.shape[1], 0.01) - - metrics_ned = [ned_loss] - metrics_cut = [regression_loss] - - tf.random.set_random_seed(i) - np.random.seed(i) - train_index, test_index = train_test_split(np.arange(x.shape[0]), test_size=0.0) - test_index = train_index - x_train, x_test = x[train_index], x[test_index] - y_train, y_test = y[train_index], y[test_index] - t_train, t_test = t[train_index], t[test_index] - yt_train = np.concatenate([y_train, t_train], 1) - - start_time = time.time() - - nednet.compile(optimizer=Adam(lr=1e-3), loss=ned_loss, metrics=metrics_ned) - - adam_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=1e-8, - cooldown=0, - min_lr=0, - ), - ] - - nednet.fit( - x_train, - yt_train, - callbacks=adam_callbacks, - validation_split=val_split, - epochs=100, - batch_size=batch_size, - verbose=verbose, - ) - - sgd_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=0.0, - cooldown=0, - min_lr=0, - ), - ] - - sgd_lr = 1e-5 - momentum = 0.9 - nednet.compile( - optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True), - loss=ned_loss, - metrics=metrics_ned, - ) - print(nednet.summary()) - nednet.fit( - x_train, - yt_train, - callbacks=sgd_callbacks, - validation_split=val_split, - epochs=300, - batch_size=batch_size, - verbose=verbose, - ) - - t_hat_test = nednet.predict(x_test)[:, 1] - t_hat_train = nednet.predict(x_train)[:, 1] - - # cutting the activation layer - cut_net = post_cut(nednet, x.shape[1], 0.01) - - cut_net.compile(optimizer=Adam(lr=1e-3), loss=dead_loss, metrics=metrics_cut) - - adam_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=1e-8, - cooldown=0, - min_lr=0, - ), - ] - print(cut_net.summary()) - - cut_net.fit( - x_train, - yt_train, - callbacks=adam_callbacks, - validation_split=val_split, - epochs=100, - batch_size=batch_size, - verbose=verbose, - ) - - elapsed_time = time.time() - start_time - print("***************************** elapsed_time is: ", elapsed_time) - - sgd_callbacks = [ - TerminateOnNaN(), - EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0), - ReduceLROnPlateau( - monitor="loss", - factor=0.5, - patience=5, - verbose=verbose, - mode="auto", - min_delta=0.0, - cooldown=0, - min_lr=0, - ), - ] - - sgd_lr = 1e-5 - momentum = 0.9 - cut_net.compile( - optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True), - loss=dead_loss, - metrics=metrics_cut, - ) - - cut_net.fit( - x_train, - yt_train, - callbacks=sgd_callbacks, - validation_split=val_split, - epochs=300, - batch_size=batch_size, - verbose=verbose, - ) - - y_hat_test = cut_net.predict(x_test) - y_hat_train = cut_net.predict(x_train) - - yt_hat_test = np.concatenate([y_hat_test, t_hat_test.reshape(-1, 1)], 1) - yt_hat_train = np.concatenate([y_hat_train, t_hat_train.reshape(-1, 1)], 1) - - test_outputs += [ - _split_output(yt_hat_test, t_test, y_test, y_scaler, x_test, test_index) - ] - train_outputs += [ - _split_output( - yt_hat_train, t_train, y_train, y_scaler, x_train, train_index - ) - ] - K.clear_session() - - return test_outputs, train_outputs diff --git a/src/justcause/contrib/ganite/__init__.py b/src/justcause/contrib/ganite/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/justcause/contrib/ganite/ganite_builder.py b/src/justcause/contrib/ganite/ganite_builder.py deleted file mode 100644 index 11cdfcb..0000000 --- a/src/justcause/contrib/ganite/ganite_builder.py +++ /dev/null @@ -1,442 +0,0 @@ -""" -Copyright (C) 2018 Patrick Schwab, ETH Zurich - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions - of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -""" -import numpy as np -import tensorflow as tf - - -def get_nonlinearity_by_name(name): - if name.lower() == "elu": - return tf.nn.elu - else: - return tf.nn.relu - - -def build_mlp( - x, - num_layers=1, - num_units=16, - dropout=0.0, - nonlinearity=tf.nn.elu, - weight_initialisation_std=0.1, -): - input_dim = int(x.shape[-1]) - h_in, weights_in, biases_in = [x], [], [] - for i in range(0, num_layers): - if i == 0: - """ If using variable selection, first layer is just rescaling""" - weights_in.append( - tf.Variable( - tf.random_normal( - [input_dim, num_units], - stddev=weight_initialisation_std / np.sqrt(input_dim), - ) - ) - ) - else: - weights_in.append( - tf.Variable( - tf.random_normal( - [num_units, num_units], - stddev=weight_initialisation_std / np.sqrt(num_units), - ) - ) - ) - - biases_in.append(tf.Variable(tf.zeros([1, num_units]))) - z = tf.matmul(h_in[i], weights_in[i]) + biases_in[i] - - h_in.append(nonlinearity(z)) - h_in[i + 1] = tf.nn.dropout(h_in[i + 1], 1.0 - dropout) - - h_rep = h_in[len(h_in) - 1] - return h_rep, weights_in, biases_in - - -class GANITEBuilder(object): - @staticmethod - def build( - input_dim, - output_dim, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - alpha=1.0, - beta=1.0, - ): - x = tf.placeholder("float", shape=[None, input_dim], name="x") - t = tf.placeholder("float", shape=[None, 1], name="t") - y_f = tf.placeholder("float", shape=[None, 1], name="y_f") - y_full = tf.placeholder("float", shape=[None, num_treatments], name="y_full") - - y_pred_cf, propensity_scores, z_g = GANITEBuilder.build_counterfactual_block( - input_dim, - x, - t, - y_f, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - ) - - y_pred_ite, d_ite_pred, d_ite_true, z_i = GANITEBuilder.build_ite_block( - input_dim, - x, - t, - y_f, - y_full, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - ) - - # Build losses and optimizers. - t_one_hot = tf.one_hot(tf.cast(t, "int32"), num_treatments) - - propensity_loss_cf = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits( - logits=propensity_scores, labels=t_one_hot - ) - ) - - batch_size = tf.shape(y_pred_cf)[0] - indices = tf.stack([tf.range(batch_size), tf.cast(t, "int32")[:, 0]], axis=-1) - y_f_pred = tf.gather_nd(y_pred_cf, indices) - - y_f_i = y_f # tf.Print(y_f, [y_f[:, 0]], message="y_f=", summarize=8) - y_f_pred_i = ( - y_f_pred # tf.Print(y_f_pred, [y_f_pred], message="y_f_pred=", summarize=8) - ) - - supervised_loss_cf = tf.sqrt( - tf.reduce_mean(tf.squared_difference(y_f_i[:, 0], y_f_pred_i)) - ) - - cf_discriminator_loss = propensity_loss_cf - cf_generator_loss = -propensity_loss_cf + alpha * supervised_loss_cf - - # D_ITE goal: 0 when True, 1 when Pred - ite_loss = tf.reduce_mean(tf.log(d_ite_true)) + tf.reduce_mean( - tf.log(1 - d_ite_pred) - ) - - # tf.Print(y_full, [y_full], message="y_full=", summarize=8) - y_full_i = y_full - # tf.Print(y_pred_ite, [y_pred_ite], message="y_pred_ite=", summarize=8) - y_pred_ite_i = y_pred_ite - supervised_loss_ite = tf.sqrt( - tf.reduce_mean(tf.squared_difference(y_full_i, y_pred_ite_i)) - ) - - ite_discriminator_loss = -ite_loss - ite_generator_loss = ite_loss + beta * supervised_loss_ite - return ( - cf_generator_loss, - cf_discriminator_loss, - ite_generator_loss, - ite_discriminator_loss, - x, - t, - y_f, - y_full, - y_pred_cf, - y_pred_ite, - z_g, - z_i, - ) - - @staticmethod - def build_tarnet( - mlp_input, - t, - input_dim, - num_layers, - num_units, - dropout, - num_treatments, - nonlinearity, - ): - initializer = tf.variance_scaling_initializer() - x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity) - - all_indices, outputs = [], [] - for i in range(num_treatments): - indices = tf.reshape( - tf.to_int32(tf.where(tf.equal(tf.reshape(t, (-1,)), i))), (-1,) - ) - current_last_layer_h = tf.gather(x, indices) - - last_layer = build_mlp( - current_last_layer_h, num_layers, num_units, dropout, nonlinearity - ) - - output = tf.layers.dense( - last_layer[0], - units=num_treatments, - use_bias=True, - bias_initializer=initializer, - ) - - all_indices.append(indices) - outputs.append(output) - return tf.concat(outputs, axis=-1), all_indices - - @staticmethod - def build_counterfactual_block( - input_dim, - x, - t, - y_f, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - ): - - y_pred, z_g = GANITEBuilder.build_counterfactual_generator( - input_dim, - x, - t, - y_f, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - ) - - propensity_scores = GANITEBuilder.build_counterfactual_discriminator( - input_dim, - x, - t, - y_pred, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - ) - return y_pred, propensity_scores, z_g - - @staticmethod - def build_counterfactual_generator( - input_dim, - x, - t, - y_f, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - ): - nonlinearity = get_nonlinearity_by_name(nonlinearity) - with tf.variable_scope("g_cf", initializer=initializer): - z_g = tf.placeholder("float", shape=[None, num_treatments - 1], name="z_g") - - mlp_input = tf.concat([x, y_f, t, z_g], axis=-1) - x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity) - y = tf.layers.dense( - x[0], units=num_treatments, use_bias=True, bias_initializer=initializer - ) - return y, z_g - - @staticmethod - def build_counterfactual_discriminator( - input_dim, - x, - t, - y_pred, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - reuse=False, - ): - nonlinearity = get_nonlinearity_by_name(nonlinearity) - with tf.variable_scope("d_cf", reuse=reuse, initializer=initializer): - mlp_input = tf.concat([x, y_pred], axis=-1) - x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity) - propensity_scores = tf.layers.dense( - x[0], units=num_treatments, use_bias=True, bias_initializer=initializer - ) - return propensity_scores - - @staticmethod - def build_ite_block( - input_dim, - x, - t, - y_f, - y_full, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - ): - y_pred_ite, z_i = GANITEBuilder.build_ite_generator( - input_dim, - x, - t, - y_f, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - ) - - d_ite_pred = GANITEBuilder.build_ite_discriminator( - input_dim, - x, - t, - y_pred_ite, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - reuse=False, - ) - - d_ite_true = GANITEBuilder.build_ite_discriminator( - input_dim, - x, - t, - y_full, - num_units, - dropout, - l2_weight, - learning_rate, - num_layers, - num_treatments, - with_bn, - nonlinearity, - initializer, - reuse=True, - ) - - return y_pred_ite, d_ite_pred, d_ite_true, z_i - - @staticmethod - def build_ite_generator( - input_dim, - x, - t, - y_f, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - ): - nonlinearity = get_nonlinearity_by_name(nonlinearity) - with tf.variable_scope("g_ite", initializer=initializer): - z_i = tf.placeholder("float", shape=[None, num_treatments], name="z_i") - mlp_input = tf.concat([x, z_i], axis=-1) - x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity) - y_pred = tf.layers.dense( - x[0], units=num_treatments, use_bias=True, bias_initializer=initializer - ) - return y_pred, z_i - - @staticmethod - def build_ite_discriminator( - input_dim, - x, - t, - y_pred, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - reuse=False, - ): - nonlinearity = get_nonlinearity_by_name(nonlinearity) - with tf.variable_scope("d_ite", reuse=reuse, initializer=initializer): - mlp_input = tf.concat([x, y_pred], axis=-1) - x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity) - y = tf.layers.dense( - x[0], - units=1, - use_bias=True, - bias_initializer=initializer, - activation=tf.nn.sigmoid, - ) - return y diff --git a/src/justcause/contrib/ganite/ganite_model.py b/src/justcause/contrib/ganite/ganite_model.py deleted file mode 100644 index b01098a..0000000 --- a/src/justcause/contrib/ganite/ganite_model.py +++ /dev/null @@ -1,327 +0,0 @@ -""" -Copyright (C) 2018 Patrick Schwab, ETH Zurich - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -documentation files (the "Software"), to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions - of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED -TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF -CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -""" # noqa: E501 -from __future__ import print_function - -import sys - -import numpy as np -import tensorflow as tf - -from .ganite_builder import GANITEBuilder - - -class GANITEModel(object): - def __init__( - self, - input_dim, - output_dim, - num_units=128, - dropout=0.0, - l2_weight=0.0, - learning_rate=0.0001, - num_layers=2, - num_treatments=2, - with_bn=False, - nonlinearity="elu", - initializer=tf.variance_scaling_initializer(), - alpha=1.0, - beta=1.0, - ): - - tf.reset_default_graph() - - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - self.sess = tf.Session(config=config) - self.num_treatments = num_treatments - - # ToDo: This is crazy, never return that many values - ( - self.cf_generator_loss, - self.cf_discriminator_loss, - self.ite_generator_loss, - self.ite_discriminator_loss, - self.x, - self.t, - self.y_f, - self.y_full, - self.y_pred_cf, - self.y_pred_ite, - self.z_g, - self.z_i, - ) = GANITEBuilder.build( - input_dim, - output_dim, - num_units=num_units, - dropout=dropout, - l2_weight=l2_weight, - learning_rate=learning_rate, - num_layers=num_layers, - num_treatments=num_treatments, - with_bn=with_bn, - nonlinearity=nonlinearity, - initializer=initializer, - alpha=alpha, - beta=beta, - ) - - @staticmethod - def get_scoped_variables(scope_name): - t_vars = tf.trainable_variables() - vars = [var for var in t_vars if scope_name in var.name] - return vars - - @staticmethod - def get_cf_generator_vairables(): - return GANITEModel.get_scoped_variables("g_cf") - - @staticmethod - def get_cf_discriminator_vairables(): - return GANITEModel.get_scoped_variables("d_cf") - - @staticmethod - def get_ite_generator_vairables(): - return GANITEModel.get_scoped_variables("g_ite") - - @staticmethod - def get_ite_discriminator_vairables(): - return GANITEModel.get_scoped_variables("d_ite") - - def load(self, path): - saver = tf.train.Saver() # noqa: F841 - # saver.restore(self.sess, path) - - def train( - self, - train_generator, - train_steps, - val_generator, - val_steps, - num_epochs, - learning_rate, - learning_rate_decay=0.97, - iterations_per_decay=100, - dropout=0.0, - imbalance_loss_weight=0.0, - l2_weight=0.0, - checkpoint_path="", - early_stopping_patience=12, - early_stopping_on_pehe=False, - verbose=False, - ): - - saver = tf.train.Saver(max_to_keep=0) # noqa: F841 - - global_step_1 = tf.Variable(0, trainable=False, dtype="int64") - global_step_2 = tf.Variable(0, trainable=False, dtype="int64") - global_step_3 = tf.Variable(0, trainable=False, dtype="int64") - global_step_4 = tf.Variable(0, trainable=False, dtype="int64") - - opt = tf.train.AdamOptimizer(learning_rate) - train_step_g_cf = opt.minimize( - self.cf_generator_loss, - global_step=global_step_1, - var_list=GANITEModel.get_cf_generator_vairables(), - ) - train_step_d_cf = opt.minimize( - self.cf_discriminator_loss, - global_step=global_step_2, - var_list=GANITEModel.get_cf_discriminator_vairables(), - ) - train_step_g_ite = opt.minimize( - self.ite_generator_loss, - global_step=global_step_3, - var_list=GANITEModel.get_ite_generator_vairables(), - ) - train_step_d_ite = opt.minimize( - self.ite_discriminator_loss, - global_step=global_step_4, - var_list=GANITEModel.get_ite_discriminator_vairables(), - ) - - self.sess.run(tf.global_variables_initializer()) - - best_val_loss, num_epochs_without_improvement = np.finfo(float).max, 0 - for epoch_idx in range(num_epochs): - for step_idx in range(train_steps): - train_losses_g = self.run_generator( - train_generator, 1, self.cf_generator_loss, train_step_g_cf - ) - train_losses_d = self.run_generator( - train_generator, 1, self.cf_discriminator_loss, train_step_d_cf - ) - - val_losses_g = self.run_generator( - val_generator, val_steps, self.cf_generator_loss - ) - val_losses_d = self.run_generator( - val_generator, val_steps, self.cf_discriminator_loss - ) - - current_val_loss = val_losses_g[0] - do_save = current_val_loss < best_val_loss - if do_save: - num_epochs_without_improvement = 0 - best_val_loss = current_val_loss - # saver.save(self.sess, checkpoint_path) - else: - num_epochs_without_improvement += 1 - - if verbose: - self.print_losses( - epoch_idx, - num_epochs, - [train_losses_g[0], train_losses_d[0]], - [val_losses_g[0], val_losses_d[0]], - do_save, - ) - - if num_epochs_without_improvement >= early_stopping_patience: - print("EARLY STOPPING due to NO IMPROVEMENT") - break - - best_val_loss, num_epochs_without_improvement = np.finfo(float).max, 0 - for epoch_idx in range(num_epochs): - for step_idx in range(train_steps): - train_losses_g = self.run_generator( - train_generator, - 1, - self.ite_generator_loss, - train_step_g_ite, - include_y_full=True, - ) - train_losses_d = self.run_generator( - train_generator, - 1, - self.ite_discriminator_loss, - train_step_d_ite, - include_y_full=True, - ) - val_losses_g = self.run_generator( - val_generator, val_steps, self.ite_generator_loss, include_y_full=True - ) - val_losses_d = self.run_generator( - val_generator, - val_steps, - self.ite_discriminator_loss, - include_y_full=True, - ) - - current_val_loss = val_losses_g[0] - do_save = current_val_loss < best_val_loss - if do_save: - num_epochs_without_improvement = 0 - best_val_loss = current_val_loss - # saver.save(self.sess, checkpoint_path) - else: - num_epochs_without_improvement += 1 - - if verbose: - self.print_losses( - epoch_idx, - num_epochs, - [train_losses_g[0], train_losses_d[0]], - [val_losses_g[0], val_losses_d[0]], - do_save, - ) - - if num_epochs_without_improvement >= early_stopping_patience: - print("EARLY STOPPING due to NO IMPROVEMENT") - break - - def print_losses( - self, epoch_idx, num_epochs, train_losses, val_losses, did_save=False - ): - print( - "Epoch [{:04d}/{:04d}] {:} TRAIN: G={:.3f} D={:.3f} " - "VAL: G={:.3f} D={:.3f}".format( - epoch_idx, - num_epochs, - "xx" if did_save else "::", - train_losses[0], - train_losses[1], - val_losses[0], - val_losses[1], - ), - file=sys.stderr, - ) - - def run_generator( - self, generator, steps, loss, train_step=None, include_y_full=False - ): - losses = [] - for iter_idx in range(steps): - (x_batch, t_batch), y_batch = next(generator) - t_original = t_batch.astype(int) - t_batch = np.expand_dims(t_batch, axis=-1) - y_batch = np.expand_dims(y_batch, axis=-1) - batch_size = len(x_batch) - feed_dict = { - self.x: x_batch, - self.t: t_batch, - self.y_f: y_batch, - self.z_g: np.random.uniform(size=(batch_size, self.num_treatments - 1)), - self.z_i: np.random.uniform(size=(batch_size, self.num_treatments)), - } - if include_y_full: - y_pred = self._predict_g_cf([x_batch, t_batch], y_batch) - y_pred[:, t_original] = y_batch - feed_dict[self.y_full] = y_pred - - if train_step is not None: - self.sess.run(train_step, feed_dict=feed_dict) - - losses.append(self.sess.run([loss], feed_dict=feed_dict)) - return np.mean(losses, axis=0) - - def _predict_g_cf(self, x, y_f): - batch_size = len(x[0]) - y_pred = self.sess.run( - self.y_pred_cf, - feed_dict={ - self.x: x[0], - self.t: x[1], - self.y_f: y_f, - self.z_g: np.random.uniform(size=(batch_size, self.num_treatments - 1)), - }, - ) - return y_pred - - def pred_full(self, x, t, y_f): - batch_size = len(x) - y_pred = self.sess.run( - self.y_pred_cf, - feed_dict={ - self.x: x, - self.t: t.reshape(-1, 1), - self.y_f: y_f.reshape(-1, 1), - self.z_g: np.random.uniform(size=(batch_size, self.num_treatments - 1)), - }, - ) - return y_pred - - def predict(self, x): - batch_size = len(x) - y_pred = self.sess.run( - self.y_pred_ite, - feed_dict={ - self.x: x, - self.z_i: np.random.uniform(size=(batch_size, self.num_treatments)), - }, - ) - return y_pred diff --git a/src/justcause/learners/__init__.py b/src/justcause/learners/__init__.py index f14f6e5..ae6f1f5 100644 --- a/src/justcause/learners/__init__.py +++ b/src/justcause/learners/__init__.py @@ -1,24 +1,15 @@ """Basic learners and justcause-friendly wrappers for more advanced methods""" from .meta.slearner import SLearner # noqa: F401 from .meta.tlearner import TLearner # noqa: F401 -from .meta.rlearner import RLearner # noqa: F401 -from .meta.xlearner import XLearner # noqa: F401 -from .tree.causal_forest import CausalForest # noqa: F401 from .ate.double_robust import DoubleRobustEstimator # noqa: F401 from .ate.propensity_weighting import PSWEstimator # noqa: F401 -from .nn.dragonnet import DragonNet # noqa: F401 - ___all__ = [ "SLearner", "WeightedSLearner", "TLearner", "WeightedTLearner", - "RLearner", - "XLearner", - "CausalForest", "DoubleRobustEstimator", "PSWEstimator", - "DragonNet", ] diff --git a/src/justcause/learners/meta/rlearner.py b/src/justcause/learners/meta/rlearner.py deleted file mode 100644 index 1781494..0000000 --- a/src/justcause/learners/meta/rlearner.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Wrapper of the python RLearner implemented in the ``causalml`` package""" -from typing import Optional, Union - -import numpy as np -from numpy.random import RandomState -from sklearn.linear_model import LinearRegression -from sklearn.utils import check_random_state - -from ..propensity import estimate_propensities - -StateType = Optional[Union[int, RandomState]] - - -class RLearner: - """A wrapper of the BaseRRegressor from ``causalml`` - - Defaults to LassoLars regression as a base learner if not specified otherwise. - Allows to either specify one learner for both tasks or two distinct learners - for the task outcome and effect learning. - - References: - CausalML Framework `on Github '_. - - [1] X. Nie and S. Wager, - “Quasi-Oracle Estimation of Heterogeneous Treatment Effects.” - """ - - def __init__( - self, - learner=None, - outcome_learner=None, - effect_learner=None, - random_state: StateType = None, - ): - """Setup an RLearner - - Args: - learner: default learner for both outcome and effect - outcome_learner: specific learner for outcome - effect_learner: specific learner for effect - random_state: RandomState or int to be used for K-fold splitting. NOT used - in the learners, this has to be done by the user. - """ - from causalml.inference.meta import BaseRRegressor - - if learner is None and (outcome_learner is None and effect_learner is None): - learner = LinearRegression() - - self.random_state = check_random_state(random_state) - self.model = BaseRRegressor( - learner, outcome_learner, effect_learner, random_state=random_state - ) - - def __str__(self): - """Simple string representation for logs and outputs""" - return "{}(outcome={}, effect={})".format( - self.__class__.__name__, - self.model.model_mu.__class__.__name__, - self.model.model_tau.__class__.__name__, - ) - - def __repr__(self): - return self.__str__() - - def fit(self, x: np.array, t: np.array, y: np.array, p: np.array = None) -> None: - """Fits the RLearner on given samples. - - Defaults to `justcause.learners.propensities.estimate_propensities` - for ``p`` if not given explicitly, in order to allow a generic call - to the fit() method - - Args: - x: covariate matrix of shape (num_instances, num_features) - t: treatment indicator vector, shape (num_instances) - y: factual outcomes, (num_instances) - p: propensities, shape (num_instances) - - """ - if p is None: - # Propensity is needed by CausalML, so we estimate it, - # if it was not provided - p = estimate_propensities(x, t) - - self.model.fit(x, p, t, y) - - def predict_ite(self, x: np.array, *args) -> np.array: - """Predicts ITE for given samples; ignores the factual outcome and treatment - - Args: - x: covariates used for precition - *args: NOT USED but kept to work with the standard ``fit(x, t, y)`` call - - """ - - # assert t is None and y is None, "The R-Learner does not use factual outcomes" - return self.model.predict(x).flatten() - - def estimate_ate( - self, x: np.array, t: np.array, y: np.array, p: Optional[np.array] = None - ) -> float: - """Estimate the average treatment effect (ATE) by fit and predict on given data - - Estimates the ATE as the mean of ITE predictions on the given data. - - Args: - x: covariates of shape (num_samples, num_covariates) - t: treatment indicator vector, shape (num_instances) - y: factual outcomes, (num_instances) - p: propensities, shape (num_instances) - - Returns: - the average treatment effect estimate - - - """ - self.fit(x, t, y, p) - ite = self.predict_ite(x, t, y) - return float(np.mean(ite)) diff --git a/src/justcause/learners/meta/xlearner.py b/src/justcause/learners/meta/xlearner.py deleted file mode 100644 index 6979cbf..0000000 --- a/src/justcause/learners/meta/xlearner.py +++ /dev/null @@ -1,147 +0,0 @@ -from typing import Optional - -import numpy as np -from sklearn.linear_model import LassoLars - -from justcause.learners.utils import replace_factual_outcomes - -from ..propensity import estimate_propensities - - -class XLearner: - """Wrapper of the BaseXRegressor from causalml - - Defaults to `sklearn.linear_model.LassoLars` as a base learner if not specified - otherwise. Allows to either specify one learner for all or four distinct learners - for the tasks - - outcome control - - outcome treated - - effect control - - effect treated - - References: - [1] CausalML Framework `on Github '_. - - [2] S. R. Künzel, J. S. Sekhon, P. J. Bickel, and B. Yu, - “Meta-learners for Estimating Heterogeneous - Treatment Effects using Machine Learning,” 2019. - - """ - - def __init__( - self, - learner=None, - outcome_learner_c=None, - outcome_learner_t=None, - effect_learner_c=None, - effect_learner_t=None, - ): - """Setup a XLearner - - All learners must have ``fit(x, y)`` and ``predict(x)`` methods. - - Args: - learner: default learner for all roles - outcome_learner_c: specific learner for control outcome function - outcome_learner_t: specific learner for treated outcome function - effect_learner_c: specific learner for treated effect - effect_learner_t: specific learner for control effect - """ - from causalml.inference.meta import BaseXRegressor - - if (learner is not None) or ( - (outcome_learner_c is not None) - and (outcome_learner_t is not None) - and (effect_learner_c is not None) - and (effect_learner_t is not None) - ): - self.model = BaseXRegressor( - learner, - outcome_learner_c, - outcome_learner_t, - effect_learner_c, - effect_learner_t, - ) - else: - # Assign default learner - learner = LassoLars() - self.model = BaseXRegressor( - learner, - outcome_learner_c, - outcome_learner_t, - effect_learner_c, - effect_learner_t, - ) - - def __str__(self): - """Simple string representation for logs and outputs""" - return "{}(outcome_c={}, outcome_t={}, effect_c={}, effect_t={})".format( - self.__class__.__name__, - self.model.model_mu_c.__class__.__name__, - self.model.model_mu_t.__class__.__name__, - self.model.model_tau_c.__class__.__name__, - self.model.model_tau_t.__class__.__name__, - ) - - def __repr__(self): - return self.__str__() - - def fit(self, x: np.array, t: np.array, y: np.array) -> None: - """Fits the RLearner on given samples - - Args: - x: covariate matrix of shape (num_instances, num_features) - t: treatment indicator vector, shape (num_instances) - y: factual outcomes, (num_instances) - - """ - self.model.fit(x, t, y) - - def predict_ite( - self, - x: np.array, - t: np.array = None, - y: np.array = None, - p: Optional[np.array] = None, - return_components: bool = False, - replace_factuals: bool = False, - ) -> np.array: - """Predicts ITE for the given population - - If propensities ``p`` are not given, they are estimated using the default - implementation `justcause.learners.propensities.estimate_propensities` - - Args: - x: covariates - t: treatment indicator - y: factual outcomes - p: propensity scores - return_components: whether to return Y(1) and Y(0) for all instances or not - replace_factuals: whether to replace predicted outcomes with the factual - outcomes where applicable - - Returns: - the ITE prediction either with or without components - """ - if p is None: - # Set default propensity, because CausalML currently requires it - p = estimate_propensities(x, t) - - if return_components: - ite, y_0, y_1 = self.model.predict(x, p, t, y, return_components=True) - if t is not None and y is not None and replace_factuals: - y_0, y_1 = replace_factual_outcomes(y_0, y_1, y, t) - return ite.flatten(), y_0.flatten(), y_1.flatten() - else: - return self.model.predict(x, p, t, y, return_components=False).flatten() - - def estimate_ate( - self, - x: np.array, - t: np.array, - y: np.array, - propensities: Optional[np.array] = None, - ) -> float: - """Predicts ATE for given samples as mean of ITE predictions""" - self.fit(x, t, y) - return float(np.mean(self.predict_ite(x, t, y, propensities))) diff --git a/src/justcause/learners/nn/__init__.py b/src/justcause/learners/nn/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/justcause/learners/nn/dragonnet.py b/src/justcause/learners/nn/dragonnet.py deleted file mode 100644 index 4d06bee..0000000 --- a/src/justcause/learners/nn/dragonnet.py +++ /dev/null @@ -1,107 +0,0 @@ -import numpy as np - -from ..utils import replace_factual_outcomes - - -class DragonNet: - """Wrapper of the DragonNet implementation in `justcause.contrib.dragonnet` - - Original code taken with slide adaption for usage within the framework. - Source can be found here: https://github.com/claudiashi57/dragonnet - - References: - [1] C. Shi, D. M. Blei, and V. Veitch, - “Adapting Neural Networks for the Estimation of Treatment Effects.” - """ - - def __init__( - self, learning_rate=0.001, num_epochs=50, batch_size=512, validation_split=0.1 - ): - self.learning_rate = learning_rate - self.num_epochs = num_epochs - self.batch_size = batch_size - self.validation_split = validation_split - self.model = None - - def __repr__(self): - return self.__str__() - - def __str__(self): - return "DragonNet(epochs={}, lr={}, batch={}, val_split={})".format( - self.num_epochs, self.learning_rate, self.batch_size, self.validation_split - ) - - def fit(self, x: np.array, t: np.array, y: np.array) -> None: - """Trains DragonNet with hyper-parameters specified in the constructor - - Args: - x: covariates for all instances, shape (num_instance, num_features) - t: treatment indicator vector, shape (num_instance) - y: factual outcomes, shape (num_instance) - - """ - # Late import to avoid installing all of dragonnet's requirements - from ...contrib.dragonnet import dragonnet - - self.model = dragonnet.train_dragon( - t, - y, - x, - num_epochs=self.num_epochs, - batch_size=self.batch_size, - learning_rate=self.learning_rate, - val_split=self.validation_split, - ) - - def predict_ite( - self, - x: np.array, - t: np.array = None, - y: np.array = None, - return_components: bool = False, - replace_factuals: bool = False, - ): - """Predicts ITE for the given samples - - Args: - x: covariates in shape (num_instances, num_features) - t: treatment indicator, binary in shape (num_instances) - y: factual outcomes in shape (num_instances) - return_components: whether to return Y(0) and Y(1) predictions separately - replace_factuals: whether to replace outcomes with true outcomes - where possible - - Returns: - a vector of ITEs for the inputs; also returns Y(0) and Y(1) for all - inputs if ``return_components`` is ``True`` - """ - assert self.model is not None, "DragonNet must be fit before use" - - res = self.model.predict(x) - y_0, y_1 = res[:, 0], res[:, 1] - - if return_components: - if t is not None and y is not None and replace_factuals: - y_0, y_1 = replace_factual_outcomes(y_0, y_1, y, t) - return y_1 - y_0, y_0, y_1 - else: - return y_1 - y_0 - - def estimate_ate( - self, x: np.array, t: np.array = None, y: np.array = None, - ) -> float: - """Estimates the average treatment effect of the given population - - First, it fits the model on the given population, then predicts ITEs and uses - the mean as an estimate for the ATE - - Args: - x: covariates in shape (num_instances, num_features) - t: treatment indicator, binary in shape (num_instances) - y: factual outcomes in shape (num_instances) - - Returns: ATE estimate as the mean of ITEs - """ - self.fit(x, t, y) - ite = self.predict_ite(x, t, y) - return float(np.mean(ite)) diff --git a/src/justcause/learners/tree/__init__.py b/src/justcause/learners/tree/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/justcause/learners/tree/causal_forest.py b/src/justcause/learners/tree/causal_forest.py deleted file mode 100644 index a776f31..0000000 --- a/src/justcause/learners/tree/causal_forest.py +++ /dev/null @@ -1,123 +0,0 @@ -from typing import Optional, Union - -import numpy as np -from numpy.random import RandomState -from sklearn.utils import check_random_state - -from ...utils import int_from_random_state - -#: type alias -StateType = Optional[Union[int, RandomState]] - - -class CausalForest: - """Port/Wrapper for the R implementation of CausalForests using ``rpy2`` - - See reference [2] for a list of parameters to the original implementation. In order - to pass python parameters some naming conventions have to be considered, but - cannot be guaranteed: - - In general, points are converted to underscores. Thus ``num.trees`` becomes - ``num_trees`` when using the Rpy2 API - - - References: - [1] “Generalized random forests” - S. Athey, J. Tibshirani, and S. Wager, - Ann. Stat., vol. 47, no. 2, pp. 1179–1203, 2019. - - [2] The CRAN manual for `grf` - https://cran.r-project.org/web/packages/grf/grf.pdf - - [3] Rpy2 manual on naming: - http://rpy.sourceforge.net/rpy2/doc-2.2/html/robjects_functions.html - """ - - def __init__( - self, num_trees: int = 200, random_state: RandomState = None, **kwargs - ): - """Setup the CausalForest Wrapper - - Checks if the required R package is available and instantiates it. - We don't install the grf package automatically, because it takes to long with - the required compilation. It would interrupt application workflow and is not - the task of our library. - The needed method for installation can be found in `justcause.learners.utils` - - Args: - random_state: random seed that is passed to the R implementation - num_trees: the number of trees in the forest - kwargs: named parameters that are passed to the R implementation, - see https://cran.r-project.org/web/packages/grf/grf.pdf for a reference - of possible parameters - """ - from rpy2.robjects import numpy2ri - from rpy2.robjects.packages import importr - from rpy2.rinterface import RRuntimeError - - numpy2ri.activate() - - try: - self.grf = importr("grf") - except RRuntimeError: - raise ImportError( - "R package 'grf' is not installed yet, " - "install it with justcause.learners.utils.install_r_packages(['grf'])" - ) - - """Holds the rpy2 object for the trained model""" - self.forest = None - self.random_state = check_random_state(random_state) - - self.num_trees = num_trees - self.kwargs = kwargs - - def __str__(self): - return "CausalForest" - - def fit(self, x: np.array, t: np.array, y: np.array) -> None: - """Fits the forest using factual data""" - from rpy2.robjects.vectors import FloatVector, IntVector - - integer_random_state = int_from_random_state(self.random_state) - - self.forest = self.grf.causal_forest( - x, - FloatVector(y), - IntVector(t), - seed=integer_random_state, - num_trees=self.num_trees, - **self.kwargs - ) - - def predict_ite(self, x: np.array, *args) -> np.array: - """Predicts ITE vor given samples without using facutals - - Args: - x: covariates in shape (num_instances, num_covariates) - *args: NOT USED - kept for coherent API - - Returns: - ITE predictions for all instances - - """ - import rpy2.robjects as robjects - - pred = robjects.r.predict(self.forest, x, estimate_variance=False)[0] - return np.array(pred).flatten() - - def estimate_ate(self, x: np.array, t: np.array, y: np.array) -> float: - """Estimates ATE of the given population - - Fits the CausalForest and predicts the ITE. The mean of all ITEs is - returned as the ATE. - - Args: - x: covariates - t: treatment indicator - y: factual outcome - - Returns: average treatment effect of the population - """ - self.fit(x, t, y) - ite = self.predict_ite(x) - return float(np.mean(ite)) diff --git a/src/justcause/learners/utils.py b/src/justcause/learners/utils.py index a74a1bc..5bcd4fa 100644 --- a/src/justcause/learners/utils.py +++ b/src/justcause/learners/utils.py @@ -1,5 +1,5 @@ """Miscellaneous tools used in the `justcause.learners`""" -from typing import List, Tuple +from typing import Tuple import numpy as np @@ -20,24 +20,3 @@ def replace_factual_outcomes( y_0 = np.where(t == 0, y, y_0) y_1 = np.where(t == 1, y, y_1) return y_0, y_1 - - -def install_r_packages(package_names: List[str], verbose=False): - """Installs the R packages if needed using rpy2 utility functions - - Args: - package_names: names of the R packages to install - verbose: Whether to print progress information or not - - """ - import rpy2.robjects.packages as rpackages - from rpy2 import robjects - from rpy2.robjects import StrVector - - robjects.r.options(download_file_method="curl") - utils = rpackages.importr("utils") - utils.chooseCRANmirror(ind=1) - - names_to_install = [x for x in package_names if not rpackages.isinstalled(x)] - if len(names_to_install) > 0: - utils.install_packages(StrVector(names_to_install), verbose=verbose) diff --git a/tests/conftest.py b/tests/conftest.py index e119df3..c3c9207 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,15 +13,11 @@ import numpy as np import pandas as pd -import rpy2.robjects.packages as rpackages -from rpy2 import robjects -from rpy2.robjects import StrVector from justcause.data.frames import CausalFrame from justcause.data.sets.ibm import load_ibm from justcause.data.sets.ihdp import load_ihdp from justcause.data.sets.twins import load_twins -from justcause.learners.utils import install_r_packages RUNS_ON_CIRRUS = bool(strtobool(os.environ.get("CIRRUS_CI", "false"))) @@ -98,28 +94,3 @@ def dummy_covariates_and_treatment(): T_1 = np.full(80, 0) t = np.append(T_0, T_1) return X, t - - -@pytest.fixture -def uninstall_grf(): - """ Ensures the grf packages is not installed before the test runs""" - if rpackages.isinstalled("grf"): - robjects.r.options(download_file_method="curl") - utils = rpackages.importr("utils") - utils.chooseCRANmirror(ind=0) - - utils.remove_packages(StrVector(["grf"])) - - -@pytest.fixture -def grf(): - """ Ensures grf is installed before running tests with it - - This is required as usually the user is requested to install the package manually - - """ - if not rpackages.isinstalled("grf"): - install_r_packages(["grf"]) - return 0 - - return 1 diff --git a/tests/test_learners.py b/tests/test_learners.py index 6392a88..830ab71 100644 --- a/tests/test_learners.py +++ b/tests/test_learners.py @@ -2,17 +2,7 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression, LogisticRegression -from justcause.learners import ( - CausalForest, - DoubleRobustEstimator, - DragonNet, - PSWEstimator, - RLearner, - SLearner, - TLearner, - XLearner, -) -from justcause.metrics import pehe_score +from justcause.learners import DoubleRobustEstimator, PSWEstimator, SLearner, TLearner def test_slearner(ihdp_data): @@ -69,67 +59,6 @@ def test_tlearner(ihdp_data): assert len(pred) == len(t) -def test_rlearner(ihdp_data): - rep = ihdp_data[0] - x, t, y = rep.np.X, rep.np.t, rep.np.y - - # with RF explicitly - rlearner = RLearner(RandomForestRegressor()) - rlearner.fit(x, t, y) - pred = rlearner.predict_ite(x) - assert len(pred) == len(t) - - # With default - rlearner = RLearner() - rlearner.fit(x, t, y) - pred = rlearner.predict_ite(x) - assert len(pred) == len(t) - - assert ( - str(rlearner) == "RLearner(outcome=LinearRegression, effect=LinearRegression)" - ) - - -def test_xlearner(ihdp_data): - rep = ihdp_data[0] - x, t, y = rep.np.X, rep.np.t, rep.np.y - true_ite = rep["ite"].values - - # With LinearRegression - xlearner = XLearner(LinearRegression()) - xlearner.fit(x, t, y) - pred = xlearner.predict_ite(x, t, y) - assert len(pred) == len(t) - - # With default - xlearner = XLearner(LinearRegression()) - xlearner.fit(x, t, y) - pred = xlearner.predict_ite(x, t, y) - assert len(pred) == len(t) - assert abs(pehe_score(true_ite, pred) - 0.5) < 0.2 - - pred_ate = xlearner.estimate_ate(x, t, y) - true_ate = np.mean(rep["ite"].values) - assert abs(pred_ate - true_ate) < 0.2 - - -def test_causalforest(ihdp_data, grf): - rep = ihdp_data[0] - x, t, y = rep.np.X, rep.np.t, rep.np.y - cf = CausalForest() - cf.fit(x, t, y) - pred_ate = cf.estimate_ate(x, t, y) - true_ate = np.mean(rep["ite"].values) - assert abs(pred_ate - true_ate) < 0.2 - - # Try passing keyword arguments to the R implenetation - cf = CausalForest(num_trees=50, alpha=0.1, honesty=False) - cf.fit(x, t, y) - pred_ate = cf.estimate_ate(x, t, y) - true_ate = np.mean(rep["ite"].values) - assert abs(pred_ate - true_ate) < 0.2 - - def test_dre(ihdp_data): rep = ihdp_data[0] x, t, y = rep.np.X, rep.np.t, rep.np.y @@ -154,15 +83,3 @@ def test_psw(ihdp_data): psw = PSWEstimator() ate = psw.estimate_ate(x, t, y) assert ate > 0 - - -def test_dragonnet(ihdp_data): - - rep = ihdp_data[0] - x, t, y = rep.np.X, rep.np.t, rep.np.y - dragon = DragonNet() - dragon.fit(x, t, y) - ate = np.mean(dragon.predict_ite(x, t, y)) - - true_ate = np.mean(rep["ite"].values) - assert abs(ate - true_ate) < 0.5 diff --git a/tests/test_utils.py b/tests/test_utils.py index 5b7d049..66cc6e1 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,11 +1,10 @@ import pytest import numpy as np -import rpy2.robjects.packages as rpackages from numpy.random import RandomState from justcause.data.utils import to_rep_iter, to_rep_list -from justcause.learners.utils import install_r_packages, replace_factual_outcomes +from justcause.learners.utils import replace_factual_outcomes from justcause.utils import int_from_random_state @@ -36,12 +35,6 @@ def test_replace_factuals(): assert y_0[0] == y[0] -def test_install_r_packages(uninstall_grf): - package_names = ["grf"] - install_r_packages(package_names, verbose=True) - assert rpackages.isinstalled(package_names[0]) - - def test_int_from_random_state(): rs = RandomState(5) rs_int = int_from_random_state(rs)