diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d3b8fea..08e5408 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,9 +2,12 @@
Changelog
=========
-Version 0.3.2
-=============
+Version 0.4
+===========
+- removed dependency to ``causalml`` thus x- and t- learner were removed
+- removed dependency to ``rpy`` thus CausalForest method was removed
+- removed dependency to ``tensorflow`` thus ganite and dragonnet were removed
- added missing ``requests`` library in dependencies
Version 0.3.2
diff --git a/docs/requirements.txt b/docs/requirements.txt
index f33f475..b4a3abd 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -5,5 +5,4 @@ pandas
matplotlib==2.2.4
pyarrow>=0.15 # Mac problems with 0.13
scikit-learn>=0.22
-causalml
sphinx_rtd_theme
diff --git a/environment.yaml b/environment.yaml
index 499a848..30ffd61 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -11,16 +11,10 @@ dependencies:
- scipy
- requests
- pandas
- - r-base
- - rpy2
- seaborn
- - tensorflow=1.13.1 # required for dragonnet project code
- - keras
- - cython # needed by CausalML
- - arrow-cpp # check if needed
- pyarrow>=0.15 # Mac problems with 0.13
- - scikit-learn==0.20.3 # Currently required to downgrade scikit-learn in order for CausalML to compile
- - matplotlib==2.2.4 # Required for CausalML to work instantly
+ - scikit-learn
+ - matplotlib
- seaborn
# for development below here
- jupyterlab
@@ -32,5 +26,4 @@ dependencies:
- flake8
- pip:
- pre-commit
- - causalml
- pygam
diff --git a/notebooks/template.ipynb b/notebooks/template.ipynb
index fc44152..caebae1 100644
--- a/notebooks/template.ipynb
+++ b/notebooks/template.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -40,7 +40,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
diff --git a/setup.cfg b/setup.cfg
index 4055ac2..7a83d00 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -37,7 +37,7 @@ package_dir =
# DON'T CHANGE THE FOLLOWING LINE! IT WILL BE UPDATED BY PYSCAFFOLD!
setup_requires = pyscaffold>=3.2a0,<3.3a0
# Add here dependencies of your project (semicolon/line-separated), e.g.
-install_requires = numpy; pyarrow; pandas>=1.0; scikit-learn; requests; causalml; rpy2; pygam
+install_requires = numpy; pyarrow; pandas>=1.0; scikit-learn; requests; pygam
python_requires = >=3.6
[options.packages.find]
diff --git a/src/justcause/contrib/__init__.py b/src/justcause/contrib/__init__.py
deleted file mode 100644
index e884986..0000000
--- a/src/justcause/contrib/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-"""
-Contribution packages used by JustCause
-
-All packages inside ``contrib`` are external packages that come with their
-own licences and are not part of the JustCause source code itself.
-The copyright remains with the original authors or copyright holders.
-"""
diff --git a/src/justcause/contrib/dragonnet/__init__.py b/src/justcause/contrib/dragonnet/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/justcause/contrib/dragonnet/dragon_utils.py b/src/justcause/contrib/dragonnet/dragon_utils.py
deleted file mode 100644
index 9431ba8..0000000
--- a/src/justcause/contrib/dragonnet/dragon_utils.py
+++ /dev/null
@@ -1,325 +0,0 @@
-import keras.backend as K
-import tensorflow as tf
-from keras import regularizers
-from keras.engine.topology import Layer
-from keras.layers import Concatenate, Dense, Input
-from keras.metrics import binary_accuracy
-from keras.models import Model
-
-
-def binary_classification_loss(concat_true, concat_pred):
- t_true = concat_true[:, 1]
- t_pred = concat_pred[:, 2]
- t_pred = (t_pred + 0.001) / 1.002
- losst = tf.reduce_sum(K.binary_crossentropy(t_true, t_pred))
-
- return losst
-
-
-def regression_loss(concat_true, concat_pred):
- y_true = concat_true[:, 0]
- t_true = concat_true[:, 1]
-
- y0_pred = concat_pred[:, 0]
- y1_pred = concat_pred[:, 1]
-
- loss0 = tf.reduce_sum((1.0 - t_true) * tf.square(y_true - y0_pred))
- loss1 = tf.reduce_sum(t_true * tf.square(y_true - y1_pred))
-
- return loss0 + loss1
-
-
-def ned_loss(concat_true, concat_pred):
- t_true = concat_true[:, 1]
-
- t_pred = concat_pred[:, 1]
- return tf.reduce_sum(K.binary_crossentropy(t_true, t_pred))
-
-
-def dead_loss(concat_true, concat_pred):
- return regression_loss(concat_true, concat_pred)
-
-
-def dragonnet_loss_binarycross(concat_true, concat_pred):
- return regression_loss(concat_true, concat_pred) + binary_classification_loss(
- concat_true, concat_pred
- )
-
-
-def treatment_accuracy(concat_true, concat_pred):
- t_true = concat_true[:, 1]
- t_pred = concat_pred[:, 2]
- return binary_accuracy(t_true, t_pred)
-
-
-def track_epsilon(concat_true, concat_pred):
- epsilons = concat_pred[:, 3]
- return tf.abs(tf.reduce_mean(epsilons))
-
-
-class EpsilonLayer(Layer):
- def __init__(self):
- super(EpsilonLayer, self).__init__()
-
- def build(self, input_shape):
- # Create a trainable weight variable for this layer.
- self.epsilon = self.add_weight(
- name="epsilon",
- shape=[1, 1],
- initializer="RandomNormal",
- # initializer='ones',
- trainable=True,
- )
- super(EpsilonLayer, self).build(input_shape) # Be sure to call this at the end
-
- def call(self, inputs, **kwargs):
- # import ipdb; ipdb.set_trace()
- return self.epsilon * tf.ones_like(inputs)[:, 0:1]
-
-
-def make_tarreg_loss(ratio=1.0, dragonnet_loss=dragonnet_loss_binarycross):
- def tarreg_ATE_unbounded_domain_loss(concat_true, concat_pred):
- vanilla_loss = dragonnet_loss(concat_true, concat_pred)
-
- y_true = concat_true[:, 0]
- t_true = concat_true[:, 1]
-
- y0_pred = concat_pred[:, 0]
- y1_pred = concat_pred[:, 1]
- t_pred = concat_pred[:, 2]
-
- epsilons = concat_pred[:, 3]
- t_pred = (t_pred + 0.01) / 1.02
- # t_pred = tf.clip_by_value(t_pred,0.01, 0.99,name='t_pred')
-
- y_pred = t_true * y1_pred + (1 - t_true) * y0_pred
-
- h = t_true / t_pred - (1 - t_true) / (1 - t_pred)
-
- y_pert = y_pred + epsilons * h
- targeted_regularization = tf.reduce_sum(tf.square(y_true - y_pert))
-
- # final
- loss = vanilla_loss + ratio * targeted_regularization
- return loss
-
- return tarreg_ATE_unbounded_domain_loss
-
-
-def make_dragonnet(input_dim, reg_l2):
- """
- Neural net predictive model. The dragon has three heads.
- :param input_dim:
- :param reg:
- :return:
- """
-
- K.clear_session() #
-
- t_l1 = 0.0 # noqa: F841
- t_l2 = reg_l2 # noqa: F841
- inputs = Input(shape=(input_dim,), name="input")
-
- # representation
- x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(inputs)
- x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x)
- x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x)
-
- t_predictions = Dense(units=1, activation="sigmoid")(x)
-
- # HYPOTHESIS
- y0_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(x)
- y1_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(x)
-
- # second layer
- y0_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(y0_hidden)
- y1_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(y1_hidden)
-
- # third
- y0_predictions = Dense(
- units=1,
- activation=None,
- kernel_regularizer=regularizers.l2(reg_l2),
- name="y0_predictions",
- )(y0_hidden)
- y1_predictions = Dense(
- units=1,
- activation=None,
- kernel_regularizer=regularizers.l2(reg_l2),
- name="y1_predictions",
- )(y1_hidden)
-
- dl = EpsilonLayer()
- epsilons = dl(t_predictions, name="epsilon")
- # logging.info(epsilons)
- concat_pred = Concatenate(1)(
- [y0_predictions, y1_predictions, t_predictions, epsilons]
- )
- model = Model(inputs=inputs, outputs=concat_pred)
-
- return model
-
-
-def make_tarnet(input_dim, reg_l2):
- """
- Neural net predictive model. The dragon has three heads.
- :param input_dim:
- :param reg:
- :return:
- """
-
- inputs = Input(shape=(input_dim,), name="input")
-
- # representation
- x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(inputs)
- x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x)
- x = Dense(units=200, activation="elu", kernel_initializer="RandomNormal")(x)
-
- t_predictions = Dense(units=1, activation="sigmoid")(inputs)
-
- # HYPOTHESIS
- y0_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(x)
- y1_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(x)
-
- # second layer
- y0_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(y0_hidden)
- y1_hidden = Dense(
- units=100, activation="elu", kernel_regularizer=regularizers.l2(reg_l2)
- )(y1_hidden)
-
- # third
- y0_predictions = Dense(
- units=1,
- activation=None,
- kernel_regularizer=regularizers.l2(reg_l2),
- name="y0_predictions",
- )(y0_hidden)
- y1_predictions = Dense(
- units=1,
- activation=None,
- kernel_regularizer=regularizers.l2(reg_l2),
- name="y1_predictions",
- )(y1_hidden)
-
- dl = EpsilonLayer()
- epsilons = dl(t_predictions, name="epsilon")
- # logging.info(epsilons)
- concat_pred = Concatenate(1)(
- [y0_predictions, y1_predictions, t_predictions, epsilons]
- )
- model = Model(inputs=inputs, outputs=concat_pred)
-
- return model
-
-
-def make_ned(input_dim, reg_l2=0.01):
- """
- Neural net predictive model. The dragon has three heads.
- :param input_dim:
- :param reg:
- :return:
- """
-
- inputs = Input(shape=(input_dim,), name="input")
-
- # representation
- x = Dense(
- units=200,
- activation="elu",
- kernel_initializer="RandomNormal",
- name="ned_hidden1",
- )(inputs)
- x = Dense(
- units=200,
- activation="elu",
- kernel_initializer="RandomNormal",
- name="ned_hidden2",
- )(x)
- x = Dense(
- units=200,
- activation="elu",
- kernel_initializer="RandomNormal",
- name="ned_hidden3",
- )(x)
-
- t_predictions = Dense(units=1, activation="sigmoid", name="ned_t_activation")(x)
- y_predictions = Dense(units=1, activation=None, name="ned_y_prediction")(x)
-
- concat_pred = Concatenate(1)([y_predictions, t_predictions])
-
- model = Model(inputs=inputs, outputs=concat_pred)
- return model
-
-
-def post_cut(nednet, input_dim, reg_l2=0.01):
- for layer in nednet.layers:
- layer.trainable = False
- nednet.layers.pop()
- nednet.layers.pop()
- nednet.layers.pop()
-
- frozen = nednet
-
- x = frozen.layers[-1].output
- frozen.layers[-1].outbound_nodes = []
- input = frozen.input
-
- y0_hidden = Dense(
- units=100,
- activation="elu",
- kernel_regularizer=regularizers.l2(reg_l2),
- name="post_cut_y0_1",
- )(x)
- y1_hidden = Dense(
- units=100,
- activation="elu",
- kernel_regularizer=regularizers.l2(reg_l2),
- name="post_cut_y1_1",
- )(x)
-
- # second layer
- y0_hidden = Dense(
- units=100,
- activation="elu",
- kernel_regularizer=regularizers.l2(reg_l2),
- name="post_cut_y0_2",
- )(y0_hidden)
- y1_hidden = Dense(
- units=100,
- activation="elu",
- kernel_regularizer=regularizers.l2(reg_l2),
- name="post_cut_y1_2",
- )(y1_hidden)
-
- # third
- y0_predictions = Dense(
- units=1,
- activation=None,
- kernel_regularizer=regularizers.l2(reg_l2),
- name="y0_predictions",
- )(y0_hidden)
- y1_predictions = Dense(
- units=1,
- activation=None,
- kernel_regularizer=regularizers.l2(reg_l2),
- name="y1_predictions",
- )(y1_hidden)
-
- concat_pred = Concatenate(1)([y0_predictions, y1_predictions])
-
- model = Model(inputs=input, outputs=concat_pred)
- return model
diff --git a/src/justcause/contrib/dragonnet/dragonnet.py b/src/justcause/contrib/dragonnet/dragonnet.py
deleted file mode 100644
index c6b5916..0000000
--- a/src/justcause/contrib/dragonnet/dragonnet.py
+++ /dev/null
@@ -1,460 +0,0 @@
-import time
-
-import keras.backend as K
-import numpy as np
-import tensorflow as tf
-from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
-from keras.optimizers import SGD, Adam
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-
-from .dragon_utils import (
- binary_classification_loss,
- dead_loss,
- dragonnet_loss_binarycross,
- make_dragonnet,
- make_ned,
- make_tarnet,
- make_tarreg_loss,
- ned_loss,
- post_cut,
- regression_loss,
- track_epsilon,
- treatment_accuracy,
-)
-
-
-def _split_output(yt_hat, t, y, y_scaler, x, index):
- q_t0 = y_scaler.inverse_transform(yt_hat[:, 0].copy())
- q_t1 = y_scaler.inverse_transform(yt_hat[:, 1].copy())
- g = yt_hat[:, 2].copy()
-
- if yt_hat.shape[1] == 4:
- eps = yt_hat[:, 3][0]
- else:
- eps = np.zeros_like(yt_hat[:, 2])
-
- y = y_scaler.inverse_transform(y.copy())
- var = "average propensity for treated: {} and untreated: {}".format(
- g[t.squeeze() == 1.0].mean(), g[t.squeeze() == 0.0].mean()
- )
- print(var)
-
- return {
- "q_t0": q_t0,
- "q_t1": q_t1,
- "g": g,
- "t": t,
- "y": y,
- "x": x,
- "index": index,
- "eps": eps,
- }
-
-
-def train_dragon(
- t,
- y_unscaled,
- x,
- targeted_regularization=True,
- knob_loss=dragonnet_loss_binarycross,
- ratio=1.0,
- val_split=0.1,
- batch_size=512,
- num_epochs=100,
- learning_rate=0.001,
- verbose=1,
-):
- """Build and Train the DragonNet Model on given data
-
- :param t: treatment treatment to train on
- :param y_unscaled: outcomes to train on
- :param x: covariates to train on
- :param targeted_regularization: use targeted regularization?
- :param knob_loss: base loss to use for tar_reg
- :param ratio:
- :param val_split:
- :param batch_size: batch size to use
- :param num_epochs: number of epochs to run
- :return:
- """
-
- # Build Model structure
- dragonnet = make_dragonnet(x.shape[1], 0.01)
-
- # Only report regression loss for now
- metrics = [regression_loss]
-
- if targeted_regularization:
- loss = make_tarreg_loss(ratio=ratio, dragonnet_loss=knob_loss)
- else:
- loss = knob_loss
-
- yt = np.c_[y_unscaled, t]
-
- start_time = time.time()
-
- dragonnet.compile(optimizer=Adam(lr=learning_rate), loss=loss, metrics=metrics)
-
- class EarlyStoppingByLossVal(Callback):
- def __init__(self, monitor="regression_loss", value=400, verbose=0):
- super(Callback, self).__init__()
- self.monitor = monitor
- self.value = value
- self.verbose = verbose
-
- def on_epoch_end(self, epoch, logs={}):
- current = logs.get(self.monitor)
- if current < self.value:
- if self.verbose > 0:
- print("Epoch %05d: early stopping THR" % epoch)
- self.model.stop_training = True
-
- adam_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="regression_loss", patience=10, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=1e-8,
- cooldown=0,
- min_lr=0,
- )
- ]
-
- dragonnet.fit(
- x,
- yt,
- callbacks=adam_callbacks,
- validation_split=val_split,
- epochs=num_epochs,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- elapsed_time = time.time() - start_time
- if verbose:
- print("***************************** elapsed_time is: ", elapsed_time)
-
- return dragonnet
-
-
-def train_and_predict_dragons(
- t,
- y_unscaled,
- x,
- targeted_regularization=True,
- output_dir="",
- knob_loss=dragonnet_loss_binarycross,
- ratio=1.0,
- dragon=1,
- val_split=0.1,
- batch_size=512,
-):
-
- verbose = 1
- y_scaler = StandardScaler().fit(y_unscaled)
- y = y_scaler.transform(y_unscaled)
- train_outputs = []
- test_outputs = []
- runs = 1
- for i in range(runs):
- if dragon == 0:
-
- dragonnet = make_tarnet(x.shape[1], 0.01)
- elif dragon == 1:
- dragonnet = make_dragonnet(x.shape[1], 0.01)
-
- metrics = [
- regression_loss,
- binary_classification_loss,
- treatment_accuracy,
- track_epsilon,
- ]
-
- if targeted_regularization:
- loss = make_tarreg_loss(ratio=ratio, dragonnet_loss=knob_loss)
- else:
- loss = knob_loss
-
- tf.random.set_random_seed(i)
- np.random.seed(i)
- train_index, test_index = train_test_split(
- np.arange(x.shape[0]), test_size=0, random_state=1
- )
- test_index = train_index
-
- x_train, x_test = x[train_index], x[test_index]
- y_train, y_test = y[train_index], y[test_index]
- t_train, t_test = t[train_index], t[test_index]
- yt_train = np.concatenate([y_train, t_train], 1)
-
- start_time = time.time()
-
- dragonnet.compile(optimizer=Adam(lr=1e-3), loss=loss, metrics=metrics)
-
- adam_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=1e-8,
- cooldown=0,
- min_lr=0,
- ),
- ]
-
- dragonnet.fit(
- x_train,
- yt_train,
- callbacks=adam_callbacks,
- validation_split=val_split,
- epochs=100,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- sgd_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=0.0,
- cooldown=0,
- min_lr=0,
- ),
- ]
-
- # should pick something better!
- sgd_lr = 1e-5
- momentum = 0.9
- dragonnet.compile(
- optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True),
- loss=loss,
- metrics=metrics,
- )
- dragonnet.fit(
- x_train,
- yt_train,
- callbacks=sgd_callbacks,
- validation_split=val_split,
- epochs=300,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- elapsed_time = time.time() - start_time
- print("***************************** elapsed_time is: ", elapsed_time)
-
- yt_hat_test = dragonnet.predict(x_test)
- yt_hat_train = dragonnet.predict(x_train)
-
- test_outputs += [
- _split_output(yt_hat_test, t_test, y_test, y_scaler, x_test, test_index)
- ]
- train_outputs += [
- _split_output(
- yt_hat_train, t_train, y_train, y_scaler, x_train, train_index
- )
- ]
- K.clear_session()
-
- return test_outputs, train_outputs
-
-
-def train_and_predict_ned(
- t,
- y_unscaled,
- x,
- targeted_regularization=True,
- output_dir="",
- knob_loss=dragonnet_loss_binarycross,
- ratio=1.0,
- dragon=1,
- val_split=0.1,
- batch_size=512,
-):
-
- verbose = 0
- y_scaler = StandardScaler().fit(y_unscaled)
- y = y_scaler.transform(y_unscaled)
-
- train_outputs = []
- test_outputs = []
- runs = 25
- for i in range(runs):
-
- nednet = make_ned(x.shape[1], 0.01)
-
- metrics_ned = [ned_loss]
- metrics_cut = [regression_loss]
-
- tf.random.set_random_seed(i)
- np.random.seed(i)
- train_index, test_index = train_test_split(np.arange(x.shape[0]), test_size=0.0)
- test_index = train_index
- x_train, x_test = x[train_index], x[test_index]
- y_train, y_test = y[train_index], y[test_index]
- t_train, t_test = t[train_index], t[test_index]
- yt_train = np.concatenate([y_train, t_train], 1)
-
- start_time = time.time()
-
- nednet.compile(optimizer=Adam(lr=1e-3), loss=ned_loss, metrics=metrics_ned)
-
- adam_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=1e-8,
- cooldown=0,
- min_lr=0,
- ),
- ]
-
- nednet.fit(
- x_train,
- yt_train,
- callbacks=adam_callbacks,
- validation_split=val_split,
- epochs=100,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- sgd_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=0.0,
- cooldown=0,
- min_lr=0,
- ),
- ]
-
- sgd_lr = 1e-5
- momentum = 0.9
- nednet.compile(
- optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True),
- loss=ned_loss,
- metrics=metrics_ned,
- )
- print(nednet.summary())
- nednet.fit(
- x_train,
- yt_train,
- callbacks=sgd_callbacks,
- validation_split=val_split,
- epochs=300,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- t_hat_test = nednet.predict(x_test)[:, 1]
- t_hat_train = nednet.predict(x_train)[:, 1]
-
- # cutting the activation layer
- cut_net = post_cut(nednet, x.shape[1], 0.01)
-
- cut_net.compile(optimizer=Adam(lr=1e-3), loss=dead_loss, metrics=metrics_cut)
-
- adam_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=1e-8,
- cooldown=0,
- min_lr=0,
- ),
- ]
- print(cut_net.summary())
-
- cut_net.fit(
- x_train,
- yt_train,
- callbacks=adam_callbacks,
- validation_split=val_split,
- epochs=100,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- elapsed_time = time.time() - start_time
- print("***************************** elapsed_time is: ", elapsed_time)
-
- sgd_callbacks = [
- TerminateOnNaN(),
- EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0),
- ReduceLROnPlateau(
- monitor="loss",
- factor=0.5,
- patience=5,
- verbose=verbose,
- mode="auto",
- min_delta=0.0,
- cooldown=0,
- min_lr=0,
- ),
- ]
-
- sgd_lr = 1e-5
- momentum = 0.9
- cut_net.compile(
- optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True),
- loss=dead_loss,
- metrics=metrics_cut,
- )
-
- cut_net.fit(
- x_train,
- yt_train,
- callbacks=sgd_callbacks,
- validation_split=val_split,
- epochs=300,
- batch_size=batch_size,
- verbose=verbose,
- )
-
- y_hat_test = cut_net.predict(x_test)
- y_hat_train = cut_net.predict(x_train)
-
- yt_hat_test = np.concatenate([y_hat_test, t_hat_test.reshape(-1, 1)], 1)
- yt_hat_train = np.concatenate([y_hat_train, t_hat_train.reshape(-1, 1)], 1)
-
- test_outputs += [
- _split_output(yt_hat_test, t_test, y_test, y_scaler, x_test, test_index)
- ]
- train_outputs += [
- _split_output(
- yt_hat_train, t_train, y_train, y_scaler, x_train, train_index
- )
- ]
- K.clear_session()
-
- return test_outputs, train_outputs
diff --git a/src/justcause/contrib/ganite/__init__.py b/src/justcause/contrib/ganite/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/justcause/contrib/ganite/ganite_builder.py b/src/justcause/contrib/ganite/ganite_builder.py
deleted file mode 100644
index 11cdfcb..0000000
--- a/src/justcause/contrib/ganite/ganite_builder.py
+++ /dev/null
@@ -1,442 +0,0 @@
-"""
-Copyright (C) 2018 Patrick Schwab, ETH Zurich
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-documentation files (the "Software"), to deal in the Software without restriction, including without limitation
-the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
-and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions
- of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-"""
-import numpy as np
-import tensorflow as tf
-
-
-def get_nonlinearity_by_name(name):
- if name.lower() == "elu":
- return tf.nn.elu
- else:
- return tf.nn.relu
-
-
-def build_mlp(
- x,
- num_layers=1,
- num_units=16,
- dropout=0.0,
- nonlinearity=tf.nn.elu,
- weight_initialisation_std=0.1,
-):
- input_dim = int(x.shape[-1])
- h_in, weights_in, biases_in = [x], [], []
- for i in range(0, num_layers):
- if i == 0:
- """ If using variable selection, first layer is just rescaling"""
- weights_in.append(
- tf.Variable(
- tf.random_normal(
- [input_dim, num_units],
- stddev=weight_initialisation_std / np.sqrt(input_dim),
- )
- )
- )
- else:
- weights_in.append(
- tf.Variable(
- tf.random_normal(
- [num_units, num_units],
- stddev=weight_initialisation_std / np.sqrt(num_units),
- )
- )
- )
-
- biases_in.append(tf.Variable(tf.zeros([1, num_units])))
- z = tf.matmul(h_in[i], weights_in[i]) + biases_in[i]
-
- h_in.append(nonlinearity(z))
- h_in[i + 1] = tf.nn.dropout(h_in[i + 1], 1.0 - dropout)
-
- h_rep = h_in[len(h_in) - 1]
- return h_rep, weights_in, biases_in
-
-
-class GANITEBuilder(object):
- @staticmethod
- def build(
- input_dim,
- output_dim,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- alpha=1.0,
- beta=1.0,
- ):
- x = tf.placeholder("float", shape=[None, input_dim], name="x")
- t = tf.placeholder("float", shape=[None, 1], name="t")
- y_f = tf.placeholder("float", shape=[None, 1], name="y_f")
- y_full = tf.placeholder("float", shape=[None, num_treatments], name="y_full")
-
- y_pred_cf, propensity_scores, z_g = GANITEBuilder.build_counterfactual_block(
- input_dim,
- x,
- t,
- y_f,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- )
-
- y_pred_ite, d_ite_pred, d_ite_true, z_i = GANITEBuilder.build_ite_block(
- input_dim,
- x,
- t,
- y_f,
- y_full,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- )
-
- # Build losses and optimizers.
- t_one_hot = tf.one_hot(tf.cast(t, "int32"), num_treatments)
-
- propensity_loss_cf = tf.reduce_mean(
- tf.nn.softmax_cross_entropy_with_logits(
- logits=propensity_scores, labels=t_one_hot
- )
- )
-
- batch_size = tf.shape(y_pred_cf)[0]
- indices = tf.stack([tf.range(batch_size), tf.cast(t, "int32")[:, 0]], axis=-1)
- y_f_pred = tf.gather_nd(y_pred_cf, indices)
-
- y_f_i = y_f # tf.Print(y_f, [y_f[:, 0]], message="y_f=", summarize=8)
- y_f_pred_i = (
- y_f_pred # tf.Print(y_f_pred, [y_f_pred], message="y_f_pred=", summarize=8)
- )
-
- supervised_loss_cf = tf.sqrt(
- tf.reduce_mean(tf.squared_difference(y_f_i[:, 0], y_f_pred_i))
- )
-
- cf_discriminator_loss = propensity_loss_cf
- cf_generator_loss = -propensity_loss_cf + alpha * supervised_loss_cf
-
- # D_ITE goal: 0 when True, 1 when Pred
- ite_loss = tf.reduce_mean(tf.log(d_ite_true)) + tf.reduce_mean(
- tf.log(1 - d_ite_pred)
- )
-
- # tf.Print(y_full, [y_full], message="y_full=", summarize=8)
- y_full_i = y_full
- # tf.Print(y_pred_ite, [y_pred_ite], message="y_pred_ite=", summarize=8)
- y_pred_ite_i = y_pred_ite
- supervised_loss_ite = tf.sqrt(
- tf.reduce_mean(tf.squared_difference(y_full_i, y_pred_ite_i))
- )
-
- ite_discriminator_loss = -ite_loss
- ite_generator_loss = ite_loss + beta * supervised_loss_ite
- return (
- cf_generator_loss,
- cf_discriminator_loss,
- ite_generator_loss,
- ite_discriminator_loss,
- x,
- t,
- y_f,
- y_full,
- y_pred_cf,
- y_pred_ite,
- z_g,
- z_i,
- )
-
- @staticmethod
- def build_tarnet(
- mlp_input,
- t,
- input_dim,
- num_layers,
- num_units,
- dropout,
- num_treatments,
- nonlinearity,
- ):
- initializer = tf.variance_scaling_initializer()
- x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
-
- all_indices, outputs = [], []
- for i in range(num_treatments):
- indices = tf.reshape(
- tf.to_int32(tf.where(tf.equal(tf.reshape(t, (-1,)), i))), (-1,)
- )
- current_last_layer_h = tf.gather(x, indices)
-
- last_layer = build_mlp(
- current_last_layer_h, num_layers, num_units, dropout, nonlinearity
- )
-
- output = tf.layers.dense(
- last_layer[0],
- units=num_treatments,
- use_bias=True,
- bias_initializer=initializer,
- )
-
- all_indices.append(indices)
- outputs.append(output)
- return tf.concat(outputs, axis=-1), all_indices
-
- @staticmethod
- def build_counterfactual_block(
- input_dim,
- x,
- t,
- y_f,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- ):
-
- y_pred, z_g = GANITEBuilder.build_counterfactual_generator(
- input_dim,
- x,
- t,
- y_f,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- )
-
- propensity_scores = GANITEBuilder.build_counterfactual_discriminator(
- input_dim,
- x,
- t,
- y_pred,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- )
- return y_pred, propensity_scores, z_g
-
- @staticmethod
- def build_counterfactual_generator(
- input_dim,
- x,
- t,
- y_f,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- ):
- nonlinearity = get_nonlinearity_by_name(nonlinearity)
- with tf.variable_scope("g_cf", initializer=initializer):
- z_g = tf.placeholder("float", shape=[None, num_treatments - 1], name="z_g")
-
- mlp_input = tf.concat([x, y_f, t, z_g], axis=-1)
- x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
- y = tf.layers.dense(
- x[0], units=num_treatments, use_bias=True, bias_initializer=initializer
- )
- return y, z_g
-
- @staticmethod
- def build_counterfactual_discriminator(
- input_dim,
- x,
- t,
- y_pred,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- reuse=False,
- ):
- nonlinearity = get_nonlinearity_by_name(nonlinearity)
- with tf.variable_scope("d_cf", reuse=reuse, initializer=initializer):
- mlp_input = tf.concat([x, y_pred], axis=-1)
- x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
- propensity_scores = tf.layers.dense(
- x[0], units=num_treatments, use_bias=True, bias_initializer=initializer
- )
- return propensity_scores
-
- @staticmethod
- def build_ite_block(
- input_dim,
- x,
- t,
- y_f,
- y_full,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- ):
- y_pred_ite, z_i = GANITEBuilder.build_ite_generator(
- input_dim,
- x,
- t,
- y_f,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- )
-
- d_ite_pred = GANITEBuilder.build_ite_discriminator(
- input_dim,
- x,
- t,
- y_pred_ite,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- reuse=False,
- )
-
- d_ite_true = GANITEBuilder.build_ite_discriminator(
- input_dim,
- x,
- t,
- y_full,
- num_units,
- dropout,
- l2_weight,
- learning_rate,
- num_layers,
- num_treatments,
- with_bn,
- nonlinearity,
- initializer,
- reuse=True,
- )
-
- return y_pred_ite, d_ite_pred, d_ite_true, z_i
-
- @staticmethod
- def build_ite_generator(
- input_dim,
- x,
- t,
- y_f,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- ):
- nonlinearity = get_nonlinearity_by_name(nonlinearity)
- with tf.variable_scope("g_ite", initializer=initializer):
- z_i = tf.placeholder("float", shape=[None, num_treatments], name="z_i")
- mlp_input = tf.concat([x, z_i], axis=-1)
- x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
- y_pred = tf.layers.dense(
- x[0], units=num_treatments, use_bias=True, bias_initializer=initializer
- )
- return y_pred, z_i
-
- @staticmethod
- def build_ite_discriminator(
- input_dim,
- x,
- t,
- y_pred,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- reuse=False,
- ):
- nonlinearity = get_nonlinearity_by_name(nonlinearity)
- with tf.variable_scope("d_ite", reuse=reuse, initializer=initializer):
- mlp_input = tf.concat([x, y_pred], axis=-1)
- x = build_mlp(mlp_input, num_layers, num_units, dropout, nonlinearity)
- y = tf.layers.dense(
- x[0],
- units=1,
- use_bias=True,
- bias_initializer=initializer,
- activation=tf.nn.sigmoid,
- )
- return y
diff --git a/src/justcause/contrib/ganite/ganite_model.py b/src/justcause/contrib/ganite/ganite_model.py
deleted file mode 100644
index b01098a..0000000
--- a/src/justcause/contrib/ganite/ganite_model.py
+++ /dev/null
@@ -1,327 +0,0 @@
-"""
-Copyright (C) 2018 Patrick Schwab, ETH Zurich
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
-documentation files (the "Software"), to deal in the Software without restriction, including without limitation
-the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
-and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions
- of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
-CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-""" # noqa: E501
-from __future__ import print_function
-
-import sys
-
-import numpy as np
-import tensorflow as tf
-
-from .ganite_builder import GANITEBuilder
-
-
-class GANITEModel(object):
- def __init__(
- self,
- input_dim,
- output_dim,
- num_units=128,
- dropout=0.0,
- l2_weight=0.0,
- learning_rate=0.0001,
- num_layers=2,
- num_treatments=2,
- with_bn=False,
- nonlinearity="elu",
- initializer=tf.variance_scaling_initializer(),
- alpha=1.0,
- beta=1.0,
- ):
-
- tf.reset_default_graph()
-
- config = tf.ConfigProto()
- config.gpu_options.allow_growth = True
- self.sess = tf.Session(config=config)
- self.num_treatments = num_treatments
-
- # ToDo: This is crazy, never return that many values
- (
- self.cf_generator_loss,
- self.cf_discriminator_loss,
- self.ite_generator_loss,
- self.ite_discriminator_loss,
- self.x,
- self.t,
- self.y_f,
- self.y_full,
- self.y_pred_cf,
- self.y_pred_ite,
- self.z_g,
- self.z_i,
- ) = GANITEBuilder.build(
- input_dim,
- output_dim,
- num_units=num_units,
- dropout=dropout,
- l2_weight=l2_weight,
- learning_rate=learning_rate,
- num_layers=num_layers,
- num_treatments=num_treatments,
- with_bn=with_bn,
- nonlinearity=nonlinearity,
- initializer=initializer,
- alpha=alpha,
- beta=beta,
- )
-
- @staticmethod
- def get_scoped_variables(scope_name):
- t_vars = tf.trainable_variables()
- vars = [var for var in t_vars if scope_name in var.name]
- return vars
-
- @staticmethod
- def get_cf_generator_vairables():
- return GANITEModel.get_scoped_variables("g_cf")
-
- @staticmethod
- def get_cf_discriminator_vairables():
- return GANITEModel.get_scoped_variables("d_cf")
-
- @staticmethod
- def get_ite_generator_vairables():
- return GANITEModel.get_scoped_variables("g_ite")
-
- @staticmethod
- def get_ite_discriminator_vairables():
- return GANITEModel.get_scoped_variables("d_ite")
-
- def load(self, path):
- saver = tf.train.Saver() # noqa: F841
- # saver.restore(self.sess, path)
-
- def train(
- self,
- train_generator,
- train_steps,
- val_generator,
- val_steps,
- num_epochs,
- learning_rate,
- learning_rate_decay=0.97,
- iterations_per_decay=100,
- dropout=0.0,
- imbalance_loss_weight=0.0,
- l2_weight=0.0,
- checkpoint_path="",
- early_stopping_patience=12,
- early_stopping_on_pehe=False,
- verbose=False,
- ):
-
- saver = tf.train.Saver(max_to_keep=0) # noqa: F841
-
- global_step_1 = tf.Variable(0, trainable=False, dtype="int64")
- global_step_2 = tf.Variable(0, trainable=False, dtype="int64")
- global_step_3 = tf.Variable(0, trainable=False, dtype="int64")
- global_step_4 = tf.Variable(0, trainable=False, dtype="int64")
-
- opt = tf.train.AdamOptimizer(learning_rate)
- train_step_g_cf = opt.minimize(
- self.cf_generator_loss,
- global_step=global_step_1,
- var_list=GANITEModel.get_cf_generator_vairables(),
- )
- train_step_d_cf = opt.minimize(
- self.cf_discriminator_loss,
- global_step=global_step_2,
- var_list=GANITEModel.get_cf_discriminator_vairables(),
- )
- train_step_g_ite = opt.minimize(
- self.ite_generator_loss,
- global_step=global_step_3,
- var_list=GANITEModel.get_ite_generator_vairables(),
- )
- train_step_d_ite = opt.minimize(
- self.ite_discriminator_loss,
- global_step=global_step_4,
- var_list=GANITEModel.get_ite_discriminator_vairables(),
- )
-
- self.sess.run(tf.global_variables_initializer())
-
- best_val_loss, num_epochs_without_improvement = np.finfo(float).max, 0
- for epoch_idx in range(num_epochs):
- for step_idx in range(train_steps):
- train_losses_g = self.run_generator(
- train_generator, 1, self.cf_generator_loss, train_step_g_cf
- )
- train_losses_d = self.run_generator(
- train_generator, 1, self.cf_discriminator_loss, train_step_d_cf
- )
-
- val_losses_g = self.run_generator(
- val_generator, val_steps, self.cf_generator_loss
- )
- val_losses_d = self.run_generator(
- val_generator, val_steps, self.cf_discriminator_loss
- )
-
- current_val_loss = val_losses_g[0]
- do_save = current_val_loss < best_val_loss
- if do_save:
- num_epochs_without_improvement = 0
- best_val_loss = current_val_loss
- # saver.save(self.sess, checkpoint_path)
- else:
- num_epochs_without_improvement += 1
-
- if verbose:
- self.print_losses(
- epoch_idx,
- num_epochs,
- [train_losses_g[0], train_losses_d[0]],
- [val_losses_g[0], val_losses_d[0]],
- do_save,
- )
-
- if num_epochs_without_improvement >= early_stopping_patience:
- print("EARLY STOPPING due to NO IMPROVEMENT")
- break
-
- best_val_loss, num_epochs_without_improvement = np.finfo(float).max, 0
- for epoch_idx in range(num_epochs):
- for step_idx in range(train_steps):
- train_losses_g = self.run_generator(
- train_generator,
- 1,
- self.ite_generator_loss,
- train_step_g_ite,
- include_y_full=True,
- )
- train_losses_d = self.run_generator(
- train_generator,
- 1,
- self.ite_discriminator_loss,
- train_step_d_ite,
- include_y_full=True,
- )
- val_losses_g = self.run_generator(
- val_generator, val_steps, self.ite_generator_loss, include_y_full=True
- )
- val_losses_d = self.run_generator(
- val_generator,
- val_steps,
- self.ite_discriminator_loss,
- include_y_full=True,
- )
-
- current_val_loss = val_losses_g[0]
- do_save = current_val_loss < best_val_loss
- if do_save:
- num_epochs_without_improvement = 0
- best_val_loss = current_val_loss
- # saver.save(self.sess, checkpoint_path)
- else:
- num_epochs_without_improvement += 1
-
- if verbose:
- self.print_losses(
- epoch_idx,
- num_epochs,
- [train_losses_g[0], train_losses_d[0]],
- [val_losses_g[0], val_losses_d[0]],
- do_save,
- )
-
- if num_epochs_without_improvement >= early_stopping_patience:
- print("EARLY STOPPING due to NO IMPROVEMENT")
- break
-
- def print_losses(
- self, epoch_idx, num_epochs, train_losses, val_losses, did_save=False
- ):
- print(
- "Epoch [{:04d}/{:04d}] {:} TRAIN: G={:.3f} D={:.3f} "
- "VAL: G={:.3f} D={:.3f}".format(
- epoch_idx,
- num_epochs,
- "xx" if did_save else "::",
- train_losses[0],
- train_losses[1],
- val_losses[0],
- val_losses[1],
- ),
- file=sys.stderr,
- )
-
- def run_generator(
- self, generator, steps, loss, train_step=None, include_y_full=False
- ):
- losses = []
- for iter_idx in range(steps):
- (x_batch, t_batch), y_batch = next(generator)
- t_original = t_batch.astype(int)
- t_batch = np.expand_dims(t_batch, axis=-1)
- y_batch = np.expand_dims(y_batch, axis=-1)
- batch_size = len(x_batch)
- feed_dict = {
- self.x: x_batch,
- self.t: t_batch,
- self.y_f: y_batch,
- self.z_g: np.random.uniform(size=(batch_size, self.num_treatments - 1)),
- self.z_i: np.random.uniform(size=(batch_size, self.num_treatments)),
- }
- if include_y_full:
- y_pred = self._predict_g_cf([x_batch, t_batch], y_batch)
- y_pred[:, t_original] = y_batch
- feed_dict[self.y_full] = y_pred
-
- if train_step is not None:
- self.sess.run(train_step, feed_dict=feed_dict)
-
- losses.append(self.sess.run([loss], feed_dict=feed_dict))
- return np.mean(losses, axis=0)
-
- def _predict_g_cf(self, x, y_f):
- batch_size = len(x[0])
- y_pred = self.sess.run(
- self.y_pred_cf,
- feed_dict={
- self.x: x[0],
- self.t: x[1],
- self.y_f: y_f,
- self.z_g: np.random.uniform(size=(batch_size, self.num_treatments - 1)),
- },
- )
- return y_pred
-
- def pred_full(self, x, t, y_f):
- batch_size = len(x)
- y_pred = self.sess.run(
- self.y_pred_cf,
- feed_dict={
- self.x: x,
- self.t: t.reshape(-1, 1),
- self.y_f: y_f.reshape(-1, 1),
- self.z_g: np.random.uniform(size=(batch_size, self.num_treatments - 1)),
- },
- )
- return y_pred
-
- def predict(self, x):
- batch_size = len(x)
- y_pred = self.sess.run(
- self.y_pred_ite,
- feed_dict={
- self.x: x,
- self.z_i: np.random.uniform(size=(batch_size, self.num_treatments)),
- },
- )
- return y_pred
diff --git a/src/justcause/learners/__init__.py b/src/justcause/learners/__init__.py
index f14f6e5..ae6f1f5 100644
--- a/src/justcause/learners/__init__.py
+++ b/src/justcause/learners/__init__.py
@@ -1,24 +1,15 @@
"""Basic learners and justcause-friendly wrappers for more advanced methods"""
from .meta.slearner import SLearner # noqa: F401
from .meta.tlearner import TLearner # noqa: F401
-from .meta.rlearner import RLearner # noqa: F401
-from .meta.xlearner import XLearner # noqa: F401
-from .tree.causal_forest import CausalForest # noqa: F401
from .ate.double_robust import DoubleRobustEstimator # noqa: F401
from .ate.propensity_weighting import PSWEstimator # noqa: F401
-from .nn.dragonnet import DragonNet # noqa: F401
-
___all__ = [
"SLearner",
"WeightedSLearner",
"TLearner",
"WeightedTLearner",
- "RLearner",
- "XLearner",
- "CausalForest",
"DoubleRobustEstimator",
"PSWEstimator",
- "DragonNet",
]
diff --git a/src/justcause/learners/meta/rlearner.py b/src/justcause/learners/meta/rlearner.py
deleted file mode 100644
index 1781494..0000000
--- a/src/justcause/learners/meta/rlearner.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""Wrapper of the python RLearner implemented in the ``causalml`` package"""
-from typing import Optional, Union
-
-import numpy as np
-from numpy.random import RandomState
-from sklearn.linear_model import LinearRegression
-from sklearn.utils import check_random_state
-
-from ..propensity import estimate_propensities
-
-StateType = Optional[Union[int, RandomState]]
-
-
-class RLearner:
- """A wrapper of the BaseRRegressor from ``causalml``
-
- Defaults to LassoLars regression as a base learner if not specified otherwise.
- Allows to either specify one learner for both tasks or two distinct learners
- for the task outcome and effect learning.
-
- References:
- CausalML Framework `on Github '_.
-
- [1] X. Nie and S. Wager,
- “Quasi-Oracle Estimation of Heterogeneous Treatment Effects.”
- """
-
- def __init__(
- self,
- learner=None,
- outcome_learner=None,
- effect_learner=None,
- random_state: StateType = None,
- ):
- """Setup an RLearner
-
- Args:
- learner: default learner for both outcome and effect
- outcome_learner: specific learner for outcome
- effect_learner: specific learner for effect
- random_state: RandomState or int to be used for K-fold splitting. NOT used
- in the learners, this has to be done by the user.
- """
- from causalml.inference.meta import BaseRRegressor
-
- if learner is None and (outcome_learner is None and effect_learner is None):
- learner = LinearRegression()
-
- self.random_state = check_random_state(random_state)
- self.model = BaseRRegressor(
- learner, outcome_learner, effect_learner, random_state=random_state
- )
-
- def __str__(self):
- """Simple string representation for logs and outputs"""
- return "{}(outcome={}, effect={})".format(
- self.__class__.__name__,
- self.model.model_mu.__class__.__name__,
- self.model.model_tau.__class__.__name__,
- )
-
- def __repr__(self):
- return self.__str__()
-
- def fit(self, x: np.array, t: np.array, y: np.array, p: np.array = None) -> None:
- """Fits the RLearner on given samples.
-
- Defaults to `justcause.learners.propensities.estimate_propensities`
- for ``p`` if not given explicitly, in order to allow a generic call
- to the fit() method
-
- Args:
- x: covariate matrix of shape (num_instances, num_features)
- t: treatment indicator vector, shape (num_instances)
- y: factual outcomes, (num_instances)
- p: propensities, shape (num_instances)
-
- """
- if p is None:
- # Propensity is needed by CausalML, so we estimate it,
- # if it was not provided
- p = estimate_propensities(x, t)
-
- self.model.fit(x, p, t, y)
-
- def predict_ite(self, x: np.array, *args) -> np.array:
- """Predicts ITE for given samples; ignores the factual outcome and treatment
-
- Args:
- x: covariates used for precition
- *args: NOT USED but kept to work with the standard ``fit(x, t, y)`` call
-
- """
-
- # assert t is None and y is None, "The R-Learner does not use factual outcomes"
- return self.model.predict(x).flatten()
-
- def estimate_ate(
- self, x: np.array, t: np.array, y: np.array, p: Optional[np.array] = None
- ) -> float:
- """Estimate the average treatment effect (ATE) by fit and predict on given data
-
- Estimates the ATE as the mean of ITE predictions on the given data.
-
- Args:
- x: covariates of shape (num_samples, num_covariates)
- t: treatment indicator vector, shape (num_instances)
- y: factual outcomes, (num_instances)
- p: propensities, shape (num_instances)
-
- Returns:
- the average treatment effect estimate
-
-
- """
- self.fit(x, t, y, p)
- ite = self.predict_ite(x, t, y)
- return float(np.mean(ite))
diff --git a/src/justcause/learners/meta/xlearner.py b/src/justcause/learners/meta/xlearner.py
deleted file mode 100644
index 6979cbf..0000000
--- a/src/justcause/learners/meta/xlearner.py
+++ /dev/null
@@ -1,147 +0,0 @@
-from typing import Optional
-
-import numpy as np
-from sklearn.linear_model import LassoLars
-
-from justcause.learners.utils import replace_factual_outcomes
-
-from ..propensity import estimate_propensities
-
-
-class XLearner:
- """Wrapper of the BaseXRegressor from causalml
-
- Defaults to `sklearn.linear_model.LassoLars` as a base learner if not specified
- otherwise. Allows to either specify one learner for all or four distinct learners
- for the tasks
- - outcome control
- - outcome treated
- - effect control
- - effect treated
-
- References:
- [1] CausalML Framework `on Github '_.
-
- [2] S. R. Künzel, J. S. Sekhon, P. J. Bickel, and B. Yu,
- “Meta-learners for Estimating Heterogeneous
- Treatment Effects using Machine Learning,” 2019.
-
- """
-
- def __init__(
- self,
- learner=None,
- outcome_learner_c=None,
- outcome_learner_t=None,
- effect_learner_c=None,
- effect_learner_t=None,
- ):
- """Setup a XLearner
-
- All learners must have ``fit(x, y)`` and ``predict(x)`` methods.
-
- Args:
- learner: default learner for all roles
- outcome_learner_c: specific learner for control outcome function
- outcome_learner_t: specific learner for treated outcome function
- effect_learner_c: specific learner for treated effect
- effect_learner_t: specific learner for control effect
- """
- from causalml.inference.meta import BaseXRegressor
-
- if (learner is not None) or (
- (outcome_learner_c is not None)
- and (outcome_learner_t is not None)
- and (effect_learner_c is not None)
- and (effect_learner_t is not None)
- ):
- self.model = BaseXRegressor(
- learner,
- outcome_learner_c,
- outcome_learner_t,
- effect_learner_c,
- effect_learner_t,
- )
- else:
- # Assign default learner
- learner = LassoLars()
- self.model = BaseXRegressor(
- learner,
- outcome_learner_c,
- outcome_learner_t,
- effect_learner_c,
- effect_learner_t,
- )
-
- def __str__(self):
- """Simple string representation for logs and outputs"""
- return "{}(outcome_c={}, outcome_t={}, effect_c={}, effect_t={})".format(
- self.__class__.__name__,
- self.model.model_mu_c.__class__.__name__,
- self.model.model_mu_t.__class__.__name__,
- self.model.model_tau_c.__class__.__name__,
- self.model.model_tau_t.__class__.__name__,
- )
-
- def __repr__(self):
- return self.__str__()
-
- def fit(self, x: np.array, t: np.array, y: np.array) -> None:
- """Fits the RLearner on given samples
-
- Args:
- x: covariate matrix of shape (num_instances, num_features)
- t: treatment indicator vector, shape (num_instances)
- y: factual outcomes, (num_instances)
-
- """
- self.model.fit(x, t, y)
-
- def predict_ite(
- self,
- x: np.array,
- t: np.array = None,
- y: np.array = None,
- p: Optional[np.array] = None,
- return_components: bool = False,
- replace_factuals: bool = False,
- ) -> np.array:
- """Predicts ITE for the given population
-
- If propensities ``p`` are not given, they are estimated using the default
- implementation `justcause.learners.propensities.estimate_propensities`
-
- Args:
- x: covariates
- t: treatment indicator
- y: factual outcomes
- p: propensity scores
- return_components: whether to return Y(1) and Y(0) for all instances or not
- replace_factuals: whether to replace predicted outcomes with the factual
- outcomes where applicable
-
- Returns:
- the ITE prediction either with or without components
- """
- if p is None:
- # Set default propensity, because CausalML currently requires it
- p = estimate_propensities(x, t)
-
- if return_components:
- ite, y_0, y_1 = self.model.predict(x, p, t, y, return_components=True)
- if t is not None and y is not None and replace_factuals:
- y_0, y_1 = replace_factual_outcomes(y_0, y_1, y, t)
- return ite.flatten(), y_0.flatten(), y_1.flatten()
- else:
- return self.model.predict(x, p, t, y, return_components=False).flatten()
-
- def estimate_ate(
- self,
- x: np.array,
- t: np.array,
- y: np.array,
- propensities: Optional[np.array] = None,
- ) -> float:
- """Predicts ATE for given samples as mean of ITE predictions"""
- self.fit(x, t, y)
- return float(np.mean(self.predict_ite(x, t, y, propensities)))
diff --git a/src/justcause/learners/nn/__init__.py b/src/justcause/learners/nn/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/justcause/learners/nn/dragonnet.py b/src/justcause/learners/nn/dragonnet.py
deleted file mode 100644
index 4d06bee..0000000
--- a/src/justcause/learners/nn/dragonnet.py
+++ /dev/null
@@ -1,107 +0,0 @@
-import numpy as np
-
-from ..utils import replace_factual_outcomes
-
-
-class DragonNet:
- """Wrapper of the DragonNet implementation in `justcause.contrib.dragonnet`
-
- Original code taken with slide adaption for usage within the framework.
- Source can be found here: https://github.com/claudiashi57/dragonnet
-
- References:
- [1] C. Shi, D. M. Blei, and V. Veitch,
- “Adapting Neural Networks for the Estimation of Treatment Effects.”
- """
-
- def __init__(
- self, learning_rate=0.001, num_epochs=50, batch_size=512, validation_split=0.1
- ):
- self.learning_rate = learning_rate
- self.num_epochs = num_epochs
- self.batch_size = batch_size
- self.validation_split = validation_split
- self.model = None
-
- def __repr__(self):
- return self.__str__()
-
- def __str__(self):
- return "DragonNet(epochs={}, lr={}, batch={}, val_split={})".format(
- self.num_epochs, self.learning_rate, self.batch_size, self.validation_split
- )
-
- def fit(self, x: np.array, t: np.array, y: np.array) -> None:
- """Trains DragonNet with hyper-parameters specified in the constructor
-
- Args:
- x: covariates for all instances, shape (num_instance, num_features)
- t: treatment indicator vector, shape (num_instance)
- y: factual outcomes, shape (num_instance)
-
- """
- # Late import to avoid installing all of dragonnet's requirements
- from ...contrib.dragonnet import dragonnet
-
- self.model = dragonnet.train_dragon(
- t,
- y,
- x,
- num_epochs=self.num_epochs,
- batch_size=self.batch_size,
- learning_rate=self.learning_rate,
- val_split=self.validation_split,
- )
-
- def predict_ite(
- self,
- x: np.array,
- t: np.array = None,
- y: np.array = None,
- return_components: bool = False,
- replace_factuals: bool = False,
- ):
- """Predicts ITE for the given samples
-
- Args:
- x: covariates in shape (num_instances, num_features)
- t: treatment indicator, binary in shape (num_instances)
- y: factual outcomes in shape (num_instances)
- return_components: whether to return Y(0) and Y(1) predictions separately
- replace_factuals: whether to replace outcomes with true outcomes
- where possible
-
- Returns:
- a vector of ITEs for the inputs; also returns Y(0) and Y(1) for all
- inputs if ``return_components`` is ``True``
- """
- assert self.model is not None, "DragonNet must be fit before use"
-
- res = self.model.predict(x)
- y_0, y_1 = res[:, 0], res[:, 1]
-
- if return_components:
- if t is not None and y is not None and replace_factuals:
- y_0, y_1 = replace_factual_outcomes(y_0, y_1, y, t)
- return y_1 - y_0, y_0, y_1
- else:
- return y_1 - y_0
-
- def estimate_ate(
- self, x: np.array, t: np.array = None, y: np.array = None,
- ) -> float:
- """Estimates the average treatment effect of the given population
-
- First, it fits the model on the given population, then predicts ITEs and uses
- the mean as an estimate for the ATE
-
- Args:
- x: covariates in shape (num_instances, num_features)
- t: treatment indicator, binary in shape (num_instances)
- y: factual outcomes in shape (num_instances)
-
- Returns: ATE estimate as the mean of ITEs
- """
- self.fit(x, t, y)
- ite = self.predict_ite(x, t, y)
- return float(np.mean(ite))
diff --git a/src/justcause/learners/tree/__init__.py b/src/justcause/learners/tree/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/src/justcause/learners/tree/causal_forest.py b/src/justcause/learners/tree/causal_forest.py
deleted file mode 100644
index a776f31..0000000
--- a/src/justcause/learners/tree/causal_forest.py
+++ /dev/null
@@ -1,123 +0,0 @@
-from typing import Optional, Union
-
-import numpy as np
-from numpy.random import RandomState
-from sklearn.utils import check_random_state
-
-from ...utils import int_from_random_state
-
-#: type alias
-StateType = Optional[Union[int, RandomState]]
-
-
-class CausalForest:
- """Port/Wrapper for the R implementation of CausalForests using ``rpy2``
-
- See reference [2] for a list of parameters to the original implementation. In order
- to pass python parameters some naming conventions have to be considered, but
- cannot be guaranteed:
- - In general, points are converted to underscores. Thus ``num.trees`` becomes
- ``num_trees`` when using the Rpy2 API
-
-
- References:
- [1] “Generalized random forests”
- S. Athey, J. Tibshirani, and S. Wager,
- Ann. Stat., vol. 47, no. 2, pp. 1179–1203, 2019.
-
- [2] The CRAN manual for `grf`
- https://cran.r-project.org/web/packages/grf/grf.pdf
-
- [3] Rpy2 manual on naming:
- http://rpy.sourceforge.net/rpy2/doc-2.2/html/robjects_functions.html
- """
-
- def __init__(
- self, num_trees: int = 200, random_state: RandomState = None, **kwargs
- ):
- """Setup the CausalForest Wrapper
-
- Checks if the required R package is available and instantiates it.
- We don't install the grf package automatically, because it takes to long with
- the required compilation. It would interrupt application workflow and is not
- the task of our library.
- The needed method for installation can be found in `justcause.learners.utils`
-
- Args:
- random_state: random seed that is passed to the R implementation
- num_trees: the number of trees in the forest
- kwargs: named parameters that are passed to the R implementation,
- see https://cran.r-project.org/web/packages/grf/grf.pdf for a reference
- of possible parameters
- """
- from rpy2.robjects import numpy2ri
- from rpy2.robjects.packages import importr
- from rpy2.rinterface import RRuntimeError
-
- numpy2ri.activate()
-
- try:
- self.grf = importr("grf")
- except RRuntimeError:
- raise ImportError(
- "R package 'grf' is not installed yet, "
- "install it with justcause.learners.utils.install_r_packages(['grf'])"
- )
-
- """Holds the rpy2 object for the trained model"""
- self.forest = None
- self.random_state = check_random_state(random_state)
-
- self.num_trees = num_trees
- self.kwargs = kwargs
-
- def __str__(self):
- return "CausalForest"
-
- def fit(self, x: np.array, t: np.array, y: np.array) -> None:
- """Fits the forest using factual data"""
- from rpy2.robjects.vectors import FloatVector, IntVector
-
- integer_random_state = int_from_random_state(self.random_state)
-
- self.forest = self.grf.causal_forest(
- x,
- FloatVector(y),
- IntVector(t),
- seed=integer_random_state,
- num_trees=self.num_trees,
- **self.kwargs
- )
-
- def predict_ite(self, x: np.array, *args) -> np.array:
- """Predicts ITE vor given samples without using facutals
-
- Args:
- x: covariates in shape (num_instances, num_covariates)
- *args: NOT USED - kept for coherent API
-
- Returns:
- ITE predictions for all instances
-
- """
- import rpy2.robjects as robjects
-
- pred = robjects.r.predict(self.forest, x, estimate_variance=False)[0]
- return np.array(pred).flatten()
-
- def estimate_ate(self, x: np.array, t: np.array, y: np.array) -> float:
- """Estimates ATE of the given population
-
- Fits the CausalForest and predicts the ITE. The mean of all ITEs is
- returned as the ATE.
-
- Args:
- x: covariates
- t: treatment indicator
- y: factual outcome
-
- Returns: average treatment effect of the population
- """
- self.fit(x, t, y)
- ite = self.predict_ite(x)
- return float(np.mean(ite))
diff --git a/src/justcause/learners/utils.py b/src/justcause/learners/utils.py
index a74a1bc..5bcd4fa 100644
--- a/src/justcause/learners/utils.py
+++ b/src/justcause/learners/utils.py
@@ -1,5 +1,5 @@
"""Miscellaneous tools used in the `justcause.learners`"""
-from typing import List, Tuple
+from typing import Tuple
import numpy as np
@@ -20,24 +20,3 @@ def replace_factual_outcomes(
y_0 = np.where(t == 0, y, y_0)
y_1 = np.where(t == 1, y, y_1)
return y_0, y_1
-
-
-def install_r_packages(package_names: List[str], verbose=False):
- """Installs the R packages if needed using rpy2 utility functions
-
- Args:
- package_names: names of the R packages to install
- verbose: Whether to print progress information or not
-
- """
- import rpy2.robjects.packages as rpackages
- from rpy2 import robjects
- from rpy2.robjects import StrVector
-
- robjects.r.options(download_file_method="curl")
- utils = rpackages.importr("utils")
- utils.chooseCRANmirror(ind=1)
-
- names_to_install = [x for x in package_names if not rpackages.isinstalled(x)]
- if len(names_to_install) > 0:
- utils.install_packages(StrVector(names_to_install), verbose=verbose)
diff --git a/tests/conftest.py b/tests/conftest.py
index e119df3..c3c9207 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,15 +13,11 @@
import numpy as np
import pandas as pd
-import rpy2.robjects.packages as rpackages
-from rpy2 import robjects
-from rpy2.robjects import StrVector
from justcause.data.frames import CausalFrame
from justcause.data.sets.ibm import load_ibm
from justcause.data.sets.ihdp import load_ihdp
from justcause.data.sets.twins import load_twins
-from justcause.learners.utils import install_r_packages
RUNS_ON_CIRRUS = bool(strtobool(os.environ.get("CIRRUS_CI", "false")))
@@ -98,28 +94,3 @@ def dummy_covariates_and_treatment():
T_1 = np.full(80, 0)
t = np.append(T_0, T_1)
return X, t
-
-
-@pytest.fixture
-def uninstall_grf():
- """ Ensures the grf packages is not installed before the test runs"""
- if rpackages.isinstalled("grf"):
- robjects.r.options(download_file_method="curl")
- utils = rpackages.importr("utils")
- utils.chooseCRANmirror(ind=0)
-
- utils.remove_packages(StrVector(["grf"]))
-
-
-@pytest.fixture
-def grf():
- """ Ensures grf is installed before running tests with it
-
- This is required as usually the user is requested to install the package manually
-
- """
- if not rpackages.isinstalled("grf"):
- install_r_packages(["grf"])
- return 0
-
- return 1
diff --git a/tests/test_learners.py b/tests/test_learners.py
index 6392a88..830ab71 100644
--- a/tests/test_learners.py
+++ b/tests/test_learners.py
@@ -2,17 +2,7 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
-from justcause.learners import (
- CausalForest,
- DoubleRobustEstimator,
- DragonNet,
- PSWEstimator,
- RLearner,
- SLearner,
- TLearner,
- XLearner,
-)
-from justcause.metrics import pehe_score
+from justcause.learners import DoubleRobustEstimator, PSWEstimator, SLearner, TLearner
def test_slearner(ihdp_data):
@@ -69,67 +59,6 @@ def test_tlearner(ihdp_data):
assert len(pred) == len(t)
-def test_rlearner(ihdp_data):
- rep = ihdp_data[0]
- x, t, y = rep.np.X, rep.np.t, rep.np.y
-
- # with RF explicitly
- rlearner = RLearner(RandomForestRegressor())
- rlearner.fit(x, t, y)
- pred = rlearner.predict_ite(x)
- assert len(pred) == len(t)
-
- # With default
- rlearner = RLearner()
- rlearner.fit(x, t, y)
- pred = rlearner.predict_ite(x)
- assert len(pred) == len(t)
-
- assert (
- str(rlearner) == "RLearner(outcome=LinearRegression, effect=LinearRegression)"
- )
-
-
-def test_xlearner(ihdp_data):
- rep = ihdp_data[0]
- x, t, y = rep.np.X, rep.np.t, rep.np.y
- true_ite = rep["ite"].values
-
- # With LinearRegression
- xlearner = XLearner(LinearRegression())
- xlearner.fit(x, t, y)
- pred = xlearner.predict_ite(x, t, y)
- assert len(pred) == len(t)
-
- # With default
- xlearner = XLearner(LinearRegression())
- xlearner.fit(x, t, y)
- pred = xlearner.predict_ite(x, t, y)
- assert len(pred) == len(t)
- assert abs(pehe_score(true_ite, pred) - 0.5) < 0.2
-
- pred_ate = xlearner.estimate_ate(x, t, y)
- true_ate = np.mean(rep["ite"].values)
- assert abs(pred_ate - true_ate) < 0.2
-
-
-def test_causalforest(ihdp_data, grf):
- rep = ihdp_data[0]
- x, t, y = rep.np.X, rep.np.t, rep.np.y
- cf = CausalForest()
- cf.fit(x, t, y)
- pred_ate = cf.estimate_ate(x, t, y)
- true_ate = np.mean(rep["ite"].values)
- assert abs(pred_ate - true_ate) < 0.2
-
- # Try passing keyword arguments to the R implenetation
- cf = CausalForest(num_trees=50, alpha=0.1, honesty=False)
- cf.fit(x, t, y)
- pred_ate = cf.estimate_ate(x, t, y)
- true_ate = np.mean(rep["ite"].values)
- assert abs(pred_ate - true_ate) < 0.2
-
-
def test_dre(ihdp_data):
rep = ihdp_data[0]
x, t, y = rep.np.X, rep.np.t, rep.np.y
@@ -154,15 +83,3 @@ def test_psw(ihdp_data):
psw = PSWEstimator()
ate = psw.estimate_ate(x, t, y)
assert ate > 0
-
-
-def test_dragonnet(ihdp_data):
-
- rep = ihdp_data[0]
- x, t, y = rep.np.X, rep.np.t, rep.np.y
- dragon = DragonNet()
- dragon.fit(x, t, y)
- ate = np.mean(dragon.predict_ite(x, t, y))
-
- true_ate = np.mean(rep["ite"].values)
- assert abs(ate - true_ate) < 0.5
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 5b7d049..66cc6e1 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,11 +1,10 @@
import pytest
import numpy as np
-import rpy2.robjects.packages as rpackages
from numpy.random import RandomState
from justcause.data.utils import to_rep_iter, to_rep_list
-from justcause.learners.utils import install_r_packages, replace_factual_outcomes
+from justcause.learners.utils import replace_factual_outcomes
from justcause.utils import int_from_random_state
@@ -36,12 +35,6 @@ def test_replace_factuals():
assert y_0[0] == y[0]
-def test_install_r_packages(uninstall_grf):
- package_names = ["grf"]
- install_r_packages(package_names, verbose=True)
- assert rpackages.isinstalled(package_names[0])
-
-
def test_int_from_random_state():
rs = RandomState(5)
rs_int = int_from_random_state(rs)