Wp2 (#88)

* updates * README * README * README
autonlab · Jun 28, 2022 · fc9087e · fc9087e
1 parent 3ed86ce
commit fc9087e
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 50 deletions.
diff --git a/README.md b/README.md
@@ -166,7 +166,8 @@ Phenotyping and Knowledge Discovery
 
 `auton_survival.phenotyping` allows extraction of latent clusters or subgroups
 of patients that demonstrate similar outcomes. In the context of this package,
-we refer to this task as **phenotyping**. `auton_survival.phenotyping` allows:
+we refer to this task as **phenotyping**. `auton_survival.phenotyping` provides
+the following phenotyping utilities:
 
 - **Intersectional Phenotyping**: Recovers groups, or phenotypes, of individuals 
 over exhaustive combinations of user-specified categorical and numerical features. 
@@ -226,6 +227,8 @@ response to a specific intervention. Relies on the specially designed
 `auton_survival.models.cmhe.DeepCoxMixturesHeterogenousEffects` latent variable model.
 
 ```python
+from auton_survival.models.cmhe DeepCoxMixturesHeterogenousEffects
+
 # Instantiate the CMHE model
 model = DeepCoxMixturesHeterogenousEffects(random_seed=random_seed, k=k, g=g, layers=layers)
 
@@ -248,6 +251,13 @@ model = SurvivalVirtualTwins(horizon=365)
 phenotypes = model.fit_predict(features, outcomes.time, outcomes.event, interventions)
 ```
 
+DAG representations of the unsupervised, supervised, and counterfactual probabilitic
+phenotypers in auton-survival are shown in the below figure. *X* represents the
+covariates, *T* the time-to-event and *Z* is the phenotype to be inferred.
+
+<p align="center"><img src="https://ndownloader.figshare.com/files/36056648" width=60%></p>
+
+
 <a id="evaluation"></a>
 
 Evaluation and Reporting
@@ -277,9 +287,14 @@ score = survival_regression_metric(metric='brs', outcomes_train,
 ```
 
 - **Treatment Effect**: Used to compare treatment arms by computing the difference in the following metrics for treatment and control groups:
- - **Time at Risk** (TaR)
- - **Risk at Time**
- - **Restricted Mean Survival Time** (RMST)
+ - **Time at Risk (TaR)** (left)
+ - **Risk at Time** (center)
+ - **Restricted Mean Survival Time (RMST)** (right)
+
+<p align="center">
+<img src="https://ndownloader.figshare.com/files/36056507" width=30%>
+<img src="https://ndownloader.figshare.com/files/36056534" width=30%>
+<img src="https://ndownloader.figshare.com/files/36056546" width=30%></p>
 
 ```python
 from auton_survival.metrics import survival_diff_metric

diff --git a/auton_survival/experiments.py b/auton_survival/experiments.py
@@ -39,11 +39,11 @@
 
 class SurvivalRegressionCV:
  """Universal interface to train Survival Analysis models in a cross-
- validation or nested cross-validation fashion.
+ validation fashion.
 
- Each of the model is trained in a CV fashion over the user specified
- hyperparameter grid. The best model(s) in terms of user-specified metric
- is selected.
+ The model is trained in a CV fashion over the user-specified
+ hyperparameter grid. Model hyperparameters are selected based on the
+ user-specified metric.
 
  Parameters
  -----------
@@ -65,9 +65,6 @@ class SurvivalRegressionCV:
  num_folds : int, default=5
  The number of folds.
  Ignored if folds is specified.
- num_nested_folds : int, default=None
- The number of folds to use for nested cross-validation.
- If None, then regular (unnested) CV is performed.
  random_seed : int, default=0
  Controls reproducibility of results.
  hyperparam_grid : dict
@@ -92,12 +89,11 @@ class SurvivalRegressionCV:
  """
 
  def __init__(self, model='dcph', folds=None, num_folds=5,
- num_nested_folds=None, random_seed=0, hyperparam_grid={}):
+ random_seed=0, hyperparam_grid={}):
 
  self.model = model
  self.folds = folds
  self.num_folds = num_folds
- self.num_nested_folds = num_nested_folds
  self.random_seed = random_seed
  self.hyperparam_grid = list(ParameterGrid(hyperparam_grid))
 
@@ -116,7 +112,7 @@ def fit(self, features, outcomes, horizons, metric='ibs'):
  outcomes : pd.DataFrame
  A pandas dataframe with columns 'time' and 'event' that contain the
  survival time and censoring status \( \delta_i = 1 \), respectively.
- horizon : int or float or list
+ horizons : int or float or list
  Event-horizons at which to evaluate model performance.
  metric : str, default='ibs'
  Metric used to evaluate model performance and tune hyperparameters.
@@ -125,12 +121,12 @@ def fit(self, features, outcomes, horizons, metric='ibs'):
  - 'brs' : Brier Score
  - 'ibs' : Integrated Brier Score
  - 'ctd' : Concordance Index
+
  Returns
  -----------
  Trained survival regression model(s).
 
  """
-
 
  assert horizons is not None, "Horizons must be specified."
  if isinstance(horizons, (int, float)):
@@ -156,10 +152,6 @@ def fit(self, features, outcomes, horizons, metric='ibs'):
  assert max(horizons) < time_max, "Horizons exceeds max time range."
  assert min(horizons) > time_min, "Horizons exceeds min time range."
 
- # if self.horizon is None:
- # assert (self.metric == 'ibs'), "Horizon must be specified for the selected metric"
- # self.horizon = time_max
-
  hyper_param_scores = []
  for i, hyper_param in enumerate(self.hyperparam_grid):
  print("At hyper-param", hyper_param)
@@ -189,7 +181,6 @@ def fit(self, features, outcomes, horizons, metric='ibs'):
  **best_hyper_param).fit(features, outcomes)
  return model
 
-
  def _get_stratified_folds(self, dataset, event_label, n_folds, random_seed):
 
  """Get cross-validation fold value for each sample.
@@ -288,7 +279,6 @@ class CounterfactualSurvivalRegressionCV:
  model : str
  A string that determines the choice of the surival analysis model.
  Survival model choices include:
-
  - 'dsm' : Deep Survival Machines [3] model
  - 'dcph' : Deep Cox Proportional Hazards [2] model
  - 'dcm' : Deep Cox Mixtures [4] model
@@ -341,10 +331,10 @@ def __init__(self, model, cv_folds=5, random_seed=0, hyperparam_grid={}):
  random_seed=random_seed,
  hyperparam_grid=hyperparam_grid)
 
- def fit(self, features, outcomes, interventions, metric):
+ def fit(self, features, outcomes, interventions, horizons, metric):
 
- r"""Fits the Survival Regression Model to the data in a Cross
- Validation fashion.
+ r"""Fits the Survival Regression Model to the data in a cross-
+ validation fashion.
 
  Parameters
  -----------
@@ -359,6 +349,15 @@ def fit(self, features, outcomes, interventions, metric):
  interventions: pandas.Series
  A pandas series containing the treatment status of each subject.
  \( a_i = 1 \) if the subject is `treated`, else is considered control.
+ horizons : int or float or list
+ Event-horizons at which to evaluate model performance.
+ metric : str, default='ibs'
+ Metric used to evaluate model performance and tune hyperparameters.
+ Options include:
+ - 'auc': Dynamic area under the ROC curve
+ - 'brs' : Brier Score
+ - 'ibs' : Integrated Brier Score
+ - 'ctd' : Concordance Index
 
  Returns
  -----------
@@ -369,9 +368,11 @@ def fit(self, features, outcomes, interventions, metric):
 
  treated_model = self.treated_experiment.fit(features.loc[interventions==1],
  outcomes.loc[interventions==1],
+ horizons=horizons,
  metric=metric)
  control_model = self.control_experiment.fit(features.loc[interventions!=1],
  outcomes.loc[interventions!=1],
+ horizons=horizons,
  metric=metric)
 
  return CounterfactualSurvivalModel(treated_model, control_model)
diff --git a/auton_survival/phenotyping.py b/auton_survival/phenotyping.py
@@ -478,8 +478,7 @@ def __init__(self,
 
  self.random_seed = random_seed
 
- def fit(self, features, outcomes, interventions, metric, 
- horizon):
+ def fit(self, features, outcomes, interventions, horizons, metric):
 
  """Fit a counterfactual model and regress the difference of the estimated
  counterfactual Restricted Mean Survival Time using a Random Forest regressor.
@@ -495,16 +494,15 @@ def fit(self, features, outcomes, interventions, metric,
  interventions : np.array
  Boolean numpy array of treatment indicators. True means individual
  was assigned a specific treatment.
+ horizons : int or float or list
+ Event-horizons at which to evaluate model performance.
  metric : str, default='ibs'
  Metric used to evaluate model performance and tune hyperparameters.
  Options include:
  - 'auc': Dynamic area under the ROC curve
  - 'brs' : Brier Score
  - 'ibs' : Integrated Brier Score
  - 'ctd' : Concordance Index
- horizon : np.float
- The event horizon at which to compute the counterfacutal RMST for
- regression. 
 
  Returns
  -----------
@@ -515,12 +513,13 @@ def fit(self, features, outcomes, interventions, metric,
  cf_model = CounterfactualSurvivalRegressionCV(model=self.cf_method,
  hyperparam_grid=self.cf_hyperparams)
 
- self.cf_model = cf_model.fit(features, outcomes, interventions, metric)
+ self.cf_model = cf_model.fit(features, outcomes, interventions,
+ horizons, metric)
 
  times = np.unique(outcomes.time.values)
  cf_predictions = self.cf_model.predict_counterfactual_survival(features,
  times.tolist())
-
+ horizon = max(horizons)
  ite_estimates = cf_predictions[1] - cf_predictions[0]
  ite_estimates = [estimate[times < horizon] for estimate in ite_estimates]
  times = times[times < horizon]
@@ -558,7 +557,7 @@ def predict_proba(self, features):
 
  """
 
- phenotype_preds=  self.pheno_model.predict(features)
+ phenotype_preds = self.pheno_model.predict(features)
  preds_surv_greater = (phenotype_preds - phenotype_preds.min()) / (phenotype_preds.max() - phenotype_preds.min())
  preds_surv_less = 1 - preds_surv_greater
  preds = np.array([[preds_surv_less[i], preds_surv_greater[i]]

diff --git a/examples/CV Survival Regression on SUPPORT Dataset.ipynb b/examples/CV Survival Regression on SUPPORT Dataset.ipynb
@@ -49,8 +49,7 @@
  "outputs": [],
  "source": [
  "import numpy as np\n",
- "horizons = [0.25, 0.5, 0.75]\n",
- "times = np.quantile(outcomes.time[outcomes.event==1], horizons).tolist()"
+ "times = np.quantile(outcomes.time[outcomes.event==1], [0.25, 0.5, 0.75]).tolist()"
  ]
  },
  {
@@ -67,7 +66,7 @@
  " 'layers' : [[100]]}\n",
  "\n",
  "experiment = SurvivalRegressionCV(model='dsm', num_folds=3, hyperparam_grid=param_grid, random_seed=0)\n",
- "model = experiment.fit(x, outcomes, metric='ctd')"
+ "model = experiment.fit(x, outcomes, times, metric='brs')"
  ]
  },
  {
@@ -80,13 +79,6 @@
  "model"
  ]
  },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
  {
  "cell_type": "code",
  "execution_count": null,
@@ -122,7 +114,7 @@
  "for fold in set(experiment.folds):\n",
  " print(survival_regression_metric('ctd', outcomes[experiment.folds==fold], \n",
  " out_survival[experiment.folds==fold], \n",
- " times=times))\n"
+ " times=times))"
  ]
  },
  {
@@ -136,13 +128,6 @@
  " print(time)"
  ]
  },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
  {
  "cell_type": "code",
  "execution_count": null,