From f99ff95346d8ccb0fe50b09bd4609e890125ca7c Mon Sep 17 00:00:00 2001
From: davidsebfischer <david.seb.fischer@gmail.com>
Date: Tue, 12 Oct 2021 13:12:49 +0200
Subject: [PATCH 01/15] skeleton for LR encoding ED model with extension of
 estimator

added estimators that yield neighbrouhood tensors of features and avoid using the full adjacency matrix, this can be used for single layer embeddings and breaks the scaling of attention coefficient computation with number of nodes in graph
---
 ncem/estimators/__init__.py                 |   3 +-
 ncem/estimators/base_estimator_neighbors.py | 220 ++++++++++++++++++++
 ncem/estimators/estimator_ed_ncem.py        |  92 +++++++-
 ncem/models/__init__.py                     |   1 +
 ncem/models/layers/__init__.py              |   4 +-
 ncem/models/layers/output_layers.py         |  30 +++
 ncem/models/layers/single_gnn_layers.py     | 171 +++++++++++++++
 ncem/models/model_ed_single_ncem.py         | 152 ++++++++++++++
 ncem/train/train_model.py                   |  12 +-
 9 files changed, 680 insertions(+), 5 deletions(-)
 create mode 100644 ncem/estimators/base_estimator_neighbors.py
 create mode 100644 ncem/models/layers/single_gnn_layers.py
 create mode 100644 ncem/models/model_ed_single_ncem.py

diff --git a/ncem/estimators/__init__.py b/ncem/estimators/__init__.py
index 916e2d05..dc05e957 100644
--- a/ncem/estimators/__init__.py
+++ b/ncem/estimators/__init__.py
@@ -1,9 +1,10 @@
 """Importing estimator classes."""
 from ncem.estimators.base_estimator import (Estimator, EstimatorGraph,
                                             EstimatorNoGraph)
+from ncem.estimators.base_estimator_neighbors import EstimatorNeighborhood
 from ncem.estimators.estimator_cvae import EstimatorCVAE
 from ncem.estimators.estimator_cvae_ncem import EstimatorCVAEncem
 from ncem.estimators.estimator_ed import EstimatorED
-from ncem.estimators.estimator_ed_ncem import EstimatorEDncem
+from ncem.estimators.estimator_ed_ncem import EstimatorEDncem, EstimatorEdNcemNeighborhood
 from ncem.estimators.estimator_interactions import EstimatorInteractions
 from ncem.estimators.estimator_linear import EstimatorLinear
diff --git a/ncem/estimators/base_estimator_neighbors.py b/ncem/estimators/base_estimator_neighbors.py
new file mode 100644
index 00000000..ef3f6586
--- /dev/null
+++ b/ncem/estimators/base_estimator_neighbors.py
@@ -0,0 +1,220 @@
+from typing import Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import tensorflow as tf
+
+from ncem.estimators.base_estimator import Estimator
+
+
+class EstimatorNeighborhood(Estimator):
+    """EstimatorGraph class for spatial models of the nieghborhood only (not full graph)."""
+
+    n_features_in: int
+    _n_neighbors_padded: int
+    h0_in: bool
+    idx_target_features: np.ndarray
+    idx_neighbor_features: np.ndarray
+
+    def set_input_features(self, h0_in=True, target_feature_names=None, neighbor_feature_names=None):
+        """
+        Need to run this before compiling model.
+
+        Returns:
+        """
+        self.h0_in = h0_in
+        if self.h0_in:
+            assert target_feature_names is None
+            assert neighbor_feature_names is None
+            self.n_features_in = self.n_features_0
+        else:
+            self.idx_target_features = None  # TODO match names to feature names in h1 here, as np index array
+            self.idx_neighbor_features = None  # TODO match names to feature names in h1 here, as np index array
+            assert len(self.idx_target_features.tolist()) == len(self.idx_neighbor_features.tolist())
+            assert len(set(self.idx_target_features.tolist()).intersection(set(self.idx_neighbor_features.tolist()))) == 0
+            self.n_features_in = len(self.idx_target_features)
+
+    @property
+    def n_neighbors_padded(self):
+        if self._n_neighbors_padded is None:
+            self._n_neighbors_padded = np.max(np.asarray([
+                np.max(np.asarray(np.sum(a, axis=1)).flatten()) for a in self.a.values()
+            ]))
+        return self._n_neighbors_padded
+
+    def _get_output_signature(self, resampled: bool = False):
+        """Get output signatures.
+
+        Parameters
+        ----------
+        resampled : bool
+            Whether dataset is resampled or not.
+
+        Returns
+        -------
+        output_signature
+        """
+        h_targets = tf.TensorSpec(
+            shape=(self.n_eval_nodes_per_graph, self.n_features_in), dtype=tf.float32
+        )  # target node features
+        h_neighbors = tf.TensorSpec(
+            shape=(self.n_neighbors_padded, self.n_features_in), dtype=tf.float32
+        )  # neighbor node features
+        sf = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph, 1), dtype=tf.float32)  # input node size factors
+        node_covar = tf.TensorSpec(
+            shape=(self.n_eval_nodes_per_graph, self.n_node_covariates), dtype=tf.float32
+        )  # node-level covariates
+        a = tf.TensorSpec(
+            shape=(self.n_eval_nodes_per_graph, self.n_neighbors_padded), dtype=tf.float32
+        )  # adjacency matrix
+        domain = tf.TensorSpec(shape=(self.n_domains,), dtype=tf.int32)  # domain
+        reconstruction = tf.TensorSpec(
+            shape=(self.n_eval_nodes_per_graph, self.n_features_1), dtype=tf.float32
+        )  # node features to reconstruct
+        kl_dummy = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph,), dtype=tf.float32)  # dummy for kl loss
+
+        if self.vi_model:
+            if resampled:
+                output_signature = (
+                    (h_targets, h_neighbors, sf, a, node_covar, domain),
+                    (reconstruction, kl_dummy),
+                    (h_targets, h_neighbors, sf, a, node_covar, domain),
+                    (reconstruction, kl_dummy),
+                )
+            else:
+                output_signature = ((h_targets, h_neighbors, sf, a, node_covar, domain),
+                                    (reconstruction, kl_dummy))
+        else:
+            if resampled:
+                output_signature = (
+                    (h_targets, h_neighbors, sf, a, node_covar, domain),
+                    reconstruction,
+                    (h_targets, h_neighbors, sf, a, node_covar, domain),
+                    reconstruction,
+                )
+            else:
+                output_signature = ((h_targets, h_neighbors, sf, a, node_covar, domain),
+                                    reconstruction)
+        return output_signature
+
+    def _get_dataset_base(
+        self,
+        image_keys: List[str],
+        nodes_idx: Dict[str, np.ndarray],
+        batch_size: int,
+        shuffle_buffer_size: Optional[int],
+        train: bool = True,
+        seed: Optional[int] = None,
+        prefetch: int = 100,
+        reinit_n_eval: Optional[int] = None,
+    ):
+        """Prepare a dataset.
+
+        Parameters
+        ----------
+        image_keys : np.array
+            Image keys in partition.
+        nodes_idx : dict, str
+            Dictionary of nodes per image in partition.
+        batch_size : int
+            Batch size.
+        shuffle_buffer_size : int, optional
+            Shuffle buffer size.
+        train : bool
+            Whether dataset is used for training or not (influences shuffling of nodes).
+        seed : int, optional
+            Random seed.
+        prefetch: int
+            Prefetch of dataset.
+        reinit_n_eval : int, optional
+            Used if model is reinitialized to different number of nodes per graph.
+
+        Returns
+        -------
+        A tensorflow dataset.
+        """
+        np.random.seed(seed)
+        if reinit_n_eval is not None and reinit_n_eval != self.n_eval_nodes_per_graph:
+            print(
+                "ATTENTION: specifying reinit_n_eval will change class argument n_eval_nodes_per_graph "
+                "from %i to %i" % (self.n_eval_nodes_per_graph, reinit_n_eval)
+            )
+            self.n_eval_nodes_per_graph = reinit_n_eval
+
+        def generator():
+            for key in image_keys:
+                if nodes_idx[key].size == 0:  # needed for images where no nodes are selected
+                    continue
+                idx_nodes = np.arange(0, self.a[key].shape[0])
+
+                if train:
+                    index_list = [
+                        np.asarray(
+                            np.random.choice(
+                                a=nodes_idx[key],
+                                size=self.n_eval_nodes_per_graph,
+                                replace=True,
+                            ),
+                            dtype=np.int32,
+                        )
+                    ]
+                else:
+                    # dropping
+                    index_list = [
+                        np.asarray(
+                            nodes_idx[key][self.n_eval_nodes_per_graph * i: self.n_eval_nodes_per_graph * (i + 1)],
+                            dtype=np.int32,
+                        )
+                        for i in range(len(nodes_idx[key]) // self.n_eval_nodes_per_graph)
+                    ]
+
+                for indices in index_list:
+                    h_out = self.h_1[key][idx_nodes[indices], :]
+                    if self.h0_in:
+                        h_targets = self.h_0[key][idx_nodes[indices], :]
+                    else:
+                        h_targets = self.h_1[key][idx_nodes[indices], self.idx_target_features]
+                    h_neighbors = []
+                    a_neighborhood = np.zeros((self.n_eval_nodes_per_graph, self.n_neighbors_padded), "float32")
+                    for i, j in enumerate(idx_nodes[indices]):
+                        idx_neighbors = np.where(np.asarray(self.a[key][j, :].todense()).flatten() == 1.)[0]
+                        if self.h0_in:
+                            h_neighbors_j = self.h_0[key][idx_neighbors, :]
+                        else:
+                            h_neighbors_j = self.h_1[key][idx_neighbors, self.idx_neighbor_features]
+                        h_neighbors_j = np.expand_dims(h_neighbors_j, axis=0)
+                        # Pad neighborhoods:
+                        diff = self.n_neighbors_padded - h_neighbors_j.shape[1]
+                        zeros = np.zeros((1, diff, h_neighbors_j.shape[2]), dtype="float32")
+                        h_neighbors_j = np.concatenate([h_neighbors_j, zeros], axis=1)
+                        h_neighbors.append(h_neighbors_j)
+                        a_neighborhood[i, :len(idx_neighbors)] = 1.
+                    h_neighbors = np.concatenate([h_neighbors], axis=0)
+                    if self.log_transform:
+                        h_targets = np.log(h_targets + 1.0)
+                        h_neighbors = np.log(h_neighbors + 1.0)
+
+                    node_covar = self.node_covar[key][idx_nodes]
+                    node_covar = node_covar[indices, :]
+
+                    sf = np.expand_dims(self.size_factors[key][idx_nodes], axis=1)
+                    sf = sf[indices, :]
+
+                    g = np.zeros((self.n_domains,), dtype="int32")
+                    g[self.domains[key]] = 1
+
+                    if self.vi_model:
+                        kl_dummy = np.zeros((self.n_eval_nodes_per_graph,), dtype="float32")
+                        yield (h_targets, h_neighbors, sf, a_neighborhood, node_covar, g), (h_out, kl_dummy)
+                    else:
+                        yield (h_targets, h_neighbors, sf, a_neighborhood, node_covar, g), h_out
+
+        output_signature = self._get_output_signature(resampled=False)
+
+        dataset = tf.data.Dataset.from_generator(generator=generator, output_signature=output_signature)
+        if train:
+            if shuffle_buffer_size is not None:
+                dataset = dataset.shuffle(buffer_size=shuffle_buffer_size, seed=None, reshuffle_each_iteration=True)
+            dataset = dataset.repeat()
+        dataset = dataset.batch(batch_size)
+        dataset = dataset.prefetch(prefetch)
+        return dataset
diff --git a/ncem/estimators/estimator_ed_ncem.py b/ncem/estimators/estimator_ed_ncem.py
index f2ce6456..0f1a8336 100644
--- a/ncem/estimators/estimator_ed_ncem.py
+++ b/ncem/estimators/estimator_ed_ncem.py
@@ -1,7 +1,8 @@
 import tensorflow as tf
+from typing import Tuple
 
-from ncem.estimators import EstimatorGraph
-from ncem.models import ModelEDncem
+from ncem.estimators import EstimatorGraph, EstimatorNeighborhood
+from ncem.models import ModelEDncem, ModelEd2Ncem
 
 
 class EstimatorEDncem(EstimatorGraph):
@@ -161,3 +162,90 @@ def init_model(
         self.max_beta = max_beta
         self.pre_warm_up = pre_warm_up
         self._compile_model(optimizer=optimizer, output_layer=output_layer)
+
+
+class EstimatorEdNcemNeighborhood(EstimatorNeighborhood):
+    """Estimator class for encoder-decoder NCEM models with single graph layer. Subclass of EstimatorNeighborhood."""
+
+    def __init__(
+        self,
+        cond_type: str,
+        use_type_cond: bool,
+        log_transform: bool = False,
+    ):
+        """Initialize a EstimatorEDncem object.
+
+        Parameters
+        ----------
+        cond_type : str
+            Max, ind or gcn, graph layer used in conditional.
+        use_type_cond : bool
+            Whether to use the categorical cell type label in conditional.
+        log_transform : bool
+            Whether to log transform h_1.
+
+        Raises
+        ------
+        ValueError
+            If `cond_type` is not recognized.
+        """
+        super(EstimatorEdNcemNeighborhood, self).__init__()
+        self.model_type = "ed_ncem"
+        if cond_type in ["gat", "lr_gat"]:
+            self.adj_type = "full"
+        else:
+            raise ValueError("cond_type %s not recognized" % cond_type)
+        self.cond_type = cond_type
+        self.use_type_cond = use_type_cond
+        self.log_transform = log_transform
+        self.metrics = {"np": [], "tf": []}
+        self.n_eval_nodes_per_graph = None
+
+    def init_model(
+        self,
+        optimizer: str,
+        learning_rate: float,
+        latent_dim: Tuple[int],
+        dropout_rate: float,
+        l2_coef: float,
+        l1_coef: float,
+        n_eval_nodes_per_graph: int,
+        use_domain: bool,
+        scale_node_size: bool,
+        output_layer: str,
+        dec_intermediate_dim: int,
+        dec_n_hidden: int,
+        dec_dropout_rate: float,
+        dec_l1_coef: float,
+        dec_l2_coef: float,
+        dec_use_batch_norm: bool,
+        **kwargs
+    ):
+        self.n_eval_nodes_per_graph = n_eval_nodes_per_graph
+        self.model = ModelEd2Ncem(
+            input_shapes=(
+                self.n_features_in,
+                self.n_features_1,
+                self.n_eval_nodes_per_graph,
+                self.n_neighbors_padded,
+                self.n_node_covariates,
+                self.n_domains,
+            ),
+            latent_dim=latent_dim,
+            dropout_rate=dropout_rate,
+            l2_coef=l2_coef,
+            l1_coef=l1_coef,
+            use_domain=use_domain,
+            use_type_cond=self.use_type_cond,
+            scale_node_size=scale_node_size,
+            output_layer=output_layer,
+            dec_intermediate_dim=dec_intermediate_dim,
+            dec_n_hidden=dec_n_hidden,
+            dec_dropout_rate=dec_dropout_rate,
+            dec_l1_coef=dec_l1_coef,
+            dec_l2_coef=dec_l2_coef,
+            dec_use_batch_norm=dec_use_batch_norm,
+        )
+        optimizer = tf.keras.optimizers.get(optimizer)
+        tf.keras.backend.set_value(optimizer.lr, learning_rate)
+        self._compile_model(optimizer=optimizer, output_layer=output_layer)
diff --git a/ncem/models/__init__.py b/ncem/models/__init__.py
index f8dbe2a4..45f8c778 100644
--- a/ncem/models/__init__.py
+++ b/ncem/models/__init__.py
@@ -4,5 +4,6 @@
 from ncem.models.model_cvae_ncem import ModelCVAEncem
 from ncem.models.model_ed import ModelED
 from ncem.models.model_ed_ncem import ModelEDncem
+from ncem.models.model_ed_single_ncem import ModelEd2Ncem
 from ncem.models.model_interactions import ModelInteractions
 from ncem.models.model_linear import ModelLinear
diff --git a/ncem/models/layers/__init__.py b/ncem/models/layers/__init__.py
index c47dbc0f..7ecc010a 100644
--- a/ncem/models/layers/__init__.py
+++ b/ncem/models/layers/__init__.py
@@ -9,5 +9,7 @@
                                               LinearOutput,
                                               NegBinConstDispOutput,
                                               NegBinOutput,
-                                              NegBinSharedDispOutput)
+                                              NegBinSharedDispOutput,
+                                              get_out)
 from ncem.models.layers.preproc_input import DenseInteractions, PreprocInput
+from ncem.models.layers.single_gnn_layers import SingleLrGatLayer, SingleGatLayer
diff --git a/ncem/models/layers/output_layers.py b/ncem/models/layers/output_layers.py
index 2b1057e2..3d50b2d5 100644
--- a/ncem/models/layers/output_layers.py
+++ b/ncem/models/layers/output_layers.py
@@ -1,6 +1,36 @@
 import tensorflow as tf
 
 
+def get_out(output_layer: str, out_feature_dim, scale_node_size):
+    if output_layer == "gaussian":
+        output_decoder_layer = GaussianOutput(
+            original_dim=out_feature_dim,
+            use_node_scale=scale_node_size,
+            name="GaussianOutput_decoder",
+        )
+    elif output_layer == "nb":
+        output_decoder_layer = NegBinOutput(
+            original_dim=out_feature_dim,
+            use_node_scale=scale_node_size,
+            name="NegBinOutput_decoder",
+        )
+    elif output_layer == "nb_shared_disp":
+        output_decoder_layer = NegBinSharedDispOutput(
+            original_dim=out_feature_dim,
+            use_node_scale=scale_node_size,
+            name="NegBinSharedDispOutput_decoder",
+        )
+    elif output_layer == "nb_const_disp":
+        output_decoder_layer = NegBinConstDispOutput(
+            original_dim=out_feature_dim,
+            use_node_scale=scale_node_size,
+            name="NegBinConstDispOutput_decoder",
+        )
+    else:
+        raise ValueError("tried to access a non-supported output layer %s" % output_layer)
+    return output_decoder_layer
+
+
 class LinearOutput(tf.keras.layers.Layer):
     """Linear output layer."""
 
diff --git a/ncem/models/layers/single_gnn_layers.py b/ncem/models/layers/single_gnn_layers.py
new file mode 100644
index 00000000..e3fa3d96
--- /dev/null
+++ b/ncem/models/layers/single_gnn_layers.py
@@ -0,0 +1,171 @@
+import tensorflow as tf
+
+
+class SingleMaxLayer(tf.keras.layers.Layer):
+
+    def call(self, inputs, **kwargs):
+        x = inputs[0]
+        y = tf.where(x > 0.5, x=tf.divide(x, x), y=tf.multiply(x, tf.constant(0.0, dtype=tf.float32)))
+        return y
+
+
+class SingleGcnLayer(tf.keras.layers.Layer):
+
+    def __init__(self, input_dim, latent_dim, dropout_rate, activation, l2_reg, use_bias: bool = False, **kwargs):
+        super().__init__(**kwargs)
+        self.input_dim = input_dim
+        self.latent_dim = latent_dim
+        self.dropout_rate = dropout_rate
+        self.activation = tf.keras.activations.get(activation)
+        self.l2_reg = l2_reg
+        self.use_bias = use_bias
+
+        self.kernel = None
+        self.bias = None
+        self.kernel = self.add_weight(
+            name="kernel",
+            shape=(self.input_dim, self.latent_dim),
+            initializer=tf.keras.initializers.glorot_uniform(),
+            regularizer=tf.keras.regularizers.l2(self.l2_reg),
+        )
+        # Layer bias
+        if self.use_bias:
+            self.bias = self.add_weight(name="bias", shape=(self.output_dim,))
+
+    def call(self, inputs, **kwargs):
+        targets = inputs[0]  # (batch, target nodes, features)
+        neighbors = inputs[1]  # (batch, target nodes, padded neighbor nodes, features)
+        neighborhood = tf.concat([tf.expand_dims(targets, axis=-2), neighbors], axis=-2)
+        a = inputs[2]  # (batch, target nodes, padded neighbor nodes)
+
+        # (batch, target nodes, padded neighbor nodes, latent)
+        y = tf.matmul(neighborhood, self.kernel, axes=1)
+        y = tf.reduce_sum(y, axis=-2)  # (batch, target nodes, latent)
+        # Normalise neighborhood size in embedding:
+        factor = tf.reduce_sum(a, axis=-1, keepdims=True)  # (batch, target nodes, 1)
+        factor = tf.where(factor > 0.5, x=factor, y=tf.ones_like(factor))
+        y = y / factor
+        y = tf.keras.layers.Dropout(self.dropout_rate)(y)
+
+        if self.use_bias:
+            y = tf.add(y, self.bias)
+        if self.activation is not None:
+            y = self.activation(y)
+
+        return y
+
+
+class SingleLrGatLayer(tf.keras.layers.Layer):
+
+    def __init__(self, lr_dim, out_dim, dropout_rate, l2_reg, **kwargs):
+        """Initialize GCNLayer.
+
+        Parameters
+        ----------
+        out_dim
+            Output dimension.
+        dropout_rate
+            Dropout rate.
+        activation
+            Activation.
+        l2_reg
+            l2 regularization coefficient.
+        kwargs
+            Arbitrary keyword arguments.
+        """
+        super().__init__(**kwargs)
+        self.lr_dim = lr_dim
+        self.out_dim = out_dim
+        self.dropout_rate = dropout_rate
+        self.l2_reg = l2_reg
+
+        # Layer kernel
+        self.kernel_l = self.add_weight(
+            name="kernel_l",
+            shape=(1, 1, self.lr_dim),
+            initializer=tf.keras.initializers.glorot_uniform(),
+            regularizer=tf.keras.regularizers.l2(self.l2_reg),
+        )
+        self.bias_l = self.add_weight(name="bias_l", shape=(1, 1, self.lr_dim,))
+        self.kernel_r = self.add_weight(
+            name="kernel_r",
+            shape=(1, 1, self.lr_dim),
+            initializer=tf.keras.initializers.glorot_uniform(),
+            regularizer=tf.keras.regularizers.l2(self.l2_reg),
+        )
+        self.bias_r = self.add_weight(name="bias_r", shape=(1, 1, 1, self.lr_dim,))
+
+    def call(self, inputs, **kwargs):
+        targets_receptor = inputs[0]  # (batch, target nodes, lr)
+        neighbors_ligand = inputs[1]  # (batch, target nodes, padded neighbor nodes, lr)
+        a = inputs[2]  # (batch, target nodes, padded neighbor nodes)
+
+        targets_receptor = targets_receptor * self.kernel_r + self.bias_r  # (batch, target nodes, lr)
+        neighbors_ligand = neighbors_ligand * self.kernel_l + self.bias_l  # (batch, target nodes, padded neighbor nodes, lr)
+        targets_receptor = tf.expand_dims(targets_receptor, axis=-2)  # (batch, target nodes, 1, lr)
+        weights = targets_receptor * neighbors_ligand
+        # Mask embeddings to neighbors
+        a = tf.expand_dims(a, axis=-1)  # (batch, target nodes, padded neighbor nodes, 1)
+        weights = weights * a
+        weights = tf.reduce_sum(weights, axis=2)  # (batch, target nodes, lr)
+        y = tf.math.sigmoid(weights)
+
+        return y
+
+
+class SingleGatLayer(tf.keras.layers.Layer):
+
+    def __init__(self, in_dim, out_dim, dropout_rate, l2_reg, **kwargs):
+        """Initialize GatLayer.
+
+        Parameters
+        ----------
+        out_dim
+            Output dimension.
+        dropout_rate
+            Dropout rate.
+        activation
+            Activation.
+        l2_reg
+            l2 regularization coefficient.
+        kwargs
+            Arbitrary keyword arguments.
+        """
+        super().__init__(**kwargs)
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.dropout_rate = dropout_rate
+        self.l2_reg = l2_reg
+
+        # Layer kernel
+        self.kernel_target = self.add_weight(
+            name="kernel_l",
+            shape=(1, 1, self.in_dim),
+            initializer=tf.keras.initializers.glorot_uniform(),
+            regularizer=tf.keras.regularizers.l2(self.l2_reg),
+        )
+        self.bias_target = self.add_weight(name="bias_l", shape=(1, 1, self.in_dim,))
+        self.kernel_neighbor = self.add_weight(
+            name="kernel_r",
+            shape=(1, 1, self.in_dim),
+            initializer=tf.keras.initializers.glorot_uniform(),
+            regularizer=tf.keras.regularizers.l2(self.l2_reg),
+        )
+        self.bias_neighbor = self.add_weight(name="bias_r", shape=(1, 1, 1, self.in_dim,))
+
+    def call(self, inputs, **kwargs):
+        targets = inputs[0]  # (batch, target nodes, features)
+        neighbors = inputs[1]  # (batch, target nodes, padded neighbor nodes, features)
+        a = inputs[2]  # (batch, target nodes, padded neighbor nodes)
+
+        targets = targets * self.kernel_neighbor + self.bias_neighbor  # (batch, target nodes, lr)
+        neighbors = neighbors * self.kernel_target + self.bias_target  # (batch, target nodes, padded neighbor nodes, lr)
+        targets = tf.expand_dims(targets, axis=-2)  # (batch, target nodes, 1, lr)
+        weights = targets * neighbors
+        # Mask embeddings to neighbors
+        a = tf.expand_dims(a, axis=-1)  # (batch, target nodes, padded neighbor nodes, 1)
+        weights = weights * a
+        weights = tf.reduce_sum(weights, axis=2)  # (batch, target nodes, lr)
+        y = tf.math.sigmoid(weights)
+
+        return y
diff --git a/ncem/models/model_ed_single_ncem.py b/ncem/models/model_ed_single_ncem.py
new file mode 100644
index 00000000..6d264f00
--- /dev/null
+++ b/ncem/models/model_ed_single_ncem.py
@@ -0,0 +1,152 @@
+from typing import Union
+
+import tensorflow as tf
+
+from ncem.models.layers import get_out, Decoder
+from ncem.models.layers.single_gnn_layers import SingleMaxLayer, SingleGcnLayer, SingleGatLayer, SingleLrGatLayer
+
+
+class ModelEd2Ncem:
+    """Model class for NCEM encoder-decoder with graph layer IND (MAX) or GCN."""
+
+    def __init__(
+        self,
+        input_shapes,
+        latent_dim: tuple,
+        dropout_rate: float,
+        l2_coef: float,
+        l1_coef: float,
+        cond_type: str,
+        use_type_cond: bool,
+        use_domain: bool,
+        scale_node_size: bool,
+        output_layer: str,
+        dec_intermediate_dim: int,
+        dec_n_hidden: int,
+        dec_dropout_rate: float,
+        dec_l1_coef: float,
+        dec_l2_coef: float,
+        dec_use_batch_norm: bool,
+        **kwargs,
+    ):
+        super().__init__()
+        self.args = {
+            "input_shapes": input_shapes,
+            "latent_dim": latent_dim,
+            "dropout_rate": dropout_rate,
+            "l2_coef": l2_coef,
+            "l1_coef": l1_coef,
+            "use_domain": use_domain,
+            "use_type_cond": use_type_cond,
+            "scale_node_size": scale_node_size,
+            "output_layer": output_layer,
+            "dec_intermediate_dim": "dec_intermediate_dim",
+            "dec_n_hidden": "dec_n_hidden",
+            "dec_dropout_rate": "dec_dropout_rate",
+            "dec_l1_coef": "dec_l1_coef",
+            "dec_l2_coef": "dec_l2_coef",
+            "dec_use_batch_norm": "dec_use_batch_norm",
+        }
+        in_lr_feature_dim = input_shapes[0]
+        out_feature_dim = input_shapes[1]
+        num_targets_dim = input_shapes[2]
+        neighbors_dim = input_shapes[3]
+        categ_condition_dim = input_shapes[4]
+        domain_dim = input_shapes[5]
+
+        # node features - reconstruction: Input Tensor - shape=(None, targets, F-out)
+        input_x_reconstruct = tf.keras.Input(shape=(num_targets_dim, out_feature_dim), name="node_features_reconstruct")
+        # node size - reconstruction: Input Tensor - shape=(None, targets, 1)
+        input_node_size = tf.keras.Input(shape=(num_targets_dim, 1), name="node_size_reconstruct")
+        # node features - node representation of neighbors: Input Tensor - shape=(None, targets, F-in)
+        input_x_targets = tf.keras.Input(shape=(neighbors_dim, in_lr_feature_dim), name="node_features_targets")
+        # node features - node representation of neighbors: Input Tensor - shape=(None, neighbors, F-in)
+        input_x_neighbors = tf.keras.Input(shape=(neighbors_dim, in_lr_feature_dim), name="node_features_neighbors")
+        # adj_matrices - A: Input Tensor - shape=(None, targets, neighbors)
+        input_a = tf.keras.Input(shape=(num_targets_dim, neighbors_dim), name="adjacency_matrix")
+        # Categorical predictors: Input Tensor - shape=(None, targets, P)
+        input_categ_condition = tf.keras.Input(shape=(num_targets_dim, categ_condition_dim),
+                                               name="categorical_predictor")
+        # domain information of graph - shape=(None, 1)
+        input_g = tf.keras.layers.Input(shape=(domain_dim,), name="input_da_group", dtype="int32")
+
+        if use_domain:
+            categ_condition = tf.concat(
+                [
+                    input_categ_condition,
+                    tf.tile(tf.expand_dims(tf.cast(input_g, dtype="float32"), axis=-2), [1, num_targets_dim, 1]),
+                ],
+                axis=-1,
+            )
+        else:
+            categ_condition = input_categ_condition
+
+        if cond_type == "lr_gat":
+            x_encoder = SingleLrGatLayer(
+                lr_dim=in_lr_feature_dim,
+                latent_dim=latent_dim,
+                dropout_rate=dropout_rate,
+                l2_reg=l2_coef,
+                name=f"lr_gat_layer",
+            )([input_x_targets, input_x_neighbors, input_a])
+        elif cond_type == "gat":
+            x_encoder = SingleGatLayer(
+                lr_dim=in_lr_feature_dim,
+                latent_dim=latent_dim,
+                dropout_rate=dropout_rate,
+                l2_reg=l2_coef,
+                name=f"lr_gat_layer",
+            )([input_x_targets, input_x_neighbors, input_a])
+        elif cond_type == "max":
+            x_encoder = SingleMaxLayer(
+                name=f"max_layer"
+            )([input_x_neighbors,])
+        elif cond_type == "gcn":
+            x_encoder = SingleGcnLayer(
+                name=f"max_layer"
+            )([input_x_targets, input_x_neighbors, input_a])
+        else:
+            raise ValueError("tried to access a non-supported conditional layer %s" % cond_type)
+
+        # Decoder on neighborhood embeddings:
+        if dec_n_hidden > 0:
+            x = Decoder(
+                intermediate_dim=dec_intermediate_dim,
+                n_hidden=dec_n_hidden,
+                dropout_rate=dec_dropout_rate,
+                l1_coef=dec_l1_coef,
+                l2_coef=dec_l2_coef,
+                use_type_cond=use_type_cond,
+                use_batch_norm=dec_use_batch_norm,
+            )((x_encoder, categ_condition))
+        else:
+            x = tf.concat([x_encoder, categ_condition], axis=-1)
+
+        output_decoder = get_out(output_layer=output_layer, out_feature_dim=out_feature_dim,
+                                 scale_node_size=scale_node_size)((x, input_node_size))
+        output_decoder_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_decoder)
+
+        self.encoder = tf.keras.Model(
+            inputs=[
+                input_x_reconstruct,
+                input_x_targets,
+                input_x_neighbors,
+                input_a,
+                input_categ_condition,
+                input_g,
+            ],
+            outputs=x_encoder,
+            name="encoder_ncem",
+        )
+        self.training_model = tf.keras.Model(
+            inputs=[
+                input_x_reconstruct,
+                input_x_targets,
+                input_x_neighbors,
+                input_a,
+                input_categ_condition,
+                input_g,
+            ],
+            outputs=output_decoder_concat,
+            name="ed_ncem",
+        )
diff --git a/ncem/train/train_model.py b/ncem/train/train_model.py
index 16a195b1..1ab3f530 100644
--- a/ncem/train/train_model.py
+++ b/ncem/train/train_model.py
@@ -3,7 +3,7 @@
 from typing import Union
 
 from ncem.estimators import (EstimatorCVAE, EstimatorCVAEncem, EstimatorED,
-                             EstimatorEDncem, EstimatorInteractions,
+                             EstimatorEDncem, EstimatorEdNcemNeighborhood, EstimatorInteractions,
                              EstimatorLinear)
 
 
@@ -137,6 +137,16 @@ def _save_specific(self, fn):
         self._save_evaluation_per_node_type(fn=fn)
 
 
+class TrainModelEdSingleNcem(TrainModel):
+    estimator: EstimatorEdNcemNeighborhood
+
+    def init_estim(self, **kwargs):
+        self.estimator = EstimatorEdNcemNeighborhood(**kwargs)
+
+    def _save_specific(self, fn):
+        self._save_evaluation_per_node_type(fn=fn)
+
+
 class TrainModelCVAEBase(TrainModel):
     estimator: Union[EstimatorCVAE, EstimatorCVAEncem]
 

From f84f8a73d1647bdd67a71ed2be0bec9b83709f4d Mon Sep 17 00:00:00 2001
From: davidsebfischer <david.seb.fischer@gmail.com>
Date: Tue, 12 Oct 2021 13:35:53 +0200
Subject: [PATCH 02/15] enabled extraction of scaled adj matrix

---
 ncem/estimators/base_estimator_neighbors.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ncem/estimators/base_estimator_neighbors.py b/ncem/estimators/base_estimator_neighbors.py
index ef3f6586..06357d3c 100644
--- a/ncem/estimators/base_estimator_neighbors.py
+++ b/ncem/estimators/base_estimator_neighbors.py
@@ -176,7 +176,8 @@ def generator():
                     h_neighbors = []
                     a_neighborhood = np.zeros((self.n_eval_nodes_per_graph, self.n_neighbors_padded), "float32")
                     for i, j in enumerate(idx_nodes[indices]):
-                        idx_neighbors = np.where(np.asarray(self.a[key][j, :].todense()).flatten() == 1.)[0]
+                        a_j = np.asarray(self.a[key][j, :].todense()).flatten()
+                        idx_neighbors = np.where(a_j > 0.)[0]
                         if self.h0_in:
                             h_neighbors_j = self.h_0[key][idx_neighbors, :]
                         else:
@@ -187,7 +188,7 @@ def generator():
                         zeros = np.zeros((1, diff, h_neighbors_j.shape[2]), dtype="float32")
                         h_neighbors_j = np.concatenate([h_neighbors_j, zeros], axis=1)
                         h_neighbors.append(h_neighbors_j)
-                        a_neighborhood[i, :len(idx_neighbors)] = 1.
+                        a_neighborhood[i, :len(idx_neighbors)] = a_j[idx_neighbors]
                     h_neighbors = np.concatenate([h_neighbors], axis=0)
                     if self.log_transform:
                         h_targets = np.log(h_targets + 1.0)

From 162ba4e6f254c0bd6a81a6e35c35591f23113667 Mon Sep 17 00:00:00 2001
From: davidsebfischer <david.seb.fischer@gmail.com>
Date: Wed, 13 Oct 2021 14:23:25 +0200
Subject: [PATCH 03/15] debugged unit tests

---
 ncem/api/train/__init__.py                  |   2 +-
 ncem/data.py                                |  31 +++---
 ncem/estimators/base_estimator_neighbors.py |  62 ++++++------
 ncem/estimators/estimator_ed_ncem.py        |   6 +-
 ncem/models/layers/single_gnn_layers.py     |   7 +-
 ncem/models/model_ed_single_ncem.py         |  18 ++--
 ncem/unit_test/directories.py               |  14 +++
 ncem/unit_test/test_dataloader_local.py     |  83 ++++++----------
 ncem/unit_test/test_estimator_local.py      | 103 +++++++++++++++-----
 9 files changed, 186 insertions(+), 140 deletions(-)
 create mode 100644 ncem/unit_test/directories.py

diff --git a/ncem/api/train/__init__.py b/ncem/api/train/__init__.py
index e28b4ced..9d9d506d 100644
--- a/ncem/api/train/__init__.py
+++ b/ncem/api/train/__init__.py
@@ -1,6 +1,6 @@
 """Initializes a train object in api."""
 from ncem.estimators import (Estimator, EstimatorCVAE, EstimatorCVAEncem,
-                             EstimatorED, EstimatorEDncem, EstimatorGraph,
+                             EstimatorED, EstimatorEDncem, EstimatorEdNcemNeighborhood, EstimatorGraph,
                              EstimatorInteractions, EstimatorLinear,
                              EstimatorNoGraph)
 from ncem.models import BetaScheduler
diff --git a/ncem/data.py b/ncem/data.py
index 8d54f210..fbaec427 100644
--- a/ncem/data.py
+++ b/ncem/data.py
@@ -1,6 +1,7 @@
 import abc
 import warnings
 from collections import OrderedDict
+import os
 from typing import Dict, List, Optional, Sequence, Tuple, Union
 
 import matplotlib.colors as colors
@@ -1726,6 +1727,10 @@ def size_factors(self):
         global_mean_per_node = self.celldata.X.sum(axis=1).mean(axis=0)
         return {i: global_mean_per_node / np.sum(adata.X, axis=1) for i, adata in self.img_celldata.items()}
 
+    @property
+    def var_names(self):
+        return self.celldata.var_names
+
 
 class DataLoaderZhang(DataLoader):
     """DataLoaderZhang class. Inherits all functions from DataLoader."""
@@ -1770,7 +1775,7 @@ def _register_celldata(self):
             "patient_col": "mouse",
         }
 
-        celldata = read_h5ad(self.data_path + metadata["fn"]).copy()
+        celldata = read_h5ad(os.path.join(self.data_path, metadata["fn"])).copy()
         celldata.uns["metadata"] = metadata
         celldata.uns["img_keys"] = list(np.unique(celldata.obs[metadata["image_col"]]))
 
@@ -1875,7 +1880,7 @@ def _register_celldata(self):
             "patient_col": None,
         }
 
-        celldata = read_h5ad(self.data_path + metadata["fn"])
+        celldata = read_h5ad(os.path.join(self.data_path, metadata["fn"]))
         celldata = celldata[celldata.obs[metadata["image_col"]] != "Dirt"].copy()
         celldata.uns["metadata"] = metadata
         img_keys = list(np.unique(celldata.obs[metadata["image_col"]]))
@@ -1973,7 +1978,7 @@ def _register_celldata(self):
             "cluster_col_preprocessed": "Cluster_preprocessed",
             "patient_col": "donor",
         }
-        celldata_df = read_csv(self.data_path + metadata["fn"][0])
+        celldata_df = read_csv(os.path.join(self.data_path, metadata["fn"][0]))
         celldata_df["point"] = [f"scMEP_point_{str(x)}" for x in celldata_df["point"]]
         celldata_df = celldata_df.fillna(0)
         # celldata_df = celldata_df.dropna(inplace=False).reset_index()
@@ -2097,7 +2102,7 @@ def _register_graph_features(self, label_selection):
         label_cols_toread = list(label_selection.intersection(set(list(label_cols.keys()))))
         usecols = label_cols_toread + [patient_col]
 
-        tissue_meta_data = read_excel(self.data_path + "scMEP_sample_description.xlsx", usecols=usecols)
+        tissue_meta_data = read_excel(os.path.join(self.data_path, "scMEP_sample_description.xlsx"), usecols=usecols)
         # BUILD LABEL VECTORS FROM LABEL COLUMNS
         # The columns contain unprocessed numeric and categorical entries that are now processed to prediction-ready
         # numeric tensors. Here we first generate a dictionary of tensors for each label (label_tensors). We then
@@ -2212,8 +2217,8 @@ def _register_celldata(self):
             "cluster_col_preprocessed": "cell_class_preprocessed",
             "patient_col": None,
         }
-        nuclei_df = read_excel(self.data_path + metadata["fn"][0])
-        membranes_df = read_excel(self.data_path + metadata["fn"][1])
+        nuclei_df = read_excel(os.path.join(self.data_path, metadata["fn"][0]))
+        membranes_df = read_excel(os.path.join(self.data_path, metadata["fn"][1]))
 
         celldata_df = nuclei_df.join(membranes_df.set_index("ObjectNumber"), on="ObjectNumber")
 
@@ -2385,7 +2390,7 @@ def _register_celldata(self):
             "cluster_col_preprocessed": "ClusterName_preprocessed",
             "patient_col": "patients",
         }
-        celldata_df = read_csv(self.data_path + metadata["fn"])
+        celldata_df = read_csv(os.path.join(self.data_path, metadata["fn"]))
 
         feature_cols = [
             "CD44 - stroma:Cyc_2_ch_2",
@@ -2544,7 +2549,7 @@ def _register_graph_features(self, label_selection):
 
         usecols = label_cols_toread_csv + [patient_col]
         tissue_meta_data = read_csv(
-            self.data_path + "CRC_TMAs_patient_annotations.csv",
+            os.path.join(self.data_path, "CRC_TMAs_patient_annotations.csv"),
             # sep='\t',
             usecols=usecols,
         )[usecols]
@@ -2737,7 +2742,7 @@ def _register_celldata(self):
             "patient_col": "embryo",
         }
 
-        celldata = read_h5ad(self.data_path + metadata["fn"]).copy()
+        celldata = read_h5ad(os.path.join(self.data_path, metadata["fn"])).copy()
         celldata.uns["metadata"] = metadata
         celldata.uns["img_keys"] = list(np.unique(celldata.obs[metadata["image_col"]]))
 
@@ -2835,7 +2840,7 @@ def _register_celldata(self):
             "cluster_col": "CellTypeID_new",
             "cluster_col_preprocessed": "CellTypeID_new_preprocessed",
         }
-        celldata_df = read_csv(self.data_path + metadata["fn"])
+        celldata_df = read_csv(os.path.join(self.data_path, metadata["fn"]))
 
         feature_cols = [
             "Abcb4",
@@ -2994,10 +2999,10 @@ def _register_celldata(self):
 
         # add clean cluster column which removes regular expression from cluster_col
         celldata.obs[metadata["cluster_col_preprocessed"]] = list(
-            pd.Series(list(celldata.obs[metadata["cluster_col"]]), dtype="category").map(self.cell_type_merge_dict)
+            pd.Series(list(celldata.obs[metadata["cluster_col"]]), dtype="str").map(self.cell_type_merge_dict)
         )
         celldata.obs[metadata["cluster_col_preprocessed"]] = celldata.obs[metadata["cluster_col_preprocessed"]].astype(
-            "category"
+            "str"
         )
         # register node type names
         node_type_names = list(np.unique(celldata.obs[metadata["cluster_col_preprocessed"]]))
@@ -3076,7 +3081,7 @@ def _register_celldata(self):
             "cluster_col": "CellTypeID_new",
             "cluster_col_preprocessed": "CellTypeID_new_preprocessed",
         }
-        celldata_df = read_csv(self.data_path + metadata["fn"])
+        celldata_df = read_csv(os.path.join(self.data_path, metadata["fn"]))
 
         feature_cols = [
             "Abcb4",
diff --git a/ncem/estimators/base_estimator_neighbors.py b/ncem/estimators/base_estimator_neighbors.py
index 06357d3c..893e806e 100644
--- a/ncem/estimators/base_estimator_neighbors.py
+++ b/ncem/estimators/base_estimator_neighbors.py
@@ -10,11 +10,15 @@ class EstimatorNeighborhood(Estimator):
     """EstimatorGraph class for spatial models of the nieghborhood only (not full graph)."""
 
     n_features_in: int
-    _n_neighbors_padded: int
+    _n_neighbors_padded: Union[int, None]
     h0_in: bool
     idx_target_features: np.ndarray
     idx_neighbor_features: np.ndarray
 
+    def __init__(self):
+        super(EstimatorNeighborhood, self).__init__()
+        self._n_neighbors_padded = None
+
     def set_input_features(self, h0_in=True, target_feature_names=None, neighbor_feature_names=None):
         """
         Need to run this before compiling model.
@@ -27,18 +31,19 @@ def set_input_features(self, h0_in=True, target_feature_names=None, neighbor_fea
             assert neighbor_feature_names is None
             self.n_features_in = self.n_features_0
         else:
-            self.idx_target_features = None  # TODO match names to feature names in h1 here, as np index array
-            self.idx_neighbor_features = None  # TODO match names to feature names in h1 here, as np index array
-            assert len(self.idx_target_features.tolist()) == len(self.idx_neighbor_features.tolist())
+            features = self.data.var_names.tolist()
+            self.idx_target_features = np.array([features.index(x) for x in target_feature_names])
+            self.idx_neighbor_features = np.array([features.index(x) for x in neighbor_feature_names])
+            assert len(self.idx_target_features) == len(self.idx_neighbor_features)
             assert len(set(self.idx_target_features.tolist()).intersection(set(self.idx_neighbor_features.tolist()))) == 0
             self.n_features_in = len(self.idx_target_features)
 
     @property
     def n_neighbors_padded(self):
         if self._n_neighbors_padded is None:
-            self._n_neighbors_padded = np.max(np.asarray([
+            self._n_neighbors_padded = int(np.max(np.asarray([
                 np.max(np.asarray(np.sum(a, axis=1)).flatten()) for a in self.a.values()
-            ]))
+            ])))
         return self._n_neighbors_padded
 
     def _get_output_signature(self, resampled: bool = False):
@@ -53,24 +58,25 @@ def _get_output_signature(self, resampled: bool = False):
         -------
         output_signature
         """
+        # target node features
         h_targets = tf.TensorSpec(
             shape=(self.n_eval_nodes_per_graph, self.n_features_in), dtype=tf.float32
-        )  # target node features
+        )
+        # neighbor node features
         h_neighbors = tf.TensorSpec(
-            shape=(self.n_neighbors_padded, self.n_features_in), dtype=tf.float32
-        )  # neighbor node features
+            shape=(self.n_eval_nodes_per_graph, self.n_neighbors_padded, self.n_features_in), dtype=tf.float32
+        )
         sf = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph, 1), dtype=tf.float32)  # input node size factors
-        node_covar = tf.TensorSpec(
-            shape=(self.n_eval_nodes_per_graph, self.n_node_covariates), dtype=tf.float32
-        )  # node-level covariates
-        a = tf.TensorSpec(
-            shape=(self.n_eval_nodes_per_graph, self.n_neighbors_padded), dtype=tf.float32
-        )  # adjacency matrix
-        domain = tf.TensorSpec(shape=(self.n_domains,), dtype=tf.int32)  # domain
-        reconstruction = tf.TensorSpec(
-            shape=(self.n_eval_nodes_per_graph, self.n_features_1), dtype=tf.float32
-        )  # node features to reconstruct
-        kl_dummy = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph,), dtype=tf.float32)  # dummy for kl loss
+        # node-level covariates
+        node_covar = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph, self.n_node_covariates), dtype=tf.float32)
+        # adjacency matrix
+        a = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph, self.n_neighbors_padded), dtype=tf.float32)
+        # domain
+        domain = tf.TensorSpec(shape=(self.n_domains,), dtype=tf.int32)
+        # node features to reconstruct
+        reconstruction = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph, self.n_features_1), dtype=tf.float32)
+        # dummy for kl loss
+        kl_dummy = tf.TensorSpec(shape=(self.n_eval_nodes_per_graph,), dtype=tf.float32)
 
         if self.vi_model:
             if resampled:
@@ -94,9 +100,10 @@ def _get_output_signature(self, resampled: bool = False):
             else:
                 output_signature = ((h_targets, h_neighbors, sf, a, node_covar, domain),
                                     reconstruction)
+        print(output_signature)
         return output_signature
 
-    def _get_dataset_base(
+    def _get_dataset(
         self,
         image_keys: List[str],
         nodes_idx: Dict[str, np.ndarray],
@@ -172,7 +179,7 @@ def generator():
                     if self.h0_in:
                         h_targets = self.h_0[key][idx_nodes[indices], :]
                     else:
-                        h_targets = self.h_1[key][idx_nodes[indices], self.idx_target_features]
+                        h_targets = self.h_1[key][idx_nodes[indices], :][:, self.idx_target_features]
                     h_neighbors = []
                     a_neighborhood = np.zeros((self.n_eval_nodes_per_graph, self.n_neighbors_padded), "float32")
                     for i, j in enumerate(idx_nodes[indices]):
@@ -181,7 +188,7 @@ def generator():
                         if self.h0_in:
                             h_neighbors_j = self.h_0[key][idx_neighbors, :]
                         else:
-                            h_neighbors_j = self.h_1[key][idx_neighbors, self.idx_neighbor_features]
+                            h_neighbors_j = self.h_1[key][idx_neighbors, :][:, self.idx_neighbor_features]
                         h_neighbors_j = np.expand_dims(h_neighbors_j, axis=0)
                         # Pad neighborhoods:
                         diff = self.n_neighbors_padded - h_neighbors_j.shape[1]
@@ -189,16 +196,13 @@ def generator():
                         h_neighbors_j = np.concatenate([h_neighbors_j, zeros], axis=1)
                         h_neighbors.append(h_neighbors_j)
                         a_neighborhood[i, :len(idx_neighbors)] = a_j[idx_neighbors]
-                    h_neighbors = np.concatenate([h_neighbors], axis=0)
+                    h_neighbors = np.concatenate(h_neighbors, axis=0)
                     if self.log_transform:
                         h_targets = np.log(h_targets + 1.0)
                         h_neighbors = np.log(h_neighbors + 1.0)
 
-                    node_covar = self.node_covar[key][idx_nodes]
-                    node_covar = node_covar[indices, :]
-
-                    sf = np.expand_dims(self.size_factors[key][idx_nodes], axis=1)
-                    sf = sf[indices, :]
+                    node_covar = self.node_covar[key][idx_nodes][indices, :]
+                    sf = np.expand_dims(self.size_factors[key][idx_nodes][indices], axis=1)
 
                     g = np.zeros((self.n_domains,), dtype="int32")
                     g[self.domains[key]] = 1
diff --git a/ncem/estimators/estimator_ed_ncem.py b/ncem/estimators/estimator_ed_ncem.py
index 0f1a8336..634d84f3 100644
--- a/ncem/estimators/estimator_ed_ncem.py
+++ b/ncem/estimators/estimator_ed_ncem.py
@@ -170,7 +170,7 @@ class EstimatorEdNcemNeighborhood(EstimatorNeighborhood):
     def __init__(
         self,
         cond_type: str,
-        use_type_cond: bool,
+        use_type_cond: bool = True,
         log_transform: bool = False,
     ):
         """Initialize a EstimatorEDncem object.
@@ -191,7 +191,7 @@ def __init__(
         """
         super(EstimatorEdNcemNeighborhood, self).__init__()
         self.model_type = "ed_ncem"
-        if cond_type in ["gat", "lr_gat"]:
+        if cond_type in ["gat", "lr_gat", "max", "gcn"]:
             self.adj_type = "full"
         else:
             raise ValueError("cond_type %s not recognized" % cond_type)
@@ -209,6 +209,7 @@ def init_model(
         dropout_rate: float,
         l2_coef: float,
         l1_coef: float,
+        cond_type: str,
         n_eval_nodes_per_graph: int,
         use_domain: bool,
         scale_node_size: bool,
@@ -239,6 +240,7 @@ def init_model(
             use_type_cond=self.use_type_cond,
             scale_node_size=scale_node_size,
             output_layer=output_layer,
+            cond_type=cond_type,
             dec_intermediate_dim=dec_intermediate_dim,
             dec_n_hidden=dec_n_hidden,
             dec_dropout_rate=dec_dropout_rate,
diff --git a/ncem/models/layers/single_gnn_layers.py b/ncem/models/layers/single_gnn_layers.py
index e3fa3d96..bf38c64a 100644
--- a/ncem/models/layers/single_gnn_layers.py
+++ b/ncem/models/layers/single_gnn_layers.py
@@ -57,13 +57,11 @@ def call(self, inputs, **kwargs):
 
 class SingleLrGatLayer(tf.keras.layers.Layer):
 
-    def __init__(self, lr_dim, out_dim, dropout_rate, l2_reg, **kwargs):
+    def __init__(self, lr_dim, dropout_rate, l2_reg, **kwargs):
         """Initialize GCNLayer.
 
         Parameters
         ----------
-        out_dim
-            Output dimension.
         dropout_rate
             Dropout rate.
         activation
@@ -75,7 +73,6 @@ def __init__(self, lr_dim, out_dim, dropout_rate, l2_reg, **kwargs):
         """
         super().__init__(**kwargs)
         self.lr_dim = lr_dim
-        self.out_dim = out_dim
         self.dropout_rate = dropout_rate
         self.l2_reg = l2_reg
 
@@ -93,7 +90,7 @@ def __init__(self, lr_dim, out_dim, dropout_rate, l2_reg, **kwargs):
             initializer=tf.keras.initializers.glorot_uniform(),
             regularizer=tf.keras.regularizers.l2(self.l2_reg),
         )
-        self.bias_r = self.add_weight(name="bias_r", shape=(1, 1, 1, self.lr_dim,))
+        self.bias_r = self.add_weight(name="bias_r", shape=(1, 1, self.lr_dim,))
 
     def call(self, inputs, **kwargs):
         targets_receptor = inputs[0]  # (batch, target nodes, lr)
diff --git a/ncem/models/model_ed_single_ncem.py b/ncem/models/model_ed_single_ncem.py
index 6d264f00..eb8434e9 100644
--- a/ncem/models/model_ed_single_ncem.py
+++ b/ncem/models/model_ed_single_ncem.py
@@ -54,14 +54,13 @@ def __init__(
         categ_condition_dim = input_shapes[4]
         domain_dim = input_shapes[5]
 
-        # node features - reconstruction: Input Tensor - shape=(None, targets, F-out)
-        input_x_reconstruct = tf.keras.Input(shape=(num_targets_dim, out_feature_dim), name="node_features_reconstruct")
-        # node size - reconstruction: Input Tensor - shape=(None, targets, 1)
-        input_node_size = tf.keras.Input(shape=(num_targets_dim, 1), name="node_size_reconstruct")
         # node features - node representation of neighbors: Input Tensor - shape=(None, targets, F-in)
-        input_x_targets = tf.keras.Input(shape=(neighbors_dim, in_lr_feature_dim), name="node_features_targets")
+        input_x_targets = tf.keras.Input(shape=(num_targets_dim, in_lr_feature_dim), name="node_features_targets")
         # node features - node representation of neighbors: Input Tensor - shape=(None, neighbors, F-in)
-        input_x_neighbors = tf.keras.Input(shape=(neighbors_dim, in_lr_feature_dim), name="node_features_neighbors")
+        input_x_neighbors = tf.keras.Input(shape=(num_targets_dim, neighbors_dim, in_lr_feature_dim),
+                                           name="node_features_neighbors")
+        # node size - reconstruction: Input Tensor - shape=(None, targets, 1)
+        input_node_size = tf.keras.Input(shape=(num_targets_dim, 1), name="node_size_reconstruct")
         # adj_matrices - A: Input Tensor - shape=(None, targets, neighbors)
         input_a = tf.keras.Input(shape=(num_targets_dim, neighbors_dim), name="adjacency_matrix")
         # Categorical predictors: Input Tensor - shape=(None, targets, P)
@@ -84,7 +83,6 @@ def __init__(
         if cond_type == "lr_gat":
             x_encoder = SingleLrGatLayer(
                 lr_dim=in_lr_feature_dim,
-                latent_dim=latent_dim,
                 dropout_rate=dropout_rate,
                 l2_reg=l2_coef,
                 name=f"lr_gat_layer",
@@ -100,7 +98,7 @@ def __init__(
         elif cond_type == "max":
             x_encoder = SingleMaxLayer(
                 name=f"max_layer"
-            )([input_x_neighbors,])
+            )([input_x_neighbors, ])
         elif cond_type == "gcn":
             x_encoder = SingleGcnLayer(
                 name=f"max_layer"
@@ -128,9 +126,9 @@ def __init__(
 
         self.encoder = tf.keras.Model(
             inputs=[
-                input_x_reconstruct,
                 input_x_targets,
                 input_x_neighbors,
+                input_node_size,
                 input_a,
                 input_categ_condition,
                 input_g,
@@ -140,9 +138,9 @@ def __init__(
         )
         self.training_model = tf.keras.Model(
             inputs=[
-                input_x_reconstruct,
                 input_x_targets,
                 input_x_neighbors,
+                input_node_size,
                 input_a,
                 input_categ_condition,
                 input_g,
diff --git a/ncem/unit_test/directories.py b/ncem/unit_test/directories.py
new file mode 100644
index 00000000..adfdc29e
--- /dev/null
+++ b/ncem/unit_test/directories.py
@@ -0,0 +1,14 @@
+"""
+Paths to cache directories used throughout the tests.
+"""
+
+import os
+
+DIR_TEMP = os.path.join(os.path.dirname(__file__), "temp")
+DIR_TEMP_DATA = os.path.join(DIR_TEMP, "data")
+
+DATA_PATH_ZHANG = os.path.join(DIR_TEMP_DATA, "zhang")
+DATA_PATH_JAROSCH = os.path.join(DIR_TEMP_DATA, "busch")
+DATA_PATH_HARTMANN = os.path.join(DIR_TEMP_DATA, "hartmann")
+DATA_PATH_SCHUERCH = os.path.join(DIR_TEMP_DATA, "schuerch")
+DATA_PATH_LU = os.path.join(DIR_TEMP_DATA, "lu")
diff --git a/ncem/unit_test/test_dataloader_local.py b/ncem/unit_test/test_dataloader_local.py
index abc997d2..f1e95f51 100644
--- a/ncem/unit_test/test_dataloader_local.py
+++ b/ncem/unit_test/test_dataloader_local.py
@@ -1,84 +1,57 @@
-import unittest
+import pytest
 
+from ncem.unit_test.directories import DATA_PATH_ZHANG, DATA_PATH_JAROSCH, DATA_PATH_HARTMANN, DATA_PATH_SCHUERCH, \
+    DATA_PATH_LU
 
-class TestDataLoader(unittest.TestCase):
+
+class HelperTestDataLoader:
     data_origin: str
-    data_path: str
 
-    def get_dataloader(self):
-        if self.data_origin == "zhang":
+    def get_dataloader(self, data_origin: str):
+        if data_origin == "zhang":
             from ncem.data import DataLoaderZhang as DataLoader
 
             radius = 100
             label_selection = []
-        elif self.data_origin == "jarosch":
+            data_path = DATA_PATH_ZHANG
+        elif data_origin == "jarosch":
             from ncem.data import DataLoaderJarosch as DataLoader
 
             radius = 100
             label_selection = []
-        elif self.data_origin == "hartmann":
+            data_path = DATA_PATH_JAROSCH
+        elif data_origin == "hartmann":
             from ncem.data import DataLoaderHartmann as DataLoader
 
             radius = 100
             label_selection = ["Diagnosis", "Age", "Sex"]
-        elif self.data_origin == "pascualreguant":
+            data_path = DATA_PATH_HARTMANN
+        elif data_origin == "pascualreguant":
             from ncem.data import DataLoaderPascualReguant as DataLoader
 
             radius = 100
             label_selection = []
-        elif self.data_origin == "schuerch":
+            data_path = None  # TODO
+        elif data_origin == "schuerch":
             from ncem.data import DataLoaderSchuerch as DataLoader
 
             radius = 100
             label_selection = ["Group"]
-        else:
-            from ncem.data import DataLoaderZhang as DataLoader
+            data_path = DATA_PATH_SCHUERCH
+        elif data_origin == "luwt":
+            from ncem.data import DataLoaderSchuerch as DataLoader
 
             radius = 100
-            label_selection = []
-
-        self.data = DataLoader(data_path=self.data_path, radius=radius, label_selection=label_selection)
-
-
-class TestDataLoaderZang(TestDataLoader, unittest.TestCase):
-    data_path = "/Users/anna.schaar/phd/datasets/zhang/"
-    data_origin = "zhang"
-
-    def test_data_types(self):
-        self.get_dataloader()
-
-
-class TestDataLoaderJarosch(TestDataLoader, unittest.TestCase):
-    data_path = "/Users/anna.schaar/phd/datasets/busch/"
-    data_origin = "jarosch"
-
-    def test_data_types(self):
-        self.get_dataloader()
-
-
-class TestDataLoaderHartmann(TestDataLoader, unittest.TestCase):
-    data_path = "/Users/anna.schaar/phd/datasets/hartmann/"
-    data_origin = "hartmann"
-
-    def test_data_types(self):
-        self.get_dataloader()
-
-
-class TestDataLoaderPascualReguant(TestDataLoader, unittest.TestCase):
-    data_path = "/Users/anna.schaar/phd/datasets/pascualreguant/"
-    data_origin = "pascualreguant"
-
-    def test_data_types(self):
-        self.get_dataloader()
-
-
-class TestDataLoaderSchuerch(TestDataLoader, unittest.TestCase):
-    data_path = "/Users/anna.schaar/phd/datasets/schuerch/"
-    data_origin = "schuerch"
+            label_selection = ["Group"]
+            data_path = DATA_PATH_LU
+        else:
+            assert False
 
-    def test_data_types(self):
-        self.get_dataloader()
+        self.data = DataLoader(data_path=data_path, radius=radius, label_selection=label_selection)
+        self.data_origin = data_origin
 
 
-if __name__ == "__main__":
-    unittest.main()
+@pytest.mark.parametrize("dataset", ["luwt", "zhang", "jarosch", "hartmann", "lu"])
+def test_data_types(dataset):
+    loader = HelperTestDataLoader
+    loader.get_dataloader(data_origin=dataset)
diff --git a/ncem/unit_test/test_estimator_local.py b/ncem/unit_test/test_estimator_local.py
index 95c73bc9..b1f387a9 100644
--- a/ncem/unit_test/test_estimator_local.py
+++ b/ncem/unit_test/test_estimator_local.py
@@ -1,14 +1,14 @@
-import unittest
+import pytest
 
 import ncem.api as ncem
+from ncem.estimators import Estimator
 
+from ncem.unit_test.directories import DATA_PATH_ZHANG, DATA_PATH_JAROSCH, DATA_PATH_HARTMANN, DATA_PATH_SCHUERCH, \
+    DATA_PATH_LU
 
-class TestEstimator(unittest.TestCase):
-    base_path = "/Users/anna.schaar/phd/datasets/"
-    data_path_zhang = base_path + "zhang/"
-    data_path_jarosch = base_path + "busch/"
-    data_path_hartmann = base_path + "hartmann/"
-    data_path_schuerch = base_path + "buffer/"
+
+class HelperTestEstimator:
+    est: Estimator
 
     def get_estimator(
         self,
@@ -27,6 +27,14 @@ def get_estimator(
             self.est = ncem.train.EstimatorEDncem(cond_type="max")
         elif model == "ed_ncem_gcn":
             self.est = ncem.train.EstimatorEDncem(cond_type="gcn")
+        elif model == "ed_ncem2_lr_gat":
+            self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="lr_gat")
+        elif model == "ed_ncem2_gat":
+            self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="gat")
+        elif model == "ed_ncem2_gcn":
+            self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="gcn")
+        elif model == "ed_ncem2_max":
+            self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="max")
         elif model == "cvae":
             self.est = ncem.train.EstimatorCVAE()
         elif model == "cvae_ncem_max":
@@ -38,10 +46,13 @@ def get_estimator(
 
         if data_origin == "zhang":
             radius = 100
-            data_path = self.data_path_zhang
+            data_path = DATA_PATH_ZHANG
         elif data_origin == "hartmann":
             radius = 100
-            data_path = self.data_path_hartmann
+            data_path = DATA_PATH_HARTMANN
+        elif data_origin.startswith("lu"):
+            radius = 100
+            data_path = DATA_PATH_LU
         else:
             assert False
 
@@ -53,9 +64,20 @@ def get_estimator(
             node_feature_space_id=node_feature_space_id,
         )
 
-    def _test_train(self, model: str, data_origin: str = "zhang"):
+    def test_train(self, model: str, data_origin: str = "zhang"):
         self.get_estimator(model=model, data_origin=data_origin)
 
+        kwargs_shared = {
+            'optimizer': "adam",
+            'latent_dim': 4,
+            'dropout_rate': 0.,
+            'l2_coef': 0.,
+            'l1_coef': 0.,
+            'n_eval_nodes_per_graph': 4,
+            'scale_node_size': False,
+            'output_layer': "gaussian"
+        }
+
         if model == "linear":
             kwargs = {"use_source_type": True, "use_domain": True, "learning_rate": 1e-2}
             train_kwargs = {}
@@ -110,6 +132,25 @@ def _test_train(self, model: str, data_origin: str = "zhang"):
                 "beta": 0.1,
             }
             train_kwargs = {}
+        elif model.startswith("ed_ncem2"):
+            kwargs = kwargs_shared
+            kwargs.update({
+                "use_domain": True,
+                "use_bias": True,
+                "learning_rate": 1e-2,
+                "cond_type": "lr_gat",
+                "dec_intermediate_dim": 0,
+                "dec_n_hidden": 0,
+                "dec_dropout_rate": float,
+                "dec_l1_coef": 0.,
+                "dec_l2_coef": 0.,
+                "dec_use_batch_norm": False,
+            })
+            train_kwargs = {}
+            self.est.set_input_features(
+                h0_in=False,
+                target_feature_names=["Abcb4", "Abcc3"],
+                neighbor_feature_names=["Adgre1", "Ammecr1"])
         else:
             assert False
         self._model_kwargs = kwargs
@@ -139,18 +180,30 @@ def _test_train(self, model: str, data_origin: str = "zhang"):
         )
         self.est.model.training_model.summary()
 
-    def test_linear(self):
-        # self._test_train(model='linear_baseline', data_origin='hartmann')
-        # self._test_train(model="linear", data_origin="hartmann")
-        # self._test_train(model='interactions_baseline', data_origin='hartmann')
-        self._test_train(model="interactions", data_origin="hartmann")
-
-    def test_ed(self):
-        # self._test_train(model="ed", data_origin="hartmann")
-        self._test_train(model="ed_ncem_max", data_origin="hartmann")
-        # self._test_train(model="ed_ncem_gcn", data_origin="hartmann")
-
-    def test_cvae(self):
-        # self._test_train(model="cvae", data_origin="hartmann")
-        self._test_train(model="cvae_ncem_max", data_origin="hartmann")
-        # self._test_train(model="cvae_ncem_gcn", data_origin="hartmann")
+
+@pytest.mark.parametrize("dataset", ["luwt"])
+@pytest.mark.parametrize("model", ["interactions"])
+def test_linear(dataset: str, model: str):
+    estim = HelperTestEstimator()
+    estim.test_train(model=model, data_origin=dataset)
+
+
+@pytest.mark.parametrize("dataset", ["luwt"])
+@pytest.mark.parametrize("model", ["ed_ncem_max"])
+def test_ed(dataset: str, model: str):
+    estim = HelperTestEstimator()
+    estim.test_train(model=model, data_origin=dataset)
+
+
+@pytest.mark.parametrize("dataset", ["luwt"])
+@pytest.mark.parametrize("model", ["cvae_ncem_max"])
+def test_cvae(dataset: str, model: str):
+    estim = HelperTestEstimator()
+    estim.test_train(model=model, data_origin=dataset)
+
+
+@pytest.mark.parametrize("dataset", ["luwt"])
+@pytest.mark.parametrize("model", ["ed_ncem2_max", "ed_ncem2_gcn", "ed_ncem2_lr_gat", "ed_ncem2_gat"])
+def test_ed2(dataset: str, model: str):
+    estim = HelperTestEstimator()
+    estim.test_train(model=model, data_origin=dataset)

From 37ec98302474deb49fa3d4b0713097ce8b08c7ca Mon Sep 17 00:00:00 2001
From: davidsebfischer <david.seb.fischer@gmail.com>
Date: Wed, 13 Oct 2021 14:30:12 +0200
Subject: [PATCH 04/15] added disclaimer

---
 ncem/models/layers/single_gnn_layers.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ncem/models/layers/single_gnn_layers.py b/ncem/models/layers/single_gnn_layers.py
index bf38c64a..397af2ed 100644
--- a/ncem/models/layers/single_gnn_layers.py
+++ b/ncem/models/layers/single_gnn_layers.py
@@ -112,6 +112,10 @@ def call(self, inputs, **kwargs):
 
 class SingleGatLayer(tf.keras.layers.Layer):
 
+    """
+    TODO GAT implementation here is not complete yet.
+    """
+
     def __init__(self, in_dim, out_dim, dropout_rate, l2_reg, **kwargs):
         """Initialize GatLayer.
 

From 3dc14a31f07da513a09f7f1fbbd067e7a6ce365e Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Thu, 14 Oct 2021 11:04:45 +0200
Subject: [PATCH 05/15] simplify output layers with get_out in remaining models

---
 ncem/models/layers/output_layers.py | 10 +--
 ncem/models/model_cvae.py           | 98 ++++------------------------
 ncem/models/model_cvae_ncem.py      | 99 ++++-------------------------
 ncem/models/model_ed.py             | 57 +++--------------
 ncem/models/model_ed_ncem.py        | 57 +++--------------
 ncem/models/model_ed_single_ncem.py |  2 -
 6 files changed, 45 insertions(+), 278 deletions(-)

diff --git a/ncem/models/layers/output_layers.py b/ncem/models/layers/output_layers.py
index 3d50b2d5..e365ccbe 100644
--- a/ncem/models/layers/output_layers.py
+++ b/ncem/models/layers/output_layers.py
@@ -1,30 +1,30 @@
 import tensorflow as tf
 
 
-def get_out(output_layer: str, out_feature_dim, scale_node_size):
+def get_out(output_layer: str, out_feature_dim, scale_node_size, name: str = 'decoder'):
     if output_layer == "gaussian":
         output_decoder_layer = GaussianOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name="GaussianOutput_decoder",
+            name=f"GaussianOutput_{name}",
         )
     elif output_layer == "nb":
         output_decoder_layer = NegBinOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name="NegBinOutput_decoder",
+            name=f"NegBinOutput_{name}",
         )
     elif output_layer == "nb_shared_disp":
         output_decoder_layer = NegBinSharedDispOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name="NegBinSharedDispOutput_decoder",
+            name=f"NegBinSharedDispOutput_{name}",
         )
     elif output_layer == "nb_const_disp":
         output_decoder_layer = NegBinConstDispOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name="NegBinConstDispOutput_decoder",
+            name=f"NegBinConstDispOutput_{name}",
         )
     else:
         raise ValueError("tried to access a non-supported output layer %s" % output_layer)
diff --git a/ncem/models/model_cvae.py b/ncem/models/model_cvae.py
index 88a35e77..0e28313e 100644
--- a/ncem/models/model_cvae.py
+++ b/ncem/models/model_cvae.py
@@ -1,10 +1,7 @@
 import numpy as np
 import tensorflow as tf
 
-from ncem.models.layers import (Decoder, Encoder, GaussianConstDispOutput,
-                                GaussianOutput, NegBinConstDispOutput,
-                                NegBinOutput, NegBinSharedDispOutput,
-                                PreprocInput, SamplingPrior)
+from ncem.models.layers import (Decoder, Encoder, get_out, PreprocInput, SamplingPrior)
 
 
 class ModelCVAE:
@@ -160,88 +157,17 @@ def __init__(
         sampling_decoder1 = self.decoder_model((latent_sampling_reshaped1, categ_condition))
         sampling_decoder2 = self.decoder_model((latent_sampling_reshaped2, categ_condition))
 
-        if output_layer == "gaussian":
-            output_decoder_layer = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "gaussian_const_disp":
-            output_decoder_layer = GaussianConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianConstDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = GaussianConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianConstDispOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = GaussianConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianConstDispOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "nb":
-            output_decoder_layer = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "nb_shared_disp":
-            output_decoder_layer = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "nb_const_disp":
-            output_decoder_layer = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((sampling_decoder2, input_node_size))
-        else:
-            raise ValueError("tried to access a non-supported output layer %s" % output_layer)
+        output_decoder_layer = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size
+        )((output_decoder, input_node_size))
+        output_sampling_decoder1 = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size,
+            name='sampling1'
+        )((sampling_decoder1, input_node_size))
+        output_sampling_decoder2 = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size,
+            name='sampling2'
+        )((sampling_decoder2, input_node_size))
 
         output_decoder_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_decoder_layer)
         output_sampling_concat1 = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_sampling_decoder1)
diff --git a/ncem/models/model_cvae_ncem.py b/ncem/models/model_cvae_ncem.py
index 65111f55..7213198e 100644
--- a/ncem/models/model_cvae_ncem.py
+++ b/ncem/models/model_cvae_ncem.py
@@ -3,11 +3,7 @@
 import numpy as np
 import tensorflow as tf
 
-from ncem.models.layers import (CondDecoder, CondEncoder,
-                                GaussianConstDispOutput, GaussianOutput,
-                                GCNLayer, MaxLayer, NegBinConstDispOutput,
-                                NegBinOutput, NegBinSharedDispOutput,
-                                PreprocInput, SamplingPrior)
+from ncem.models.layers import (CondDecoder, CondEncoder, GCNLayer, MaxLayer, get_out, PreprocInput, SamplingPrior)
 
 
 class ModelCVAEncem:
@@ -243,88 +239,17 @@ def __init__(
         sampling_decoder1 = self.decoder_model((latent_sampling_reshaped1, x_neighbour_embedding, categ_condition))
         sampling_decoder2 = self.decoder_model((latent_sampling_reshaped2, x_neighbour_embedding, categ_condition))
 
-        if output_layer == "gaussian":
-            output_decoder_layer = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "gaussian_const_disp":
-            output_decoder_layer = GaussianConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianConstDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = GaussianConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianConstDispOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = GaussianConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianConstDispOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "nb":
-            output_decoder_layer = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "nb_shared_disp":
-            output_decoder_layer = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_sampling",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_sampling",
-            )((sampling_decoder2, input_node_size))
-        elif output_layer == "nb_const_disp":
-            output_decoder_layer = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder1 = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((sampling_decoder1, input_node_size))
-            output_sampling_decoder2 = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((sampling_decoder2, input_node_size))
-        else:
-            raise ValueError("tried to access a non-supported output layer %s" % output_layer)
+        output_decoder_layer = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size
+        )((output_decoder, input_node_size))
+        output_sampling_decoder1 = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size,
+            name='sampling1'
+        )((sampling_decoder1, input_node_size))
+        output_sampling_decoder2 = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size,
+            name='sampling2'
+        )((sampling_decoder2, input_node_size))
 
         output_decoder_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_decoder_layer)
         output_sampling_concat1 = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_sampling_decoder1)
diff --git a/ncem/models/model_ed.py b/ncem/models/model_ed.py
index 66d95b31..419adbab 100644
--- a/ncem/models/model_ed.py
+++ b/ncem/models/model_ed.py
@@ -1,8 +1,6 @@
 import tensorflow as tf
 
-from ncem.models.layers import (Decoder, Encoder, GaussianOutput,
-                                NegBinConstDispOutput, NegBinOutput,
-                                NegBinSharedDispOutput)
+from ncem.models.layers import (Decoder, Encoder, get_out)
 
 
 class ModelED:
@@ -133,52 +131,13 @@ def __init__(
         output_decoder = self.decoder_model((z, categ_condition))
         sampling_decoder = self.decoder_model((latent_sampling_reshaped, categ_condition))
 
-        if output_layer == "gaussian":
-            output_decoder_layer = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        elif output_layer == "nb":
-            output_decoder_layer = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        elif output_layer == "nb_shared_disp":
-            output_decoder_layer = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        elif output_layer == "nb_const_disp":
-            output_decoder_layer = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        else:
-            raise ValueError("tried to access a non-supported output layer %s" % output_layer)
+        output_decoder_layer = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size
+        )((output_decoder, input_node_size))
+        output_sampling_decoder = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size,
+            name='sampling'
+        )((sampling_decoder, input_node_size))
 
         output_decoder_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_decoder_layer)
         output_sampling_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_sampling_decoder)
diff --git a/ncem/models/model_ed_ncem.py b/ncem/models/model_ed_ncem.py
index 8ee5313f..12081f95 100644
--- a/ncem/models/model_ed_ncem.py
+++ b/ncem/models/model_ed_ncem.py
@@ -2,9 +2,7 @@
 
 import tensorflow as tf
 
-from ncem.models.layers import (Decoder, Encoder, GaussianOutput, GCNLayer,
-                                MaxLayer, NegBinConstDispOutput, NegBinOutput,
-                                NegBinSharedDispOutput)
+from ncem.models.layers import (Decoder, Encoder, GCNLayer, MaxLayer, get_out)
 
 
 class ModelEDncem:
@@ -213,52 +211,13 @@ def __init__(
         output_decoder = self.decoder_model((z, categ_condition))
         sampling_decoder = self.decoder_model((latent_sampling_reshaped, categ_condition))
 
-        if output_layer == "gaussian":
-            output_decoder_layer = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = GaussianOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="GaussianOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        elif output_layer == "nb":
-            output_decoder_layer = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = NegBinOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        elif output_layer == "nb_shared_disp":
-            output_decoder_layer = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = NegBinSharedDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinSharedDispOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        elif output_layer == "nb_const_disp":
-            output_decoder_layer = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_decoder",
-            )((output_decoder, input_node_size))
-            output_sampling_decoder = NegBinConstDispOutput(
-                original_dim=out_node_feature_dim,
-                use_node_scale=scale_node_size,
-                name="NegBinConstDispOutput_sampling",
-            )((sampling_decoder, input_node_size))
-        else:
-            raise ValueError("tried to access a non-supported output layer %s" % output_layer)
+        output_decoder_layer = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size
+        )((output_decoder, input_node_size))
+        output_sampling_decoder = get_out(
+            output_layer=output_layer, out_feature_dim=out_node_feature_dim, scale_node_size=scale_node_size,
+            name='sampling'
+        )((sampling_decoder, input_node_size))
 
         output_decoder_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_decoder_layer)
         output_sampling_concat = tf.keras.layers.Concatenate(axis=2, name="reconstruction")(output_sampling_decoder)
diff --git a/ncem/models/model_ed_single_ncem.py b/ncem/models/model_ed_single_ncem.py
index eb8434e9..6c4532bf 100644
--- a/ncem/models/model_ed_single_ncem.py
+++ b/ncem/models/model_ed_single_ncem.py
@@ -1,5 +1,3 @@
-from typing import Union
-
 import tensorflow as tf
 
 from ncem.models.layers import get_out, Decoder

From 427ffb595635acd104260e06967f57071f4dd675 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Thu, 14 Oct 2021 11:15:07 +0200
Subject: [PATCH 06/15] simplify output layers with get_out in remaining models

---
 ncem/data.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/ncem/data.py b/ncem/data.py
index f7dcf444..0038e0e5 100644
--- a/ncem/data.py
+++ b/ncem/data.py
@@ -2820,15 +2820,15 @@ class DataLoaderLuWT(DataLoader):
     """DataLoaderLuWT class. Inherits all functions from DataLoader."""
 
     cell_type_merge_dict = {
-        1: "AEC",
-        2: "SEC",
-        3: "MK",
-        4: "Hepatocyte",
-        5: "Macrophage",
-        6: "Myeloid",
-        7: "Erythroid progenitor",
-        8: "Erythroid cell",
-        9: "Unknown",
+        "1": "AEC",
+        "2": "SEC",
+        "3": "MK",
+        "4": "Hepatocyte",
+        "5": "Macrophage",
+        "6": "Myeloid",
+        "7": "Erythroid progenitor",
+        "8": "Erythroid cell",
+        "9": "Unknown",
     }
 
     def _register_celldata(self):
@@ -3090,15 +3090,15 @@ class DataLoaderLuTET2(DataLoader):
     """DataLoaderLuTET2 class. Inherits all functions from DataLoader."""
 
     cell_type_merge_dict = {
-        1: "AEC",
-        2: "SEC",
-        3: "MK",
-        4: "Hepatocyte",
-        5: "Macrophage",
-        6: "Myeloid",
-        7: "Erythroid progenitor",
-        8: "Erythroid cell",
-        9: "Unknown",
+        "1": "AEC",
+        "2": "SEC",
+        "3": "MK",
+        "4": "Hepatocyte",
+        "5": "Macrophage",
+        "6": "Myeloid",
+        "7": "Erythroid progenitor",
+        "8": "Erythroid cell",
+        "9": "Unknown",
     }
 
     def _register_celldata(self):

From 833ec071fc2e158d415e3e67c885299552e44447 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Thu, 14 Oct 2021 14:24:05 +0200
Subject: [PATCH 07/15] add disclaimer to cond layers

---
 ncem/models/layers/single_gnn_layers.py | 18 +++++++++++++++---
 ncem/models/model_ed_single_ncem.py     |  9 +++++++--
 ncem/unit_test/test_estimator_local.py  |  8 +++++---
 3 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/ncem/models/layers/single_gnn_layers.py b/ncem/models/layers/single_gnn_layers.py
index 397af2ed..7c05f633 100644
--- a/ncem/models/layers/single_gnn_layers.py
+++ b/ncem/models/layers/single_gnn_layers.py
@@ -2,15 +2,25 @@
 
 
 class SingleMaxLayer(tf.keras.layers.Layer):
-
+    """
+    TODO MAX implementation here is not complete yet.
+    """
     def call(self, inputs, **kwargs):
         x = inputs[0]
-        y = tf.where(x > 0.5, x=tf.divide(x, x), y=tf.multiply(x, tf.constant(0.0, dtype=tf.float32)))
+        a = inputs[1]
+
+        a = tf.expand_dims(a, axis=-1)  # (batch, target nodes, padded neighbor nodes, 1)
+        t = x * a
+        t = tf.reduce_sum(t, axis=2)
+
+        y = tf.where(t > 0.5, x=tf.divide(t, t), y=tf.multiply(t, tf.constant(0.0, dtype=tf.float32)))
         return y
 
 
 class SingleGcnLayer(tf.keras.layers.Layer):
-
+    """
+    TODO GCN implementation here is not complete yet.
+    """
     def __init__(self, input_dim, latent_dim, dropout_rate, activation, l2_reg, use_bias: bool = False, **kwargs):
         super().__init__(**kwargs)
         self.input_dim = input_dim
@@ -94,6 +104,7 @@ def __init__(self, lr_dim, dropout_rate, l2_reg, **kwargs):
 
     def call(self, inputs, **kwargs):
         targets_receptor = inputs[0]  # (batch, target nodes, lr)
+        print(targets_receptor.shape)
         neighbors_ligand = inputs[1]  # (batch, target nodes, padded neighbor nodes, lr)
         a = inputs[2]  # (batch, target nodes, padded neighbor nodes)
 
@@ -101,6 +112,7 @@ def call(self, inputs, **kwargs):
         neighbors_ligand = neighbors_ligand * self.kernel_l + self.bias_l  # (batch, target nodes, padded neighbor nodes, lr)
         targets_receptor = tf.expand_dims(targets_receptor, axis=-2)  # (batch, target nodes, 1, lr)
         weights = targets_receptor * neighbors_ligand
+        print(weights.shape)
         # Mask embeddings to neighbors
         a = tf.expand_dims(a, axis=-1)  # (batch, target nodes, padded neighbor nodes, 1)
         weights = weights * a
diff --git a/ncem/models/model_ed_single_ncem.py b/ncem/models/model_ed_single_ncem.py
index 6c4532bf..2fbb7924 100644
--- a/ncem/models/model_ed_single_ncem.py
+++ b/ncem/models/model_ed_single_ncem.py
@@ -79,6 +79,10 @@ def __init__(
             categ_condition = input_categ_condition
 
         if cond_type == "lr_gat":
+            print("LRGAT")
+            #x_encoder = SingleMaxLayer(
+            #    name=f"max_layer"
+            #)([input_x_neighbors, ])
             x_encoder = SingleLrGatLayer(
                 lr_dim=in_lr_feature_dim,
                 dropout_rate=dropout_rate,
@@ -94,12 +98,13 @@ def __init__(
                 name=f"lr_gat_layer",
             )([input_x_targets, input_x_neighbors, input_a])
         elif cond_type == "max":
+            print("MAX")
             x_encoder = SingleMaxLayer(
                 name=f"max_layer"
-            )([input_x_neighbors, ])
+            )([input_x_neighbors, input_a])
         elif cond_type == "gcn":
             x_encoder = SingleGcnLayer(
-                name=f"max_layer"
+                name=f"gcn_layer"
             )([input_x_targets, input_x_neighbors, input_a])
         else:
             raise ValueError("tried to access a non-supported conditional layer %s" % cond_type)
diff --git a/ncem/unit_test/test_estimator_local.py b/ncem/unit_test/test_estimator_local.py
index b1f387a9..d59938c1 100644
--- a/ncem/unit_test/test_estimator_local.py
+++ b/ncem/unit_test/test_estimator_local.py
@@ -27,14 +27,14 @@ def get_estimator(
             self.est = ncem.train.EstimatorEDncem(cond_type="max")
         elif model == "ed_ncem_gcn":
             self.est = ncem.train.EstimatorEDncem(cond_type="gcn")
+        elif model == "ed_ncem2_max":
+            self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="max")
         elif model == "ed_ncem2_lr_gat":
             self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="lr_gat")
         elif model == "ed_ncem2_gat":
             self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="gat")
         elif model == "ed_ncem2_gcn":
             self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="gcn")
-        elif model == "ed_ncem2_max":
-            self.est = ncem.train.EstimatorEdNcemNeighborhood(cond_type="max")
         elif model == "cvae":
             self.est = ncem.train.EstimatorCVAE()
         elif model == "cvae_ncem_max":
@@ -56,6 +56,7 @@ def get_estimator(
         else:
             assert False
 
+        print('COND TYPE', self.est.cond_type)
         self.est.get_data(
             data_origin=data_origin,
             data_path=data_path,
@@ -138,7 +139,7 @@ def test_train(self, model: str, data_origin: str = "zhang"):
                 "use_domain": True,
                 "use_bias": True,
                 "learning_rate": 1e-2,
-                "cond_type": "lr_gat",
+                "cond_type": self.est.cond_type,
                 "dec_intermediate_dim": 0,
                 "dec_n_hidden": 0,
                 "dec_dropout_rate": float,
@@ -204,6 +205,7 @@ def test_cvae(dataset: str, model: str):
 
 @pytest.mark.parametrize("dataset", ["luwt"])
 @pytest.mark.parametrize("model", ["ed_ncem2_max", "ed_ncem2_gcn", "ed_ncem2_lr_gat", "ed_ncem2_gat"])
+#@pytest.mark.parametrize("model", ["ed_ncem2_max"])
 def test_ed2(dataset: str, model: str):
     estim = HelperTestEstimator()
     estim.test_train(model=model, data_origin=dataset)

From 21f54c99e6153c2c9d436a3e5e8f99fc9955cc17 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Tue, 19 Oct 2021 14:41:02 +0200
Subject: [PATCH 08/15] fix max and gcn layer for single gnn

---
 ncem/models/layers/single_gnn_layers.py | 13 ++++++++-----
 ncem/models/model_ed_single_ncem.py     |  5 +++++
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/ncem/models/layers/single_gnn_layers.py b/ncem/models/layers/single_gnn_layers.py
index 7c05f633..ae77593d 100644
--- a/ncem/models/layers/single_gnn_layers.py
+++ b/ncem/models/layers/single_gnn_layers.py
@@ -21,9 +21,8 @@ class SingleGcnLayer(tf.keras.layers.Layer):
     """
     TODO GCN implementation here is not complete yet.
     """
-    def __init__(self, input_dim, latent_dim, dropout_rate, activation, l2_reg, use_bias: bool = False, **kwargs):
+    def __init__(self, latent_dim, dropout_rate, activation, l2_reg, use_bias: bool = False, **kwargs):
         super().__init__(**kwargs)
-        self.input_dim = input_dim
         self.latent_dim = latent_dim
         self.dropout_rate = dropout_rate
         self.activation = tf.keras.activations.get(activation)
@@ -32,15 +31,19 @@ def __init__(self, input_dim, latent_dim, dropout_rate, activation, l2_reg, use_
 
         self.kernel = None
         self.bias = None
+
+    def build(self, input_shapes):
+        input_shape = input_shapes[0]
+        # Layer kernel
         self.kernel = self.add_weight(
             name="kernel",
-            shape=(self.input_dim, self.latent_dim),
+            shape=(int(input_shape[2]), self.latent_dim),
             initializer=tf.keras.initializers.glorot_uniform(),
             regularizer=tf.keras.regularizers.l2(self.l2_reg),
         )
         # Layer bias
         if self.use_bias:
-            self.bias = self.add_weight(name="bias", shape=(self.output_dim,))
+            self.bias = self.add_weight(name="bias", shape=(self.latent_dim,))
 
     def call(self, inputs, **kwargs):
         targets = inputs[0]  # (batch, target nodes, features)
@@ -49,7 +52,7 @@ def call(self, inputs, **kwargs):
         a = inputs[2]  # (batch, target nodes, padded neighbor nodes)
 
         # (batch, target nodes, padded neighbor nodes, latent)
-        y = tf.matmul(neighborhood, self.kernel, axes=1)
+        y = tf.matmul(neighborhood, self.kernel)
         y = tf.reduce_sum(y, axis=-2)  # (batch, target nodes, latent)
         # Normalise neighborhood size in embedding:
         factor = tf.reduce_sum(a, axis=-1, keepdims=True)  # (batch, target nodes, 1)
diff --git a/ncem/models/model_ed_single_ncem.py b/ncem/models/model_ed_single_ncem.py
index 2fbb7924..d295ec43 100644
--- a/ncem/models/model_ed_single_ncem.py
+++ b/ncem/models/model_ed_single_ncem.py
@@ -104,6 +104,11 @@ def __init__(
             )([input_x_neighbors, input_a])
         elif cond_type == "gcn":
             x_encoder = SingleGcnLayer(
+                latent_dim=latent_dim,
+                dropout_rate=dropout_rate,
+                activation="relu",
+                l2_reg=l2_coef,
+                use_bias=True,
                 name=f"gcn_layer"
             )([input_x_targets, input_x_neighbors, input_a])
         else:

From 998c8cc29f5c788d660546bf27709a010f7e9256 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Mon, 25 Oct 2021 09:52:33 +0200
Subject: [PATCH 09/15] Bump version from 0.3.2 to 0.4.0

---
 .cookietemple.yml           | 2 +-
 .github/release-drafter.yml | 4 ++--
 cookietemple.cfg            | 2 +-
 docs/conf.py                | 4 ++--
 ncem/__init__.py            | 2 +-
 pyproject.toml              | 2 +-
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.cookietemple.yml b/.cookietemple.yml
index 6385bf99..12e162d5 100644
--- a/.cookietemple.yml
+++ b/.cookietemple.yml
@@ -15,5 +15,5 @@ full_name: Anna Schaar
 email: anna.schaar@helmholtz-muenchen.de
 project_name: ncem
 project_short_description: ncem.  Learning cell communication from spatial graphs of cells.
-version: 0.3.2
+version: 0.4.0
 license: BSD-3-Clause
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index bf959b95..f5635d31 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -1,5 +1,5 @@
-name-template: "0.3.2 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.3.2 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.4.0 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.4.0 # <<COOKIETEMPLE_FORCE_BUMP>>
 categories:
     - title: "🚀 Features"
       labels:
diff --git a/cookietemple.cfg b/cookietemple.cfg
index 2a381cf0..8cc7bb7c 100644
--- a/cookietemple.cfg
+++ b/cookietemple.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.2
+current_version = 0.4.0
 
 [bumpversion_files_whitelisted]
 init_file = ncem/__init__.py
diff --git a/docs/conf.py b/docs/conf.py
index 8a78691e..ac8e1e4c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -73,9 +73,9 @@
 # the built documents.
 #
 # The short X.Y version.
-version = "0.3.2"
+version = "0.4.0"
 # The full version, including alpha/beta/rc tags.
-release = "0.3.2"
+release = "0.4.0"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/ncem/__init__.py b/ncem/__init__.py
index b2d3f6a6..2be35553 100644
--- a/ncem/__init__.py
+++ b/ncem/__init__.py
@@ -8,4 +8,4 @@
 __maintainer__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __author__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __email__ = ", ".join(["anna.schaar@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de"])
-__version__ = "0.3.2"
+__version__ = "0.4.0"
diff --git a/pyproject.toml b/pyproject.toml
index e0bde5d3..94395d8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ncem"
-version = "0.3.2"  # <<COOKIETEMPLE_FORCE_BUMP>>
+version = "0.4.0"  # <<COOKIETEMPLE_FORCE_BUMP>>
 description = "ncem. Learning cell communication from spatial graphs of cells."
 authors = ["David S. Fischer <david.fischer@helmholtz-muenchen.de>", "Anna C. Schaar <anna.schaar@helmholtz-muenchen.de>"]
 license = "BSD"

From 01438aaec90208eb9d07e8effd4894d59041ebb2 Mon Sep 17 00:00:00 2001
From: davidsebfischer <david.seb.fischer@gmail.com>
Date: Thu, 28 Oct 2021 17:46:24 +0200
Subject: [PATCH 10/15] added node embedding and output weight saving in EDncem
 models - added to trainer - added to estimator - added necessary weight
 transfer into encoder model

---
 .gitignore                             |  3 ++
 ncem/estimators/base_estimator.py      | 23 ++++++++++++
 ncem/estimators/estimator_ed_ncem.py   | 49 +++++++++++++++++++++++---
 ncem/models/layers/output_layers.py    | 10 +++---
 ncem/train/train_model.py              | 26 ++++++++++++--
 ncem/unit_test/test_estimator_local.py | 20 ++++++++---
 6 files changed, 117 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index 3b2f137e..ba0287a6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,6 @@
+**.DS_Store
+ncem/unit_test/temp
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
diff --git a/ncem/estimators/base_estimator.py b/ncem/estimators/base_estimator.py
index 337e8f34..5d8903db 100644
--- a/ncem/estimators/base_estimator.py
+++ b/ncem/estimators/base_estimator.py
@@ -15,6 +15,29 @@
                                 r_squared_linreg)
 
 
+def transfer_layers(model1, model2):
+    """
+    Transfer layer weights from model 1 to model 2.
+
+    :param model1: Input model.
+    :param model2: Output model.
+    """
+    layer_names_model1 = [x.name for x in model1.layers]
+    layer_names_model2 = [x.name for x in model2.layers]
+    layers_updated = []
+    layer_not_updated = set(layer_names_model2)
+    for x in layer_names_model1:
+        w = model1.get_layer(name=x).get_weights()
+        if x in layer_names_model2:
+            # Only update layers with parameters:
+            if len(w) > 0:
+                model2.get_layer(x).set_weights(w)
+                layers_updated.append(x)
+                layer_not_updated = layer_not_updated.difference({x})
+    print(f"updated layers: {layers_updated}")
+    print(f"did not update layers: {layer_not_updated}")
+
+
 class Estimator:
     """Estimator class for models.
 
diff --git a/ncem/estimators/estimator_ed_ncem.py b/ncem/estimators/estimator_ed_ncem.py
index 634d84f3..d6addb94 100644
--- a/ncem/estimators/estimator_ed_ncem.py
+++ b/ncem/estimators/estimator_ed_ncem.py
@@ -1,11 +1,52 @@
+import abc
 import tensorflow as tf
-from typing import Tuple
+from typing import Tuple, Union
 
-from ncem.estimators import EstimatorGraph, EstimatorNeighborhood
+from ncem.estimators import Estimator, EstimatorGraph, EstimatorNeighborhood
+from ncem.estimators.base_estimator import transfer_layers
 from ncem.models import ModelEDncem, ModelEd2Ncem
+from ncem.models.layers.output_layers import IDENTIFIER_OUTPUT_LAYER
 
 
-class EstimatorEDncem(EstimatorGraph):
+class EstimatorEDncemBase(Estimator, abc.ABC):
+    model: Union[ModelEDncem, ModelEd2Ncem]
+
+    def predict_embedding_any(self, img_keys, node_idx, batch_size: int = 1):
+        """Predict embedding on any given data set.
+
+        Parameters
+        ----------
+        img_keys
+            Image keys.
+        node_idx
+            Nodes indices.
+        batch_size : int
+            Number of samples. If unspecified, it will default to 1.
+
+        Returns
+        -------
+        eval_dict
+        """
+        ds = self._get_dataset(
+            image_keys=img_keys,
+            nodes_idx=node_idx,
+            batch_size=batch_size,
+            shuffle_buffer_size=1,
+            train=False,
+            seed=None,
+            reinit_n_eval=None,
+        )
+        transfer_layers(model1=self.model.training_model, model2=self.model.encoder)
+        return self.model.encoder.predict(ds)
+
+    def get_decoding_weights(self):
+        layer_name_out = [x.name for x in self.model.training_model.layers if IDENTIFIER_OUTPUT_LAYER in x.name]
+        assert len(layer_name_out) == 1, "there should only be one output layer for this operation"
+        w = self.model.training_model.get_layer(name=layer_name_out[0]).get_weights()
+        return w
+
+
+class EstimatorEDncem(EstimatorGraph, EstimatorEDncemBase):
     """Estimator class for encoder-decoder NCEM models. Subclass of EstimatorGraph."""
 
     def __init__(
@@ -164,7 +205,7 @@ def init_model(
         self._compile_model(optimizer=optimizer, output_layer=output_layer)
 
 
-class EstimatorEdNcemNeighborhood(EstimatorNeighborhood):
+class EstimatorEdNcemNeighborhood(EstimatorNeighborhood, EstimatorEDncemBase):
     """Estimator class for encoder-decoder NCEM models with single graph layer. Subclass of EstimatorNeighborhood."""
 
     def __init__(
diff --git a/ncem/models/layers/output_layers.py b/ncem/models/layers/output_layers.py
index e365ccbe..f17ef4b9 100644
--- a/ncem/models/layers/output_layers.py
+++ b/ncem/models/layers/output_layers.py
@@ -1,30 +1,32 @@
 import tensorflow as tf
 
+IDENTIFIER_OUTPUT_LAYER = "Output"
+
 
 def get_out(output_layer: str, out_feature_dim, scale_node_size, name: str = 'decoder'):
     if output_layer == "gaussian":
         output_decoder_layer = GaussianOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name=f"GaussianOutput_{name}",
+            name=f"Gaussian{IDENTIFIER_OUTPUT_LAYER}_{name}",
         )
     elif output_layer == "nb":
         output_decoder_layer = NegBinOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name=f"NegBinOutput_{name}",
+            name=f"NegBin{IDENTIFIER_OUTPUT_LAYER}_{name}",
         )
     elif output_layer == "nb_shared_disp":
         output_decoder_layer = NegBinSharedDispOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name=f"NegBinSharedDispOutput_{name}",
+            name=f"NegBinSharedDisp{IDENTIFIER_OUTPUT_LAYER}_{name}",
         )
     elif output_layer == "nb_const_disp":
         output_decoder_layer = NegBinConstDispOutput(
             original_dim=out_feature_dim,
             use_node_scale=scale_node_size,
-            name=f"NegBinConstDispOutput_{name}",
+            name=f"NegBinConstDisp{IDENTIFIER_OUTPUT_LAYER}_{name}",
         )
     else:
         raise ValueError("tried to access a non-supported output layer %s" % output_layer)
diff --git a/ncem/train/train_model.py b/ncem/train/train_model.py
index 1ab3f530..ca54fe29 100644
--- a/ncem/train/train_model.py
+++ b/ncem/train/train_model.py
@@ -127,7 +127,29 @@ def _save_specific(self, fn):
         self._save_evaluation_per_node_type(fn=fn)
 
 
-class TrainModelEDncem(TrainModel):
+class TrainModelEDncemBase:
+
+    estimator: Union[EstimatorEDncem, EstimatorEdNcemNeighborhood]
+
+    def _save_embedding(self, fn):
+        splits = {
+            "eval": {"img_keys": self.estimator.img_keys_eval, "node_idx": self.estimator.nodes_idx_eval},
+            "test": {"img_keys": self.estimator.img_keys_test, "node_idx": self.estimator.nodes_idx_test},
+            "train": {"img_keys": self.estimator.img_keys_train, "node_idx": self.estimator.nodes_idx_train},
+        }
+        embeddings = {}
+        for k, v in splits.items():
+            embeddings[k] = self.estimator.predict_embedding_any(**v)
+        with open(fn + "_embeddings.pickle", "wb") as f:
+            pickle.dump(obj=embeddings, file=f)
+
+    def _save_output_weights(self, fn):
+        w = self.estimator.get_decoding_weights()
+        with open(fn + "_output_weights.pickle", "wb") as f:
+            pickle.dump(obj=w, file=f)
+
+
+class TrainModelEDncem(TrainModel, TrainModelEDncemBase):
     estimator: EstimatorEDncem
 
     def init_estim(self, **kwargs):
@@ -137,7 +159,7 @@ def _save_specific(self, fn):
         self._save_evaluation_per_node_type(fn=fn)
 
 
-class TrainModelEdSingleNcem(TrainModel):
+class TrainModelEdSingleNcem(TrainModel, TrainModelEDncemBase):
     estimator: EstimatorEdNcemNeighborhood
 
     def init_estim(self, **kwargs):
diff --git a/ncem/unit_test/test_estimator_local.py b/ncem/unit_test/test_estimator_local.py
index d59938c1..7dbf63e8 100644
--- a/ncem/unit_test/test_estimator_local.py
+++ b/ncem/unit_test/test_estimator_local.py
@@ -1,10 +1,10 @@
 import pytest
+from typing import Union
 
 import ncem.api as ncem
 from ncem.estimators import Estimator
 
-from ncem.unit_test.directories import DATA_PATH_ZHANG, DATA_PATH_JAROSCH, DATA_PATH_HARTMANN, DATA_PATH_SCHUERCH, \
-    DATA_PATH_LU
+from ncem.unit_test.directories import DATA_PATH_ZHANG, DATA_PATH_HARTMANN, DATA_PATH_LU
 
 
 class HelperTestEstimator:
@@ -182,6 +182,17 @@ def test_train(self, model: str, data_origin: str = "zhang"):
         self.est.model.training_model.summary()
 
 
+class HelperTestEstimatorEd(HelperTestEstimator):
+
+    est: Union[ncem.train.EstimatorED, ncem.train.EstimatorEDncem, ncem.train.EstimatorEdNcemNeighborhood]
+
+    def test_embedding(self):
+        _ = self.est.predict_embedding_any(img_keys=self.est.img_keys_test, node_idx=self.est.nodes_idx_test)
+
+    def test_decoding_weights(self):
+        _ = self.est.get_decoding_weights()
+
+
 @pytest.mark.parametrize("dataset", ["luwt"])
 @pytest.mark.parametrize("model", ["interactions"])
 def test_linear(dataset: str, model: str):
@@ -205,7 +216,8 @@ def test_cvae(dataset: str, model: str):
 
 @pytest.mark.parametrize("dataset", ["luwt"])
 @pytest.mark.parametrize("model", ["ed_ncem2_max", "ed_ncem2_gcn", "ed_ncem2_lr_gat", "ed_ncem2_gat"])
-#@pytest.mark.parametrize("model", ["ed_ncem2_max"])
 def test_ed2(dataset: str, model: str):
-    estim = HelperTestEstimator()
+    estim = HelperTestEstimatorEd()
     estim.test_train(model=model, data_origin=dataset)
+    estim.test_embedding()
+    estim.test_decoding_weights()

From 0c2839e836f3c6f162ae855850259cfa9096e406 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Wed, 3 Nov 2021 12:44:58 +0100
Subject: [PATCH 11/15] Bump version from 0.4.6 to 0.4.7

---
 .cookietemple.yml           | 2 +-
 .github/release-drafter.yml | 8 ++++----
 cookietemple.cfg            | 2 +-
 docs/conf.py                | 4 ++--
 ncem/__init__.py            | 2 +-
 pyproject.toml              | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.cookietemple.yml b/.cookietemple.yml
index 6268509e..f60afe74 100644
--- a/.cookietemple.yml
+++ b/.cookietemple.yml
@@ -15,5 +15,5 @@ full_name: Anna Schaar
 email: anna.schaar@helmholtz-muenchen.de
 project_name: ncem
 project_short_description: ncem.  Learning cell communication from spatial graphs of cells.
-version: 0.4.6
+version: 0.4.7
 license: BSD-3-Clause
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index fdd3b158..c606dea0 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -1,9 +1,9 @@
 <<<<<<< HEAD
-name-template: "0.4.0 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.4.0 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.4.7 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.4.7 # <<COOKIETEMPLE_FORCE_BUMP>>
 =======
-name-template: "0.4.6 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.4.6 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.4.7 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.4.7 # <<COOKIETEMPLE_FORCE_BUMP>>
 >>>>>>> a602092480da26e9dd2e6cdfea66a485b515f277
 categories:
     - title: "🚀 Features"
diff --git a/cookietemple.cfg b/cookietemple.cfg
index 6a8e5939..3cdf895b 100644
--- a/cookietemple.cfg
+++ b/cookietemple.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.6
+current_version = 0.4.7
 
 [bumpversion_files_whitelisted]
 init_file = ncem/__init__.py
diff --git a/docs/conf.py b/docs/conf.py
index 7471b66e..a2154a83 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -73,9 +73,9 @@
 # the built documents.
 #
 # The short X.Y version.
-version = "0.4.6"
+version = "0.4.7"
 # The full version, including alpha/beta/rc tags.
-release = "0.4.6"
+release = "0.4.7"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/ncem/__init__.py b/ncem/__init__.py
index 67e82e88..a3c4e1c6 100644
--- a/ncem/__init__.py
+++ b/ncem/__init__.py
@@ -8,4 +8,4 @@
 __maintainer__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __author__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __email__ = ", ".join(["anna.schaar@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de"])
-__version__ = "0.4.6"
+__version__ = "0.4.7"
diff --git a/pyproject.toml b/pyproject.toml
index a3f703a2..6e3d4ce3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ncem"
-version = "0.4.6"  # <<COOKIETEMPLE_FORCE_BUMP>>
+version = "0.4.7"  # <<COOKIETEMPLE_FORCE_BUMP>>
 description = "ncem. Learning cell communication from spatial graphs of cells."
 authors = ["Anna C. Schaar <anna.schaar@helmholtz-muenchen.de>"]
 license = "BSD"

From 0e2711f4572376247514c2a383172d61615a9bab Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Thu, 4 Nov 2021 16:54:26 +0100
Subject: [PATCH 12/15] add n_top_genes to dataloader

---
 ncem/data.py                      | 44 ++++++++++++++++---------------
 ncem/estimators/base_estimator.py | 12 +++++++--
 2 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/ncem/data.py b/ncem/data.py
index 4f68145f..dc2dbade 100644
--- a/ncem/data.py
+++ b/ncem/data.py
@@ -1640,6 +1640,7 @@ def __init__(
         coord_type: str = 'generic',
         n_rings: int = 1,
         label_selection: Optional[List[str]] = None,
+        n_top_genes: Optional[int] = None
     ):
         """Initialize DataLoader.
 
@@ -1655,7 +1656,7 @@ def __init__(
         self.data_path = data_path
 
         print("Loading data from raw files")
-        self.register_celldata()
+        self.register_celldata(n_top_genes=n_top_genes)
         self.register_img_celldata()
         self.register_graph_features(label_selection=label_selection)
         self.compute_adjacency_matrices(radius=radius, coord_type=coord_type, n_rings=n_rings)
@@ -1683,10 +1684,10 @@ def patients(self):
         """
         return np.unique(np.asarray(list(self.celldata.uns["img_to_patient_dict"].values())))
 
-    def register_celldata(self):
+    def register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         print("registering celldata")
-        self._register_celldata()
+        self._register_celldata(n_top_genes=n_top_genes)
         assert self.celldata is not None, "celldata was not loaded"
 
     def register_img_celldata(self):
@@ -1707,7 +1708,7 @@ def register_graph_features(self, label_selection):
         self._register_graph_features(label_selection=label_selection)
 
     @abc.abstractmethod
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         pass
 
@@ -1774,7 +1775,7 @@ class DataLoaderZhang(DataLoader):
         "other": "other",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.109,
@@ -1880,7 +1881,7 @@ class DataLoaderJarosch(DataLoader):
         "other Lymphocytes": "other Lymphocytes",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.5,
@@ -1979,7 +1980,7 @@ class DataLoaderHartmann(DataLoader):
         "Myeloid_CD11c": "CD11c Myeloid",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 400 / 1024,
@@ -2218,7 +2219,7 @@ class DataLoaderPascualReguant(DataLoader):
         "other": "other",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.325,
@@ -2391,7 +2392,7 @@ class DataLoaderSchuerch(DataLoader):
         "vasculature": "vasculature",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.377442,
@@ -2742,7 +2743,7 @@ class DataLoaderLohoff(DataLoader):
         'Surface ectoderm': 'Surface ectoderm'
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 1.,
@@ -2842,7 +2843,7 @@ class DataLoaderLuWT(DataLoader):
         9: "Unknown",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.1079,
@@ -3085,7 +3086,7 @@ class DataLoaderLuWTimputed(DataLoader):
         8: "Erythroid cell",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.1079,
@@ -3094,12 +3095,13 @@ def _register_celldata(self):
             "pos_cols": ["Center_x", "Center_y"],
             "cluster_col": "CellTypeID_new",
             "cluster_col_preprocessed": "CellTypeID_new_preprocessed",
+            "n_top_genes": n_top_genes
         }
         celldata = read_h5ad(self.data_path + metadata["fn"])
         celldata.uns["metadata"] = metadata
-        # only loading top 500 genes
-        sc.pp.highly_variable_genes(celldata, n_top_genes=500)
-        celldata = celldata[:, celldata.var.highly_variable].copy()
+        if n_top_genes:
+            sc.pp.highly_variable_genes(celldata, n_top_genes=n_top_genes)
+            celldata = celldata[:, celldata.var.highly_variable].copy()
         self.celldata = celldata
 
     def _register_img_celldata(self):
@@ -3155,7 +3157,7 @@ class DataLoaderLuTET2(DataLoader):
         9: "Unknown",
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 0.1079,
@@ -3403,7 +3405,7 @@ class DataLoader10xVisiumMouseBrain(DataLoader):
         'Thalamus_2': 'Thalamus 2'
     }
 
-    def _register_celldata(self):
+    def _register_celldata(self, n_top_genes: Optional[int] = None):
         """Load AnnData object of complete dataset."""
         metadata = {
             "lateral_resolution": 1.,
@@ -3412,13 +3414,13 @@ def _register_celldata(self):
             "cluster_col": "cluster",
             "cluster_col_preprocessed": "cluster_preprocessed",
             "patient_col": "in_tissue",
-            "n_top_genes": 500
+            "n_top_genes": n_top_genes
         }
 
         celldata = read_h5ad(self.data_path + metadata["fn"]).copy()
-        # only loading top 500 genes
-        sc.pp.highly_variable_genes(celldata, n_top_genes=500)
-        celldata = celldata[:, celldata.var.highly_variable].copy()
+        if n_top_genes:
+            sc.pp.highly_variable_genes(celldata, n_top_genes=n_top_genes)
+            celldata = celldata[:, celldata.var.highly_variable].copy()
 
         celldata.X = celldata.X.toarray()
         celldata.uns["metadata"] = metadata
diff --git a/ncem/estimators/base_estimator.py b/ncem/estimators/base_estimator.py
index ad675af2..dd1d9097 100644
--- a/ncem/estimators/base_estimator.py
+++ b/ncem/estimators/base_estimator.py
@@ -91,6 +91,7 @@ def _load_data(
         radius: Optional[int] = None,
         n_rings: int = 1,
         label_selection: Optional[List[str]] = None,
+        n_top_genes: Optional[int] = None
     ):
         """Initialize a DataLoader object.
 
@@ -104,6 +105,8 @@ def _load_data(
             Radius.
         label_selection : list, optional
             Label selection.
+        n_top_genes: int, optional
+            N top genes for highly variable gene selection.
 
         Raises
         ------
@@ -165,7 +168,8 @@ def _load_data(
             raise ValueError(f"data_origin {data_origin} not recognized")
 
         self.data = DataLoader(
-            data_path, radius=radius, coord_type=coord_type, n_rings=n_rings, label_selection=label_selection
+            data_path, radius=radius, coord_type=coord_type, n_rings=n_rings, label_selection=label_selection,
+            n_top_genes=n_top_genes
         )
 
     def get_data(
@@ -182,7 +186,8 @@ def get_data(
         use_covar_graph_covar: bool = False,
         domain_type: str = "image",
         robustness: Optional[float] = None,
-        robustness_seed: int = 1
+        robustness_seed: int = 1,
+        n_top_genes: Optional[int] = None
     ):
         """Get data used in estimator classes.
 
@@ -214,6 +219,8 @@ def get_data(
             Optional fraction of images for robustness test.
         robustness_seed: int
             Seed for robustness analysis
+        n_top_genes: int, optional
+            N top genes for highly variable gene selection.
         Raises
         ------
         ValueError
@@ -231,6 +238,7 @@ def get_data(
             radius=radius,
             n_rings=n_rings,
             label_selection=labels_to_load,
+            n_top_genes=n_top_genes
         )
         if robustness:
             np.random.seed(robustness_seed)

From 2f03c77b74d51c6e1225af37005cca5cdb098ae4 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Thu, 4 Nov 2021 16:54:59 +0100
Subject: [PATCH 13/15] Bump version from 0.4.7 to 0.4.8

---
 .cookietemple.yml           | 2 +-
 .github/release-drafter.yml | 8 ++++----
 cookietemple.cfg            | 2 +-
 docs/conf.py                | 4 ++--
 ncem/__init__.py            | 2 +-
 pyproject.toml              | 2 +-
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/.cookietemple.yml b/.cookietemple.yml
index f60afe74..218098e4 100644
--- a/.cookietemple.yml
+++ b/.cookietemple.yml
@@ -15,5 +15,5 @@ full_name: Anna Schaar
 email: anna.schaar@helmholtz-muenchen.de
 project_name: ncem
 project_short_description: ncem.  Learning cell communication from spatial graphs of cells.
-version: 0.4.7
+version: 0.4.8
 license: BSD-3-Clause
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index c606dea0..3b58fa28 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -1,9 +1,9 @@
 <<<<<<< HEAD
-name-template: "0.4.7 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.4.7 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.4.8 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.4.8 # <<COOKIETEMPLE_FORCE_BUMP>>
 =======
-name-template: "0.4.7 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
-tag-template: 0.4.7 # <<COOKIETEMPLE_FORCE_BUMP>>
+name-template: "0.4.8 🌈" # <<COOKIETEMPLE_FORCE_BUMP>>
+tag-template: 0.4.8 # <<COOKIETEMPLE_FORCE_BUMP>>
 >>>>>>> a602092480da26e9dd2e6cdfea66a485b515f277
 categories:
     - title: "🚀 Features"
diff --git a/cookietemple.cfg b/cookietemple.cfg
index 3cdf895b..cf15e347 100644
--- a/cookietemple.cfg
+++ b/cookietemple.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.4.7
+current_version = 0.4.8
 
 [bumpversion_files_whitelisted]
 init_file = ncem/__init__.py
diff --git a/docs/conf.py b/docs/conf.py
index a2154a83..403d4b8c 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -73,9 +73,9 @@
 # the built documents.
 #
 # The short X.Y version.
-version = "0.4.7"
+version = "0.4.8"
 # The full version, including alpha/beta/rc tags.
-release = "0.4.7"
+release = "0.4.8"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/ncem/__init__.py b/ncem/__init__.py
index a3c4e1c6..2308f073 100644
--- a/ncem/__init__.py
+++ b/ncem/__init__.py
@@ -8,4 +8,4 @@
 __maintainer__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __author__ = ", ".join(["Anna C. Schaar", "David S. Fischer"])
 __email__ = ", ".join(["anna.schaar@helmholtz-muenchen.de", "david.fischer@helmholtz-muenchen.de"])
-__version__ = "0.4.7"
+__version__ = "0.4.8"
diff --git a/pyproject.toml b/pyproject.toml
index 6e3d4ce3..28ed8310 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "ncem"
-version = "0.4.7"  # <<COOKIETEMPLE_FORCE_BUMP>>
+version = "0.4.8"  # <<COOKIETEMPLE_FORCE_BUMP>>
 description = "ncem. Learning cell communication from spatial graphs of cells."
 authors = ["Anna C. Schaar <anna.schaar@helmholtz-muenchen.de>"]
 license = "BSD"

From be89c0b9d7f0d0ea02d6436693cb44a9abbf9880 Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Sun, 7 Nov 2021 18:31:38 +0100
Subject: [PATCH 14/15] add hgnc names for schuerch

---
 ncem/data.py | 65 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/ncem/data.py b/ncem/data.py
index dc2dbade..a01e0abf 100644
--- a/ncem/data.py
+++ b/ncem/data.py
@@ -15,7 +15,7 @@
 from matplotlib.ticker import FormatStrFormatter
 from matplotlib.tri import Triangulation
 from omnipath.interactions import import_intercell_network
-from pandas import read_csv, read_excel
+from pandas import read_csv, read_excel, DataFrame
 from scipy import sparse, stats
 from tqdm import tqdm
 
@@ -2464,8 +2464,67 @@ def _register_celldata(self, n_top_genes: Optional[int] = None):
             "CD57 - NK cells:Cyc_17_ch_4",
             "MMP12 - matrix metalloproteinase:Cyc_21_ch_4",
         ]
-
-        celldata = AnnData(X=celldata_df[feature_cols], obs=celldata_df[["File Name", "patients", "ClusterName"]])
+        feature_cols_hgnc_names = [
+            'CD44',
+            'FOXP3',
+            'CD8A',
+            'TP53',
+            'GATA3',
+            'PTPRC',
+            'TBX21',
+            'CTNNB1',
+            'HLA-DR',
+            'CD274',
+            'MKI67',
+            'PTPRC',
+            'CD4',
+            'CR2',
+            'MUC1',
+            'TNFRSF8',
+            'CD2',
+            'VIM',
+            'MS4A1',
+            'LAG3',
+            'ATP1A1',
+            'CD5',
+            'IDO1',
+            'KRT1',
+            'ITGAM',
+            'NCAM1',
+            'ACTA1',
+            'BCL2',
+            'IL2RA',
+            'ITGAX',
+            'PDCD1',
+            'GZMB',
+            'EGFR',
+            'VISTA',
+            'FUT4',
+            'ICOS',
+            'SYP',
+            'GFAP',
+            'CD7',
+            'CD247',
+            'CHGA',
+            'CD163',
+            'PTPRC',
+            'CD68',
+            'PECAM1',
+            'PDPN',
+            'CD34',
+            'CD38',
+            'SDC1',
+            'HOECHST1:Cyc_1_ch_1',  ##
+            'CDX2',
+            'COL6A1',
+            'CCR4',
+            'MMP9',
+            'TFRC',
+            'B3GAT1',
+            'MMP12'
+        ]
+        X = DataFrame(np.array(celldata_df[feature_cols]), columns=feature_cols_hgnc_names)
+        celldata = AnnData(X=X, obs=celldata_df[["File Name", "patients", "ClusterName"]])
 
         celldata.uns["metadata"] = metadata
         img_keys = list(np.unique(celldata_df[metadata["image_col"]]))

From 8a4bcfaff34e13f6ef36c9f2546598fc34818c7e Mon Sep 17 00:00:00 2001
From: Anna Schaar <anna.schaar@tum.de>
Date: Sun, 7 Nov 2021 19:07:27 +0100
Subject: [PATCH 15/15] add saving of LR names to model class

---
 ncem/estimators/base_estimator_neighbors.py | 8 +++++++-
 ncem/models/layers/single_gnn_layers.py     | 4 ++--
 ncem/train/train_model.py                   | 9 +++++++++
 ncem/unit_test/test_estimator_local.py      | 5 ++---
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/ncem/estimators/base_estimator_neighbors.py b/ncem/estimators/base_estimator_neighbors.py
index 893e806e..a8d461f4 100644
--- a/ncem/estimators/base_estimator_neighbors.py
+++ b/ncem/estimators/base_estimator_neighbors.py
@@ -12,6 +12,9 @@ class EstimatorNeighborhood(Estimator):
     n_features_in: int
     _n_neighbors_padded: Union[int, None]
     h0_in: bool
+    features: list
+    target_feature_names: list
+    neighbor_feature_names: list
     idx_target_features: np.ndarray
     idx_neighbor_features: np.ndarray
 
@@ -32,6 +35,9 @@ def set_input_features(self, h0_in=True, target_feature_names=None, neighbor_fea
             self.n_features_in = self.n_features_0
         else:
             features = self.data.var_names.tolist()
+            self.features = features
+            self.target_feature_names = target_feature_names
+            self.neighbor_feature_names = neighbor_feature_names
             self.idx_target_features = np.array([features.index(x) for x in target_feature_names])
             self.idx_neighbor_features = np.array([features.index(x) for x in neighbor_feature_names])
             assert len(self.idx_target_features) == len(self.idx_neighbor_features)
@@ -100,7 +106,7 @@ def _get_output_signature(self, resampled: bool = False):
             else:
                 output_signature = ((h_targets, h_neighbors, sf, a, node_covar, domain),
                                     reconstruction)
-        print(output_signature)
+        # print(output_signature)
         return output_signature
 
     def _get_dataset(
diff --git a/ncem/models/layers/single_gnn_layers.py b/ncem/models/layers/single_gnn_layers.py
index ae77593d..e17fe164 100644
--- a/ncem/models/layers/single_gnn_layers.py
+++ b/ncem/models/layers/single_gnn_layers.py
@@ -107,7 +107,7 @@ def __init__(self, lr_dim, dropout_rate, l2_reg, **kwargs):
 
     def call(self, inputs, **kwargs):
         targets_receptor = inputs[0]  # (batch, target nodes, lr)
-        print(targets_receptor.shape)
+        # print('targets_receptor', targets_receptor.shape)
         neighbors_ligand = inputs[1]  # (batch, target nodes, padded neighbor nodes, lr)
         a = inputs[2]  # (batch, target nodes, padded neighbor nodes)
 
@@ -115,7 +115,7 @@ def call(self, inputs, **kwargs):
         neighbors_ligand = neighbors_ligand * self.kernel_l + self.bias_l  # (batch, target nodes, padded neighbor nodes, lr)
         targets_receptor = tf.expand_dims(targets_receptor, axis=-2)  # (batch, target nodes, 1, lr)
         weights = targets_receptor * neighbors_ligand
-        print(weights.shape)
+        # print('weights', weights.shape)
         # Mask embeddings to neighbors
         a = tf.expand_dims(a, axis=-1)  # (batch, target nodes, padded neighbor nodes, 1)
         weights = weights * a
diff --git a/ncem/train/train_model.py b/ncem/train/train_model.py
index ca54fe29..ff923765 100644
--- a/ncem/train/train_model.py
+++ b/ncem/train/train_model.py
@@ -165,8 +165,17 @@ class TrainModelEdSingleNcem(TrainModel, TrainModelEDncemBase):
     def init_estim(self, **kwargs):
         self.estimator = EstimatorEdNcemNeighborhood(**kwargs)
 
+    def _save_lr_feature_names(self, fn):
+        feature_names = {
+            "features": self.estimator.features,
+            "target_feature_names": self.estimator.target_feature_names,
+            "neighbor_feature_names": self.estimator.neighbor_feature_names,
+        }
+        self._try_save(fn + "_lr_features.pickle", feature_names)
+
     def _save_specific(self, fn):
         self._save_evaluation_per_node_type(fn=fn)
+        self._save_lr_feature_names(fn=fn)
 
 
 class TrainModelCVAEBase(TrainModel):
diff --git a/ncem/unit_test/test_estimator_local.py b/ncem/unit_test/test_estimator_local.py
index 33d78337..fdd96fab 100644
--- a/ncem/unit_test/test_estimator_local.py
+++ b/ncem/unit_test/test_estimator_local.py
@@ -56,14 +56,12 @@ def get_estimator(
         else:
             assert False
 
-        print('COND TYPE', self.est.cond_type)
         self.est.get_data(
             data_origin=data_origin,
             data_path=data_path,
             radius=radius,
             node_label_space_id=node_label_space_id,
             node_feature_space_id=node_feature_space_id,
-            robustness=0.5
         )
 
     def test_train(self, model: str, data_origin: str = "zhang"):
@@ -216,7 +214,8 @@ def test_cvae(dataset: str, model: str):
 
 
 @pytest.mark.parametrize("dataset", ["luwt"])
-@pytest.mark.parametrize("model", ["ed_ncem2_max", "ed_ncem2_gcn", "ed_ncem2_lr_gat", "ed_ncem2_gat"])
+#@pytest.mark.parametrize("model", ["ed_ncem2_max", "ed_ncem2_gcn", "ed_ncem2_lr_gat", "ed_ncem2_gat"])
+@pytest.mark.parametrize("model", ["ed_ncem2_lr_gat"])
 def test_ed2(dataset: str, model: str):
     estim = HelperTestEstimatorEd()
     estim.test_train(model=model, data_origin=dataset)