pykale · xianyuanliu · Jan 20, 2022 · Jan 20, 2022 · Jan 22, 2022 · Jan 22, 2022
diff --git a/.gitignore b/.gitignore
@@ -22,8 +22,9 @@ examples/data/
 examples/*/data/
 examples/*/outputs/
 examples/*/lightning_logs/
+examples/*/tb_logs/
 examples/*/demo*/
-examples/action_dann_lightn/configs_xianyuan/
+examples/*/configs_xianyuan/
 
 # Logs
 log-*.txt

diff --git a/examples/action_dann_lightn/config.py b/examples/action_dann_lightn/config.py
@@ -16,15 +16,17 @@
 # Dataset
 # -----------------------------------------------------------------------------
 _C.DATASET = CN()
-_C.DATASET.ROOT = "I:/Datasets/EgoAction/" # "/shared/tale2/Shared"
+_C.DATASET.ROOT = "J:/Datasets/EgoAction/" # "/shared/tale2/Shared"
 _C.DATASET.SOURCE = "EPIC" # dataset options=["EPIC", "GTEA", "ADL", "KITCHEN"]
 _C.DATASET.SRC_TRAINLIST = "epic_D1_train.pkl"
 _C.DATASET.SRC_TESTLIST = "epic_D1_test.pkl"
 _C.DATASET.TARGET = "EPIC" # dataset options=["EPIC", "GTEA", "ADL", "KITCHEN"]
 _C.DATASET.TGT_TRAINLIST = "epic_D2_train.pkl"
 _C.DATASET.TGT_TESTLIST = "epic_D2_test.pkl"
-_C.DATASET.IMAGE_MODALITY = "rgb" # mode options=["rgb", "flow", "joint"]
-# _C.DATASET.NUM_CLASSES = 8
+_C.DATASET.IMAGE_MODALITY = "rgb" # options=["rgb", "flow", "joint"]
+_C.DATASET.INPUT_TYPE = "image" # options=["image", "feature"]
+_C.DATASET.CLASS_TYPE = "verb" # options=["verb", "verb+noun"]
+_C.DATASET.NUM_SEGMENTS = 1 # = 1, if image input; = 8, if feature input.
 _C.DATASET.FRAMES_PER_SEGMENT = 16
 _C.DATASET.NUM_REPEAT = 5 # 10
 _C.DATASET.WEIGHT_TYPE = "natural"
@@ -72,7 +74,7 @@
 _C.OUTPUT.VERBOSE = False # To discuss, for HPC jobs
 _C.OUTPUT.FAST_DEV_RUN = False # True for debug
 _C.OUTPUT.PB_FRESH = 0 # 0 # 50 # 0 to disable ; MAYBE make it a command line option
-_C.OUTPUT.TB_DIR = os.path.join("lightning_logs", _C.DATASET.SOURCE + "2" + _C.DATASET.TARGET)
+_C.OUTPUT.TB_DIR = os.path.join("tb_logs", _C.DATASET.SOURCE + "2" + _C.DATASET.TARGET)
 
 
 def get_cfg_defaults():

diff --git a/examples/action_dann_lightn/main.py b/examples/action_dann_lightn/main.py
@@ -47,9 +47,10 @@ def main():
  # ---- setup output ----
  format_str = "@%(asctime)s %(name)s [%(levelname)s] - (%(message)s)"
  logging.basicConfig(format=format_str)
+
  # ---- setup dataset ----
  seed = cfg.SOLVER.SEED
- source, target, num_classes = VideoDataset.get_source_target(
+ source, target, dict_num_classes = VideoDataset.get_source_target(
  VideoDataset(cfg.DATASET.SOURCE.upper()), VideoDataset(cfg.DATASET.TARGET.upper()), seed, cfg
  )
  dataset = VideoMultiDomainDatasets(
@@ -68,7 +69,7 @@ def main():
  set_seed(seed) # seed_everything in pytorch_lightning did not set torch.backends.cudnn
  print(f"==> Building model for seed {seed} ......")
  # ---- setup model and logger ----
- model, train_params = get_model(cfg, dataset, num_classes)
+ model, train_params = get_model(cfg, dataset, dict_num_classes)
  tb_logger = pl_loggers.TensorBoardLogger(cfg.OUTPUT.TB_DIR, name="seed{}".format(seed))
  checkpoint_callback = ModelCheckpoint(
  # dirpath=full_checkpoint_dir,

diff --git a/examples/action_dann_lightn/model.py b/examples/action_dann_lightn/model.py
@@ -48,32 +48,36 @@ def get_config(cfg):
  "target": cfg.DATASET.TARGET,
  "size_type": cfg.DATASET.SIZE_TYPE,
  "weight_type": cfg.DATASET.WEIGHT_TYPE,
+ "class_type": cfg.DATASET.CLASS_TYPE,
  },
  }
  return config_params
 
 
 # Based on https://github.com/criteo-research/pytorch-ada/blob/master/adalib/ada/utils/experimentation.py
-def get_model(cfg, dataset, num_classes):
+def get_model(cfg, dataset, dict_num_classes):
  """
  Builds and returns a model and associated hyper parameters according to the config object passed.
 
  Args:
  cfg: A YACS config object.
  dataset: A multi domain dataset consisting of source and target datasets.
- num_classes: The class number of specific dataset.
+ dict_num_classes (dict): The dictionary of class number for specific dataset.
  """
 
  # setup feature extractor
  feature_network, class_feature_dim, domain_feature_dim = get_video_feat_extractor(
- cfg.MODEL.METHOD.upper(), cfg.DATASET.IMAGE_MODALITY, cfg.MODEL.ATTENTION, num_classes
+ cfg.MODEL.METHOD.upper(), cfg.DATASET.IMAGE_MODALITY, cfg.MODEL.ATTENTION, dict_num_classes
  )
  # setup classifier
- classifier_network = ClassNetVideo(input_size=class_feature_dim, n_class=num_classes)
+ classifier_network = ClassNetVideo(input_size=class_feature_dim, dict_n_class=dict_num_classes)
 
  config_params = get_config(cfg)
  train_params = config_params["train_params"]
  train_params_local = deepcopy(train_params)
+ data_params = config_params["data_params"]
+ data_params_local = deepcopy(data_params)
+ class_type = data_params_local["class_type"]
  method_params = {}
 
  method = domain_adapter.Method(cfg.DAN.METHOD)
@@ -85,6 +89,7 @@ def get_model(cfg, dataset, num_classes):
  image_modality=cfg.DATASET.IMAGE_MODALITY,
  feature_extractor=feature_network,
  task_classifier=classifier_network,
+ class_type=class_type,
  **method_params,
  **train_params_local,
  )
@@ -95,7 +100,7 @@ def get_model(cfg, dataset, num_classes):
  if cfg.DAN.USERANDOM:
  critic_input_size = cfg.DAN.RANDOM_DIM
  else:
- critic_input_size = domain_feature_dim * num_classes
+ critic_input_size = domain_feature_dim * dict_num_classes["verb"]
  critic_network = DomainNetVideo(input_size=critic_input_size)
 
  if cfg.DAN.METHOD == "CDAN":
@@ -109,6 +114,7 @@ def get_model(cfg, dataset, num_classes):
  feature_extractor=feature_network,
  task_classifier=classifier_network,
  critic=critic_network,
+ class_type=class_type,
  **method_params,
  **train_params_local,
  )

diff --git a/kale/embed/video_feature_extractor.py b/kale/embed/video_feature_extractor.py
@@ -15,7 +15,7 @@
 from kale.loaddata.video_access import get_image_modality
 
 
-def get_video_feat_extractor(model_name, image_modality, attention, num_classes):
+def get_video_feat_extractor(model_name, image_modality, attention, dict_num_classes):
  """
  Get the feature extractor w/o the pre-trained model and SELayers. The pre-trained models are saved in the path
  ``$XDG_CACHE_HOME/torch/hub/checkpoints/``. For Linux, default path is ``~/.cache/torch/hub/checkpoints/``.
@@ -26,15 +26,18 @@ def get_video_feat_extractor(model_name, image_modality, attention, num_classes)
  model_name (string): The name of the feature extractor. (Choices=["I3D", "R3D_18", "R2PLUS1D_18", "MC3_18"])
  image_modality (string): Image type. (Choices=["rgb", "flow", "joint"])
  attention (string): The attention type. (Choices=["SELayerC", "SELayerT", "SELayerCoC", "SELayerMC", "SELayerCT", "SELayerTC", "SELayerMAC"])
- num_classes (int): The class number of specific dataset. (Default: No use)
+ dict_num_classes (dict): The class number of specific dataset. (Default: No use)
 
  Returns:
  feature_network (dictionary): The network to extract features.
  class_feature_dim (int): The dimension of the feature network output for ClassNet.
  It is a convention when the input dimension and the network is fixed.
  domain_feature_dim (int): The dimension of the feature network output for DomainNet.
  """
+
  rgb, flow = get_image_modality(image_modality)
+ # only use verb class when input is image.
+ num_classes = dict_num_classes["verb"]
 
  attention_list = ["SELayerC", "SELayerT", "SELayerCoC", "SELayerMC", "SELayerCT", "SELayerTC", "SELayerMAC"]
  model_list = ["I3D", "R3D_18", "MC3_18", "R2PLUS1D_18"]