From 900109ca8e23241dac7f54fcc875274f06dc2bf8 Mon Sep 17 00:00:00 2001
From: Rishi Jha <rjha18@live.com>
Date: Tue, 2 Jan 2024 20:25:31 -0800
Subject: [PATCH] Refactoring toml path names to be more descriptive

---
 experiments/example_attack/config.toml        | 26 +++++++++----------
 experiments/example_downstream/config.toml    |  4 +--
 .../example_downstream_soft/config.toml       |  6 ++---
 .../example_flip_selection/config.toml        |  6 ++---
 experiments/example_precomputed/config.toml   |  4 +--
 .../example_precomputed_mix/config.toml       |  4 +--
 modules/base_utils/util.py                    |  2 ++
 modules/generate_labels/run_module.py         | 22 +++++++---------
 modules/select_flips/run_module.py            | 18 ++++++-------
 modules/train_expert/run_module.py            | 13 ++++------
 modules/train_user/run_module.py              |  9 +++----
 schemas/generate_labels.toml                  |  6 ++---
 schemas/select_flips.toml                     |  6 ++---
 schemas/train_expert.toml                     |  2 +-
 schemas/train_user.toml                       |  6 ++---
 15 files changed, 64 insertions(+), 70 deletions(-)

diff --git a/experiments/example_attack/config.toml b/experiments/example_attack/config.toml
index 7ddb3b2..04026fd 100644
--- a/experiments/example_attack/config.toml
+++ b/experiments/example_attack/config.toml
@@ -1,28 +1,28 @@
 # TODO Description
 
 # TODO
-# [train_expert]
-# output = "out/checkpoints/r32p_1xs/0/model.pth"
-# model = "r32p"
-# trainer = "sgd"
-# dataset = "cifar"
-# source_label = 9
-# target_label = 4
-# poisoner = "1xs"
-# epochs = 20
-# checkpoint_iters = 50
+[train_expert]
+output_dir = "out/checkpoints/r32p_1xs/0/"
+model = "r32p"
+trainer = "sgd"
+dataset = "cifar"
+source_label = 9
+target_label = 4
+poisoner = "1xs"
+epochs = 20
+checkpoint_iters = 50
 
 # TODO
 [generate_labels]
-input = "out/checkpoints/r32p_1xs/{}/model_{}_{}.pth"
-opt_input = "out/checkpoints/r32p_1xs/{}/model_{}_{}_opt.pth"
+input_pths = "out/checkpoints/r32p_1xs/{}/model_{}_{}.pth"
+opt_pths = "out/checkpoints/r32p_1xs/{}/model_{}_{}_opt.pth"
 expert_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_attack/"
+output_dir = "experiments/example_attack/"
 lambda = 0.0
 
 [generate_labels.expert_config]
diff --git a/experiments/example_downstream/config.toml b/experiments/example_downstream/config.toml
index fafe638..46fdd04 100644
--- a/experiments/example_downstream/config.toml
+++ b/experiments/example_downstream/config.toml
@@ -1,11 +1,11 @@
 [train_user]
-input = "experiments/example_attack/labels.npy"
+input_labels = "experiments/example_attack/labels.npy"
 user_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_downstream/"
+output_dir = "experiments/example_downstream/"
 soft = false
 alpha = 0.0
diff --git a/experiments/example_downstream_soft/config.toml b/experiments/example_downstream_soft/config.toml
index 484981c..2d41fe9 100644
--- a/experiments/example_downstream_soft/config.toml
+++ b/experiments/example_downstream_soft/config.toml
@@ -1,12 +1,12 @@
 [train_user]
-input = "experiments/example_attack/labels.npy"
-true = "experiments/example_attack/true.npy"
+input_labels = "experiments/example_attack/labels.npy"
+true_labels = "experiments/example_attack/true.npy"
 user_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_downstream/"
+output_dir = "experiments/example_downstream/"
 soft = true
 alpha = 0.2
diff --git a/experiments/example_flip_selection/config.toml b/experiments/example_flip_selection/config.toml
index 9b1182d..ade97d8 100644
--- a/experiments/example_flip_selection/config.toml
+++ b/experiments/example_flip_selection/config.toml
@@ -1,5 +1,5 @@
 [select_flips]
 budgets = [150, 300, 500, 1000, 1500]
-input = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/*/labels.npy"
-true = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/0/true.npy"
-output_path = "out/computed/r18_1xs/"
\ No newline at end of file
+input_label_glob = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/*/labels.npy"
+true_labels = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/0/true.npy"
+output_dir = "out/computed/r18_1xs/"
\ No newline at end of file
diff --git a/experiments/example_precomputed/config.toml b/experiments/example_precomputed/config.toml
index d4e9f5b..0157de8 100644
--- a/experiments/example_precomputed/config.toml
+++ b/experiments/example_precomputed/config.toml
@@ -1,11 +1,11 @@
 [train_user]
-input = "precomputed/cifar/r32p/1xs/1500.npy"
+input_labels = "precomputed/cifar/r32p/1xs/1500.npy"
 user_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_precomputed/"
+output_dir = "experiments/example_precomputed/"
 soft = false
 alpha = 0.0
\ No newline at end of file
diff --git a/experiments/example_precomputed_mix/config.toml b/experiments/example_precomputed_mix/config.toml
index 863d762..913119c 100644
--- a/experiments/example_precomputed_mix/config.toml
+++ b/experiments/example_precomputed_mix/config.toml
@@ -1,12 +1,12 @@
 [train_user]
-input = "precomputed/cifar/r32p/1xs/1500.npy"
+input_labels = "precomputed/cifar/r32p/1xs/1500.npy"
 user_model = "vit-pretrain"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_precomputed_mix/"
+output_dir = "experiments/example_precomputed_mix/"
 soft = false
 alpha = 0.0
 
diff --git a/modules/base_utils/util.py b/modules/base_utils/util.py
index ef6124f..ed77998 100644
--- a/modules/base_utils/util.py
+++ b/modules/base_utils/util.py
@@ -53,6 +53,8 @@ def generate_full_path(path):
 
 
 def slurmify_path(path, slurm_id):
+    if path is None:
+        return path
     return path if slurm_id is None else path.format(slurm_id)
 
 
diff --git a/modules/generate_labels/run_module.py b/modules/generate_labels/run_module.py
index 5394453..27694b3 100644
--- a/modules/generate_labels/run_module.py
+++ b/modules/generate_labels/run_module.py
@@ -14,7 +14,7 @@
 from modules.base_utils.util import extract_toml, get_module_device,\
                                     get_mtt_attack_info, load_model,\
                                     either_dataloader_dataset_to_both,\
-                                    make_pbar, clf_loss, needs_big_ims, softmax, total_mse_distance
+                                    make_pbar, clf_loss, needs_big_ims, slurmify_path, softmax, total_mse_distance
 from modules.generate_labels.utils import coalesce_attack_config, extract_experts,\
                                      extract_labels, sgd_step
 
@@ -30,8 +30,8 @@ def run(experiment_name, module_name, **kwargs):
 
     args = extract_toml(experiment_name, module_name)
 
-    input_path = args["input"]
-    input_opt_path = args["opt_input"]
+    input_pths = args["input_pths"]
+    opt_pths = args["opt_pths"]
     expert_model_flag = args["expert_model"]
     dataset_flag = args["dataset"]
     poisoner_flag = args["poisoner"]
@@ -44,10 +44,8 @@ def run(experiment_name, module_name, **kwargs):
     expert_config = args.get('expert_config', {})
     config = coalesce_attack_config(args.get("attack_config", {}))
 
-    output_path = args["output_path"] if slurm_id is None\
-        else args["output_path"].format(slurm_id)
-
-    Path(output_path).mkdir(parents=True, exist_ok=True)
+    output_dir = slurmify_path(args["output_dir"], slurm_id)
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
 
     print(f"{expert_model_flag=} {clean_label=} {target_label=} {poisoner_flag=}")
     print("Building datasets...")
@@ -63,9 +61,9 @@ def run(experiment_name, module_name, **kwargs):
     print("Loading expert trajectories...")
     expert_starts, expert_opt_starts = extract_experts(
         expert_config,
-        input_path,
+        input_pths,
         config['iterations'],
-        expert_opt_path=input_opt_path
+        expert_opt_path=opt_pths
     )
 
     print("Training...")
@@ -159,9 +157,9 @@ def run(experiment_name, module_name, **kwargs):
                 pbar.set_postfix(**pbar_postfix)
 
     y_true = torch.stack([mtt_dataset[i][3].detach() for i in range(len(mtt_dataset.distill))])
-    np.save(output_path + "labels.npy", labels_syn.detach().numpy())
-    np.save(output_path + "true.npy", y_true)
-    np.save(output_path + "losses.npy", losses)
+    np.save(output_dir + "labels.npy", labels_syn.detach().numpy())
+    np.save(output_dir + "true.npy", y_true)
+    np.save(output_dir + "losses.npy", losses)
 
 if __name__ == "__main__":
     experiment_name, module_name = sys.argv[1], sys.argv[2]
diff --git a/modules/select_flips/run_module.py b/modules/select_flips/run_module.py
index f7f534a..386773d 100644
--- a/modules/select_flips/run_module.py
+++ b/modules/select_flips/run_module.py
@@ -18,20 +18,21 @@ def run(experiment_name, module_name, **kwargs):
 
     args = extract_toml(experiment_name, module_name)
     budgets = args.get("budgets", [150, 300, 500, 1000, 1500])
-    input_path = slurmify_path(args["input"], slurm_id)
-    true_path = slurmify_path(args["true"], slurm_id)
-    output_path = slurmify_path(args["output_path"], slurm_id)
+    input_label_glob = slurmify_path(args["input_label_glob"], slurm_id)
+    true_labels = slurmify_path(args["true_labels"], slurm_id)
+    output_dir = slurmify_path(args["output_dir"], slurm_id)
 
-    Path(output_path).mkdir(parents=True, exist_ok=True)
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
 
     distances = []
     all_labels = []
-    for f in glob.glob(input_path):
+    true = np.load(true_labels)
+    np.save(f'{output_dir}/true.npy', true)
+
+    for f in glob.glob(input_label_glob):
         labels = np.load(f)
 
-        true = np.load(true_path)
         dists = np.zeros(len(labels))
-
         inds = labels.argmax(axis=1) != true.argmax(axis=1)
         dists[inds] = labels[inds].max(axis=1) -\
             labels[inds][np.arange(inds.sum()), true[inds].argmax(axis=1)]
@@ -43,14 +44,13 @@ def run(experiment_name, module_name, **kwargs):
     distances = np.stack(distances)
     all_labels = np.stack(all_labels).mean(axis=0)
 
-    np.save(f'{output_path}/true.npy', true)
     for n in budgets:
         to_save = true.copy()
         if n != 0:
             idx = np.argsort(distances.min(axis=0))[-n:]
             all_labels[idx] = all_labels[idx] - 50000 * true[idx]
             to_save[idx] = all_labels[idx]
-        np.save(f'{output_path}/{n}.npy', to_save)
+        np.save(f'{output_dir}/{n}.npy', to_save)
 
 if __name__ == "__main__":
     experiment_name, module_name = sys.argv[1], sys.argv[2]
diff --git a/modules/train_expert/run_module.py b/modules/train_expert/run_module.py
index 0adcd71..3767f87 100644
--- a/modules/train_expert/run_module.py
+++ b/modules/train_expert/run_module.py
@@ -38,10 +38,9 @@ def run(experiment_name, module_name, **kwargs):
     epochs = args.get("epochs", None)
     optim_kwargs = args.get("optim_kwargs", {})
     scheduler_kwargs = args.get("scheduler_kwargs", {})
-    output_path = slurmify_path(args["output"], slurm_id)
+    output_dir = slurmify_path(args["output_dir"], slurm_id)
 
-    Path(output_path[:output_path.rfind('/')]).mkdir(parents=True,
-                                                     exist_ok=True)
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
     
     n_classes = get_n_classes(dataset_flag)
     model = load_model(model_flag, n_classes)
@@ -73,12 +72,10 @@ def run(experiment_name, module_name, **kwargs):
 
     def checkpoint_callback(model, opt, epoch, iteration, save_iter):
         if iteration % save_iter == 0 and iteration != 0:
-            index = output_path.rfind('.')
-            checkpoint_path = output_path[:index] + f'_{str(epoch)}_{str(iteration)}' + output_path[index:]
+            checkpoint_path = f'{output_dir}model_{str(epoch)}_{str(iteration)}.pth'
+            opt_path = f'{output_dir}model_{str(epoch)}_{str(iteration)}_opt.pth'
             torch.save(model.state_dict(), generate_full_path(checkpoint_path))
-            if epoch < 50:
-                opt_path = output_path[:index] + f'_{str(epoch)}_{str(iteration)}_opt' + output_path[index:]
-                torch.save(opt.state_dict(), generate_full_path(opt_path))
+            torch.save(opt.state_dict(), generate_full_path(opt_path))
 
     mini_train(
         model=model,
diff --git a/modules/train_user/run_module.py b/modules/train_user/run_module.py
index f3a780a..a007c8c 100644
--- a/modules/train_user/run_module.py
+++ b/modules/train_user/run_module.py
@@ -40,12 +40,9 @@ def run(experiment_name, module_name, **kwargs):
     scheduler_kwargs = args.get("scheduler_kwargs", {})
     alpha = args.get("alpha", None)
 
-    true_path = args.get("true", None)
-    input_path = slurmify_path(args["input"], slurm_id)
-    output_path = slurmify_path(args["output_path"], slurm_id)
-
-    if true_path is not None:
-        true_path = slurmify_path(args["true"], slurm_id)
+    input_path = slurmify_path(args["input_labels"], slurm_id)
+    true_path = slurmify_path(args.get("true_labels", None), slurm_id)
+    output_path = slurmify_path(args["output_dir"], slurm_id)
 
     Path(output_path).mkdir(parents=True, exist_ok=True)
 
diff --git a/schemas/generate_labels.toml b/schemas/generate_labels.toml
index 435e858..82b82f8 100644
--- a/schemas/generate_labels.toml
+++ b/schemas/generate_labels.toml
@@ -6,9 +6,9 @@
 ###
 
 [generate_labels]
-input = "TODO"
-opt_input = "TODO"
-output_path = "string: Path to .pth file."
+input_pths = "TODO"
+opt_pths = "TODO"
+output_dir = "string: Path to .pth file."
 expert_model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs"
 dataset = "string: (cifar / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets"
 trainer = "string: (sgd / adam). Specifies optimizer. "
diff --git a/schemas/select_flips.toml b/schemas/select_flips.toml
index 0b72225..744def6 100644
--- a/schemas/select_flips.toml
+++ b/schemas/select_flips.toml
@@ -5,6 +5,6 @@
 
 [select_flips]
 budgets = "TODO"
-input = "TODO"
-true = "TODO"
-output_path = "TODO"
+input_label_glob = "TODO"
+true_labels = "TODO"
+output_dir = "TODO"
diff --git a/schemas/train_expert.toml b/schemas/train_expert.toml
index 51899e3..a458642 100644
--- a/schemas/train_expert.toml
+++ b/schemas/train_expert.toml
@@ -5,7 +5,7 @@
 ###
 
 [train_expert]
-output = "string: Path to .pth file."
+output_dir = "string: Path to .pth file."
 model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs"
 dataset = "string: (cifar / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets"
 trainer = "string: (sgd / adam). Specifies optimizer. "
diff --git a/schemas/train_user.toml b/schemas/train_user.toml
index 8ed370f..dac7876 100644
--- a/schemas/train_user.toml
+++ b/schemas/train_user.toml
@@ -6,8 +6,8 @@
 ###
 
 [train_user]
-input = "TODO"
-output_path = "string: Path to .pth file."
+input_labels = "TODO"
+output_dir = "string: Path to .pth file."
 user_model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs"
 dataset = "string: (cifa r / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets"
 trainer = "string: (sgd / adam). Specifies optimizer. "
@@ -18,7 +18,7 @@ poisoner = "string: Form: {{1,2,3,9}xp, {1,2}xs, {1,4}xl}. Integer resembles num
 [OPTIONAL]
 soft = "TODO"
 alpha = "TODO"
-true = "TODO"
+true_labels = "TODO"
 batch_size = "int: {0,1,...,infty}. Specifies batch size. Set to default for trainer if omitted."
 epochs = "int: {0,1,...,infty}. Specifies number of epochs. Set to default for trainer if omitted."
 optim_kwargs = "dict. Optional keywords for Pytorch SGD / Adam optimizer. See sever example."