Refactoring toml path names to be more descriptive

SewoongLab · Jan 3, 2024 · 900109c · 900109c
1 parent 08148ba
commit 900109c
Show file tree

Hide file tree

Showing 15 changed files with 64 additions and 70 deletions.
diff --git a/experiments/example_attack/config.toml b/experiments/example_attack/config.toml
@@ -1,28 +1,28 @@
 # TODO Description
 
 # TODO
-# [train_expert]
-# output = "out/checkpoints/r32p_1xs/0/model.pth"
-# model = "r32p"
-# trainer = "sgd"
-# dataset = "cifar"
-# source_label = 9
-# target_label = 4
-# poisoner = "1xs"
-# epochs = 20
-# checkpoint_iters = 50
+[train_expert]
+output_dir = "out/checkpoints/r32p_1xs/0/"
+model = "r32p"
+trainer = "sgd"
+dataset = "cifar"
+source_label = 9
+target_label = 4
+poisoner = "1xs"
+epochs = 20
+checkpoint_iters = 50
 
 # TODO
 [generate_labels]
-input = "out/checkpoints/r32p_1xs/{}/model_{}_{}.pth"
-opt_input = "out/checkpoints/r32p_1xs/{}/model_{}_{}_opt.pth"
+input_pths = "out/checkpoints/r32p_1xs/{}/model_{}_{}.pth"
+opt_pths = "out/checkpoints/r32p_1xs/{}/model_{}_{}_opt.pth"
 expert_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_attack/"
+output_dir = "experiments/example_attack/"
 lambda = 0.0
 
 [generate_labels.expert_config]

diff --git a/experiments/example_downstream/config.toml b/experiments/example_downstream/config.toml
@@ -1,11 +1,11 @@
 [train_user]
-input = "experiments/example_attack/labels.npy"
+input_labels = "experiments/example_attack/labels.npy"
 user_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_downstream/"
+output_dir = "experiments/example_downstream/"
 soft = false
 alpha = 0.0
diff --git a/experiments/example_downstream_soft/config.toml b/experiments/example_downstream_soft/config.toml
@@ -1,12 +1,12 @@
 [train_user]
-input = "experiments/example_attack/labels.npy"
-true = "experiments/example_attack/true.npy"
+input_labels = "experiments/example_attack/labels.npy"
+true_labels = "experiments/example_attack/true.npy"
 user_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_downstream/"
+output_dir = "experiments/example_downstream/"
 soft = true
 alpha = 0.2
diff --git a/experiments/example_flip_selection/config.toml b/experiments/example_flip_selection/config.toml
@@ -1,5 +1,5 @@
 [select_flips]
 budgets = [150, 300, 500, 1000, 1500]
-input = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/*/labels.npy"
-true = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/0/true.npy"
-output_path = "out/computed/r18_1xs/"
+input_label_glob = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/*/labels.npy"
+true_labels = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/0/true.npy"
+output_dir = "out/computed/r18_1xs/"
diff --git a/experiments/example_precomputed/config.toml b/experiments/example_precomputed/config.toml
@@ -1,11 +1,11 @@
 [train_user]
-input = "precomputed/cifar/r32p/1xs/1500.npy"
+input_labels = "precomputed/cifar/r32p/1xs/1500.npy"
 user_model = "r32p"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_precomputed/"
+output_dir = "experiments/example_precomputed/"
 soft = false
 alpha = 0.0
diff --git a/experiments/example_precomputed_mix/config.toml b/experiments/example_precomputed_mix/config.toml
@@ -1,12 +1,12 @@
 [train_user]
-input = "precomputed/cifar/r32p/1xs/1500.npy"
+input_labels = "precomputed/cifar/r32p/1xs/1500.npy"
 user_model = "vit-pretrain"
 trainer = "sgd"
 dataset = "cifar"
 source_label = 9
 target_label = 4
 poisoner = "1xs"
-output_path = "experiments/example_precomputed_mix/"
+output_dir = "experiments/example_precomputed_mix/"
 soft = false
 alpha = 0.0
 

diff --git a/modules/base_utils/util.py b/modules/base_utils/util.py
@@ -53,6 +53,8 @@ def generate_full_path(path):
 
 
 def slurmify_path(path, slurm_id):
+ if path is None:
+ return path
  return path if slurm_id is None else path.format(slurm_id)
 
 

diff --git a/modules/generate_labels/run_module.py b/modules/generate_labels/run_module.py
@@ -14,7 +14,7 @@
 from modules.base_utils.util import extract_toml, get_module_device,\
  get_mtt_attack_info, load_model,\
  either_dataloader_dataset_to_both,\
- make_pbar, clf_loss, needs_big_ims, softmax, total_mse_distance
+ make_pbar, clf_loss, needs_big_ims, slurmify_path, softmax, total_mse_distance
 from modules.generate_labels.utils import coalesce_attack_config, extract_experts,\
  extract_labels, sgd_step
 
@@ -30,8 +30,8 @@ def run(experiment_name, module_name, **kwargs):
 
  args = extract_toml(experiment_name, module_name)
 
- input_path = args["input"]
- input_opt_path = args["opt_input"]
+ input_pths = args["input_pths"]
+ opt_pths = args["opt_pths"]
  expert_model_flag = args["expert_model"]
  dataset_flag = args["dataset"]
  poisoner_flag = args["poisoner"]
@@ -44,10 +44,8 @@ def run(experiment_name, module_name, **kwargs):
  expert_config = args.get('expert_config', {})
  config = coalesce_attack_config(args.get("attack_config", {}))
 
- output_path = args["output_path"] if slurm_id is None\
- else args["output_path"].format(slurm_id)
-
- Path(output_path).mkdir(parents=True, exist_ok=True)
+ output_dir = slurmify_path(args["output_dir"], slurm_id)
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
 
  print(f"{expert_model_flag=} {clean_label=} {target_label=} {poisoner_flag=}")
  print("Building datasets...")
@@ -63,9 +61,9 @@ def run(experiment_name, module_name, **kwargs):
  print("Loading expert trajectories...")
  expert_starts, expert_opt_starts = extract_experts(
  expert_config,
- input_path,
+ input_pths,
  config['iterations'],
- expert_opt_path=input_opt_path
+ expert_opt_path=opt_pths
  )
 
  print("Training...")
@@ -159,9 +157,9 @@ def run(experiment_name, module_name, **kwargs):
  pbar.set_postfix(**pbar_postfix)
 
  y_true = torch.stack([mtt_dataset[i][3].detach() for i in range(len(mtt_dataset.distill))])
- np.save(output_path + "labels.npy", labels_syn.detach().numpy())
- np.save(output_path + "true.npy", y_true)
- np.save(output_path + "losses.npy", losses)
+ np.save(output_dir + "labels.npy", labels_syn.detach().numpy())
+ np.save(output_dir + "true.npy", y_true)
+ np.save(output_dir + "losses.npy", losses)
 
 if __name__ == "__main__":
  experiment_name, module_name = sys.argv[1], sys.argv[2]

diff --git a/modules/select_flips/run_module.py b/modules/select_flips/run_module.py
@@ -18,20 +18,21 @@ def run(experiment_name, module_name, **kwargs):
 
  args = extract_toml(experiment_name, module_name)
  budgets = args.get("budgets", [150, 300, 500, 1000, 1500])
- input_path = slurmify_path(args["input"], slurm_id)
- true_path = slurmify_path(args["true"], slurm_id)
- output_path = slurmify_path(args["output_path"], slurm_id)
+ input_label_glob = slurmify_path(args["input_label_glob"], slurm_id)
+ true_labels = slurmify_path(args["true_labels"], slurm_id)
+ output_dir = slurmify_path(args["output_dir"], slurm_id)
 
- Path(output_path).mkdir(parents=True, exist_ok=True)
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
 
  distances = []
  all_labels = []
- for f in glob.glob(input_path):
+ true = np.load(true_labels)
+ np.save(f'{output_dir}/true.npy', true)
+
+ for f in glob.glob(input_label_glob):
  labels = np.load(f)
 
- true = np.load(true_path)
  dists = np.zeros(len(labels))
-
  inds = labels.argmax(axis=1) != true.argmax(axis=1)
  dists[inds] = labels[inds].max(axis=1) -\
  labels[inds][np.arange(inds.sum()), true[inds].argmax(axis=1)]
@@ -43,14 +44,13 @@ def run(experiment_name, module_name, **kwargs):
  distances = np.stack(distances)
  all_labels = np.stack(all_labels).mean(axis=0)
 
- np.save(f'{output_path}/true.npy', true)
  for n in budgets:
  to_save = true.copy()
  if n != 0:
  idx = np.argsort(distances.min(axis=0))[-n:]
  all_labels[idx] = all_labels[idx] - 50000 * true[idx]
  to_save[idx] = all_labels[idx]
- np.save(f'{output_path}/{n}.npy', to_save)
+ np.save(f'{output_dir}/{n}.npy', to_save)
 
 if __name__ == "__main__":
  experiment_name, module_name = sys.argv[1], sys.argv[2]

diff --git a/modules/train_expert/run_module.py b/modules/train_expert/run_module.py
@@ -38,10 +38,9 @@ def run(experiment_name, module_name, **kwargs):
  epochs = args.get("epochs", None)
  optim_kwargs = args.get("optim_kwargs", {})
  scheduler_kwargs = args.get("scheduler_kwargs", {})
- output_path = slurmify_path(args["output"], slurm_id)
+ output_dir = slurmify_path(args["output_dir"], slurm_id)
 
- Path(output_path[:output_path.rfind('/')]).mkdir(parents=True,
- exist_ok=True)
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
 
  n_classes = get_n_classes(dataset_flag)
  model = load_model(model_flag, n_classes)
@@ -73,12 +72,10 @@ def run(experiment_name, module_name, **kwargs):
 
  def checkpoint_callback(model, opt, epoch, iteration, save_iter):
  if iteration % save_iter == 0 and iteration != 0:
- index = output_path.rfind('.')
- checkpoint_path = output_path[:index] + f'_{str(epoch)}_{str(iteration)}' + output_path[index:]
+ checkpoint_path = f'{output_dir}model_{str(epoch)}_{str(iteration)}.pth'
+ opt_path = f'{output_dir}model_{str(epoch)}_{str(iteration)}_opt.pth'
  torch.save(model.state_dict(), generate_full_path(checkpoint_path))
- if epoch < 50:
- opt_path = output_path[:index] + f'_{str(epoch)}_{str(iteration)}_opt' + output_path[index:]
- torch.save(opt.state_dict(), generate_full_path(opt_path))
+ torch.save(opt.state_dict(), generate_full_path(opt_path))
 
  mini_train(
  model=model,

diff --git a/modules/train_user/run_module.py b/modules/train_user/run_module.py
@@ -40,12 +40,9 @@ def run(experiment_name, module_name, **kwargs):
  scheduler_kwargs = args.get("scheduler_kwargs", {})
  alpha = args.get("alpha", None)
 
- true_path = args.get("true", None)
- input_path = slurmify_path(args["input"], slurm_id)
- output_path = slurmify_path(args["output_path"], slurm_id)
-
- if true_path is not None:
- true_path = slurmify_path(args["true"], slurm_id)
+ input_path = slurmify_path(args["input_labels"], slurm_id)
+ true_path = slurmify_path(args.get("true_labels", None), slurm_id)
+ output_path = slurmify_path(args["output_dir"], slurm_id)
 
  Path(output_path).mkdir(parents=True, exist_ok=True)
 

diff --git a/schemas/generate_labels.toml b/schemas/generate_labels.toml
@@ -6,9 +6,9 @@
 ###
 
 [generate_labels]
-input = "TODO"
-opt_input = "TODO"
-output_path = "string: Path to .pth file."
+input_pths = "TODO"
+opt_pths = "TODO"
+output_dir = "string: Path to .pth file."
 expert_model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs"
 dataset = "string: (cifar / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets"
 trainer = "string: (sgd / adam). Specifies optimizer. "

diff --git a/schemas/select_flips.toml b/schemas/select_flips.toml
@@ -5,6 +5,6 @@
 
 [select_flips]
 budgets = "TODO"
-input = "TODO"
-true = "TODO"
-output_path = "TODO"
+input_label_glob = "TODO"
+true_labels = "TODO"
+output_dir = "TODO"
diff --git a/schemas/train_expert.toml b/schemas/train_expert.toml
@@ -5,7 +5,7 @@
 ###
 
 [train_expert]
-output = "string: Path to .pth file."
+output_dir = "string: Path to .pth file."
 model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs"
 dataset = "string: (cifar / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets"
 trainer = "string: (sgd / adam). Specifies optimizer. "

diff --git a/schemas/train_user.toml b/schemas/train_user.toml
@@ -6,8 +6,8 @@
 ###
 
 [train_user]
-input = "TODO"
-output_path = "string: Path to .pth file."
+input_labels = "TODO"
+output_dir = "string: Path to .pth file."
 user_model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs"
 dataset = "string: (cifa r / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets"
 trainer = "string: (sgd / adam). Specifies optimizer. "
@@ -18,7 +18,7 @@ poisoner = "string: Form: {{1,2,3,9}xp, {1,2}xs, {1,4}xl}. Integer resembles num
 [OPTIONAL]
 soft = "TODO"
 alpha = "TODO"
-true = "TODO"
+true_labels = "TODO"
 batch_size = "int: {0,1,...,infty}. Specifies batch size. Set to default for trainer if omitted."
 epochs = "int: {0,1,...,infty}. Specifies number of epochs. Set to default for trainer if omitted."
 optim_kwargs = "dict. Optional keywords for Pytorch SGD / Adam optimizer. See sever example."