From 900109ca8e23241dac7f54fcc875274f06dc2bf8 Mon Sep 17 00:00:00 2001 From: Rishi Jha Date: Tue, 2 Jan 2024 20:25:31 -0800 Subject: [PATCH] Refactoring toml path names to be more descriptive --- experiments/example_attack/config.toml | 26 +++++++++---------- experiments/example_downstream/config.toml | 4 +-- .../example_downstream_soft/config.toml | 6 ++--- .../example_flip_selection/config.toml | 6 ++--- experiments/example_precomputed/config.toml | 4 +-- .../example_precomputed_mix/config.toml | 4 +-- modules/base_utils/util.py | 2 ++ modules/generate_labels/run_module.py | 22 +++++++--------- modules/select_flips/run_module.py | 18 ++++++------- modules/train_expert/run_module.py | 13 ++++------ modules/train_user/run_module.py | 9 +++---- schemas/generate_labels.toml | 6 ++--- schemas/select_flips.toml | 6 ++--- schemas/train_expert.toml | 2 +- schemas/train_user.toml | 6 ++--- 15 files changed, 64 insertions(+), 70 deletions(-) diff --git a/experiments/example_attack/config.toml b/experiments/example_attack/config.toml index 7ddb3b2..04026fd 100644 --- a/experiments/example_attack/config.toml +++ b/experiments/example_attack/config.toml @@ -1,28 +1,28 @@ # TODO Description # TODO -# [train_expert] -# output = "out/checkpoints/r32p_1xs/0/model.pth" -# model = "r32p" -# trainer = "sgd" -# dataset = "cifar" -# source_label = 9 -# target_label = 4 -# poisoner = "1xs" -# epochs = 20 -# checkpoint_iters = 50 +[train_expert] +output_dir = "out/checkpoints/r32p_1xs/0/" +model = "r32p" +trainer = "sgd" +dataset = "cifar" +source_label = 9 +target_label = 4 +poisoner = "1xs" +epochs = 20 +checkpoint_iters = 50 # TODO [generate_labels] -input = "out/checkpoints/r32p_1xs/{}/model_{}_{}.pth" -opt_input = "out/checkpoints/r32p_1xs/{}/model_{}_{}_opt.pth" +input_pths = "out/checkpoints/r32p_1xs/{}/model_{}_{}.pth" +opt_pths = "out/checkpoints/r32p_1xs/{}/model_{}_{}_opt.pth" expert_model = "r32p" trainer = "sgd" dataset = "cifar" source_label = 9 target_label = 4 poisoner = "1xs" -output_path = "experiments/example_attack/" +output_dir = "experiments/example_attack/" lambda = 0.0 [generate_labels.expert_config] diff --git a/experiments/example_downstream/config.toml b/experiments/example_downstream/config.toml index fafe638..46fdd04 100644 --- a/experiments/example_downstream/config.toml +++ b/experiments/example_downstream/config.toml @@ -1,11 +1,11 @@ [train_user] -input = "experiments/example_attack/labels.npy" +input_labels = "experiments/example_attack/labels.npy" user_model = "r32p" trainer = "sgd" dataset = "cifar" source_label = 9 target_label = 4 poisoner = "1xs" -output_path = "experiments/example_downstream/" +output_dir = "experiments/example_downstream/" soft = false alpha = 0.0 diff --git a/experiments/example_downstream_soft/config.toml b/experiments/example_downstream_soft/config.toml index 484981c..2d41fe9 100644 --- a/experiments/example_downstream_soft/config.toml +++ b/experiments/example_downstream_soft/config.toml @@ -1,12 +1,12 @@ [train_user] -input = "experiments/example_attack/labels.npy" -true = "experiments/example_attack/true.npy" +input_labels = "experiments/example_attack/labels.npy" +true_labels = "experiments/example_attack/true.npy" user_model = "r32p" trainer = "sgd" dataset = "cifar" source_label = 9 target_label = 4 poisoner = "1xs" -output_path = "experiments/example_downstream/" +output_dir = "experiments/example_downstream/" soft = true alpha = 0.2 diff --git a/experiments/example_flip_selection/config.toml b/experiments/example_flip_selection/config.toml index 9b1182d..ade97d8 100644 --- a/experiments/example_flip_selection/config.toml +++ b/experiments/example_flip_selection/config.toml @@ -1,5 +1,5 @@ [select_flips] budgets = [150, 300, 500, 1000, 1500] -input = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/*/labels.npy" -true = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/0/true.npy" -output_path = "out/computed/r18_1xs/" \ No newline at end of file +input_label_glob = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/*/labels.npy" +true_labels = "/gscratch/sewoong/rjha01/code/robust-ml-suite/out/labels/r18_1xs/0/true.npy" +output_dir = "out/computed/r18_1xs/" \ No newline at end of file diff --git a/experiments/example_precomputed/config.toml b/experiments/example_precomputed/config.toml index d4e9f5b..0157de8 100644 --- a/experiments/example_precomputed/config.toml +++ b/experiments/example_precomputed/config.toml @@ -1,11 +1,11 @@ [train_user] -input = "precomputed/cifar/r32p/1xs/1500.npy" +input_labels = "precomputed/cifar/r32p/1xs/1500.npy" user_model = "r32p" trainer = "sgd" dataset = "cifar" source_label = 9 target_label = 4 poisoner = "1xs" -output_path = "experiments/example_precomputed/" +output_dir = "experiments/example_precomputed/" soft = false alpha = 0.0 \ No newline at end of file diff --git a/experiments/example_precomputed_mix/config.toml b/experiments/example_precomputed_mix/config.toml index 863d762..913119c 100644 --- a/experiments/example_precomputed_mix/config.toml +++ b/experiments/example_precomputed_mix/config.toml @@ -1,12 +1,12 @@ [train_user] -input = "precomputed/cifar/r32p/1xs/1500.npy" +input_labels = "precomputed/cifar/r32p/1xs/1500.npy" user_model = "vit-pretrain" trainer = "sgd" dataset = "cifar" source_label = 9 target_label = 4 poisoner = "1xs" -output_path = "experiments/example_precomputed_mix/" +output_dir = "experiments/example_precomputed_mix/" soft = false alpha = 0.0 diff --git a/modules/base_utils/util.py b/modules/base_utils/util.py index ef6124f..ed77998 100644 --- a/modules/base_utils/util.py +++ b/modules/base_utils/util.py @@ -53,6 +53,8 @@ def generate_full_path(path): def slurmify_path(path, slurm_id): + if path is None: + return path return path if slurm_id is None else path.format(slurm_id) diff --git a/modules/generate_labels/run_module.py b/modules/generate_labels/run_module.py index 5394453..27694b3 100644 --- a/modules/generate_labels/run_module.py +++ b/modules/generate_labels/run_module.py @@ -14,7 +14,7 @@ from modules.base_utils.util import extract_toml, get_module_device,\ get_mtt_attack_info, load_model,\ either_dataloader_dataset_to_both,\ - make_pbar, clf_loss, needs_big_ims, softmax, total_mse_distance + make_pbar, clf_loss, needs_big_ims, slurmify_path, softmax, total_mse_distance from modules.generate_labels.utils import coalesce_attack_config, extract_experts,\ extract_labels, sgd_step @@ -30,8 +30,8 @@ def run(experiment_name, module_name, **kwargs): args = extract_toml(experiment_name, module_name) - input_path = args["input"] - input_opt_path = args["opt_input"] + input_pths = args["input_pths"] + opt_pths = args["opt_pths"] expert_model_flag = args["expert_model"] dataset_flag = args["dataset"] poisoner_flag = args["poisoner"] @@ -44,10 +44,8 @@ def run(experiment_name, module_name, **kwargs): expert_config = args.get('expert_config', {}) config = coalesce_attack_config(args.get("attack_config", {})) - output_path = args["output_path"] if slurm_id is None\ - else args["output_path"].format(slurm_id) - - Path(output_path).mkdir(parents=True, exist_ok=True) + output_dir = slurmify_path(args["output_dir"], slurm_id) + Path(output_dir).mkdir(parents=True, exist_ok=True) print(f"{expert_model_flag=} {clean_label=} {target_label=} {poisoner_flag=}") print("Building datasets...") @@ -63,9 +61,9 @@ def run(experiment_name, module_name, **kwargs): print("Loading expert trajectories...") expert_starts, expert_opt_starts = extract_experts( expert_config, - input_path, + input_pths, config['iterations'], - expert_opt_path=input_opt_path + expert_opt_path=opt_pths ) print("Training...") @@ -159,9 +157,9 @@ def run(experiment_name, module_name, **kwargs): pbar.set_postfix(**pbar_postfix) y_true = torch.stack([mtt_dataset[i][3].detach() for i in range(len(mtt_dataset.distill))]) - np.save(output_path + "labels.npy", labels_syn.detach().numpy()) - np.save(output_path + "true.npy", y_true) - np.save(output_path + "losses.npy", losses) + np.save(output_dir + "labels.npy", labels_syn.detach().numpy()) + np.save(output_dir + "true.npy", y_true) + np.save(output_dir + "losses.npy", losses) if __name__ == "__main__": experiment_name, module_name = sys.argv[1], sys.argv[2] diff --git a/modules/select_flips/run_module.py b/modules/select_flips/run_module.py index f7f534a..386773d 100644 --- a/modules/select_flips/run_module.py +++ b/modules/select_flips/run_module.py @@ -18,20 +18,21 @@ def run(experiment_name, module_name, **kwargs): args = extract_toml(experiment_name, module_name) budgets = args.get("budgets", [150, 300, 500, 1000, 1500]) - input_path = slurmify_path(args["input"], slurm_id) - true_path = slurmify_path(args["true"], slurm_id) - output_path = slurmify_path(args["output_path"], slurm_id) + input_label_glob = slurmify_path(args["input_label_glob"], slurm_id) + true_labels = slurmify_path(args["true_labels"], slurm_id) + output_dir = slurmify_path(args["output_dir"], slurm_id) - Path(output_path).mkdir(parents=True, exist_ok=True) + Path(output_dir).mkdir(parents=True, exist_ok=True) distances = [] all_labels = [] - for f in glob.glob(input_path): + true = np.load(true_labels) + np.save(f'{output_dir}/true.npy', true) + + for f in glob.glob(input_label_glob): labels = np.load(f) - true = np.load(true_path) dists = np.zeros(len(labels)) - inds = labels.argmax(axis=1) != true.argmax(axis=1) dists[inds] = labels[inds].max(axis=1) -\ labels[inds][np.arange(inds.sum()), true[inds].argmax(axis=1)] @@ -43,14 +44,13 @@ def run(experiment_name, module_name, **kwargs): distances = np.stack(distances) all_labels = np.stack(all_labels).mean(axis=0) - np.save(f'{output_path}/true.npy', true) for n in budgets: to_save = true.copy() if n != 0: idx = np.argsort(distances.min(axis=0))[-n:] all_labels[idx] = all_labels[idx] - 50000 * true[idx] to_save[idx] = all_labels[idx] - np.save(f'{output_path}/{n}.npy', to_save) + np.save(f'{output_dir}/{n}.npy', to_save) if __name__ == "__main__": experiment_name, module_name = sys.argv[1], sys.argv[2] diff --git a/modules/train_expert/run_module.py b/modules/train_expert/run_module.py index 0adcd71..3767f87 100644 --- a/modules/train_expert/run_module.py +++ b/modules/train_expert/run_module.py @@ -38,10 +38,9 @@ def run(experiment_name, module_name, **kwargs): epochs = args.get("epochs", None) optim_kwargs = args.get("optim_kwargs", {}) scheduler_kwargs = args.get("scheduler_kwargs", {}) - output_path = slurmify_path(args["output"], slurm_id) + output_dir = slurmify_path(args["output_dir"], slurm_id) - Path(output_path[:output_path.rfind('/')]).mkdir(parents=True, - exist_ok=True) + Path(output_dir).mkdir(parents=True, exist_ok=True) n_classes = get_n_classes(dataset_flag) model = load_model(model_flag, n_classes) @@ -73,12 +72,10 @@ def run(experiment_name, module_name, **kwargs): def checkpoint_callback(model, opt, epoch, iteration, save_iter): if iteration % save_iter == 0 and iteration != 0: - index = output_path.rfind('.') - checkpoint_path = output_path[:index] + f'_{str(epoch)}_{str(iteration)}' + output_path[index:] + checkpoint_path = f'{output_dir}model_{str(epoch)}_{str(iteration)}.pth' + opt_path = f'{output_dir}model_{str(epoch)}_{str(iteration)}_opt.pth' torch.save(model.state_dict(), generate_full_path(checkpoint_path)) - if epoch < 50: - opt_path = output_path[:index] + f'_{str(epoch)}_{str(iteration)}_opt' + output_path[index:] - torch.save(opt.state_dict(), generate_full_path(opt_path)) + torch.save(opt.state_dict(), generate_full_path(opt_path)) mini_train( model=model, diff --git a/modules/train_user/run_module.py b/modules/train_user/run_module.py index f3a780a..a007c8c 100644 --- a/modules/train_user/run_module.py +++ b/modules/train_user/run_module.py @@ -40,12 +40,9 @@ def run(experiment_name, module_name, **kwargs): scheduler_kwargs = args.get("scheduler_kwargs", {}) alpha = args.get("alpha", None) - true_path = args.get("true", None) - input_path = slurmify_path(args["input"], slurm_id) - output_path = slurmify_path(args["output_path"], slurm_id) - - if true_path is not None: - true_path = slurmify_path(args["true"], slurm_id) + input_path = slurmify_path(args["input_labels"], slurm_id) + true_path = slurmify_path(args.get("true_labels", None), slurm_id) + output_path = slurmify_path(args["output_dir"], slurm_id) Path(output_path).mkdir(parents=True, exist_ok=True) diff --git a/schemas/generate_labels.toml b/schemas/generate_labels.toml index 435e858..82b82f8 100644 --- a/schemas/generate_labels.toml +++ b/schemas/generate_labels.toml @@ -6,9 +6,9 @@ ### [generate_labels] -input = "TODO" -opt_input = "TODO" -output_path = "string: Path to .pth file." +input_pths = "TODO" +opt_pths = "TODO" +output_dir = "string: Path to .pth file." expert_model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs" dataset = "string: (cifar / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets" trainer = "string: (sgd / adam). Specifies optimizer. " diff --git a/schemas/select_flips.toml b/schemas/select_flips.toml index 0b72225..744def6 100644 --- a/schemas/select_flips.toml +++ b/schemas/select_flips.toml @@ -5,6 +5,6 @@ [select_flips] budgets = "TODO" -input = "TODO" -true = "TODO" -output_path = "TODO" +input_label_glob = "TODO" +true_labels = "TODO" +output_dir = "TODO" diff --git a/schemas/train_expert.toml b/schemas/train_expert.toml index 51899e3..a458642 100644 --- a/schemas/train_expert.toml +++ b/schemas/train_expert.toml @@ -5,7 +5,7 @@ ### [train_expert] -output = "string: Path to .pth file." +output_dir = "string: Path to .pth file." model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs" dataset = "string: (cifar / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets" trainer = "string: (sgd / adam). Specifies optimizer. " diff --git a/schemas/train_user.toml b/schemas/train_user.toml index 8ed370f..dac7876 100644 --- a/schemas/train_user.toml +++ b/schemas/train_user.toml @@ -6,8 +6,8 @@ ### [train_user] -input = "TODO" -output_path = "string: Path to .pth file." +input_labels = "TODO" +output_dir = "string: Path to .pth file." user_model = "string: (r32p, r18, r18_tin, vgg, vgg_pretrain, vit_pretrain). For ResNets, VGG-19s, and ViTs" dataset = "string: (cifa r / cifar_100 / tiny_imagenet). For CIFAR-10, CIFAR-100 and Tiny Imagenet datasets" trainer = "string: (sgd / adam). Specifies optimizer. " @@ -18,7 +18,7 @@ poisoner = "string: Form: {{1,2,3,9}xp, {1,2}xs, {1,4}xl}. Integer resembles num [OPTIONAL] soft = "TODO" alpha = "TODO" -true = "TODO" +true_labels = "TODO" batch_size = "int: {0,1,...,infty}. Specifies batch size. Set to default for trainer if omitted." epochs = "int: {0,1,...,infty}. Specifies number of epochs. Set to default for trainer if omitted." optim_kwargs = "dict. Optional keywords for Pytorch SGD / Adam optimizer. See sever example."