From ab844374948024d02d8443a52998fde7e6a5536b Mon Sep 17 00:00:00 2001 From: triomino Date: Sun, 2 Aug 2020 17:12:11 +0800 Subject: [PATCH] fix imagedata inner layer problem --- dataset/imagenet.py | 7 +++++-- models/util.py | 11 +++++------ train_student.py | 17 +++++++++++++++-- vgg13_shufflev2.sh | 38 +++++++++++++++++++------------------- 4 files changed, 44 insertions(+), 29 deletions(-) diff --git a/dataset/imagenet.py b/dataset/imagenet.py index b488f4b..25b6c20 100644 --- a/dataset/imagenet.py +++ b/dataset/imagenet.py @@ -13,7 +13,9 @@ from dataset.folder2lmdb import ImageFolderLMDB imagenet_list = ['imagenet', 'imagenette'] -def get_data_folder(dataset = 'imagenet'): + + +def get_data_folder(dataset='imagenet'): """ return the path to store the data """ @@ -179,7 +181,8 @@ def get_dataloader_sample(dataset='imagenet', batch_size=128, num_workers=8, is_ return train_loader, test_loader, len(train_set), len(train_set.classes) -def get_imagenet_dataloader(dataset='imagenet', batch_size=128, num_workers=16, use_lmdb=False, multiprocessing_distributed=False): +def get_imagenet_dataloader(dataset='imagenet', batch_size=128, num_workers=16, use_lmdb=False, + multiprocessing_distributed=False): """ Data Loader for imagenet """ diff --git a/models/util.py b/models/util.py index caf56aa..be5320b 100644 --- a/models/util.py +++ b/models/util.py @@ -20,23 +20,22 @@ def __init__(self, s_shape, t_shape, use_relu=True): self.conv = nn.ConvTranspose2d(s_C, t_C, kernel_size=4, stride=2, padding=1) elif s_H >= t_H: self.conv = nn.Conv2d(s_C, t_C, kernel_size=(1+s_H-t_H, 1+s_W-t_W)) - else: + else: self.conv = nn.Conv2d(s_C, t_C, kernel_size=3, padding=1, stride=1) self.bn = nn.BatchNorm2d(t_C) self.relu = nn.ReLU(inplace=True) - + def forward(self, x, t): - - if self.s_H == 2 * self.t_H or self.s_H *2 == self.t_H or self.s_H >= self.t_H: + if self.s_H == 2 * self.t_H or self.s_H * 2 == self.t_H or self.s_H >= self.t_H: x = self.conv(x) if self.use_relu: - return self.relu(self.bn(x)) + return self.relu(self.bn(x)), t else: return self.bn(x), t else: x = self.conv(x) if self.use_relu: - return self.relu(self.bn(x)) + return self.relu(self.bn(x)), F.adaptive_avg_pool2d(t, (self.s_H, self.s_H)) else: return self.bn(x), F.adaptive_avg_pool2d(t, (self.s_H, self.s_H)) diff --git a/train_student.py b/train_student.py index 49fe95c..22aaa27 100644 --- a/train_student.py +++ b/train_student.py @@ -5,6 +5,7 @@ from __future__ import print_function import os +import re import argparse import time @@ -33,6 +34,9 @@ from helper.loops import train_distill as train, validate +split_symbol = '~' if os.name == 'nt' else ':' + + def parse_option(): parser = argparse.ArgumentParser('argument for training') @@ -142,7 +146,12 @@ def parse_option(): def get_teacher_name(model_path): """parse teacher name""" - segments = model_path.split('/')[-2].split('_') + directory = model_path.split('/')[-2] + pattern = ''.join(['S', split_symbol, '(.+)', '_T', split_symbol]) + name_match = re.match(pattern, directory) + if name_match: + return name_match[1] + segments = directory.split('_') if segments[0] == 'wrn': return segments[0] + '_' + segments[1] + '_' + segments[2] if segments[0] == 'resnext50': @@ -228,8 +237,12 @@ def main_worker(gpu, ngpus_per_node, opt): model_t = load_teacher(opt.path_t, n_cls, opt.gpu) module_args = {'num_classes': n_cls} model_s = model_dict[opt.model_s](**module_args) + + if opt.dataset == 'cifar100': + data = torch.randn(2, 3, 32, 32) + elif opt.dataset == 'imagenet': + data = torch.randn(2, 3, 224, 224) - data = torch.randn(2, 3, 32, 32) model_t.eval() model_s.eval() feat_t, _ = model_t(data, is_feat=True) diff --git a/vgg13_shufflev2.sh b/vgg13_shufflev2.sh index 8424263..15fd68a 100644 --- a/vgg13_shufflev2.sh +++ b/vgg13_shufflev2.sh @@ -5,39 +5,39 @@ EPOCH=90 LEARNING_RATE=0.1 DALI=cpu -# aakd -python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ ---batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ ---print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ ---multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ ---distill aakd -r 1 -a 1 -b 400 # kd -python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ ---batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ ---print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ ---multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ ---distill kd -r 1 -a 1 -b 0 +# python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ +# --batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ +# --print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ +# --multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ +# --distill kd -r 1 -a 1 -b 0 # fitnet python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ --batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ --print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ --multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ ---distill hint -r 1 -a 1 -b 100 --hint-layer 1 +--distill hint -r 1 -a 1 -b 100 --hint_layer 1 # sp python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ --batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ --print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ --multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ ---distill sp -r 1 -a 1 -b 3000 +--distill similarity -r 1 -a 1 -b 3000 # vid +# python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ +# --batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ +# --print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ +# --multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ +# --distill vid -r 1 -a 1 -b 1 +# aakd python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ --batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ --print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ --multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ ---distill vid -r 1 -a 1 -b 1 +--distill aakd -r 1 -a 1 -b 100 # irg -python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ ---batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ ---print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ ---multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ ---distill irg -r 1 -a 1 -b 0.05 +# python train_student.py --path-t ./save/models/vgg13_imagenet_vanilla/vgg13_transformed.pth \ +# --batch_size $BATCH_SIZE --epochs $EPOCH --dataset imagenet --gpu_id $GPU --dist-url tcp://127.0.0.1:23333 \ +# --print-freq 100 --num_workers $WORKER --model_s ShuffleV2_Imagenet --trial release \ +# --multiprocessing-distributed --learning_rate $LEARNING_RATE --lr_decay_epochs 30,60 --weight_decay 1e-4 --dali $DALI \ +# --distill irg -r 1 -a 1 -b 0.005 \ No newline at end of file