Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
  • Loading branch information
xvjiarui committed Jul 9, 2020
1 parent 0032f0b commit b2724da
Show file tree
Hide file tree
Showing 430 changed files with 20,454 additions and 1 deletion.
125 changes: 125 additions & 0 deletions .dev/clean_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import argparse
import glob
import json
import os
import os.path as osp

import mmcv

# build schedule look-up table to automatically find the final model
SCHEDULES_LUT = {
'20ki': 20000,
'40ki': 40000,
'60ki': 60000,
'80ki': 80000,
'160ki': 160000
}
RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']


def get_final_iter(config):
iter_num = SCHEDULES_LUT[config.split('_')[-2]]
return iter_num


def get_final_results(log_json_path, iter_num):
result_dict = dict()
with open(log_json_path, 'r') as f:
for line in f.readlines():
log_line = json.loads(line)
if 'mode' not in log_line.keys():
continue

if log_line['mode'] == 'train' and log_line['iter'] == iter_num:
result_dict['memory'] = log_line['memory']

if log_line['iter'] == iter_num:
result_dict.update({
key: log_line[key]
for key in RESULTS_LUT if key in log_line
})
return result_dict


def parse_args():
parser = argparse.ArgumentParser(description='Gather benchmarked models')
parser.add_argument(
'root',
type=str,
help='root path of benchmarked models to be gathered')
parser.add_argument(
'config',
type=str,
help='root path of benchmarked configs to be gathered')

args = parser.parse_args()
return args


def main():
args = parse_args()
models_root = args.root
config_name = args.config

# find all models in the root directory to be gathered
raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))

# filter configs that is not trained in the experiments dir
used_configs = []
for raw_config in raw_configs:
work_dir = osp.splitext(osp.basename(raw_config))[0]
if osp.exists(osp.join(models_root, work_dir)):
used_configs.append(work_dir)
print(f'Find {len(used_configs)} models to be gathered')

# find final_ckpt and log file for trained each config
# and parse the best performance
model_infos = []
for used_config in used_configs:
exp_dir = osp.join(models_root, used_config)
# check whether the exps is finished
final_iter = get_final_iter(used_config)
final_model = 'iter_{}.pth'.format(final_iter)
model_path = osp.join(exp_dir, final_model)

# skip if the model is still training
if not osp.exists(model_path):
print(f'{used_config} not finished yet')
continue

# get logs
log_json_path = glob.glob(osp.join(exp_dir, '*.log.json'))[0]
log_txt_path = glob.glob(osp.join(exp_dir, '*.log'))[0]
model_performance = get_final_results(log_json_path, final_iter)

if model_performance is None:
print(f'{used_config} does not have performance')
continue

model_time = osp.split(log_txt_path)[-1].split('.')[0]
model_infos.append(
dict(
config=used_config,
results=model_performance,
iters=final_iter,
model_time=model_time,
log_json_path=osp.split(log_json_path)[-1]))

# publish model for each checkpoint
for model in model_infos:

model_name = osp.split(model['config'])[-1].split('.')[0]

model_name += '_' + model['model_time']
for checkpoints in mmcv.scandir(
osp.join(models_root, model['config']), suffix='.pth'):
if checkpoints.endswith(f"iter_{model['iters']}.pth"
) or checkpoints.endswith('latest.pth'):
continue
print('removing {}'.format(
osp.join(models_root, model['config'], checkpoints)))
os.remove(osp.join(models_root, model['config'], checkpoints))


if __name__ == '__main__':
main()
197 changes: 197 additions & 0 deletions .dev/gather_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import argparse
import glob
import json
import os
import os.path as osp
import shutil
import subprocess

import mmcv
import torch

# build schedule look-up table to automatically find the final model
RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc']


def process_checkpoint(in_file, out_file):
checkpoint = torch.load(in_file, map_location='cpu')
# remove optimizer for smaller file size
if 'optimizer' in checkpoint:
del checkpoint['optimizer']
# if it is necessary to remove some sensitive data in checkpoint['meta'],
# add the code here.
torch.save(checkpoint, out_file)
sha = subprocess.check_output(['sha256sum', out_file]).decode()
final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
subprocess.Popen(['mv', out_file, final_file])
return final_file


def get_final_iter(config):
iter_num = config.split('_')[-2]
assert iter_num.endswith('k')
return int(iter_num[:-1]) * 1000


def get_final_results(log_json_path, iter_num):
result_dict = dict()
with open(log_json_path, 'r') as f:
for line in f.readlines():
log_line = json.loads(line)
if 'mode' not in log_line.keys():
continue

if log_line['mode'] == 'train' and log_line['iter'] == iter_num:
result_dict['memory'] = log_line['memory']

if log_line['iter'] == iter_num:
result_dict.update({
key: log_line[key]
for key in RESULTS_LUT if key in log_line
})
return result_dict


def parse_args():
parser = argparse.ArgumentParser(description='Gather benchmarked models')
parser.add_argument(
'root',
type=str,
help='root path of benchmarked models to be gathered')
parser.add_argument(
'config',
type=str,
help='root path of benchmarked configs to be gathered')
parser.add_argument(
'out_dir',
type=str,
help='output path of gathered models to be stored')
parser.add_argument('out_file', type=str, help='the output json file name')
parser.add_argument(
'--filter', type=str, nargs='+', default=[], help='config filter')
parser.add_argument(
'--all', action='store_true', help='whether include .py and .log')

args = parser.parse_args()
return args


def main():
args = parse_args()
models_root = args.root
models_out = args.out_dir
config_name = args.config
mmcv.mkdir_or_exist(models_out)

# find all models in the root directory to be gathered
raw_configs = list(mmcv.scandir(config_name, '.py', recursive=True))

# filter configs that is not trained in the experiments dir
used_configs = []
for raw_config in raw_configs:
work_dir = osp.splitext(osp.basename(raw_config))[0]
if osp.exists(osp.join(models_root, work_dir)):
used_configs.append((work_dir, raw_config))
print(f'Find {len(used_configs)} models to be gathered')

# find final_ckpt and log file for trained each config
# and parse the best performance
model_infos = []
for used_config, raw_config in used_configs:
bypass = True
for p in args.filter:
if p in used_config:
bypass = False
break
if bypass:
continue
exp_dir = osp.join(models_root, used_config)
# check whether the exps is finished
final_iter = get_final_iter(used_config)
final_model = 'iter_{}.pth'.format(final_iter)
model_path = osp.join(exp_dir, final_model)

# skip if the model is still training
if not osp.exists(model_path):
print(f'{used_config} train not finished yet')
continue

# get logs
log_json_paths = glob.glob(osp.join(exp_dir, '*.log.json'))
log_json_path = log_json_paths[0]
model_performance = None
for idx, _log_json_path in enumerate(log_json_paths):
model_performance = get_final_results(_log_json_path, final_iter)
if model_performance is not None:
log_json_path = _log_json_path
break

if model_performance is None:
print(f'{used_config} model_performance is None')
continue

model_time = osp.split(log_json_path)[-1].split('.')[0]
model_infos.append(
dict(
config=used_config,
raw_config=raw_config,
results=model_performance,
iters=final_iter,
model_time=model_time,
log_json_path=osp.split(log_json_path)[-1]))

# publish model for each checkpoint
publish_model_infos = []
for model in model_infos:
model_publish_dir = osp.join(models_out,
model['raw_config'].rstrip('.py'))
model_name = osp.split(model['config'])[-1].split('.')[0]

publish_model_path = osp.join(model_publish_dir,
model_name + '_' + model['model_time'])
trained_model_path = osp.join(models_root, model['config'],
'iter_{}.pth'.format(model['iters']))
if osp.exists(model_publish_dir):
for file in os.listdir(model_publish_dir):
if file.endswith('.pth'):
print(f'model {file} found')
model['model_path'] = osp.abspath(
osp.join(model_publish_dir, file))
break
if 'model_path' not in model:
print(f'dir {model_publish_dir} exists, no model found')

else:
mmcv.mkdir_or_exist(model_publish_dir)

# convert model
final_model_path = process_checkpoint(trained_model_path,
publish_model_path)
model['model_path'] = final_model_path

new_json_path = f'{model_name}-{model["log_json_path"]}'
# copy log
shutil.copy(
osp.join(models_root, model['config'], model['log_json_path']),
osp.join(model_publish_dir, new_json_path))
if args.all:
new_txt_path = new_json_path.rstrip('.json')
shutil.copy(
osp.join(models_root, model['config'],
model['log_json_path'].rstrip('.json')),
osp.join(model_publish_dir, new_txt_path))

if args.all:
# copy config to guarantee reproducibility
raw_config = osp.join(config_name, model['raw_config'])
mmcv.Config.fromfile(raw_config).dump(
osp.join(model_publish_dir, osp.basename(raw_config)))

publish_model_infos.append(model)

models = dict(models=publish_model_infos)
mmcv.dump(models, osp.join(models_out, args.out_file))


if __name__ == '__main__':
main()
Loading

0 comments on commit b2724da

Please sign in to comment.