forked from open-mmlab/mmsegmentation
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rebenchmark for Pyslowfast comparison (open-mmlab#28)
* add resolution for K400 * rebenchmark on 32G V100 * add new i3d config and i3d benchmakr minor minor Co-authored-by: linjintao <[email protected]>
- Loading branch information
1 parent
3aefca8
commit b96aab9
Showing
3 changed files
with
137 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
125 changes: 125 additions & 0 deletions
125
configs/recognition/i3d/i3d_r50_video_8x8x1_100e_kinetics400_rgb.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# model settings | ||
model = dict( | ||
type='Recognizer3D', | ||
backbone=dict( | ||
type='ResNet3d', | ||
pretrained2d=True, | ||
pretrained='torchvision:https://resnet50', | ||
depth=50, | ||
conv_cfg=dict(type='Conv3d'), | ||
norm_eval=False, | ||
inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)), | ||
zero_init_residual=False), | ||
cls_head=dict( | ||
type='I3DHead', | ||
num_classes=400, | ||
in_channels=2048, | ||
spatial_type='avg', | ||
dropout_ratio=0.5, | ||
init_std=0.01)) | ||
# model training and testing settings | ||
train_cfg = None | ||
test_cfg = dict(average_clips=None) | ||
# dataset settings | ||
dataset_type = 'VideoDataset' | ||
data_root = 'data/kinetics400/videos_train' | ||
data_root_val = 'data/kinetics400/videos_val' | ||
ann_file_train = 'data/kinetics400/kinetics400_train_list_videos.txt' | ||
ann_file_val = 'data/kinetics400/kinetics400_val_list_videos.txt' | ||
ann_file_test = 'data/kinetics400/kinetics400_val_list_videos.txt' | ||
img_norm_cfg = dict( | ||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) | ||
train_pipeline = [ | ||
dict(type='DecordInit'), | ||
dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1), | ||
dict(type='DecordDecode'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict( | ||
type='MultiScaleCrop', | ||
input_size=224, | ||
scales=(1, 0.8), | ||
random_crop=False, | ||
max_wh_scale_gap=0), | ||
dict(type='Resize', scale=(224, 224), keep_ratio=False), | ||
dict(type='Flip', flip_ratio=0.5), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs', 'label']) | ||
] | ||
val_pipeline = [ | ||
dict(type='DecordInit'), | ||
dict( | ||
type='SampleFrames', | ||
clip_len=8, | ||
frame_interval=8, | ||
num_clips=1, | ||
test_mode=True), | ||
dict(type='DecordDecode'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='CenterCrop', crop_size=224), | ||
dict(type='Flip', flip_ratio=0), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs']) | ||
] | ||
test_pipeline = [ | ||
dict(type='DecordInit'), | ||
dict( | ||
type='SampleFrames', | ||
clip_len=8, | ||
frame_interval=8, | ||
num_clips=10, | ||
test_mode=True), | ||
dict(type='DecordDecode'), | ||
dict(type='Resize', scale=(-1, 256)), | ||
dict(type='ThreeCrop', crop_size=256), | ||
dict(type='Flip', flip_ratio=0), | ||
dict(type='Normalize', **img_norm_cfg), | ||
dict(type='FormatShape', input_format='NCTHW'), | ||
dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]), | ||
dict(type='ToTensor', keys=['imgs']) | ||
] | ||
data = dict( | ||
videos_per_gpu=8, | ||
workers_per_gpu=4, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_train, | ||
data_prefix=data_root, | ||
pipeline=train_pipeline), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_val, | ||
data_prefix=data_root_val, | ||
pipeline=val_pipeline), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file=ann_file_val, | ||
data_prefix=data_root_val, | ||
pipeline=test_pipeline)) | ||
# optimizer | ||
optimizer = dict( | ||
type='SGD', lr=0.01, momentum=0.9, | ||
weight_decay=0.0001) # this lr is used for 8 gpus | ||
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) | ||
# learning policy | ||
lr_config = dict(policy='step', step=[40, 80]) | ||
total_epochs = 100 | ||
checkpoint_config = dict(interval=5) | ||
evaluation = dict( | ||
interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'], topk=(1, 5)) | ||
log_config = dict( | ||
interval=20, | ||
hooks=[ | ||
dict(type='TextLoggerHook'), | ||
# dict(type='TensorboardLoggerHook'), | ||
]) | ||
# runtime settings | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/i3d_r50_video_3d_32x2x1_100e_kinetics400_rgb/' | ||
load_from = None | ||
resume_from = None | ||
workflow = [('train', 1)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters