Skip to content

Commit

Permalink
Fix #2
Browse files Browse the repository at this point in the history
  • Loading branch information
Valentin Gabeur committed Nov 9, 2020
1 parent e24a84d commit ef81f96
Show file tree
Hide file tree
Showing 5 changed files with 518 additions and 10 deletions.
70 changes: 69 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,83 @@ tar -xvf activity-net.tar.gz
tar -xvf LSMDC.tar.gz
```

Download the checkpoints:
```bash
# Create and move to mmt/data/checkpoints directory
mkdir checkpoints
cd checkpoints
# Download checkpoints
wget http:https://pascal.inrialpes.fr/data2/vgabeur/mmt/data/checkpoints/HowTo100M_full_train.pth
wget http:https://pascal.inrialpes.fr/data2/vgabeur/mmt/data/checkpoints/MSRVTT_jsfusion_trainval.pth
wget http:https://pascal.inrialpes.fr/data2/vgabeur/mmt/data/checkpoints/prtrn_MSRVTT_jsfusion_trainval.pth
```

You can then run the following scripts:

### MSRVTT

Training from scratch
#### Training from scratch

Training + evaluation:
```bash
python -m train --config configs_pub/eccv20/MSRVTT_jsfusion_trainval.json
```

Evaluation from checkpoint:
```bash
python -m train --config configs_pub/eccv20/MSRVTT_jsfusion_trainval.json --only_eval --load_checkpoint data/checkpoints/MSRVTT_jsfusion_trainval.pth
```

Expected results:
```
MSRVTT_jsfusion_test:
t2v_metrics/R1/final_eval: 24.3
t2v_metrics/R5/final_eval: 54.9
t2v_metrics/R10/final_eval: 68.6
t2v_metrics/R50/final_eval: 89.6
t2v_metrics/MedR/final_eval: 5.0
t2v_metrics/MeanR/final_eval: 26.485
t2v_metrics/geometric_mean_R1-R5-R10/final_eval: 45.06446759875623
v2t_metrics/R1/final_eval: 24.5
v2t_metrics/R5/final_eval: 54.5
v2t_metrics/R10/final_eval: 69.1
v2t_metrics/R50/final_eval: 90.6
v2t_metrics/MedR/final_eval: 4.0
v2t_metrics/MeanR/final_eval: 24.06
v2t_metrics/geometric_mean_R1-R5-R10/final_eval: 45.187003696913585
```

#### Finetuning from a HowTo100M pretrained model:

Training + evaluation:
```bash
python -m train --config configs_pub/eccv20/prtrn_MSRVTT_jsfusion_trainval.json --load_checkpoint data/checkpoints/HowTo100M_full_train.pth
```

Evaluation from checkpoint:
```bash
python -m train --config configs_pub/eccv20/prtrn_MSRVTT_jsfusion_trainval.json --only_eval --load_checkpoint data/checkpoints/prtrn_MSRVTT_jsfusion_trainval.pth
```

Expected results:
```
MSRVTT_jsfusion_test:
t2v_metrics/R1/final_eval: 24.7
t2v_metrics/R5/final_eval: 57.1
t2v_metrics/R10/final_eval: 68.6
t2v_metrics/R50/final_eval: 90.6
t2v_metrics/MedR/final_eval: 4.0
t2v_metrics/MeanR/final_eval: 23.044
t2v_metrics/geometric_mean_R1-R5-R10/final_eval: 45.907720169747826
v2t_metrics/R1/final_eval: 27.2
v2t_metrics/R5/final_eval: 55.1
v2t_metrics/R10/final_eval: 68.4
v2t_metrics/R50/final_eval: 90.3
v2t_metrics/MedR/final_eval: 4.0
v2t_metrics/MeanR/final_eval: 19.607
v2t_metrics/geometric_mean_R1-R5-R10/final_eval: 46.80140254398485
```

### ActivityNet

Training from scratch
Expand Down
235 changes: 235 additions & 0 deletions configs_pub/eccv20/HowTo100M_full_train.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
{
"n_gpu": 1,
"seed": 0,
"experts": {
"face_dim": 512,
"modalities": [
"s3d",
"vggish"
]
},
"arch": {
"type": "CENet",
"args": {
"keep_missing_modalities": true,
"test_caption_mode": "indep",
"txt_inp": "bertftn",
"txt_agg": "bertftn",
"txt_wgh": "emb",
"vid_wgh": "none",
"vid_cont": "bert",
"vid_inp": "both",
"pos_enc": "tint",
"out_tok": "mxp",
"l2renorm": false,
"vid_bert_params": {
"vocab_size_or_config_json_file": 10,
"hidden_size": 512,
"num_hidden_layers": 4,
"num_attention_heads": 4,
"intermediate_size": 3072,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1,
"max_position_embeddings": 32,
"type_vocab_size": 19,
"initializer_range": 0.02,
"layer_norm_eps": 1e-12
},
"txt_pro": "gbn",
"txt_bert_params": {
"hidden_dropout_prob": 0.1,
"attention_probs_dropout_prob": 0.1
}
}
},
"train_sets": [
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "HowTo100M",
"cut_name": "full",
"split_name": "train",
"data_dir": "data/howto100m/symlinked-feats",
"mix_weight": 1.0,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"max_text_words": 30,
"max_expert_tokens": 30,
"caption_length": 30,
"clip_duration": 30
}
],
"batch_size": 64,
"num_workers": 64,
"pin_memory": false
}
}
],
"continuous_eval_sets": [
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "HowTo100M",
"cut_name": "full",
"split_name": "trn",
"data_dir": "data/howto100m/symlinked-feats",
"mix_weight": 1.0,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"captions_per_video": 1,
"max_text_words": 30,
"max_expert_tokens": 30,
"caption_length": 30,
"clip_duration": 30
}
],
"batch_size": 64,
"num_workers": 64,
"pin_memory": false
}
},
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "HowTo100M",
"cut_name": "full",
"split_name": "val",
"data_dir": "data/howto100m/symlinked-feats",
"mix_weight": 1.0,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"captions_per_video": 1,
"max_text_words": 30,
"max_expert_tokens": 30,
"caption_length": 30,
"clip_duration": 30
}
],
"batch_size": 64,
"num_workers": 64,
"pin_memory": false
}
},
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "HowTo100M",
"cut_name": "full",
"split_name": "test",
"data_dir": "data/howto100m/symlinked-feats",
"mix_weight": 1.0,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"captions_per_video": 1,
"max_text_words": 30,
"max_expert_tokens": 30,
"caption_length": 30,
"clip_duration": 30
}
],
"batch_size": 64,
"num_workers": 64,
"pin_memory": false
}
},
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "MSRVTT",
"cut_name": "jsfusion",
"split_name": "test",
"data_dir": "data/MSRVTT/symlinked-feats",
"captions_per_video": 1,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"captions_per_video": 1,
"max_text_words": 30,
"max_expert_tokens": 30
}
],
"batch_size": 32,
"num_workers": 0,
"pin_memory": false
}
}
],
"final_eval_sets": [
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "HowTo100M",
"cut_name": "full",
"split_name": "test",
"data_dir": "data/howto100m/symlinked-feats",
"mix_weight": 1.0,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"captions_per_video": 1,
"max_text_words": 30,
"max_expert_tokens": 30,
"caption_length": 30,
"clip_duration": 30
}
],
"batch_size": 64,
"num_workers": 64,
"pin_memory": false
}
}
],
"optimizer": {
"type": "Adam",
"args": {
"lr": 5e-05,
"weight_decay": 0
}
},
"loss": {
"type": "MaxMarginRankingLoss",
"args": {
"margin": 0.05,
"fix_norm": true
}
},
"metrics": [
"t2v_metrics",
"v2t_metrics"
],
"visualizer": {
"type": "Visualizer",
"args": {
"vis_vid_freq": 50,
"num_samples": 20
}
},
"lr_scheduler": {
"type": "StepLR",
"args": {
"step_size": 1,
"gamma": 0.98
}
},
"trainer": {
"epochs": 200,
"max_samples_per_epoch": 640000,
"save_period": 1,
"skip_first_n_saves": 0,
"include_optim_in_ckpts": true,
"verbosity": 2,
"tensorboard": true,
"monitor": "max epoch"
}
}
25 changes: 23 additions & 2 deletions configs_pub/eccv20/MSRVTT_jsfusion_trainval.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,27 @@
}
],
"continuous_eval_sets": [
{
"type": "ExpertDataLoader",
"args": {
"mix": [
{
"dataset_name": "MSRVTT",
"cut_name": "jsfusion",
"split_name": "trn",
"data_dir": "data/MSRVTT/symlinked-feats",
"captions_per_video": 1,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"max_text_words": 30,
"max_expert_tokens": 30
}
],
"batch_size": 32,
"num_workers": 64,
"pin_memory": false
}
},
{
"type": "ExpertDataLoader",
"args": {
Expand All @@ -81,7 +102,7 @@
"cut_name": "jsfusion",
"split_name": "test",
"data_dir": "data/MSRVTT/symlinked-feats",
"captions_per_video": 20,
"captions_per_video": 1,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"max_text_words": 30,
Expand All @@ -104,7 +125,7 @@
"cut_name": "jsfusion",
"split_name": "test",
"data_dir": "data/MSRVTT/symlinked-feats",
"captions_per_video": 20,
"captions_per_video": 1,
"query_shuffling": "indiv",
"temporal_encoding_window": 1,
"max_text_words": 30,
Expand Down
Loading

0 comments on commit ef81f96

Please sign in to comment.