-
Notifications
You must be signed in to change notification settings - Fork 246
/
vith16_ssv2_16x2x3.yaml
39 lines (39 loc) · 968 Bytes
/
vith16_ssv2_16x2x3.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
nodes: 8
tasks_per_node: 8
tag: ssv2-16x2x3
eval_name: video_classification_frozen
resume_checkpoint: false
data:
dataset_train: /your_path_to_ssv2_train_csv_file_index.csv
dataset_val: /your_path_to_ssv2_val_csv_file_index.csv
dataset_type: VideoDataset
num_classes: 174
frames_per_clip: 16
num_segments: 2
num_views_per_segment: 3
frame_step: 4
optimization:
attend_across_segments: true
num_epochs: 20
resolution: 224
batch_size: 4
weight_decay: 0.01
lr: 0.001
start_lr: 0.001
final_lr: 0.0
warmup: 0.
use_bfloat16: true
pretrain:
model_name: vit_huge
checkpoint_key: target_encoder
clip_duration: null
frames_per_clip: 16
tubelet_size: 2
uniform_power: true
use_silu: false
tight_silu: false
use_sdpa: true
patch_size: 16
folder: /your_absolute_file_path_to_directory_where_pretrained_models_are_contained/
checkpoint: jepa-latest.pth.tar # name of pretrained model file inside folder
write_tag: jepa