-
Notifications
You must be signed in to change notification settings - Fork 953
/
train.py
88 lines (65 loc) · 2.15 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import argparse
import os
import random
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from omegaconf import OmegaConf
import tasks
import utils.blip_utils as utils
from common.registry import registry
# imports modules for registration
from datasets.builders import *
from models import *
from processors import *
from runners.runner_base import Runner
from tasks import *
from utils.config import Config
def parse_args():
parser = argparse.ArgumentParser(description="Training")
parser.add_argument("--cfg-path", required=True, help="path to configuration file.")
parser.add_argument(
"--options",
nargs="+",
help="override some settings in the used config, the key-value pair "
"in xxx=yyy format will be merged into config file (deprecate), "
"change to --cfg-options instead.",
)
args = parser.parse_args()
# if 'LOCAL_RANK' not in os.environ:
# os.environ['LOCAL_RANK'] = str(args.local_rank)
return args
def setup_seeds(config):
seed = config.run_cfg.seed + utils.get_rank()
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
cudnn.benchmark = True
def main():
# allow auto-dl completes on main process without timeout when using NCCL backend.
os.environ['NCCL_BLOCKING_WAIT'] = "1"
# set before init_distributed_mode() to ensure the same job_id shared across all ranks.
job_id = utils.now()
root_dir = os.getcwd()
default_cfg = OmegaConf.load(os.path.join(root_dir, "configs/default.yaml"))
registry.register_path("library_root", root_dir)
registry.register_path("cache_root", default_cfg.env.cache_root)
cfg = Config(parse_args())
utils.init_distributed_mode(cfg.run_cfg)
setup_seeds(cfg)
# set after init_distributed_mode() to only log on master.
utils.setup_logger()
cfg.pretty_print()
task = tasks.setup_task(cfg)
datasets = task.build_datasets(cfg)
model = task.build_model(cfg)
runner = Runner(
cfg=cfg,
job_id=job_id,
task=task,
model=model,
datasets=datasets
)
runner.train()
if __name__ == "__main__":
main()