Skip to content

Commit

Permalink
Merge branch 'master' of github.com:awni/cinc17
Browse files Browse the repository at this point in the history
  • Loading branch information
awni committed Apr 4, 2017
2 parents 32d247e + 7a53411 commit 3ac9fb9
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 2 deletions.
2 changes: 1 addition & 1 deletion configs/explore.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@
},

"io" : {
"output_save_path" : "/deep/group/sudnya/repro/cnn2-size256_128-num64_64_dp0_5_str7"
"output_save_path" : "/deep/group/sudnya/launch-many-at-once/"
}
}
45 changes: 45 additions & 0 deletions configs/param-adam.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
{
"name" : "optimizer",
"value": [
{
"name": "adam",
"epochs" : 50,
"learning_rate" : 1e-2,
"decay_rate" : 1.0,
"decay_steps" : 2000,
"beta_1": 0.9,
"beta_2": 0.999,
"epsilon": 1e-08
},
{
"name": "adam",
"epochs" : 50,
"learning_rate" : 1e-2,
"decay_rate" : 1.0,
"decay_steps" : 2000,
"beta_1": 0.6,
"beta_2": 0.999,
"epsilon": 1e-08
},
{
"name": "adam",
"epochs" : 50,
"learning_rate" : 1e-2,
"decay_rate" : 1.0,
"decay_steps" : 2000,
"beta_1": 0.3,
"beta_2": 0.999,
"epsilon": 1e-08
},
{
"name": "adam",
"epochs" : 50,
"learning_rate" : 1e-2,
"decay_rate" : 1.0,
"decay_steps" : 2000,
"beta_1": 0.1,
"beta_2": 0.999,
"epsilon": 1e-08
}
]
}
4 changes: 4 additions & 0 deletions configs/param.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"name" : "dropout",
"value": [0.5, 0.6, 0.7]
}
7 changes: 7 additions & 0 deletions dq-launch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

echo This job is running on $(hostname)
export LD_LIBARY_PATH=/usr/local/cuda-8.0/lib64
export PATH=/usr/local/cuda-8.0/bin
python train.py -c $DQ_CFG
echo "Started training at: ", $(date +%m:%d:%y-%H:%M:%S)
149 changes: 149 additions & 0 deletions launch-many.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import argparse
import json
import logging
import os
import subprocess
from pathlib import Path

logger = logging.getLogger("Launcher")

class Launcher:
"""
Launcher class for calling dq to launch train.py with each entry in a
list of hyperparameters
"""
def __init__(self, default_config, param, experiment_name):
"""
:param default_config: the default config file
:param param: the parameters to be overridden in the base config
(these are the hyper parameters we search on and output
path)
:param experiment_name: name of the experiment
"""

output_dir = default_config.get("io").get("output_save_path")
output_dir = os.path.join( output_dir, experiment_name)

override_config = default_config
logger.debug("override cfg starts as " + str(override_config))

ctr = 0
for p_name in param.get("value"):
logger.debug("Hyper param: " + str(p_name))

#TODO not tested for more than one hyperparams list
for k, v in override_config.items():
if k == p_name:
logger.debug ("Found " + str(p_name) + " : " + str(v))
for i in param.get("value"):
override_config[k] = i
#self.search_and_replace_dict(v, p_name), val)

o_dir_exp = self.get_op_path(output_dir, p_name, ctr)
o_cfg_path = os.path.join(o_dir_exp, "override.cfg")
ctr += 1
override_config['io']['output_save_path'] = o_dir_exp

if not os.path.exists(o_dir_exp):
logger.debug("Creating " + str(o_dir_exp))
path = Path(o_dir_exp)
path.mkdir(parents=True)

with open(o_cfg_path, 'w') as fp:
json.dump(override_config, fp)

#call dq TODO
DQ_CFG = output_cfg_path
os.system("DQ_CFG=%s dq-submit dq-launch.sh"%output_cfg_path)


def get_cfg_path(self, output_dir, param_name, val):
dr = os.path.join(output_dir , param_name + "_" + str(val))
logger.debug("dir: " + str(dr))
return dr


# #TODO: refactor later
# def search_and_replace_dict(self, cfg, param, val):
# if isinstance(cfg, dict):
# for k, v in cfg.items():
#
# # is a dict itself
# if isinstance(v, dict):
# if k == param:
# print("Found "+ str(k) + " : " + str(cfg[k]))
# cfg[k] = val
# print("Replaced to " + str(cfg[k]))
# return
# else:
# return self.search_and_replace_dict(v, param, val)
#
# return



def get_default_config():
retVal = {
"seed" : 4337,
"data" : { "path" : "/deep/group/med/alivecor/training2017", "seed" : 2016 },
"optimizer" : { "name": "momentum", "epochs" : 50, "learning_rate" : 1e-2,
"momentum" : 0.95, "decay_rate" : 1.0, "decay_steps" : 2000
},
"model" : { "dropout" : 0.5, "batch_size" : 32,
"conv_layers" : [
{ "filter_size" : 256, "num_filters" : 64, "stride" : 7 },
{ "filter_size" : 128, "num_filters" : 64, "stride" : 7 }
] },
"io" : {
"output_save_path" : "/tmp"
}
}
return retVal


def main():
parser = argparse.ArgumentParser(description="Launcher")
parser.add_argument("-v", "--verbose", default=False, action="store_true")
parser.add_argument("-e", "--experiment_name", default=None)
parser.add_argument("-b", "--base_config", default=None)
parser.add_argument("-p", "--parameter_config", default=None)

parsed_arguments = parser.parse_args()
arguments = vars(parsed_arguments)

is_verbose = arguments['verbose']
base_config = arguments['base_config']
param_config = arguments['parameter_config']
experiment_name = arguments['experiment_name'].rstrip()

if is_verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)


default_config = {}
param = {}
if not base_config:
default_config = get_default_config()
logger.debug("No base cfg supplied, so using default - " + str(base_config))
else:
with open(base_config) as fid:
default_config = json.load(fid)

if not param_config:
raise ValueError("Mandatory to specify parameter config")
else:
with open(param_config) as fid:
param = json.load(fid)

if not experiment_name:
raise ValueError("Mandatory to specify experiment name")

launcher = Launcher(default_config, param, experiment_name)



if __name__ == '__main__':
main()

2 changes: 1 addition & 1 deletion launch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

#gpu=3
#env CUDA_VISIBLE_DEVICES=$gpu
python train.py -c configs/explore.json
python train.py -c configs/cnn20-lr-1.json -v
echo "Started training at: ", $(date +%m:%d:%y-%H:%M:%S)

0 comments on commit 3ac9fb9

Please sign in to comment.