Skip to content

Commit

Permalink
add flake8 config from pytorch and format the code
Browse files Browse the repository at this point in the history
  • Loading branch information
wanchaol committed Feb 6, 2019
1 parent d73f571 commit 7c381e3
Show file tree
Hide file tree
Showing 33 changed files with 344 additions and 282 deletions.
4 changes: 4 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[flake8]
max-line-length = 120
ignore = E203,E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504
exclude = third_party
3 changes: 1 addition & 2 deletions accuracy/model_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from datetime import datetime
import logging
from tqdm import tqdm
import os
import gc
import itertools
import sys
Expand All @@ -15,7 +14,6 @@
import torchvision.models as models



parser = argparse.ArgumentParser(description="PyTorch model accuracy benchmark.")
parser.add_argument('--repeat', type=int, default=5,
help="Number of Runs")
Expand Down Expand Up @@ -90,6 +88,7 @@ def cmd_string(examples_home, model, data_path):
cmd = ' '.join(['python3', examples_home, '-a', model, '--lr', str(lr), data_path])
return cmd


def log_init():
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
Expand Down
47 changes: 28 additions & 19 deletions plot/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
HERE = os.path.dirname(os.path.abspath(__file__))
MAX_BENCHES = 160
BENCH_TIMES = 4
BENCH_EVERY = 10 # th commit
BENCH_EVERY = 10 # th commit

run = partial(subprocess.check_call, cwd=REPO_DIR)
run_with_output = partial(subprocess.check_output, cwd=REPO_DIR)
run_toplevel = subprocess.check_call
silent = dict(stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)


def fetch_repo():
if os.path.exists(REPO_DIR):
print('> Pulling new changes...')
Expand All @@ -30,6 +31,7 @@ def fetch_repo():
print('> Cloning repository...')
run_toplevel(['git', 'clone', '--recursive', REPO_URL, REPO_DIR], **silent)


def get_history():
# git log --format='%H %an %ae %at' -n <num commits>
fields = [
Expand All @@ -56,11 +58,11 @@ def build(commit_hash):
start = time.time()
cname = container_name(commit_hash)
run(['docker', 'run',
'--runtime=nvidia',
'-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
'--name', cname,
'-t', 'pytorch_bench',
'/bin/bash', '/mnt/localdrive/timing/python/install_pytorch.sh', commit_hash], **silent)
'--runtime=nvidia',
'-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
'--name', cname,
'-t', 'pytorch_bench',
'/bin/bash', '/mnt/localdrive/timing/python/install_pytorch.sh', commit_hash], **silent)
run(['docker', 'commit', cname, cname], **silent)
end = time.time()
diff = int(end - start)
Expand All @@ -78,16 +80,16 @@ def run_benchmark(commit_hash, args, **kwargs):
BENCH_CPUS = '0-11'
BENCH_MEMS = '0'
return run_with_output(['docker', 'run',
'--cap-add=SYS_PTRACE',
'--runtime=nvidia',
'--security-opt',
'seccomp=unconfined',
'-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
'-w', '/mnt/localdrive',
'--cpuset-cpus=' + BENCH_CPUS,
'--cpuset-mems=' + BENCH_MEMS,
'-t', container_name(commit_hash),
*args], **kwargs).decode('utf8')
'--cap-add=SYS_PTRACE',
'--runtime=nvidia',
'--security-opt',
'seccomp=unconfined',
'-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
'-w', '/mnt/localdrive',
'--cpuset-cpus=' + BENCH_CPUS,
'--cpuset-mems=' + BENCH_MEMS,
'-t', container_name(commit_hash),
*args], **kwargs).decode('utf8')


def load_results():
Expand Down Expand Up @@ -137,35 +139,42 @@ def merge_into(original, new):
else:
original[key] = new[key]


def print_plan(to_bench):
if not to_bench:
print('> Nothing to do!')
return
print('> Building {} commits:'.format(len(to_bench)))
print('\n'.join(' - {} from {}'.format(result['hash'], datetime.fromtimestamp(result['commit_time'])) for result in to_bench))
print('\n'.join(' - {} from {}'.format(result['hash'],
datetime.fromtimestamp(result['commit_time'])) for result in to_bench))


BENCHMARKS = [
dict(args=['python', '-m', 'rnns.fastrnns.bench', '--print-json']),
]

# List[Dict[Dict[Int]]] -> Dict[Dict[List[Int]]]


def transpose_results(results):
def get_keys(result):
return sorted([(outer, inner) for outer in result for inner in result[outer]])
keys = get_keys(results[0])
assert all(get_keys(result) == keys for result in results)
any_result = results[0]
return {outer: {inner: [result[outer][inner] for result in results] for inner in any_result[outer]} for outer in any_result}
return {outer: {inner: [result[outer][inner] for result in results] for inner in any_result[outer]}
for outer in any_result}


def result_stats(result):
def mean(l):
return sum(l) / len(l)

def std(l):
m = mean(l)
return math.sqrt(sum([(v - m) ** 2 for v in l]))
return {outer: {inner: (mean(innerv), std(innerv)) for inner, innerv in outerv.items()} for outer, outerv in result.items()}
return {outer: {inner: (mean(innerv), std(innerv)) for inner, innerv in outerv.items()}
for outer, outerv in result.items()}


if __name__ == '__main__':
Expand Down
5 changes: 4 additions & 1 deletion rnns/benchmarks/benchmark_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

# Copied and pasted from benchmark_common under benchmark/scripts


def benchmark_init(cpu, gpu, skip_cpu_governor_check=False):
cpu_pin(cpu)
if not skip_cpu_governor_check:
Expand Down Expand Up @@ -32,5 +33,7 @@ def check_cpu_governor(cpu):
"The file '{}' is not readable.\n"
"More information:\n\n{}".format(fp, e))


def print_results_usecs(name, i, gpu_usecs, cpu_usecs, divide_by):
print("{}({:2d}): {:8.3f} usecs ({:8.3f} usecs cpu)".format(name, i, gpu_usecs/divide_by, cpu_usecs/divide_by, file=sys.stderr))
print("{}({:2d}): {:8.3f} usecs ({:8.3f} usecs cpu)".format(
name, i, gpu_usecs / divide_by, cpu_usecs / divide_by, file=sys.stderr))
15 changes: 7 additions & 8 deletions rnns/benchmarks/bnlstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def cast(tensor):
model.cuda()
criterion.cuda()


total_loss = 0
for data, targets in zip(data_batches, target_batches):
gc.collect()
Expand All @@ -71,13 +70,13 @@ def cast(tensor):

if __name__ == '__main__':
parser = argparse.ArgumentParser(description="PyTorch BNLSTM benchmark.")
parser.add_argument('--num_batches', type=int, default=1, help="num batches")
parser.add_argument('--hidden-size', type=int, default=100, help="Hidden size")
parser.add_argument('--max-length', type=int, default=784, help="max seq len")
parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
parser.add_argument('--jit', action='store_true', help="Use JIT")
parser.add_argument('--cuda', action='store_true', help="Use cuda")
parser.add_argument('--num_batches', type=int, default=1, help="num batches")
parser.add_argument('--hidden-size', type=int, default=100, help="Hidden size")
parser.add_argument('--max-length', type=int, default=784, help="max seq len")
parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
parser.add_argument('--jit', action='store_true', help="Use JIT")
parser.add_argument('--cuda', action='store_true', help="Use cuda")
args = parser.parse_args()

pprint.pprint(vars(args))
Expand Down
2 changes: 1 addition & 1 deletion rnns/benchmarks/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def summary(self):
assert not self.timing

def mean_min_max(lst):
return SummaryStats(sum(lst)/len(lst), min(lst), max(lst))
return SummaryStats(sum(lst) / len(lst), min(lst), max(lst))

gpu_msecs, cpu_msecs = zip(*self.results)
warmup = self.warmup_iters
Expand Down
28 changes: 14 additions & 14 deletions rnns/benchmarks/cudnn_lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,12 @@ def run_cudnn_lstm(cpu=0, gpu=0, batch_size=1, input_size=256, hidden_size=512,

benchmark_init(cpu, gpu, skip_cpu_governor_check)


def V(x):
return Variable(x) # mandatory

input = V(torch.randn(seq_len, batch_size, input_size).cuda(gpu))
hx = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
cx = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
hx = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
cx = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))

lstm = torch.nn.LSTM(input_size, hidden_size, layers).cuda(gpu)
lstm.flatten_parameters()
Expand All @@ -48,17 +47,18 @@ def V(x):

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="PyTorch CuDNN LSTM benchmark.")
parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
parser.add_argument('--input-size', type=int, default=256, help="Input size")
parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
parser.add_argument('--layers', type=int, default=1, help="Layers")
parser.add_argument('--seq-len', type=int, default=512, help="Sequence length")
parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
parser.add_argument('--benchmark', type=int, default=30, help="Benchmark iterations")
parser.add_argument('--skip-cpu-governor-check', action='store_true', help="Skip checking whether CPU governor is set to `performance`")
parser.add_argument('--backward', action='store_true', help="time backward")
parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
parser.add_argument('--input-size', type=int, default=256, help="Input size")
parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
parser.add_argument('--layers', type=int, default=1, help="Layers")
parser.add_argument('--seq-len', type=int, default=512, help="Sequence length")
parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
parser.add_argument('--benchmark', type=int, default=30, help="Benchmark iterations")
parser.add_argument('--skip-cpu-governor-check', action='store_true',
help="Skip checking whether CPU governor is set to `performance`")
parser.add_argument('--backward', action='store_true', help="time backward")
args = parser.parse_args()
pprint.pprint(vars(args))

Expand Down
53 changes: 31 additions & 22 deletions rnns/benchmarks/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@
# This file copied from scripts/lstm.py.

# If you swap the transpose here, you can test the effect of pre-transposing.


def t_use(x):
return x


def t_def(x):
return x.t()

Expand Down Expand Up @@ -66,7 +70,7 @@ def lstm(input, hidden, w_ih, w_hh):

def _unfused_lstm(input, hx, cx, w_ih, w_hh):
hx, cx
#return hx.clone(), cx.clone()
# return hx.clone(), cx.clone()
gates = input.mm(t_use(w_ih)) + hx.mm(t_use(w_hh))

ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
Expand Down Expand Up @@ -104,17 +108,20 @@ def run_lstm(cpu=0, gpu=0, batch_size=1, input_size=256, hidden_size=512,
benchmark_init(cpu, gpu, skip_cpu_governor_check)

if variable:
V = lambda x, requires_grad=False: Variable(x, requires_grad=False)
def V(x, requires_grad=False):
return Variable(x, requires_grad=False)
elif autograd:
V = lambda x, requires_grad=False: Variable(x, requires_grad=requires_grad)
def V(x, requires_grad=False):
return Variable(x, requires_grad=requires_grad)
else:
V = lambda x, requires_grad=False: x
def V(x, requires_grad=False):
return x

input = V(torch.randn(batch_size, input_size).cuda(device=gpu))
hx0 = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
cx0 = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
w_ih = V(t_def(torch.randn(4 * hidden_size, input_size)).cuda(device=gpu), requires_grad=True)
w_hh = V(t_def(torch.randn(4 * hidden_size, hidden_size)).cuda(device=gpu), requires_grad=True)
hx0 = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
cx0 = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
w_ih = V(t_def(torch.randn(4 * hidden_size, input_size)).cuda(device=gpu), requires_grad=True)
w_hh = V(t_def(torch.randn(4 * hidden_size, hidden_size)).cuda(device=gpu), requires_grad=True)

if fused:
if backward:
Expand Down Expand Up @@ -148,20 +155,22 @@ def run_lstm(cpu=0, gpu=0, batch_size=1, input_size=256, hidden_size=512,

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="PyTorch LSTM benchmark.")
parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
parser.add_argument('--input-size', type=int, default=256, help="Input size")
parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
parser.add_argument('--seq-len', type=int, default=None, help="Sequence length")
parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
parser.add_argument('--autograd', action='store_true', help="Use autograd")
parser.add_argument('--variable', action='store_true', help="Use Variable, but not autograd (measure baseline overhead)")
parser.add_argument('--fused', action='store_true', help="Use fused cell")
parser.add_argument('--jit', action='store_true', help="Use JIT compiler (implies --autograd)")
parser.add_argument('--backward', action='store_true', help="Run backwards computation")
parser.add_argument('--skip-cpu-governor-check', action='store_true', help="Skip checking whether CPU governor is set to `performance`")
parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
parser.add_argument('--input-size', type=int, default=256, help="Input size")
parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
parser.add_argument('--seq-len', type=int, default=None, help="Sequence length")
parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
parser.add_argument('--autograd', action='store_true', help="Use autograd")
parser.add_argument('--variable', action='store_true',
help="Use Variable, but not autograd (measure baseline overhead)")
parser.add_argument('--fused', action='store_true', help="Use fused cell")
parser.add_argument('--jit', action='store_true', help="Use JIT compiler (implies --autograd)")
parser.add_argument('--backward', action='store_true', help="Run backwards computation")
parser.add_argument('--skip-cpu-governor-check', action='store_true',
help="Skip checking whether CPU governor is set to `performance`")
args = parser.parse_args()

pprint.pprint(vars(args))
Expand Down
3 changes: 1 addition & 2 deletions rnns/benchmarks/lstm_variants/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class MultiLayerLSTM(nn.Module):

"""
MultiLayer LSTM of any type.
Note: Dropout is deactivated on the last layer.
"""

Expand Down Expand Up @@ -64,4 +64,3 @@ def forward(self, x, hiddens):
x, new_h = l(x, h)
new_hiddens.append(new_h)
return x, new_hiddens

Loading

0 comments on commit 7c381e3

Please sign in to comment.