add flake8 config from pytorch and format the code

pytorch · Feb 6, 2019 · 7c381e3 · 7c381e3
1 parent d73f571
commit 7c381e3
Show file tree

Hide file tree

Showing 33 changed files with 344 additions and 282 deletions.
diff --git a/.flake8 b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+max-line-length = 120
+ignore = E203,E305,E402,E721,E741,F401,F403,F405,F821,F841,F999,W503,W504
+exclude = third_party
diff --git a/accuracy/model_accuracy.py b/accuracy/model_accuracy.py
@@ -6,7 +6,6 @@
 from datetime import datetime
 import logging
 from tqdm import tqdm
-import os
 import gc
 import itertools
 import sys
@@ -15,7 +14,6 @@
 import torchvision.models as models
 
 
-
 parser = argparse.ArgumentParser(description="PyTorch model accuracy benchmark.")
 parser.add_argument('--repeat', type=int, default=5,
  help="Number of Runs")
@@ -90,6 +88,7 @@ def cmd_string(examples_home, model, data_path):
  cmd = ' '.join(['python3', examples_home, '-a', model, '--lr', str(lr), data_path])
  return cmd
 
+
 def log_init():
  if not os.path.exists(temp_dir):
  os.makedirs(temp_dir)

diff --git a/plot/main.py b/plot/main.py
@@ -14,13 +14,14 @@
 HERE = os.path.dirname(os.path.abspath(__file__))
 MAX_BENCHES = 160
 BENCH_TIMES = 4
-BENCH_EVERY = 10 # th commit
+BENCH_EVERY = 10  # th commit
 
 run = partial(subprocess.check_call, cwd=REPO_DIR)
 run_with_output = partial(subprocess.check_output, cwd=REPO_DIR)
 run_toplevel = subprocess.check_call
 silent = dict(stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
+
 def fetch_repo():
  if os.path.exists(REPO_DIR):
  print('> Pulling new changes...')
@@ -30,6 +31,7 @@ def fetch_repo():
  print('> Cloning repository...')
  run_toplevel(['git', 'clone', '--recursive', REPO_URL, REPO_DIR], **silent)
 
+
 def get_history():
  # git log --format='%H %an %ae %at' -n <num commits>
  fields = [
@@ -56,11 +58,11 @@ def build(commit_hash):
  start = time.time()
  cname = container_name(commit_hash)
  run(['docker', 'run',
-  '--runtime=nvidia',
-  '-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
-  '--name', cname,
-  '-t', 'pytorch_bench',
-  '/bin/bash', '/mnt/localdrive/timing/python/install_pytorch.sh', commit_hash], **silent)
+ '--runtime=nvidia',
+ '-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
+ '--name', cname,
+ '-t', 'pytorch_bench',
+ '/bin/bash', '/mnt/localdrive/timing/python/install_pytorch.sh', commit_hash], **silent)
  run(['docker', 'commit', cname, cname], **silent)
  end = time.time()
  diff = int(end - start)
@@ -78,16 +80,16 @@ def run_benchmark(commit_hash, args, **kwargs):
  BENCH_CPUS = '0-11'
  BENCH_MEMS = '0'
  return run_with_output(['docker', 'run',
- '--cap-add=SYS_PTRACE',
- '--runtime=nvidia',
- '--security-opt',
- 'seccomp=unconfined',
- '-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
- '-w', '/mnt/localdrive',
- '--cpuset-cpus=' + BENCH_CPUS,
- '--cpuset-mems=' + BENCH_MEMS,
- '-t', container_name(commit_hash),
- *args], **kwargs).decode('utf8')
+  '--cap-add=SYS_PTRACE',
+  '--runtime=nvidia',
+  '--security-opt',
+  'seccomp=unconfined',
+  '-v', os.path.join(HERE, '..') + ':/mnt/localdrive',
+  '-w', '/mnt/localdrive',
+  '--cpuset-cpus=' + BENCH_CPUS,
+  '--cpuset-mems=' + BENCH_MEMS,
+  '-t', container_name(commit_hash),
+  *args], **kwargs).decode('utf8')
 
 
 def load_results():
@@ -137,35 +139,42 @@ def merge_into(original, new):
  else:
  original[key] = new[key]
 
+
 def print_plan(to_bench):
  if not to_bench:
  print('> Nothing to do!')
  return
  print('> Building {} commits:'.format(len(to_bench)))
- print('\n'.join(' - {} from {}'.format(result['hash'], datetime.fromtimestamp(result['commit_time'])) for result in to_bench))
+ print('\n'.join(' - {} from {}'.format(result['hash'],
+ datetime.fromtimestamp(result['commit_time'])) for result in to_bench))
 
 
 BENCHMARKS = [
  dict(args=['python', '-m', 'rnns.fastrnns.bench', '--print-json']),
 ]
 
 # List[Dict[Dict[Int]]] -> Dict[Dict[List[Int]]]
+
+
 def transpose_results(results):
  def get_keys(result):
  return sorted([(outer, inner) for outer in result for inner in result[outer]])
  keys = get_keys(results[0])
  assert all(get_keys(result) == keys for result in results)
  any_result = results[0]
- return {outer: {inner: [result[outer][inner] for result in results] for inner in any_result[outer]} for outer in any_result}
+ return {outer: {inner: [result[outer][inner] for result in results] for inner in any_result[outer]}
+ for outer in any_result}
 
 
 def result_stats(result):
  def mean(l):
  return sum(l) / len(l)
+
  def std(l):
  m = mean(l)
  return math.sqrt(sum([(v - m) ** 2 for v in l]))
- return {outer: {inner: (mean(innerv), std(innerv)) for inner, innerv in outerv.items()} for outer, outerv in result.items()}
+ return {outer: {inner: (mean(innerv), std(innerv)) for inner, innerv in outerv.items()}
+ for outer, outerv in result.items()}
 
 
 if __name__ == '__main__':

diff --git a/rnns/benchmarks/benchmark_common.py b/rnns/benchmarks/benchmark_common.py
@@ -4,6 +4,7 @@
 
 # Copied and pasted from benchmark_common under benchmark/scripts
 
+
 def benchmark_init(cpu, gpu, skip_cpu_governor_check=False):
  cpu_pin(cpu)
  if not skip_cpu_governor_check:
@@ -32,5 +33,7 @@ def check_cpu_governor(cpu):
  "The file '{}' is not readable.\n"
  "More information:\n\n{}".format(fp, e))
 
+
 def print_results_usecs(name, i, gpu_usecs, cpu_usecs, divide_by):
- print("{}({:2d}): {:8.3f} usecs ({:8.3f} usecs cpu)".format(name, i, gpu_usecs/divide_by, cpu_usecs/divide_by, file=sys.stderr))
+ print("{}({:2d}): {:8.3f} usecs ({:8.3f} usecs cpu)".format(
+ name, i, gpu_usecs / divide_by, cpu_usecs / divide_by, file=sys.stderr))
diff --git a/rnns/benchmarks/bnlstm.py b/rnns/benchmarks/bnlstm.py
@@ -56,7 +56,6 @@ def cast(tensor):
  model.cuda()
  criterion.cuda()
 
-
  total_loss = 0
  for data, targets in zip(data_batches, target_batches):
  gc.collect()
@@ -71,13 +70,13 @@ def cast(tensor):
 
 if __name__ == '__main__':
  parser = argparse.ArgumentParser(description="PyTorch BNLSTM benchmark.")
- parser.add_argument('--num_batches',  type=int, default=1,  help="num batches")
- parser.add_argument('--hidden-size',  type=int, default=100, help="Hidden size")
- parser.add_argument('--max-length',  type=int, default=784, help="max seq len")
- parser.add_argument('--warmup',  type=int, default=10,  help="Warmup iterations")
- parser.add_argument('--benchmark',  type=int, default=20,  help="Benchmark iterations")
- parser.add_argument('--jit',  action='store_true',  help="Use JIT")
- parser.add_argument('--cuda',  action='store_true',  help="Use cuda")
+ parser.add_argument('--num_batches', type=int, default=1, help="num batches")
+ parser.add_argument('--hidden-size', type=int, default=100, help="Hidden size")
+ parser.add_argument('--max-length', type=int, default=784, help="max seq len")
+ parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
+ parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
+ parser.add_argument('--jit', action='store_true', help="Use JIT")
+ parser.add_argument('--cuda', action='store_true', help="Use cuda")
  args = parser.parse_args()
 
  pprint.pprint(vars(args))

diff --git a/rnns/benchmarks/common.py b/rnns/benchmarks/common.py
@@ -84,7 +84,7 @@ def summary(self):
  assert not self.timing
 
  def mean_min_max(lst):
- return SummaryStats(sum(lst)/len(lst), min(lst), max(lst))
+ return SummaryStats(sum(lst) / len(lst), min(lst), max(lst))
 
  gpu_msecs, cpu_msecs = zip(*self.results)
  warmup = self.warmup_iters

diff --git a/rnns/benchmarks/cudnn_lstm.py b/rnns/benchmarks/cudnn_lstm.py
@@ -23,13 +23,12 @@ def run_cudnn_lstm(cpu=0, gpu=0, batch_size=1, input_size=256, hidden_size=512,
 
  benchmark_init(cpu, gpu, skip_cpu_governor_check)
 
-
  def V(x):
  return Variable(x) # mandatory
 
  input = V(torch.randn(seq_len, batch_size, input_size).cuda(gpu))
- hx  = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
- cx  = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
+ hx = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
+ cx = V(torch.randn(layers, batch_size, hidden_size).cuda(gpu))
 
  lstm = torch.nn.LSTM(input_size, hidden_size, layers).cuda(gpu)
  lstm.flatten_parameters()
@@ -48,17 +47,18 @@ def V(x):
 
 if __name__ == "__main__":
  parser = argparse.ArgumentParser(description="PyTorch CuDNN LSTM benchmark.")
- parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
- parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
- parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
- parser.add_argument('--input-size', type=int, default=256, help="Input size")
- parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
- parser.add_argument('--layers', type=int, default=1, help="Layers")
- parser.add_argument('--seq-len', type=int, default=512, help="Sequence length")
- parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
- parser.add_argument('--benchmark', type=int, default=30, help="Benchmark iterations")
- parser.add_argument('--skip-cpu-governor-check', action='store_true', help="Skip checking whether CPU governor is set to `performance`")
- parser.add_argument('--backward', action='store_true', help="time backward")
+ parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
+ parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
+ parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
+ parser.add_argument('--input-size', type=int, default=256, help="Input size")
+ parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
+ parser.add_argument('--layers', type=int, default=1, help="Layers")
+ parser.add_argument('--seq-len', type=int, default=512, help="Sequence length")
+ parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
+ parser.add_argument('--benchmark', type=int, default=30, help="Benchmark iterations")
+ parser.add_argument('--skip-cpu-governor-check', action='store_true',
+ help="Skip checking whether CPU governor is set to `performance`")
+ parser.add_argument('--backward', action='store_true', help="time backward")
  args = parser.parse_args()
  pprint.pprint(vars(args))
 

diff --git a/rnns/benchmarks/lstm.py b/rnns/benchmarks/lstm.py
@@ -22,8 +22,12 @@
 # This file copied from scripts/lstm.py.
 
 # If you swap the transpose here, you can test the effect of pre-transposing.
+
+
 def t_use(x):
  return x
+
+
 def t_def(x):
  return x.t()
 
@@ -66,7 +70,7 @@ def lstm(input, hidden, w_ih, w_hh):
 
 def _unfused_lstm(input, hx, cx, w_ih, w_hh):
  hx, cx
- #return hx.clone(), cx.clone()
+ # return hx.clone(), cx.clone()
  gates = input.mm(t_use(w_ih)) + hx.mm(t_use(w_hh))
 
  ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1)
@@ -104,17 +108,20 @@ def run_lstm(cpu=0, gpu=0, batch_size=1, input_size=256, hidden_size=512,
  benchmark_init(cpu, gpu, skip_cpu_governor_check)
 
  if variable:
- V = lambda x, requires_grad=False: Variable(x, requires_grad=False)
+ def V(x, requires_grad=False):
+ return Variable(x, requires_grad=False)
  elif autograd:
- V = lambda x, requires_grad=False: Variable(x, requires_grad=requires_grad)
+ def V(x, requires_grad=False):
+ return Variable(x, requires_grad=requires_grad)
  else:
- V = lambda x, requires_grad=False: x
+ def V(x, requires_grad=False):
+ return x
 
  input = V(torch.randn(batch_size, input_size).cuda(device=gpu))
- hx0  = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
- cx0  = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
- w_ih  = V(t_def(torch.randn(4 * hidden_size, input_size)).cuda(device=gpu), requires_grad=True)
- w_hh  = V(t_def(torch.randn(4 * hidden_size, hidden_size)).cuda(device=gpu), requires_grad=True)
+ hx0 = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
+ cx0 = V(torch.randn(batch_size, hidden_size).cuda(device=gpu), requires_grad=True)
+ w_ih = V(t_def(torch.randn(4 * hidden_size, input_size)).cuda(device=gpu), requires_grad=True)
+ w_hh = V(t_def(torch.randn(4 * hidden_size, hidden_size)).cuda(device=gpu), requires_grad=True)
 
  if fused:
  if backward:
@@ -148,20 +155,22 @@ def run_lstm(cpu=0, gpu=0, batch_size=1, input_size=256, hidden_size=512,
 
 if __name__ == "__main__":
  parser = argparse.ArgumentParser(description="PyTorch LSTM benchmark.")
- parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
- parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
- parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
- parser.add_argument('--input-size', type=int, default=256, help="Input size")
- parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
- parser.add_argument('--seq-len', type=int, default=None, help="Sequence length")
- parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
- parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
- parser.add_argument('--autograd', action='store_true', help="Use autograd")
- parser.add_argument('--variable', action='store_true', help="Use Variable, but not autograd (measure baseline overhead)")
- parser.add_argument('--fused', action='store_true', help="Use fused cell")
- parser.add_argument('--jit', action='store_true', help="Use JIT compiler (implies --autograd)")
- parser.add_argument('--backward', action='store_true', help="Run backwards computation")
- parser.add_argument('--skip-cpu-governor-check', action='store_true', help="Skip checking whether CPU governor is set to `performance`")
+ parser.add_argument('--cpu', type=int, default=0, help="CPU to run on")
+ parser.add_argument('--gpu', type=int, default=0, help="GPU to run on")
+ parser.add_argument('--batch-size', type=int, default=1, help="Batch size")
+ parser.add_argument('--input-size', type=int, default=256, help="Input size")
+ parser.add_argument('--hidden-size', type=int, default=512, help="Hidden size")
+ parser.add_argument('--seq-len', type=int, default=None, help="Sequence length")
+ parser.add_argument('--warmup', type=int, default=10, help="Warmup iterations")
+ parser.add_argument('--benchmark', type=int, default=20, help="Benchmark iterations")
+ parser.add_argument('--autograd', action='store_true', help="Use autograd")
+ parser.add_argument('--variable', action='store_true',
+ help="Use Variable, but not autograd (measure baseline overhead)")
+ parser.add_argument('--fused', action='store_true', help="Use fused cell")
+ parser.add_argument('--jit', action='store_true', help="Use JIT compiler (implies --autograd)")
+ parser.add_argument('--backward', action='store_true', help="Run backwards computation")
+ parser.add_argument('--skip-cpu-governor-check', action='store_true',
+ help="Skip checking whether CPU governor is set to `performance`")
  args = parser.parse_args()
 
  pprint.pprint(vars(args))

diff --git a/rnns/benchmarks/lstm_variants/container.py b/rnns/benchmarks/lstm_variants/container.py
@@ -17,7 +17,7 @@ class MultiLayerLSTM(nn.Module):
 
  """
  MultiLayer LSTM of any type.
- 
+
  Note: Dropout is deactivated on the last layer.
  """
 
@@ -64,4 +64,3 @@ def forward(self, x, hiddens):
  x, new_h = l(x, h)
  new_hiddens.append(new_h)
  return x, new_hiddens
-