Format most of torchbench

Summary: It's really nice to have autolinters set up. This formats everything but the torchbenchmark/*models directories and userbenchmark directories. Reviewed By: xuzhao9, chenyang78, sijiac Differential Revision: D55926531 fbshipit-source-id: 7bd2e31457fbdc2f7944646f165fa337ed88dbac
pytorch · Apr 10, 2024 · fc72ed4 · fc72ed4
1 parent 509bee1
commit fc72ed4
Show file tree

Hide file tree

Showing 96 changed files with 5,059 additions and 1,947 deletions.
diff --git a/.github/scripts/abtest.py b/.github/scripts/abtest.py
@@ -1,25 +1,28 @@
 """
 This script runs userbenchmarks abtest upon two PyTorch versions.
 """
+
 import argparse
+import json
 import os
-import subprocess
 import shutil
+import subprocess
 import sys
-import json
 from pathlib import Path
-from bmutils import REPO_ROOT, add_path
 from typing import Dict, Optional
 
+from bmutils import add_path, REPO_ROOT
+
 with add_path(REPO_ROOT):
  import torchbenchmark.util.gitutils as gitutils
  from userbenchmark import list_userbenchmarks
- from utils.cuda_utils import prepare_cuda_env, DEFAULT_CUDA_VERSION
+ from utils.cuda_utils import DEFAULT_CUDA_VERSION, prepare_cuda_env
 
 USERBENCHMARK_OUTPUT_PATH = os.path.join(REPO_ROOT, ".userbenchmark")
 # only preserve the first 10 chars of the git hash
 GIT_HASH_LEN = 10
 
+
 def cleanup():
  print("Cleaning up torch packages...", end="", flush=True)
  CLEANUP_ROUND = 5
@@ -29,7 +32,14 @@ def cleanup():
  subprocess.check_call(command, shell=False)
  print("done")
 
-def run_commit(repo_path: str, env: os._Environ, commit: str, bm_name: str, skip_build: bool=False) -> Path:
+
+def run_commit(
+ repo_path: str,
+ env: os._Environ,
+ commit: str,
+ bm_name: str,
+ skip_build: bool = False,
+) -> Path:
  "Run the userbenchmark on the commit. Return the metrics output file path."
  # build the pytorch commit if required
  if not skip_build:
@@ -38,22 +48,30 @@ def run_commit(repo_path: str, env: os._Environ, commit: str, bm_name: str, skip
  # run_benchmark
  return run_benchmark(bm_name, cuda_env=env)
 
+
 def validate_benchmark_output(bm_output: Path, bm_name: str):
  with open(bm_output, "r") as bmobj:
  output = json.load(bmobj)
- assert output["name"] == bm_name, f"Expected benchmark name {bm_name}, getting {output['name']}."
- assert "environ" in output and "pytorch_git_version" in output["environ"], \
- f"Missing pytorch git version in {bm_output}."
+ assert (
+ output["name"] == bm_name
+ ), f"Expected benchmark name {bm_name}, getting {output['name']}."
+ assert (
+ "environ" in output and "pytorch_git_version" in output["environ"]
+ ), f"Missing pytorch git version in {bm_output}."
  assert "metrics" in output, f"Missing definition of metrics in {bm_output}."
 
+
 def run_benchmark(bm_name: str, cuda_env: os._Environ) -> Path:
  def find_latest_output(p: str) -> Optional[Path]:
  if not os.path.exists(p) or not os.path.isdir(p):
  return None
- json_files = [ os.path.join(p, jf) for jf in sorted(os.listdir(p)) if jf.endswith(".json") ]
+ json_files = [
+ os.path.join(p, jf) for jf in sorted(os.listdir(p)) if jf.endswith(".json")
+ ]
  if len(json_files) == 0:
  return None
  return json_files[-1]
+
  command = [sys.executable, "run_benchmark.py", bm_name]
  try:
  subprocess.check_call(command, env=cuda_env, cwd=REPO_ROOT, shell=False)
@@ -68,6 +86,7 @@ def find_latest_output(p: str) -> Optional[Path]:
  validate_benchmark_output(output_file, bm_name)
  return output_file
 
+
 def setup_build_env(env) -> Dict[str, str]:
  env["USE_CUDA"] = "1"
  env["BUILD_CAFFE2_OPS"] = "0"
@@ -79,6 +98,7 @@ def setup_build_env(env) -> Dict[str, str]:
  env["CMAKE_PREFIX_PATH"] = env["CONDA_PREFIX"]
  return env
 
+
 def build_pytorch_commit(repo_path: str, commit: str, cuda_env: os._Environ):
  # checkout pytorch commit
  print(f"Checking out pytorch commit {commit} ...", end="", flush=True)
@@ -106,23 +126,35 @@ def build_pytorch_commit(repo_path: str, commit: str, cuda_env: os._Environ):
  subprocess.check_call(command, cwd=repo_path, env=build_env, shell=False)
  finally:
  command_testbuild = ["python", "-c", "'import torch'"]
- subprocess.check_call(command_testbuild, cwd=os.environ["HOME"], env=build_env, shell=False)
+ subprocess.check_call(
+ command_testbuild, cwd=os.environ["HOME"], env=build_env, shell=False
+ )
  print("done")
 
+
 def process_test_result(result_a: Path, result_b: Path, output_dir: str) -> str:
  def validate_results(a, b) -> bool:
  metrics = a["metrics"].keys()
  return sorted(metrics) == sorted(b["metrics"])
+
  # check two results are different files
- assert not result_a == result_b, f"Path {result_a} and {result_b} are the same. Exit."
+ assert (
+ not result_a == result_b
+ ), f"Path {result_a} and {result_b} are the same. Exit."
  # validate results
  with open(result_a, "r") as fa:
  a = json.load(fa)
  with open(result_b, "r") as fb:
  b = json.load(fb)
- assert validate_results(a, b), f"Result validation failed for {result_a} and {result_b}."
+ assert validate_results(
+ a, b
+ ), f"Result validation failed for {result_a} and {result_b}."
  # print result in csv format
- header = ["Metric", a["environ"]["pytorch_git_version"][:GIT_HASH_LEN], b["environ"]["pytorch_git_version"][:GIT_HASH_LEN]]
+ header = [
+ "Metric",
+ a["environ"]["pytorch_git_version"][:GIT_HASH_LEN],
+ b["environ"]["pytorch_git_version"][:GIT_HASH_LEN],
+ ]
  out = [header]
  metrics = a["metrics"].keys()
  for m in sorted(metrics):
@@ -140,25 +172,43 @@ def validate_results(a, b) -> bool:
  fout.write(out + "\n")
  return out
 
+
 if __name__ == "__main__":
  parser = argparse.ArgumentParser()
- parser.add_argument("--pytorch-repo", required=True, type=str, help="PyTorch repo path")
+ parser.add_argument(
+ "--pytorch-repo", required=True, type=str, help="PyTorch repo path"
+ )
  parser.add_argument("--base", required=True, type=str, help="PyTorch base commit")
  parser.add_argument("--head", required=True, type=str, help="PyTorch head commit")
- parser.add_argument("--userbenchmark", required=True, type=str, help="Name of the userbenchmark to run")
+ parser.add_argument(
+ "--userbenchmark",
+ required=True,
+ type=str,
+ help="Name of the userbenchmark to run",
+ )
  parser.add_argument("--output-dir", required=True, type=str, help="Output dir path")
  parser.add_argument("--skip-build", action="store_true", help="Skip PyTorch build")
  args = parser.parse_args()
  # sanity checks
- assert args.userbenchmark in list_userbenchmarks(), f"Available userbenchmark list: {list_userbenchmarks()}, " \
- f"but you specified {args.userbenchmark}."
+ assert args.userbenchmark in list_userbenchmarks(), (
+ f"Available userbenchmark list: {list_userbenchmarks()}, "
+ f"but you specified {args.userbenchmark}."
+ )
  if not args.skip_build:
- assert Path(args.pytorch_repo).is_dir(), f"Specified PyTorch repo dir {args.pytorch_repo} doesn't exist."
+ assert Path(
+ args.pytorch_repo
+ ).is_dir(), f"Specified PyTorch repo dir {args.pytorch_repo} doesn't exist."
  commits = gitutils.get_git_commits(args.pytorch_repo, args.base, args.head)
- assert commits, f"Can't find git commit {args.base} or {args.head} in repo {args.pytorch_repo}"
+ assert (
+ commits
+ ), f"Can't find git commit {args.base} or {args.head} in repo {args.pytorch_repo}"
  # setup cuda environment
  cuda_env = prepare_cuda_env(cuda_version=DEFAULT_CUDA_VERSION)
- result_a = run_commit(args.pytorch_repo, cuda_env, args.base, args.userbenchmark, args.skip_build)
- result_b = run_commit(args.pytorch_repo, cuda_env, args.head, args.userbenchmark, args.skip_build)
+ result_a = run_commit(
+ args.pytorch_repo, cuda_env, args.base, args.userbenchmark, args.skip_build
+ )
+ result_b = run_commit(
+ args.pytorch_repo, cuda_env, args.head, args.userbenchmark, args.skip_build
+ )
  compare_result = process_test_result(result_a, result_b, args.output_dir)
  print(compare_result)
diff --git a/.github/scripts/bmutils/__init__.py b/.github/scripts/bmutils/__init__.py
@@ -4,7 +4,8 @@
 CURRENT_DIR = Path(__file__).parent
 REPO_ROOT = str(CURRENT_DIR.parent.parent.parent)
 
-class add_path():
+
+class add_path:
  def __init__(self, path):
  self.path = path
 
@@ -15,4 +16,4 @@ def __exit__(self, exc_type, exc_value, traceback):
  try:
  sys.path.remove(self.path)
  except ValueError:
- pass
+ pass
diff --git a/.github/scripts/bmutils/analyze-bisection-result.py b/.github/scripts/bmutils/analyze-bisection-result.py
@@ -1,37 +1,54 @@
 import argparse
-import os
 import json
-import yaml
+import os
 from pathlib import Path
 
+import yaml
+
 WORKFLOW_LINK_TEMPLATE = "https://github.com/pytorch/benchmark/actions/runs/"
 
+
 def check_env(bisection_root: str):
  "Check `bisection_root` contains bisection config file, github issue file, and result json."
  # gh-issue.md exists
  # result.json exists
  bisection_path = Path(bisection_root)
  assert os.environ["GITHUB_ENV"], f"GITHUB_ENV environment variable doesn't exist."
- assert bisection_path.is_dir(), f"Specified bisection root {bisection_path} is not a directory."
- assert bisection_path.joinpath("gh-issue.md").exists(), \
+ assert (
+ bisection_path.is_dir()
+ ), f"Specified bisection root {bisection_path} is not a directory."
+ assert bisection_path.joinpath(
+ "gh-issue.md"
+ ).exists(), (
  f"Bisection directory {bisection_path} doesn't contain file gh-issue.md."
- assert bisection_path.joinpath("result.json").exists(), \
+ )
+ assert bisection_path.joinpath(
+ "result.json"
+ ).exists(), (
  f"Bisection directory {bisection_path} doesn't contain file result.json."
- assert bisection_path.joinpath("config.yaml").exists(), \
+ )
+ assert bisection_path.joinpath(
+ "config.yaml"
+ ).exists(), (
  f"Bisection directory {bisection_path} doesn't contain file config.yaml."
+ )
+
 
 def setup_gh_issue(bisection_root: str, gh_workflow_id: str):
  bisection_path = Path(bisection_root)
  json_path = bisection_path.joinpath("result.json")
  with open(json_path, "r") as jp:
  result = jp.read()
  result = f"\nResult json: \n```\n{result}\n```"
- workflow_str = f"\nBisection workflow link: {WORKFLOW_LINK_TEMPLATE}{gh_workflow_id}\n"
+ workflow_str = (
+ f"\nBisection workflow link: {WORKFLOW_LINK_TEMPLATE}{gh_workflow_id}\n"
+ )
  gh_issue_path = bisection_path.joinpath("gh-issue.md")
  with open(gh_issue_path, "a") as ghi:
  ghi.write(result)
  ghi.write(workflow_str)
 
+
 def set_env_if_nonempty(bisection_root: str):
  bisection_path = Path(bisection_root)
  json_path = bisection_path.joinpath("result.json")
@@ -45,13 +62,20 @@ def set_env_if_nonempty(bisection_root: str):
  config = yaml.safe_load(config_file)
  affected_pytorch_version = config["end_version"]
  fname = os.environ["GITHUB_ENV"]
- content = f"TORCHBENCH_PERF_BISECTION_NONEMPTY_SIGNAL='{affected_pytorch_version}'\n"
- with open(fname, 'a') as fo:
+ content = (
+ f"TORCHBENCH_PERF_BISECTION_NONEMPTY_SIGNAL='{affected_pytorch_version}'\n"
+ )
+ with open(fname, "a") as fo:
  fo.write(content)
 
+
 if __name__ == "__main__":
  parser = argparse.ArgumentParser()
- parser.add_argument("--bisection-root", required=True, help="Root directory of the bisection directory")
+ parser.add_argument(
+ "--bisection-root",
+ required=True,
+ help="Root directory of the bisection directory",
+ )
  parser.add_argument("--gh-workflow-id", required=True, help="GitHub workflow id")
  args = parser.parse_args()
  check_env(args.bisection_root)