Skip to content

Commit

Permalink
Format most of torchbench
Browse files Browse the repository at this point in the history
Summary:
It's really nice to have autolinters set up.  This formats everything
but the torchbenchmark/*models directories and userbenchmark directories.

Reviewed By: xuzhao9, chenyang78, sijiac

Differential Revision: D55926531

fbshipit-source-id: 7bd2e31457fbdc2f7944646f165fa337ed88dbac
  • Loading branch information
bertmaher authored and facebook-github-bot committed Apr 10, 2024
1 parent 509bee1 commit fc72ed4
Show file tree
Hide file tree
Showing 96 changed files with 5,059 additions and 1,947 deletions.
92 changes: 71 additions & 21 deletions .github/scripts/abtest.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
"""
This script runs userbenchmarks abtest upon two PyTorch versions.
"""

import argparse
import json
import os
import subprocess
import shutil
import subprocess
import sys
import json
from pathlib import Path
from bmutils import REPO_ROOT, add_path
from typing import Dict, Optional

from bmutils import add_path, REPO_ROOT

with add_path(REPO_ROOT):
import torchbenchmark.util.gitutils as gitutils
from userbenchmark import list_userbenchmarks
from utils.cuda_utils import prepare_cuda_env, DEFAULT_CUDA_VERSION
from utils.cuda_utils import DEFAULT_CUDA_VERSION, prepare_cuda_env

USERBENCHMARK_OUTPUT_PATH = os.path.join(REPO_ROOT, ".userbenchmark")
# only preserve the first 10 chars of the git hash
GIT_HASH_LEN = 10


def cleanup():
print("Cleaning up torch packages...", end="", flush=True)
CLEANUP_ROUND = 5
Expand All @@ -29,7 +32,14 @@ def cleanup():
subprocess.check_call(command, shell=False)
print("done")

def run_commit(repo_path: str, env: os._Environ, commit: str, bm_name: str, skip_build: bool=False) -> Path:

def run_commit(
repo_path: str,
env: os._Environ,
commit: str,
bm_name: str,
skip_build: bool = False,
) -> Path:
"Run the userbenchmark on the commit. Return the metrics output file path."
# build the pytorch commit if required
if not skip_build:
Expand All @@ -38,22 +48,30 @@ def run_commit(repo_path: str, env: os._Environ, commit: str, bm_name: str, skip
# run_benchmark
return run_benchmark(bm_name, cuda_env=env)


def validate_benchmark_output(bm_output: Path, bm_name: str):
with open(bm_output, "r") as bmobj:
output = json.load(bmobj)
assert output["name"] == bm_name, f"Expected benchmark name {bm_name}, getting {output['name']}."
assert "environ" in output and "pytorch_git_version" in output["environ"], \
f"Missing pytorch git version in {bm_output}."
assert (
output["name"] == bm_name
), f"Expected benchmark name {bm_name}, getting {output['name']}."
assert (
"environ" in output and "pytorch_git_version" in output["environ"]
), f"Missing pytorch git version in {bm_output}."
assert "metrics" in output, f"Missing definition of metrics in {bm_output}."


def run_benchmark(bm_name: str, cuda_env: os._Environ) -> Path:
def find_latest_output(p: str) -> Optional[Path]:
if not os.path.exists(p) or not os.path.isdir(p):
return None
json_files = [ os.path.join(p, jf) for jf in sorted(os.listdir(p)) if jf.endswith(".json") ]
json_files = [
os.path.join(p, jf) for jf in sorted(os.listdir(p)) if jf.endswith(".json")
]
if len(json_files) == 0:
return None
return json_files[-1]

command = [sys.executable, "run_benchmark.py", bm_name]
try:
subprocess.check_call(command, env=cuda_env, cwd=REPO_ROOT, shell=False)
Expand All @@ -68,6 +86,7 @@ def find_latest_output(p: str) -> Optional[Path]:
validate_benchmark_output(output_file, bm_name)
return output_file


def setup_build_env(env) -> Dict[str, str]:
env["USE_CUDA"] = "1"
env["BUILD_CAFFE2_OPS"] = "0"
Expand All @@ -79,6 +98,7 @@ def setup_build_env(env) -> Dict[str, str]:
env["CMAKE_PREFIX_PATH"] = env["CONDA_PREFIX"]
return env


def build_pytorch_commit(repo_path: str, commit: str, cuda_env: os._Environ):
# checkout pytorch commit
print(f"Checking out pytorch commit {commit} ...", end="", flush=True)
Expand Down Expand Up @@ -106,23 +126,35 @@ def build_pytorch_commit(repo_path: str, commit: str, cuda_env: os._Environ):
subprocess.check_call(command, cwd=repo_path, env=build_env, shell=False)
finally:
command_testbuild = ["python", "-c", "'import torch'"]
subprocess.check_call(command_testbuild, cwd=os.environ["HOME"], env=build_env, shell=False)
subprocess.check_call(
command_testbuild, cwd=os.environ["HOME"], env=build_env, shell=False
)
print("done")


def process_test_result(result_a: Path, result_b: Path, output_dir: str) -> str:
def validate_results(a, b) -> bool:
metrics = a["metrics"].keys()
return sorted(metrics) == sorted(b["metrics"])

# check two results are different files
assert not result_a == result_b, f"Path {result_a} and {result_b} are the same. Exit."
assert (
not result_a == result_b
), f"Path {result_a} and {result_b} are the same. Exit."
# validate results
with open(result_a, "r") as fa:
a = json.load(fa)
with open(result_b, "r") as fb:
b = json.load(fb)
assert validate_results(a, b), f"Result validation failed for {result_a} and {result_b}."
assert validate_results(
a, b
), f"Result validation failed for {result_a} and {result_b}."
# print result in csv format
header = ["Metric", a["environ"]["pytorch_git_version"][:GIT_HASH_LEN], b["environ"]["pytorch_git_version"][:GIT_HASH_LEN]]
header = [
"Metric",
a["environ"]["pytorch_git_version"][:GIT_HASH_LEN],
b["environ"]["pytorch_git_version"][:GIT_HASH_LEN],
]
out = [header]
metrics = a["metrics"].keys()
for m in sorted(metrics):
Expand All @@ -140,25 +172,43 @@ def validate_results(a, b) -> bool:
fout.write(out + "\n")
return out


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--pytorch-repo", required=True, type=str, help="PyTorch repo path")
parser.add_argument(
"--pytorch-repo", required=True, type=str, help="PyTorch repo path"
)
parser.add_argument("--base", required=True, type=str, help="PyTorch base commit")
parser.add_argument("--head", required=True, type=str, help="PyTorch head commit")
parser.add_argument("--userbenchmark", required=True, type=str, help="Name of the userbenchmark to run")
parser.add_argument(
"--userbenchmark",
required=True,
type=str,
help="Name of the userbenchmark to run",
)
parser.add_argument("--output-dir", required=True, type=str, help="Output dir path")
parser.add_argument("--skip-build", action="store_true", help="Skip PyTorch build")
args = parser.parse_args()
# sanity checks
assert args.userbenchmark in list_userbenchmarks(), f"Available userbenchmark list: {list_userbenchmarks()}, " \
f"but you specified {args.userbenchmark}."
assert args.userbenchmark in list_userbenchmarks(), (
f"Available userbenchmark list: {list_userbenchmarks()}, "
f"but you specified {args.userbenchmark}."
)
if not args.skip_build:
assert Path(args.pytorch_repo).is_dir(), f"Specified PyTorch repo dir {args.pytorch_repo} doesn't exist."
assert Path(
args.pytorch_repo
).is_dir(), f"Specified PyTorch repo dir {args.pytorch_repo} doesn't exist."
commits = gitutils.get_git_commits(args.pytorch_repo, args.base, args.head)
assert commits, f"Can't find git commit {args.base} or {args.head} in repo {args.pytorch_repo}"
assert (
commits
), f"Can't find git commit {args.base} or {args.head} in repo {args.pytorch_repo}"
# setup cuda environment
cuda_env = prepare_cuda_env(cuda_version=DEFAULT_CUDA_VERSION)
result_a = run_commit(args.pytorch_repo, cuda_env, args.base, args.userbenchmark, args.skip_build)
result_b = run_commit(args.pytorch_repo, cuda_env, args.head, args.userbenchmark, args.skip_build)
result_a = run_commit(
args.pytorch_repo, cuda_env, args.base, args.userbenchmark, args.skip_build
)
result_b = run_commit(
args.pytorch_repo, cuda_env, args.head, args.userbenchmark, args.skip_build
)
compare_result = process_test_result(result_a, result_b, args.output_dir)
print(compare_result)
5 changes: 3 additions & 2 deletions .github/scripts/bmutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
CURRENT_DIR = Path(__file__).parent
REPO_ROOT = str(CURRENT_DIR.parent.parent.parent)

class add_path():

class add_path:
def __init__(self, path):
self.path = path

Expand All @@ -15,4 +16,4 @@ def __exit__(self, exc_type, exc_value, traceback):
try:
sys.path.remove(self.path)
except ValueError:
pass
pass
44 changes: 34 additions & 10 deletions .github/scripts/bmutils/analyze-bisection-result.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,54 @@
import argparse
import os
import json
import yaml
import os
from pathlib import Path

import yaml

WORKFLOW_LINK_TEMPLATE = "https://github.com/pytorch/benchmark/actions/runs/"


def check_env(bisection_root: str):
"Check `bisection_root` contains bisection config file, github issue file, and result json."
# gh-issue.md exists
# result.json exists
bisection_path = Path(bisection_root)
assert os.environ["GITHUB_ENV"], f"GITHUB_ENV environment variable doesn't exist."
assert bisection_path.is_dir(), f"Specified bisection root {bisection_path} is not a directory."
assert bisection_path.joinpath("gh-issue.md").exists(), \
assert (
bisection_path.is_dir()
), f"Specified bisection root {bisection_path} is not a directory."
assert bisection_path.joinpath(
"gh-issue.md"
).exists(), (
f"Bisection directory {bisection_path} doesn't contain file gh-issue.md."
assert bisection_path.joinpath("result.json").exists(), \
)
assert bisection_path.joinpath(
"result.json"
).exists(), (
f"Bisection directory {bisection_path} doesn't contain file result.json."
assert bisection_path.joinpath("config.yaml").exists(), \
)
assert bisection_path.joinpath(
"config.yaml"
).exists(), (
f"Bisection directory {bisection_path} doesn't contain file config.yaml."
)


def setup_gh_issue(bisection_root: str, gh_workflow_id: str):
bisection_path = Path(bisection_root)
json_path = bisection_path.joinpath("result.json")
with open(json_path, "r") as jp:
result = jp.read()
result = f"\nResult json: \n```\n{result}\n```"
workflow_str = f"\nBisection workflow link: {WORKFLOW_LINK_TEMPLATE}{gh_workflow_id}\n"
workflow_str = (
f"\nBisection workflow link: {WORKFLOW_LINK_TEMPLATE}{gh_workflow_id}\n"
)
gh_issue_path = bisection_path.joinpath("gh-issue.md")
with open(gh_issue_path, "a") as ghi:
ghi.write(result)
ghi.write(workflow_str)


def set_env_if_nonempty(bisection_root: str):
bisection_path = Path(bisection_root)
json_path = bisection_path.joinpath("result.json")
Expand All @@ -45,13 +62,20 @@ def set_env_if_nonempty(bisection_root: str):
config = yaml.safe_load(config_file)
affected_pytorch_version = config["end_version"]
fname = os.environ["GITHUB_ENV"]
content = f"TORCHBENCH_PERF_BISECTION_NONEMPTY_SIGNAL='{affected_pytorch_version}'\n"
with open(fname, 'a') as fo:
content = (
f"TORCHBENCH_PERF_BISECTION_NONEMPTY_SIGNAL='{affected_pytorch_version}'\n"
)
with open(fname, "a") as fo:
fo.write(content)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--bisection-root", required=True, help="Root directory of the bisection directory")
parser.add_argument(
"--bisection-root",
required=True,
help="Root directory of the bisection directory",
)
parser.add_argument("--gh-workflow-id", required=True, help="GitHub workflow id")
args = parser.parse_args()
check_env(args.bisection_root)
Expand Down
Loading

0 comments on commit fc72ed4

Please sign in to comment.