Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make nightly workflow runs on the main branch #2262

Closed
wants to merge 15 commits into from
7 changes: 3 additions & 4 deletions .github/workflows/v3-nightly.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: TorchBench V3 nightly (A100)
name: TorchBench nightly (A100)
on:
workflow_dispatch:
schedule:
Expand All @@ -23,7 +23,6 @@ jobs:
- name: Checkout TorchBench v3.0 branch
uses: actions/checkout@v3
with:
ref: v3.0
path: benchmark
- name: Tune Nvidia GPU
run: |
Expand All @@ -40,14 +39,14 @@ jobs:
. "${SETUP_SCRIPT}"
pushd benchmark
python install.py
- name: Run the torch-nightly userbenchmark
- name: Run the torch-nightly-test userbenchmark
run: |
. "${SETUP_SCRIPT}"
# remove old results
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
pushd benchmark
if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
python run_benchmark.py torch-nightly -c v3-cuda-tests.yaml
python run_benchmark.py torch-nightly-test -c nightly.yaml
cp -r ./.userbenchmark/torch-nightly ../benchmark-output
- name: Detect potential regressions
continue-on-error: true
Expand Down
22 changes: 21 additions & 1 deletion torchbenchmark/util/experiment/instantiator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@

@dataclasses.dataclass
class TorchBenchModelConfig:
model_set: str
name: str
test: str
device: str
batch_size: Optional[int]
extra_args: List[str]
metrics: List[str]
extra_env: Optional[Dict[str, str]] = None
output_dir: Optional[pathlib.Path] = None

Expand Down Expand Up @@ -153,7 +155,25 @@ def list_extended_models(suite_name: str = "all") -> List[str]:
return list_extended_timm_models()
elif suite_name == "all":
return list_extended_huggingface_models() + list_extended_timm_models()
elif suite_name == "torchbench":
return list_models()
else:
assert (
False
), "Currently, we only support extended model set huggingface or timm."
), f"Currently, we only support model set torchbench, huggingface or timm, but get {suite_name}."


def get_model_set_from_model_name(model_name: str) -> str:
from torchbenchmark.util.framework.huggingface.extended_configs import (
list_extended_huggingface_models,
)
from torchbenchmark.util.framework.timm.extended_configs import (
list_extended_timm_models,
)
if model_name in list_extended_huggingface_models():
return "huggingface"
if model_name in list_extended_timm_models():
return "timm"
if model_name in list_models():
return "torchbench"
assert False, f"Model {model_name} is not found in any model set."
106 changes: 70 additions & 36 deletions torchbenchmark/util/experiment/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dataclasses
import pathlib
import time
from typing import List, Optional, Tuple, Union
from typing import List, Optional, Tuple, Union, Dict, Any

import torch
from torchbenchmark import ModelTask
Expand All @@ -22,12 +22,14 @@
class TorchBenchModelMetrics:
latencies: List[float]
throughputs: List[float]
accuracy: Optional[bool]
cpu_peak_mem: Optional[float]
gpu_peak_mem: Optional[float]
ttfb: Optional[float] # time-to-first-batch
pt2_compilation_time: Optional[float]
pt2_graph_breaks: Optional[float]
model_flops: Optional[float]
error_msg: Optional[str]


def get_latencies(
Expand Down Expand Up @@ -139,22 +141,25 @@ def work_func():

def get_model_test_metrics(
model: Union[BenchmarkModel, ModelTask],
metrics=[],
required_metrics=[],
export_metrics_file=False,
metrics_gpu_backend="nvml",
nwarmup=WARMUP_ROUNDS,
num_iter=BENCHMARK_ITERS,
) -> TorchBenchModelMetrics:
import os

latencies = None
throughputs = None
cpu_peak_mem = None
gpu_peak_mem = None
ttfb = None
pt2_compilation_time = None
pt2_graph_breaks = None
model_flops = None
metrics = TorchBenchModelMetrics(
latencies=[],
throughputs=[],
accuracy=None,
cpu_peak_mem=None,
gpu_peak_mem=None,
ttfb=None,
pt2_compilation_time=None,
pt2_graph_breaks=None,
model_flops=None,
error_msg=None,
)
if not (isinstance(model, BenchmarkModel) or isinstance(model, ModelTask)):
raise ValueError(
f"Expected BenchmarkModel or ModelTask, get type: {type(model)}"
Expand All @@ -167,51 +172,42 @@ def get_model_test_metrics(
if isinstance(model, BenchmarkModel)
else model.get_model_attribute("device")
)
if "latencies" in metrics or "throughputs" in metrics:
latencies = get_latencies(
if "latencies" in required_metrics or "throughputs" in required_metrics:
metrics.latencies = get_latencies(
model.invoke, device, nwarmup=nwarmup, num_iter=num_iter
)
if "cpu_peak_mem" in metrics or "gpu_peak_mem" in metrics:
cpu_peak_mem, _device_id, gpu_peak_mem = get_peak_memory(
if "cpu_peak_mem" in required_metrics or "gpu_peak_mem" in required_metrics:
metrics.cpu_peak_mem, _device_id, metrics.gpu_peak_mem = get_peak_memory(
model.invoke,
device,
export_metrics_file=export_metrics_file,
metrics_needed=metrics,
metrics_needed=required_metrics,
metrics_gpu_backend=metrics_gpu_backend,
cpu_monitored_pid=model_pid,
)
if "throughputs" in metrics:
throughputs = [model.batch_size * 1000 / latency for latency in latencies]
if "pt2_compilation_time" in metrics:
pt2_compilation_time = (
if "throughputs" in required_metrics:
metrics.throughputs = [model.batch_size * 1000 / latency for latency in metrics.latencies]
if "pt2_compilation_time" in required_metrics:
metrics.pt2_compilation_time = (
model.get_model_attribute("pt2_compilation_time")
if isinstance(model, ModelTask)
else model.pt2_compilation_time
)
if "pt2_graph_breaks" in metrics:
pt2_graph_breaks = (
if "pt2_graph_breaks" in required_metrics:
metrics.pt2_graph_breaks = (
model.get_model_attribute("pt2_graph_breaks")
if isinstance(model, ModelTask)
else model.pt2_graph_breaks
)
if "model_flops" in metrics:
model_flops = get_model_flops(model)
if "ttfb" in metrics:
ttfb = (
if "model_flops" in required_metrics:
metrics.model_flops = get_model_flops(model)
if "ttfb" in required_metrics:
metrics.ttfb = (
model.get_model_attribute("ttfb")
if isinstance(model, ModelTask)
else model.ttfb
)
return TorchBenchModelMetrics(
latencies,
throughputs,
cpu_peak_mem,
gpu_peak_mem,
ttfb,
pt2_compilation_time,
pt2_graph_breaks,
model_flops,
)
return metrics


def get_model_accuracy(
Expand Down Expand Up @@ -242,3 +238,41 @@ def get_model_accuracy(
accuracy = model.accuracy
del model
return accuracy


def run_config(config: TorchBenchModelConfig,
as_dict: bool=False,
dryrun: bool=False,
) -> Union[TorchBenchModelMetrics, Dict[str, Any]]:
"""Run a benchmark config and return the metrics as a Dict"""
print(f"Running config {config} ...", flush=True, end="")
metrics = TorchBenchModelMetrics(
latencies=[],
throughputs=[],
accuracy=None,
cpu_peak_mem=None,
gpu_peak_mem=None,
ttfb=None,
pt2_compilation_time=None,
pt2_graph_breaks=None,
model_flops=None,
error_msg=None,
)
if dryrun:
print("[skip_by_dryrun]", flush=True)
return dataclasses.asdict(metrics) if as_dict else metrics
required_metrics = config.metrics.copy()
accuracy = None
if "accuracy" in required_metrics:
accuracy = get_model_accuracy(config)
required_metrics.remove("accuracy")
if required_metrics:
from torchbenchmark.util.experiment.instantiator import (
load_model_isolated,
)
model_task = load_model_isolated(config)
metrics = get_model_test_metrics(model_task, required_metrics=required_metrics)
if "accuracy" in required_metrics:
metrics.accuracy = accuracy
print("[done]", flush=True)
return dataclasses.asdict(metrics) if as_dict else metrics
6 changes: 3 additions & 3 deletions userbenchmark/group_bench/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,10 @@ def load_group_config(config_file: str) -> TorchBenchGroupBenchConfig:
]
metrics = data["metrics"] if "metrics" in data else []
group_configs = {}
for group_name in data["test_group"]:
for group_name in data["test_groups"]:
group_configs[group_name] = []
group_extra_args = list(filter(lambda x: bool(x), data["test_group"][group_name].get("extra_args", "").split(" ")))
for subgroup in data["test_group"][group_name]["subgroup"]:
group_extra_args = list(filter(lambda x: bool(x), data["test_groups"][group_name].get("extra_args", "").split(" ")))
for subgroup in data["test_groups"][group_name]["subgroup"]:
subgroup_extra_args = subgroup.get("extra_args", "")
subgroup_extra_args = "" if subgroup_extra_args == None else subgroup_extra_args
subgroup_extra_args_list = list(filter(lambda x: bool(x), subgroup_extra_args.split(" ")))
Expand Down
102 changes: 102 additions & 0 deletions userbenchmark/group_bench/run_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import yaml
import numpy
import itertools
from typing import Any, Dict, List, Optional, Tuple
from torchbenchmark.util.experiment.instantiator import TorchBenchModelConfig, list_extended_models, get_model_set_from_model_name
from torchbenchmark.util.experiment.metrics import run_config


def _get_models(models: Optional[List[str]]=None, model_set: Optional[List[str]]=None) -> List[Tuple[str, str]]:
result = set(map(lambda x: (get_model_set_from_model_name(x), x), models)) if models else set()
if model_set:
for s in model_set:
result = result.union(set(map(lambda x: (s, x), list_extended_models(s))))
return sorted(list(result))


def config_obj_to_model_configs(config: Dict[str, Any]) -> Dict[str, Dict[str, List[TorchBenchModelConfig]]]:
models: Tuple[str, str] = _get_models(models=config.get("model", None), model_set=config.get("model_set", None))
batch_sizes = config.get("batch_size", [None])
tests = config.get("test", ["train", "eval"])
devices = config.get("device", ["cuda"])
precisions = config.get("precision", [None])
metrics = config["metrics"]
test_groups = config["test_groups"]
result = {}
for group_name in test_groups.keys():
extra_args = test_groups[group_name].get("extra_args", [])
extra_args = [] if extra_args == None else extra_args.copy()
cfgs = itertools.product(*[devices, tests, batch_sizes, precisions, models])
for device, test, batch_size, precision, model_name_with_set in cfgs:
if precision:
extra_args = extra_args.extend(["--precision", precision])
if batch_size:
batch_size = int(batch_size)
common_key = (device, test, batch_size, precision)
if not common_key in result:
result[common_key] = {}
if not group_name in result[common_key]:
result[common_key][group_name] = []
result[common_key][group_name].append(
TorchBenchModelConfig(
model_set=model_name_with_set[0],
name=model_name_with_set[1],
device=device,
test=test,
batch_size=batch_size,
extra_args=extra_args,
extra_env=None,
metrics=metrics,
)
)
return result


def _common_key_to_group_key(common_key: Tuple[str, str, int, str]):
device, test, batch_size, precision = common_key
key = {
"device": device,
"test": test,
"batch_size": batch_size if batch_size else "default",
"precision": precision if precision else "default",
}
return key


def _config_result_to_group_result(
group_name: str,
model_set: str,
model_name: str,
metrics: Dict[str, Any],
required_metrics: List[str],
metric_aggregation: str="p50"):
# output metric format: <model_set>_<model_name>[<group_name>]_<metric_name>
result = {}
for metric in required_metrics:
metric_name = f"{model_set}_{model_name}[{group_name}]_{metric}"
metric_value = metrics[metric]
if isinstance(metrics[metric], list) and metric_aggregation == "p50":
metric_value = numpy.median(metrics[metric])
result[metric_name] = metric_value
return result


def run_benchmark_group_config(group_config_file: str, dryrun: bool=False) -> List[Dict[str, Any]]:
result = []
with open(group_config_file, "r") as fp:
config_obj = yaml.safe_load(fp)
configs: Dict[str, Dict[str, List[TorchBenchModelConfig]]] = config_obj_to_model_configs(config_obj)
for common_key in configs.keys():
group_key = _common_key_to_group_key(common_key)
group_result = {"group_key": group_key, "group_results": {}}
for group_name in configs[common_key]:
for x in configs[common_key][group_name]:
group_result["group_results"].update(
_config_result_to_group_result(
group_name=group_name,
model_set=x.model_set,
model_name=x.name,
metrics=run_config(x, as_dict=True, dryrun=dryrun),
required_metrics=x.metrics))
result.append(group_result)
return result
16 changes: 16 additions & 0 deletions userbenchmark/torch-nightly-test/nightly.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
model_set:
- torchbench
- huggingface
- timm
test:
- train
- eval
device:
- cuda
metrics:
- latencies
- cpu_peak_mem
- gpu_peak_mem
test_groups:
eager:
extra_args:
Loading
Loading