Skip to content

Commit

Permalink
More error handling improvements
Browse files Browse the repository at this point in the history
Reviewed By: xuzhao9

Differential Revision: D57965945

fbshipit-source-id: 1ccd9e1e2f24426d073e72acc9aab1172bd505a8
  • Loading branch information
int3 authored and facebook-github-bot committed Jun 3, 2024
1 parent c3d510d commit 0546be8
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 92 deletions.
154 changes: 82 additions & 72 deletions torchbenchmark/util/triton_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,10 @@ def parse_args(
type=int,
help="Number of example inputs.",
)
parser.add_argument(
"--keep-going",
action="store_true",
)
parser.add_argument(
"--input-id",
type=int,
Expand Down Expand Up @@ -466,85 +470,89 @@ def _get_bm_func(self, bm_func_name: str):

def run(
self, warmup=DEFAULT_WARMUP, rep=DEFAULT_RUN_ITERS, quantiles=DEFAULT_QUANTILES
) -> BenchmarkOperatorResult:
) -> None:
"""Benchmarking the operator and returning its metrics."""
metrics = []
input_id_range = range(self._input_id, self._input_id+self._num_inputs)
if tqdm is not None:
input_id_range = tqdm(input_id_range)
if self._input_id:
for _dryrun_input_id in range(self._input_id):
try:
input_id_range = range(self._input_id, self._input_id + self._num_inputs)
if tqdm is not None:
input_id_range = tqdm(input_id_range)
if self._input_id:
for _dryrun_input_id in range(self._input_id):
self.example_inputs = self.get_example_inputs()
for input_id in input_id_range:
self.example_inputs = self.get_example_inputs()
for input_id in input_id_range:
self.example_inputs = self.get_example_inputs()
if self.example_inputs is None:
warnings.warn(
UserWarning(
f"The input generator get_input_iter() has depleted at id {input_id}. Available number of inputs: {self._available_num_inputs}."
if self.example_inputs is None:
warnings.warn(
f"The input generator get_input_iter() has depleted at id {input_id}. Available number of "
f"inputs: {self._available_num_inputs}.",
stacklevel=1
)
break
# Move inputs to the device
self.example_inputs = input_cast(
lambda x: isinstance(x, torch.Tensor),
lambda x: x.to(self.device),
self.example_inputs,
)
break
# Move inputs to the device
self.example_inputs = input_cast(
lambda x: isinstance(x, torch.Tensor),
lambda x: x.to(self.device),
self.example_inputs,
)
self.baseline_fn = None
self.baseline_metrics = None
self._op_flops = {}
# Cast the input precisions
apply_decoration_args(self, self.dargs)
x_val = self.get_x_val(self.example_inputs)
if self._only:
benchmarks = self._only
else:
benchmarks = (
[bm for bm in REGISTERED_BENCHMARKS[self.name]]
if self.name in REGISTERED_BENCHMARKS
else []
)
# Run the baseline first, if baseline exists
baseline_name = (
BASELINE_BENCHMARKS[self.name]
if self.name in BASELINE_BENCHMARKS
else None
)
if baseline_name and baseline_name in benchmarks:
benchmarks.remove(baseline_name)
benchmarks.insert(0, baseline_name)

# get metrics for for each registered benchmark
def _reduce_benchmarks(acc, bm_name: str):
baseline = (
bm_name == BASELINE_BENCHMARKS[self.name]
if self.name in BASELINE_BENCHMARKS
else False
)
acc[bm_name] = self._do_bench(
input_id=input_id,
fn_name=bm_name,
warmup=warmup,
rep=rep,
quantiles=quantiles,
baseline=baseline,
)
if baseline:
self.baseline_metrics = acc[bm_name]
return acc
self.baseline_fn = None
self.baseline_metrics = None
self._op_flops = {}
# Cast the input precisions
apply_decoration_args(self, self.dargs)
x_val = self.get_x_val(self.example_inputs)
if self._only:
benchmarks = self._only
else:
benchmarks = (
[bm for bm in REGISTERED_BENCHMARKS[self.name]]
if self.name in REGISTERED_BENCHMARKS
else []
)
# Run the baseline first, if baseline exists
baseline_name = (
BASELINE_BENCHMARKS[self.name]
if self.name in BASELINE_BENCHMARKS
else None
)
if baseline_name and baseline_name in benchmarks:
benchmarks.remove(baseline_name)
benchmarks.insert(0, baseline_name)

# get metrics for for each registered benchmark
def _reduce_benchmarks(acc, bm_name: str):
baseline = (
bm_name == BASELINE_BENCHMARKS[self.name]
if self.name in BASELINE_BENCHMARKS
else False
)
acc[bm_name] = self._do_bench(
input_id=input_id,
fn_name=bm_name,
warmup=warmup,
rep=rep,
quantiles=quantiles,
baseline=baseline,
)
if baseline:
self.baseline_metrics = acc[bm_name]
return acc

y_vals: Dict[str, BenchmarkOperatorMetrics] = functools.reduce(
_reduce_benchmarks, benchmarks, {}
y_vals: Dict[str, BenchmarkOperatorMetrics] = functools.reduce(
_reduce_benchmarks, benchmarks, {}
)
metrics.append((x_val, y_vals))
del self.example_inputs
gc.collect()
except (KeyboardInterrupt, Exception):
warnings.warn("Caught exception, terminating early with partial results", stacklevel=1)
raise
finally:
self.output = BenchmarkOperatorResult(
op_name=self.name,
metrics=self.required_metrics,
result=metrics,
)
metrics.append((x_val, y_vals))
del self.example_inputs
gc.collect()
self.output = BenchmarkOperatorResult(
op_name=self.name,
metrics=self.required_metrics,
result=metrics,
)
return self.output

def get_x_val(self, example_inputs) -> Any:
raise NotImplementedError(
Expand Down Expand Up @@ -798,6 +806,8 @@ def _init_extra_metrics() -> Dict[str, Any]:
except torch.cuda.OutOfMemoryError:
metrics.error_msg = "CUDA OOM"
except Exception as e:
if not self.tb_args.keep_going:
raise
metrics.error_msg = str(e)
return metrics

Expand Down
42 changes: 22 additions & 20 deletions userbenchmark/triton/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def parse_args(args):
parser.add_argument("--ci", action="store_true", help="Run in the CI mode.")
return parser.parse_known_args(args)

def _run(args: argparse.Namespace, extra_args: List[str]) -> BenchmarkOperatorResult:
def _run(args: argparse.Namespace, extra_args: List[str]) -> None:
Opbench = load_opbench_by_name(args.op)
if args.fwd_bwd:
args.mode = "fwd_bwd"
Expand All @@ -56,27 +56,29 @@ def _run(args: argparse.Namespace, extra_args: List[str]) -> BenchmarkOperatorRe
device=args.device,
extra_args=extra_args,
)
metrics = opbench.run(args.warmup, args.iter)
if not args.skip_print:
if args.csv:
metrics.write_csv_to_file(sys.stdout)
else:
print(metrics)
if not hasattr(torch_version, "git_version") and args.log_scuba:
from userbenchmark.triton.fb import log_benchmark
try:
opbench.run(args.warmup, args.iter)
finally:
metrics = opbench.output
if not args.skip_print:
if args.csv:
metrics.write_csv_to_file(sys.stdout)
else:
print(metrics)
if not hasattr(torch_version, "git_version") and args.log_scuba:
from userbenchmark.triton.fb import log_benchmark

log_benchmark(metrics)
if args.plot:
try:
opbench.plot()
except NotImplementedError:
print(f"Plotting is not implemented for {args.op}")
log_benchmark(metrics)
if args.plot:
try:
opbench.plot()
except NotImplementedError:
print(f"Plotting is not implemented for {args.op}")

if args.dump_csv:
os.makedirs(TRITON_BENCH_CSV_DUMP_PATH, exist_ok=True)
path = metrics.write_csv(TRITON_BENCH_CSV_DUMP_PATH)
print(f"[TritonBench] Dumped csv to {path}")
return metrics
if args.dump_csv:
os.makedirs(TRITON_BENCH_CSV_DUMP_PATH, exist_ok=True)
path = metrics.write_csv(TRITON_BENCH_CSV_DUMP_PATH)
print(f"[TritonBench] Dumped csv to {path}")

def run(args: List[str] = []):
if args == []:
Expand Down

0 comments on commit 0546be8

Please sign in to comment.