Add machine_util.py for tuning optimal benchmark settings (#183)

Applies some configuration settings automatically, but verifies more. Only works on amazon linux so far; tries to bail gracefully on other platforms. Usage: - as a command line script, machine_config.py will check or configure the machine sudo `which python` <path to machine_config.py> --configure - as a library, provides functions for use e.g. in conftest.py - asserts benchmark script is run with configured settings - logs machine settings to benchmark data file Other miscellaneous fixes: * Add score plot, nightly sweep scripts * Add legend to sweep result plotting script. (#193) Moves compute_score and some other utils around. Needs more work. torchbenchmark isn't installed in a proper way, so it doesn't work to use relative imports from a script with a main function that is nested inside the torchbenchmark package. work around this for now. make machine_config script agnostic to cpu core pinning when run as a script, since pinning is checked by conftest during benchmarking and it's pointless to make the user pin the configuration script. Co-authored-by: xz <[email protected]>
pytorch · Jan 27, 2021 · f9e5a75 · f9e5a75
1 parent a56c9e6
commit f9e5a75
Show file tree

Hide file tree

Showing 20 changed files with 795 additions and 36 deletions.
diff --git a/score/compute_score.py → compute_score.py b/score/compute_score.py → compute_score.py
@@ -9,34 +9,10 @@
 import os
 import yaml
 
-from generate_score_config import generate_bench_cfg
+from torchbenchmark.score.compute_score import compute_score
+from torchbenchmark.score.generate_score_config import generate_bench_cfg
 from tabulate import tabulate
 
-SPEC_FILE_DEFAULT = "score.yml"
-TARGET_SCORE_DEFAULT = 1000
-
-def compute_score(config, data, fake_data=None):
- target = config['target']
- score = 0.0
- weight_sum = 0.0
- for name in config['benchmarks']:
- cfg = config['benchmarks'][name]
- weight, norm = cfg['weight'], cfg['norm']
- weight_sum += weight
- measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name]
- assert len(measured_mean) == 1, f"Missing data for {name}, unable to compute score"
- measured_mean = measured_mean[0]
- if fake_data is not None and name in fake_data:
- # used for sanity checks on the sensitivity of the score metric
- measured_mean = fake_data[name]
- benchmark_score = weight * math.log(norm / measured_mean)
- # print(f"{name}: {benchmark_score}")
- score += benchmark_score
-
- score = target * math.exp(score)
- assert abs(weight_sum - 1.0) < 1e-6, f"Bad configuration, weights don't sum to 1, but {weight_sum}"
- return score
-
 if __name__ == "__main__":
  parser = argparse.ArgumentParser(description=__doc__)
  parser.add_argument("--configuration",
@@ -77,7 +53,7 @@ def compute_score(config, data, fake_data=None):
  print(f"Using hacks {args.hack_data}, hacked_score {hacked_score}")
 
  elif args.benchmark_data_dir is not None:
- scores = [('File', 'Score')]
+ scores = [('File', 'Score', 'PyTorch Version')]
  for f in os.listdir(args.benchmark_data_dir):
  path = os.path.join(args.benchmark_data_dir, f)
  if os.path.isfile(path) and os.path.splitext(path)[1] == '.json':
@@ -86,7 +62,7 @@ def compute_score(config, data, fake_data=None):
  if config is None:
  config = generate_bench_cfg(spec, data, TARGET_SCORE_DEFAULT)
  score = compute_score(config, data)
- scores.append((f, score))
+ scores.append((f, score, data['machine_info']['pytorch_version']))
 
  print(tabulate(scores, headers='firstrow'))
 

diff --git a/conftest.py b/conftest.py
@@ -1,9 +1,14 @@
 import os
 import pytest
 import torch
+from torchbenchmark.util.machine_config import get_machine_config, check_machine_configured
+
 
 def pytest_addoption(parser):
  parser.addoption("--fuser", help="fuser to use for benchmarks")
+ parser.addoption("--ignore_machine_config",
+ action='store_true',
+ help="Disable checks/assertions for machine configuration for stable benchmarks")
 
 def set_fuser(fuser):
  if fuser == "old":
@@ -20,6 +25,10 @@ def set_fuser(fuser):
  torch._C._jit_override_can_fuse_on_gpu(True)
  torch._C._jit_set_texpr_fuser_enabled(True)
 
+def pytest_sessionstart(session):
+ if not session.config.getoption('ignore_machine_config'):
+ check_machine_configured()
+
 def pytest_configure(config):
  set_fuser(config.getoption("fuser"))
 
@@ -39,3 +48,10 @@ def pytest_benchmark_update_machine_info(config, machine_info):
 
  machine_info['circle_build_num'] = os.environ.get("CIRCLE_BUILD_NUM")
  machine_info['circle_project_name'] = os.environ.get("CIRCLE_PROJECT_REPONAME")
+
+ try:
+ # if running on unexpected machine/os, get_machine_config _may_ not work
+ machine_info['torchbench_machine_config'] = get_machine_config()
+ except Exception:
+ if not config.getoption('ignore_machine_config'):
+ raise
diff --git a/plot_sweep.py b/plot_sweep.py
@@ -0,0 +1,84 @@
+import argparse
+import json
+# import pandas as pd
+import os
+# import sys
+# import re
+import yaml
+
+# from bokeh.layouts import column, row, layout, gridplot
+# from bokeh.plotting import figure, output_file, show
+# from bokeh.sampledata.autompg import autompg
+# from bokeh.transform import jitter
+from bokeh.palettes import Category10
+from bokeh.models import HoverTool, Div
+from bokeh.plotting import figure, output_file, show
+# from bokeh.models import Legend
+# from bokeh.models import ColumnDataSource, CategoricalTicker, Div
+# from bokeh.models import ColumnDataSource, DataTable, DateFormatter, TableColumn
+# from bokeh.transform import jitter
+# from collections import defaultdict
+from datetime import datetime as dt
+from torchbenchmark.util.data import load_data_dir, load_data_files
+from torchbenchmark.score.compute_score import compute_score
+from torchbenchmark.score.generate_score_config import generate_bench_cfg
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("data_dir", nargs='+', 
+ help="One or more directories containing benchmark json files. " 
+ "Each directory will be plotted as a separate series. "
+ "By default, the first file in the first directory will be used"
+ " to generate a score configuration with a target of 1000,"
+ " and everything else will be relative to that.")
+ parser.add_argument("--output_html", default='plot.html', help="html file to write")
+ parser.add_argument("--score_heirarchy", default='torchbenchmark/score/score.yml', 
+ help="file defining score heirarchy")
+ parser.add_argument("--reference_json", required=True, 
+ help="file defining score norm values, usually first json in first data_dir")
+ args = parser.parse_args()
+ plot_height = 800
+ plot_width = 1000
+
+ assert len(args.data_dir) > 0, "Must provide at least one data directory"
+ compare_datasets = [load_data_dir(d, most_recent_files=-1) for d in args.data_dir]
+
+ with open(args.reference_json) as f:
+ ref_data = json.load(f)
+ with open(args.score_heirarchy) as f:
+ score_heirarchy = yaml.full_load(f)
+
+ score_cfg = generate_bench_cfg(score_heirarchy, ref_data, target=1000)
+
+ p = figure(plot_width=plot_width, plot_height=plot_height,
+ x_axis_type='datetime')
+ p.y_range.start = 0
+ xs = []
+ ys = []
+ for d in compare_datasets:
+ scores = []
+ dates = []
+ for i in range(len(d._json_raw)):
+ data = d._json_raw[i]
+ score = compute_score(score_cfg, data)
+ scores.append(score)
+ pytorch_ver = data['machine_info']['pytorch_version']
+ date = dt.strptime(pytorch_ver[pytorch_ver.index("dev") + len("dev"):], "%Y%m%d")
+ dates.append(date)
+ xs.append(dates)
+ ys.append(scores)
+
+ colors = Category10[10][:len(compare_datasets)]
+ basenames = map(os.path.basename, args.data_dir)
+
+ for x, y, color in zip(xs, ys, colors):
+ p.line(x, y, color=color, line_width=2, legend_label=next(basenames))
+
+ for x, y, color in zip(xs, ys, colors):
+ p.circle(x, y, color=color)
+
+ p.legend.location = "bottom_right"
+
+ output_file(args.output_html)
+ show(p)
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,6 @@
+bs4
+py-cpuinfo
+distro
 pytest
 pytest-benchmark
 requests

diff --git a/scripts/run_bench_and_upload.sh b/scripts/run_bench_and_upload.sh
@@ -21,11 +21,10 @@ BENCHMARK_DATA="`pwd`/.data"
 mkdir -p ${BENCHMARK_DATA}
 BENCHMARK_FILENAME=${CIRCLE_SHA1}_$(date +"%Y%m%d_%H%M%S").json
 BENCHMARK_ABS_FILENAME=${BENCHMARK_DATA}/${BENCHMARK_FILENAME}
-pytest test_bench.py --setup-show --benchmark-sort=Name --benchmark-json=${BENCHMARK_ABS_FILENAME} -k "$PYTEST_FILTER"
-
+pytest test_bench.py --ignore_machine_config --setup-show --benchmark-sort=Name --benchmark-json=${BENCHMARK_ABS_FILENAME} -k "$PYTEST_FILTER"
 
 # Compute benchmark score
-TORCHBENCH_SCORE=$(python score/compute_score.py --configuration score/torchbench_0.0.yaml --benchmark_data_file ${BENCHMARK_ABS_FILENAME})
+TORCHBENCH_SCORE=$(python compute_score.py --configuration torchbenchmark/score/torchbench_0.0.yaml --benchmark_data_file ${BENCHMARK_ABS_FILENAME})
 # Token is only present for certain jobs, only upload if present
 if [ -z "$SCRIBE_GRAPHQL_ACCESS_TOKEN" ]
 then

diff --git a/scripts/run_sweep_existing_envs.sh b/scripts/run_sweep_existing_envs.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+set -e
+. ~/miniconda3/etc/profile.d/conda.sh
+conda activate base
+
+CONFIG_DIR=""
+BENCHMARK_FILTER=""
+CONDA_ENVS_DIR="${HOME}/sweep_conda_envs"
+
+print_usage() {
+ echo "Usage: run_sweep.sh -c ENVS_FILE -o DATA_OUTPUT_DIR"
+}
+
+while getopts 'e:c:fo:p' flag; do
+ case "${flag}" in
+ c) ENVS_FILE="${OPTARG}";;
+ o) DATA_DIR="${OPTARG}";;
+ *) print_usage
+ exit 1 ;;
+ esac
+done
+
+if [ -z "${ENVS_FILE}" -o -z "${DATA_DIR}" ];
+then
+ print_usage
+ exit 1
+fi
+
+#sudo sh -c "echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo"
+sudo nvidia-smi -ac 5001,900
+CORE_LIST="24-47"
+export GOMP_CPU_AFFINITY="${CORE_LIST}"
+export CUDA_VISIBLE_DEVICES=0
+
+echo "Running Benchmarks..."
+mkdir -p "${DATA_DIR}"
+# for CONFIG_FILE in ${ENVS_FILE};
+while read ENV_NAME;
+do
+ ENV_PATH="${CONDA_ENVS_DIR}/${ENV_NAME}"
+ conda activate "${ENV_PATH}"
+
+ #python -c "import torch; print(f'${ENV_NAME}: {torch.__version__}')"
+
+ #pip --version
+ #pip install distro py-cpuinfo
+ echo "Run benchmark for ${ENV_NAME}"
+
+ taskset -c "${CORE_LIST}" pytest test_bench.py -k "${BENCHMARK_FILTER}" --benchmark-min-rounds 20 --benchmark-json ${DATA_DIR}/$(date +"%Y%m%d_%H%M%S")_${c}.json
+ conda deactivate
+done < ${ENVS_FILE}
+
+echo "Done"
+
+
diff --git a/test_bench.py b/test_bench.py
@@ -17,7 +17,7 @@
 import time
 import torch
 from torchbenchmark import list_models
-
+from torchbenchmark.util.machine_config import get_machine_state
 
 def pytest_generate_tests(metafunc, display_len=24):
  # This is where the list of models to test can be configured
@@ -74,11 +74,13 @@ class TestBenchNetwork:
  def test_train(self, hub_model, benchmark):
  try:
  benchmark(hub_model.train)
+ benchmark.extra_info['machine_state'] = get_machine_state()
  except NotImplementedError:
  print('Method train is not implemented, skipping...')
 
  def test_eval(self, hub_model, benchmark):
  try:
  benchmark(hub_model.eval)
+ benchmark.extra_info['machine_state'] = get_machine_state()
  except NotImplementedError:
  print('Method eval is not implemented, skipping...')
diff --git a/score/README.md → torchbenchmark/score/README.md b/score/README.md → torchbenchmark/score/README.md
diff --git a/torchbenchmark/score/compute_score.py b/torchbenchmark/score/compute_score.py
@@ -0,0 +1,38 @@
+
+"""
+Compute the benchmark score given a frozen score configuration and current benchmark data.
+"""
+import argparse
+import json
+import math
+import sys
+import os
+import yaml
+
+from tabulate import tabulate
+
+SPEC_FILE_DEFAULT = "score.yml"
+TARGET_SCORE_DEFAULT = 1000
+
+def compute_score(config, data, fake_data=None):
+ target = config['target']
+ score = 0.0
+ weight_sum = 0.0
+ for name in config['benchmarks']:
+ cfg = config['benchmarks'][name]
+ weight, norm = cfg['weight'], cfg['norm']
+ weight_sum += weight
+ measured_mean = [b['stats']['mean'] for b in data['benchmarks'] if b['name'] == name]
+ assert len(measured_mean) == 1, f"Missing data for {name}, unable to compute score"
+ measured_mean = measured_mean[0]
+ if fake_data is not None and name in fake_data:
+ # used for sanity checks on the sensitivity of the score metric
+ measured_mean = fake_data[name]
+ benchmark_score = weight * math.log(norm / measured_mean)
+ # print(f"{name}: {benchmark_score}")
+ score += benchmark_score
+
+ score = target * math.exp(score)
+ assert abs(weight_sum - 1.0) < 1e-6, f"Bad configuration, weights don't sum to 1, but {weight_sum}"
+ return score
+
diff --git a/score/configs/v0/config-v0.env → ...hbenchmark/score/configs/v0/config-v0.env b/score/configs/v0/config-v0.env → ...hbenchmark/score/configs/v0/config-v0.env
diff --git a/score/configs/v0/config-v0.json → ...benchmark/score/configs/v0/config-v0.json b/score/configs/v0/config-v0.json → ...benchmark/score/configs/v0/config-v0.json
diff --git a/score/configs/v0/config-v0.md → torchbenchmark/score/configs/v0/config-v0.md b/score/configs/v0/config-v0.md → torchbenchmark/score/configs/v0/config-v0.md
diff --git a/score/generate_score_config.py → ...hbenchmark/score/generate_score_config.py b/score/generate_score_config.py → ...hbenchmark/score/generate_score_config.py
diff --git a/score/score.yml → torchbenchmark/score/score.yml b/score/score.yml → torchbenchmark/score/score.yml
@@ -13,17 +13,17 @@ hierarchy:
  # pytorch_stargan:
  other computer vision:
  Background_Matting:
- Super_SloMo:
+ # Super_SloMo:
  natural language processing:
  translation:
  attention_is_all_you_nee...:
  language_modeling:
  BERT_pytorch:
  other nlp:
  fastNLP:
- speech:
- synthesis:
- tacotron2:
+ # speech:
+ # synthesis:
+ # tacotron2:
  recommendation:
  recommendation:
  dlrm:

diff --git a/score/torchbench_0.0.yaml → torchbenchmark/score/torchbench_0.0.yaml b/score/torchbench_0.0.yaml → torchbenchmark/score/torchbench_0.0.yaml
diff --git a/torchbenchmark/util/__init__.py b/torchbenchmark/util/__init__.py