TorchBench Userbenchmark on T4 Metal #791

Workflow file for this run

.github/workflows/userbenchmark-t4-metal.yml at d910b8a

	name: TorchBench Userbenchmark on T4 Metal
	on:
	schedule:
	- cron: '0 17 * * *' # run at 5 PM UTC
	workflow_dispatch:
	inputs:
	userbenchmark_name:
	description: "Name of the user benchmark to run"
	userbenchmark_options:
	description: "Option of the user benchmark to run"

	jobs:
	run-userbenchmark:
	runs-on: [self-hosted, bm-runner]
	timeout-minutes: 1440 # 24 hours
	environment: docker-s3-upload
	env:
	CONDA_ENV_NAME: "userbenchmarks-ci"
	PLATFORM_NAME: "aws_t4_metal"
	TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	SETUP_SCRIPT: "/data/nvme/bin/setup_instance.sh"
	steps:
	- name: Checkout TorchBench
	uses: actions/checkout@v3
	with:
	path: benchmark
	- name: Create conda environment
	run: \|
	python benchmark/utils/python_utils.py --create-conda-env "${CONDA_ENV_NAME}"
	sudo python benchmark/utils/cuda_utils.py --setup-cuda-softlink
	- name: Install PyTorch nightly
	run: \|
	. "${SETUP_SCRIPT}" && . activate "${CONDA_ENV_NAME}"
	pushd benchmark
	# Install dependencies
	python utils/cuda_utils.py --install-torch-deps
	# check the machine is tuned
	pip install -U py-cpuinfo psutil distro boto3
	sudo ${HOME}/miniconda3/envs/${CONDA_ENV_NAME}/bin/python3 torchbenchmark/util/machine_config.py
	# Check if nightly builds are available
	NIGHTLIES=$(python torchbenchmark/util/torch_nightly.py --packages torch)
	# If failed, the script will generate empty result
	if [ -z $NIGHTLIES ]; then
	echo "Torch nightly build failed. Cancel the workflow."
	exit 1
	fi
	# Install PyTorch and torchvision nightly from pip
	python utils/cuda_utils.py --install-torch-nightly
	python utils/cuda_utils.py --check-torch-nightly-version
	# make sure pytorch+cuda works
	python -c "import torch; torch.cuda.init()"
	- name: Install TorchBench
	run: \|
	set -x
	. "${SETUP_SCRIPT}" && conda activate "${CONDA_ENV_NAME}"
	pushd benchmark
	python install.py
	- name: Run user benchmark
	run: \|
	set -x
	. "${SETUP_SCRIPT}" && conda activate "${CONDA_ENV_NAME}"
	# remove old results
	if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
	pushd benchmark
	if [ -d .userbenchmark ]; then rm -Rf .userbenchmark; fi
	MANUAL_WORKFLOW="${{ github.event.inputs.userbenchmark_name }}"
	if [ -z "${MANUAL_WORKFLOW}" ]; then
	# Figure out what userbenchmarks we should run, and run it
	python ./.github/scripts/userbenchmark/schedule-benchmarks.py --platform ${PLATFORM_NAME}
	if [ -d ./.userbenchmark ]; then
	cp -r ./.userbenchmark ../benchmark-output
	else
	mkdir ../benchmark-output
	fi
	else
	python run_benchmark.py "${{ github.event.inputs.userbenchmark_name }}" ${{ github.event.inputs.userbenchmark_options }}
	cp -r ./.userbenchmark/"${{ github.event.inputs.userbenchmark_name }}" ../benchmark-output
	fi
	- name: Upload artifact
	uses: actions/upload-artifact@v3
	with:
	name: TorchBench result
	path: benchmark-output/
	- name: Upload result jsons to Scribe
	run: \|
	. "${SETUP_SCRIPT}" && conda activate "${CONDA_ENV_NAME}"
	conda install -y six
	pushd benchmark
	RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 \| sort -r))
	echo "Uploading result jsons: ${RESULTS}"
	for r in ${RESULTS[@]}; do
	python ./scripts/userbenchmark/upload_scribe.py --userbenchmark_json "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
	python ./scripts/userbenchmark/upload_s3.py --upload-file "${r}" --userbenchmark_platform "${PLATFORM_NAME}"
	done
	- name: Remove conda environment
	run: \|
	conda env remove --name "${CONDA_ENV_NAME}"
	- name: Cleanup repo
	if: always()
	run: \|
	sudo rm -rf benchmark \|\| true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

TorchBench Userbenchmark on T4 Metal #791

Workflow file

TorchBench Userbenchmark on T4 Metal #791

Jobs

Run details

Workflow file for this run