Skip to content

TorchBench V3 bisection #219

TorchBench V3 bisection

TorchBench V3 bisection #219

Workflow file for this run

name: TorchBench V3 bisection
on:
workflow_dispatch:
inputs:
regression_date:
description: "Date of the regression"
required: true
default: "2023-08-29"
jobs:
bisection:
environment: docker-s3-upload
env:
BASE_CONDA_ENV: "torchbench"
CONDA_ENV: "bisection-ci-v3"
PLATFORM_NAME: "gcp_a100"
USERBENCHMARK_NAME: "torch-nightly"
SETUP_SCRIPT: "/workspace/setup_instance.sh"
BISECT_WORKDIR: ".userbenchmark/torch-nightly/bisection"
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: [a100-runner]
timeout-minutes: 2880 # 48 hours
steps:
- name: Checkout
uses: actions/checkout@v3
with:
path: benchmark
- name: Checkout pytorch
uses: actions/checkout@v3
with:
repository: pytorch/pytorch
path: srcs/pytorch
fetch-depth: 0
- name: Checkout torchvision
uses: actions/checkout@v3
with:
repository: pytorch/vision
path: srcs/vision
fetch-depth: 0
- name: Checkout torchdata
uses: actions/checkout@v3
with:
repository: pytorch/data
path: srcs/data
fetch-depth: 0
- name: Checkout torchaudio
uses: actions/checkout@v3
with:
repository: pytorch/audio
path: srcs/audio
fetch-depth: 0
- name: Tune Nvidia GPU
run: |
sudo nvidia-smi -pm 1
sudo nvidia-smi -ac 1215,1410
nvidia-smi
- name: Install Deps
run: |
sudo apt-get -y update && sudo apt -y update
- name: Setup conda env
run: |
CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
cd benchmark
python ./utils/python_utils.py --create-conda-env "${CONDA_ENV}"
- name: Setup bisection environment
run: |
. "${SETUP_SCRIPT}"; cd benchmark
python utils/cuda_utils.py --install-torch-build-deps
python utils/cuda_utils.py --install-torchbench-deps
mkdir -p "${BISECT_WORKDIR}"
REGRESSION_DATE="${{ github.event.inputs.regression_date }}"
python regression_detector.py --name "${USERBENCHMARK_NAME}" --platform "${PLATFORM_NAME}" \
--end-date "${REGRESSION_DATE}" --download-from-s3 --output "${BISECT_WORKDIR}/regression-${REGRESSION_DATE}.yaml"
- name: Bisection
run: |
. "${SETUP_SCRIPT}"; cd benchmark
REGRESSION_DATE="${{ github.event.inputs.regression_date }}"
python bisection.py --work-dir "${BISECT_WORKDIR}" --torch-repos-path "${PWD}/../srcs" \
--torchbench-repo-path "${PWD}" --config "${BISECT_WORKDIR}/regression-${REGRESSION_DATE}.yaml" \
--output "${BISECT_WORKDIR}/bisect-output-gh${GITHUB_RUN_ID}.json"
cp -r "${BISECT_WORKDIR}" ../bisection-result
- name: Create the github issue
continue-on-error: true
if: env.TORCHBENCH_BISECTION_COMMIT_FOUND_OR_FAILED
uses: peter-evans/create-issue-from-file@v4
with:
title: V3 Performance Signal Detected by TorchBench Userbenchmark "torch-nightly" on ${{ env.TORCHBENCH_BISECTION_COMMIT_FOUND_OR_FAILED }}
content-filepath: ./benchmark/gh-issue.md
token: ${{ secrets.TORCHBENCH_ACCESS_TOKEN }}
labels: |
torchbench-perf-report
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: Bisection result
path: bisection-result/
- name: Clean up Conda env
if: always()
run: |
. "${SETUP_SCRIPT}"
conda deactivate && conda deactivate
conda remove -n "${CONDA_ENV}" --all