Add FBGEMM submodule (#2293)

Summary: This PR does the following: - Upgrade the default CUDA version to 12.4. - Pre-install fbgemm_gpu genai kernels to the nightly docker. Pull Request resolved: #2293 Test Plan: Build base image: https://github.com/pytorch/benchmark/actions/runs/9476276319 Build nightly docker: https://github.com/pytorch/benchmark/actions/runs/9486161032 Reviewed By: aaronenyeshi Differential Revision: D58471717 Pulled By: xuzhao9 fbshipit-source-id: 9d2e0b45b7cba4af1cb7578daec001605ee03985
pytorch · Jun 12, 2024 · abb45ae · abb45ae
1 parent 3ecaae9
commit abb45ae
Show file tree

Hide file tree

Showing 8 changed files with 55 additions and 16 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -4,3 +4,6 @@
 [submodule "submodules/lit-llama"]
  path = submodules/lit-llama
  url = https://github.com/Lightning-AI/lit-llama.git
+[submodule "submodules/FBGEMM"]
+ path = submodules/FBGEMM
+ url = https://github.com/pytorch/FBGEMM.git
diff --git a/docker/build-torchbench-nightly-docker.sh b/docker/build-torchbench-nightly-docker.sh
@@ -1 +1,4 @@
-docker build . -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest
+TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main}
+
+docker build . -f torchbench-nightly.dockerfile -t ghcr.io/pytorch/torchbench:latest \
+ --build-arg TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH}
diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile
@@ -28,9 +28,9 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace
 # We assume that the host NVIDIA driver binaries and libraries are mapped to the docker filesystem
 
 # Use the CUDA installation scripts from pytorch/builder
+# Install CUDA 12.4 only to reduce docker size
 RUN cd /workspace; git clone https://github.com/pytorch/builder.git
-RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_118; prune_118'
-RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_121; prune_121'
+RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_124; prune_124'
 
 # Install miniconda
 RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh

diff --git a/docker/torchbench-nightly.dockerfile b/docker/torchbench-nightly.dockerfile
@@ -8,10 +8,11 @@ ENV SETUP_SCRIPT=/workspace/setup_instance.sh
 ARG TORCHBENCH_BRANCH=${TORCHBENCH_BRANCH:-main}
 ARG FORCE_DATE=${FORCE_DATE}
 
-# Setup Conda env and CUDA
-RUN git clone -b "${TORCHBENCH_BRANCH}" --single-branch \
- https://github.com/pytorch/benchmark /workspace/benchmark
+# Checkout Torchbench and submodules
+RUN git clone --recurse-submodules -b "${TORCHBENCH_BRANCH}" --single-branch \
+  https://github.com/pytorch/benchmark /workspace/benchmark
 
+# Setup conda env and CUDA
 RUN cd /workspace/benchmark && \
  . ${SETUP_SCRIPT} && \
  python ./utils/python_utils.py --create-conda-env ${CONDA_ENV} && \
@@ -45,6 +46,12 @@ RUN cd /workspace/benchmark && \
  . ${SETUP_SCRIPT} && \
  python utils/cuda_utils.py --install-torchbench-deps
 
+# Install FBGEMM GENAI
+RUN cd /workspace/benchmark && \
+ . ${SETUP_SCRIPT} && \
+ python install.py --userbenchmark triton --fbgemm
+
+# Install Torchbench models
 RUN cd /workspace/benchmark && \
  . ${SETUP_SCRIPT} && \
  python install.py
diff --git a/install.py b/install.py
@@ -46,7 +46,7 @@ def pip_install_requirements(requirements_txt="requirements.txt"):
  choices=list_userbenchmarks(),
  help="Install requirements for optional components.",
  )
- args = parser.parse_args()
+ args, extra_args = parser.parse_known_args()
 
  os.chdir(os.path.realpath(os.path.dirname(__file__)))
 
@@ -68,9 +68,11 @@ def pip_install_requirements(requirements_txt="requirements.txt"):
  if args.userbenchmark:
  # Install userbenchmark dependencies if exists
  userbenchmark_dir = REPO_ROOT.joinpath("userbenchmark", args.userbenchmark)
+ cmd = [sys.executable, "install.py"]
+ cmd.extend(extra_args)
  if userbenchmark_dir.joinpath("install.py").is_file():
  subprocess.check_call(
- [sys.executable, "install.py"], cwd=userbenchmark_dir.absolute()
+ cmd, cwd=userbenchmark_dir.absolute()
  )
  sys.exit(0)
 

diff --git a/submodules/FBGEMM b/submodules/FBGEMM
diff --git a/userbenchmark/triton/install.py b/userbenchmark/triton/install.py
@@ -0,0 +1,27 @@
+import argparse
+import subprocess
+import sys
+import os
+from pathlib import Path
+
+REPO_PATH = Path(os.path.abspath(__file__)).parent.parent.parent
+FBGEMM_PATH = REPO_PATH.joinpath("submodules", "FBGEMM", "fbgemm_gpu")
+
+def install_fbgemm():
+ cmd = ["pip", "install", "-r", "requirements.txt"]
+ subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve()))
+ # Build target A100(8.0) or H100(9.0)
+ cmd = [sys.executable, "setup.py", "bdist_wheel", "--package_variant=genai", "-DTORCH_CUDA_ARCH_LIST=8.0;9.0"]
+ subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve()))
+
+def test_fbgemm():
+ cmd = [sys.executable, "-c", '"import fbgemm_gpu.experimental.gen_ai"']
+ subprocess.check_call(cmd)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--fbgemm", action="store_true", help="Install FBGEMM GPU")
+ args = parser.parse_args()
+ if args.fbgemm:
+ install_fbgemm()
+ test_fbgemm()
diff --git a/utils/cuda_utils.py b/utils/cuda_utils.py
@@ -8,16 +8,12 @@
 from typing import Optional
 
 # defines the default CUDA version to compile against
-DEFAULT_CUDA_VERSION = "12.1"
+DEFAULT_CUDA_VERSION = "12.4"
 
 CUDA_VERSION_MAP = {
- "11.8": {
- "pytorch_url": "cu118",
- "magma_version": "magma-cuda118",
- },
- "12.1": {
- "pytorch_url": "cu121",
- "magma_version": "magma-cuda121",
+ "12.4": {
+ "pytorch_url": "cu124",
+ "magma_version": "magma-cuda124",
  },
 }
 PIN_CMAKE_VERSION = "3.22.*"