xinhaoc · xinhaoc · May 11, 2023 · May 11, 2023 · May 12, 2023 · May 14, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -11,3 +11,11 @@ python/flexflow/core/legion_cffi_header.py
 *.pb.h
 *.o
 *.a
+
+# Ignore inference assets
+/inference/weights/*
+/inference/tokenizer/*
+/inference/prompt/*
+/inference/output/*
+
+/tests/inference/python_test_configs/*.json
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -10,6 +10,3 @@ Linked Issues:
 Issues closed by this PR:
 - Closes #
 
-**Before merging:**
-
-- [ ] Did you update the [flexflow-third-party](https://github.com/flexflow/flexflow-third-party) repo, if modifying any of the Cmake files, the build configs, or the submodules?
diff --git a/.github/README.md b/.github/README.md
diff --git a/.github/workflows/build-skip.yml b/.github/workflows/build-skip.yml
@@ -3,6 +3,7 @@ on:
  pull_request:
  paths-ignore:
  - "include/**"
+ - "inference/**"
  - "cmake/**"
  - "config/**"
  - "deps/**"

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -3,6 +3,7 @@ on:
  pull_request:
  paths:
  - "include/**"
+ - "inference/**"
  - "cmake/**"
  - "config/**"
  - "deps/**"
@@ -15,6 +16,7 @@ on:
  - "master"
  paths:
  - "include/**"
+ - "inference/**"
  - "cmake/**"
  - "config/**"
  - "deps/**"
@@ -38,6 +40,8 @@ jobs:
  matrix:
  gpu_backend: ["cuda", "hip_rocm"]
  fail-fast: false
+ env:
+ FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
  steps:
  - name: Checkout Git Repository
  uses: actions/checkout@v3
@@ -49,38 +53,47 @@ jobs:
 
  - name: Install CUDA
  uses: Jimver/[email protected]
+ if: ${{ matrix.gpu_backend == 'cuda' }}
  id: cuda-toolkit
  with:
  cuda: "11.8.0"
  # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement
  use-github-cache: "false"
 
  - name: Install system dependencies
- run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh
+ run: .github/workflows/helpers/install_dependencies.sh
 
  - name: Install conda and FlexFlow dependencies
  uses: conda-incubator/setup-miniconda@v2
  with:
  activate-environment: flexflow
- environment-file: conda/environment.yml
+ environment-file: conda/flexflow.yml
  auto-activate-base: false
 
  - name: Build FlexFlow
  run: |
  export CUDNN_DIR="$CUDA_PATH"
  export CUDA_DIR="$CUDA_PATH"
  export FF_HOME=$(pwd)
- export FF_GPU_BACKEND=${{ matrix.gpu_backend }}
  export FF_CUDA_ARCH=70
+ export FF_HIP_ARCH=gfx1100,gfx1036
+ export hip_version=5.6
+ export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
+
+ if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
+ export FF_BUILD_ALL_EXAMPLES=ON
+ export FF_BUILD_UNIT_TESTS=ON
+ else 
+ export FF_BUILD_ALL_EXAMPLES=OFF
+ export FF_BUILD_UNIT_TESTS=OFF
+ fi
+
  cores_available=$(nproc --all)
  n_build_cores=$(( cores_available -1 ))
  if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi
  mkdir build
  cd build
- if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
- export FF_BUILD_ALL_EXAMPLES=ON 
- export FF_BUILD_UNIT_TESTS=ON
- fi
+
  ../config/config.linux
  make -j $n_build_cores
 
@@ -89,35 +102,44 @@ jobs:
  export CUDNN_DIR="$CUDA_PATH"
  export CUDA_DIR="$CUDA_PATH"
  export FF_HOME=$(pwd)
- export FF_GPU_BACKEND=${{ matrix.gpu_backend }}
  export FF_CUDA_ARCH=70
- cd build
+ export FF_HIP_ARCH=gfx1100,gfx1036
+ export hip_version=5.6
+ export FF_BUILD_ALL_INFERENCE_EXAMPLES=ON
+
  if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
- export FF_BUILD_ALL_EXAMPLES=ON 
+ export FF_BUILD_ALL_EXAMPLES=ON
  export FF_BUILD_UNIT_TESTS=ON
+ else 
+ export FF_BUILD_ALL_EXAMPLES=OFF
+ export FF_BUILD_UNIT_TESTS=OFF
  fi
+
+ cd build
  ../config/config.linux
  sudo make install
  sudo ldconfig
 
- - name: Check availability of Python flexflow.core module
- if: ${{ matrix.gpu_backend == 'cuda' }}
- run: |
- export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
- sudo ln -s "$CUDA_PATH/lib64/stubs/libcuda.so" "$CUDA_PATH/lib64/stubs/libcuda.so.1"
- export CPU_ONLY_TEST=1
- python -c "import flexflow.core; exit()"
-
  - name: Run C++ unit tests
  if: ${{ matrix.gpu_backend == 'cuda' }}
  run: |
  export CUDNN_DIR="$CUDA_PATH"
  export CUDA_DIR="$CUDA_PATH"
  export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
  export FF_HOME=$(pwd)
+ sudo ln -s "$CUDA_PATH/lib64/stubs/libcuda.so" "$CUDA_PATH/lib64/stubs/libcuda.so.1"
  cd build
  ./tests/unit/unit-test
 
+ - name: Check availability of flexflow modules in Python
+ run: |
+ if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then
+ export LD_LIBRARY_PATH="$CUDA_PATH/lib64/stubs:$LD_LIBRARY_PATH"
+ fi
+ # Remove build folder to check that the installed version can run independently of the build files
+ rm -rf build
+ python -c "import flexflow.core; import flexflow.serve as ff; exit()"
+
  makefile-build:
  name: Build FlexFlow with the Makefile
  runs-on: ubuntu-20.04
@@ -163,5 +185,4 @@ jobs:
 
  cd python
  make -j $n_build_cores
- export CPU_ONLY_TEST=1
  python -c 'import flexflow.core'
diff --git a/.github/workflows/clang-format-check.yml b/.github/workflows/clang-format-check.yml
@@ -10,7 +10,7 @@ jobs:
  - check: "src"
  exclude: '\.proto$'
  - check: "include"
- - check: "nmt"
+ - check: "inference"
  - check: "python"
  - check: "scripts"
  - check: "tests"

diff --git a/.github/workflows/docker-build-skip.yml b/.github/workflows/docker-build-skip.yml
@@ -13,27 +13,22 @@ concurrency:
  cancel-in-progress: true
 
 jobs:
- docker-build:
- name: Build and Install FlexFlow in a Docker Container
- runs-on: ubuntu-20.04
+ docker-build-rocm:
+ name: Build and Install FlexFlow in a Docker Container (ROCm backend)
+ runs-on: ubuntu-latest
  strategy:
  matrix:
- gpu_backend: ["cuda", "hip_rocm"]
- cuda_version: ["11.1", "11.2", "11.3", "11.5", "11.6", "11.7", "11.8"]
- # The CUDA version doesn't matter when building for hip_rocm, so we just pick one arbitrarily (11.8) to avoid building for hip_rocm once per number of CUDA version supported
- exclude:
- - gpu_backend: "hip_rocm"
- cuda_version: "11.1"
- - gpu_backend: "hip_rocm"
- cuda_version: "11.2"
- - gpu_backend: "hip_rocm"
- cuda_version: "11.3"
- - gpu_backend: "hip_rocm"
- cuda_version: "11.5"
- - gpu_backend: "hip_rocm"
- cuda_version: "11.6"
- - gpu_backend: "hip_rocm"
- cuda_version: "11.7"
+ hip_version: ["5.3", "5.4", "5.5", "5.6"]
+ fail-fast: false
+ steps:
+ - run: 'echo "No docker-build required"'
+
+ docker-build-cuda:
+ name: Build and Install FlexFlow in a Docker Container (CUDA backend)
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ cuda_version: ["11.1", "11.6", "11.7", "11.8", "12.0", "12.1", "12.2"]
  fail-fast: false
  steps:
  - run: 'echo "No docker-build required"'