ggerganov · ggerganov · Jun 16, 2024 · Jun 16, 2024 · Jun 16, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -61,7 +61,7 @@ jobs:
 
  - name: Configure CMake
  working-directory: ./build
- run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON ..
+ run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF ..
 
  - name: Build
  working-directory: ./build
@@ -112,7 +112,7 @@ jobs:
 
  - name: Configure CMake
  working-directory: ./build
- run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON ..
+ run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF ..
 
  - name: Build
  working-directory: ./build

diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 build/
+build-blas/
 build-debug/
 build-release/
 build-sanitize-addr/

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -25,6 +25,16 @@ endif()
 
 # options
 
+if (APPLE)
+ set(GGML_METAL_DEFAULT ON)
+ set(GGML_BLAS_DEFAULT ON)
+ set(GGML_BLAS_VENDOR_DEFAULT "Apple")
+else()
+ set(GGML_METAL_DEFAULT OFF)
+ set(GGML_BLAS_DEFAULT OFF)
+ set(GGML_BLAS_VENDOR_DEFAULT "Generic")
+endif()
+
 option(BUILD_SHARED_LIBS "ggml: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})
 
 option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
@@ -41,11 +51,13 @@ option(GGML_TEST_COVERAGE "ggml: enable test coverage" OFF)
 
 option(GGML_PERF "ggml: enable perf timings" OFF)
 option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF)
-option(GGML_OPENBLAS "ggml: use OpenBLAS" OFF)
+option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
+set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
+ "ggml: BLAS library vendor")
 option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
 option(GGML_CUDA "ggml: use CUDA" OFF)
 option(GGML_CUBLAS "ggml: use CUDA (deprecated)" OFF)
-option(GGML_METAL "ggml: use Metal" OFF)
+option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
 option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
 option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
 option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" OFF)

diff --git a/examples/common.h b/examples/common.h
@@ -21,7 +21,7 @@ struct gpt_params {
  int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
  int32_t n_predict = 200; // new tokens to predict
  int32_t n_parallel = 1; // number of parallel streams
- int32_t n_batch = 8;  // batch size for prompt processing
+ int32_t n_batch = 32; // batch size for prompt processing
  int32_t n_ctx = 2048; // context size (this is the KV cache max size)
  int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU
 

diff --git a/examples/gpt-2/main-sched.cpp b/examples/gpt-2/main-sched.cpp
@@ -10,6 +10,10 @@
 #include "ggml-metal.h"
 #endif
 
+#ifdef GGML_USE_BLAS
+#include "ggml-blas.h"
+#endif
+
 #include "common.h"
 #include "common-ggml.h"
 
@@ -131,6 +135,16 @@ void init_backends(gpt2_model & model, const gpt_params & params) {
  model.backends.push_back(gpu_backend);
  }
 
+#ifdef GGML_USE_BLAS
+ ggml_backend_t blas_backend = ggml_backend_blas_init();
+ if (!blas_backend) {
+ fprintf(stderr, "%s: failed to initialize BLAS backend\n", __func__);
+ } else {
+ ggml_backend_blas_set_n_threads(blas_backend, params.n_threads);
+ model.backends.push_back(blas_backend);
+ }
+#endif
+
  // always add the CPU backend as a fallback
  ggml_backend_t cpu_backend = ggml_backend_cpu_init();
  ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads);

diff --git a/scripts/sync-llama-am.sh b/scripts/sync-llama-am.sh
@@ -97,6 +97,8 @@ if [ -f $SRC_GGML/llama-src.patch ]; then
  # ggml-alloc.c -> src/ggml-alloc.c
  # ggml-backend-impl.h -> src/ggml-backend-impl.h
  # ggml-backend.c -> src/ggml-backend.c
+ # ggml-blas.cpp -> src/ggml-blas.cpp
+ # ggml-blas.h -> src/ggml-blas.h
  # ggml-common.h -> src/ggml-common.h
  # ggml-cuda/* -> src/ggml-cuda/*
  # ggml-cuda.cu -> src/ggml-cuda.cu
@@ -133,6 +135,8 @@ if [ -f $SRC_GGML/llama-src.patch ]; then
  -e 's/\/ggml-alloc\.c/\/src\/ggml-alloc.c/g' \
  -e 's/\/ggml-backend-impl\.h/\/src\/ggml-backend-impl.h/g' \
  -e 's/\/ggml-backend\.c/\/src\/ggml-backend.c/g' \
+ -e 's/\/ggml-blas\.cpp/\/src\/ggml-blas.cpp/g' \
+ -e 's/\/ggml-blas\.h/\/src\/ggml-blas.h/g' \
  -e 's/\/ggml-common\.h/\/src\/ggml-common.h/g' \
  -e 's/\/ggml-cuda\//\/src\/ggml-cuda\//g' \
  -e 's/\/ggml-cuda\.cu/\/src\/ggml-cuda.cu/g' \

diff --git a/scripts/sync-llama.sh b/scripts/sync-llama.sh
@@ -4,6 +4,8 @@ cp -rpv ../llama.cpp/ggml.c src/ggml.c
 cp -rpv ../llama.cpp/ggml-alloc.c src/ggml-alloc.c
 cp -rpv ../llama.cpp/ggml-backend-impl.h src/ggml-backend-impl.h
 cp -rpv ../llama.cpp/ggml-backend.c src/ggml-backend.c
+cp -rpv ../llama.cpp/ggml-blas.cpp src/ggml-blas.cpp
+cp -rpv ../llama.cpp/ggml-blas.h src/ggml-blas.h
 cp -rpv ../llama.cpp/ggml-common.h src/ggml-common.h
 cp -rpv ../llama.cpp/ggml-cuda/* src/ggml-cuda/
 cp -rpv ../llama.cpp/ggml-cuda.cu src/ggml-cuda.cu

diff --git a/scripts/sync-whisper-am.sh b/scripts/sync-whisper-am.sh
@@ -101,6 +101,8 @@ if [ -f $SRC_GGML/whisper-src.patch ]; then
  # ggml-alloc.c -> src/ggml-alloc.c
  # ggml-backend-impl.h -> src/ggml-backend-impl.h
  # ggml-backend.c -> src/ggml-backend.c
+ # ggml-blas.cpp -> src/ggml-blas.cpp
+ # ggml-blas.h -> src/ggml-blas.h
  # ggml-common.h -> src/ggml-common.h
  # ggml-cuda/* -> src/ggml-cuda/
  # ggml-cuda.cu -> src/ggml-cuda.cu
@@ -143,6 +145,8 @@ if [ -f $SRC_GGML/whisper-src.patch ]; then
  -e 's/\/ggml-alloc\.c/\/src\/ggml-alloc.c/g' \
  -e 's/\/ggml-backend-impl\.h/\/src\/ggml-backend-impl.h/g' \
  -e 's/\/ggml-backend\.c/\/src\/ggml-backend.c/g' \
+ -e 's/\/ggml-blas\.cpp/\/src\/ggml-blas.cpp/g' \
+ -e 's/\/ggml-blas\.h/\/src\/ggml-blas.h/g' \
  -e 's/\/ggml-common\.h/\/src\/ggml-common.h/g' \
  -e 's/\/ggml-cuda\//\/src\/ggml-cuda\//g' \
  -e 's/\/ggml-cuda\.cu/\/src\/ggml-cuda.cu/g' \

diff --git a/scripts/sync-whisper.sh b/scripts/sync-whisper.sh
@@ -5,6 +5,8 @@ cp -rpv ../whisper.cpp/ggml-impl.h src/ggml-impl.h
 cp -rpv ../whisper.cpp/ggml-alloc.c src/ggml-alloc.c
 cp -rpv ../whisper.cpp/ggml-backend-impl.h src/ggml-backend-impl.h
 cp -rpv ../whisper.cpp/ggml-backend.c src/ggml-backend.c
+cp -rpv ../whisper.cpp/ggml-blas.cpp src/ggml-blas.cpp
+cp -rpv ../whisper.cpp/ggml-blas.h src/ggml-blas.h
 cp -rpv ../whisper.cpp/ggml-common.h src/ggml-common.h
 cp -rpv ../whisper.cpp/ggml-cuda/* src/ggml-cuda/
 cp -rpv ../whisper.cpp/ggml-cuda.cu src/ggml-cuda.cu

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -152,28 +152,89 @@ if (APPLE AND NOT GGML_NO_ACCELERATE)
  endif()
 endif()
 
-if (GGML_OPENBLAS)
- set(OPENBLAS_INCLUDE_SEARCH_PATHS
- /usr/include
- /usr/include/openblas
- /usr/include/openblas-base
- /usr/local/include
- /usr/local/include/openblas
- /usr/local/include/openblas-base
- /opt/OpenBLAS/include
- $ENV{OpenBLAS_HOME}
- $ENV{OpenBLAS_HOME}/include
- )
- find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
- find_library(OPENBLAS_LIB NAMES openblas libopenblas)
- if (OPENBLAS_LIB)
- message(STATUS "OpenBLAS found")
-
- set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${OPENBLAS_LIB})
- set(GGML_EXTRA_INCS ${GGML_EXTRA_INCS} ${OPENBLAS_INC})
- set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
+if (GGML_BLAS)
+ if (GGML_STATIC)
+ set(BLA_STATIC ON)
+ endif()
+ #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
+ # set(BLA_SIZEOF_INTEGER 8)
+ #endif()
+
+ set(BLA_VENDOR ${GGML_BLAS_VENDOR})
+ find_package(BLAS)
+
+ if (BLAS_FOUND)
+ message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
+
+ if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${GGML_BLAS_VENDOR} MATCHES "Apple"))
+ # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
+ # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
+ find_package(PkgConfig REQUIRED)
+ if (${GGML_BLAS_VENDOR} MATCHES "Generic")
+ pkg_check_modules(DepBLAS REQUIRED blas)
+ elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
+ # As of openblas v0.3.22, the 64-bit is named openblas64.pc
+ pkg_check_modules(DepBLAS openblas64)
+ if (NOT DepBLAS_FOUND)
+ pkg_check_modules(DepBLAS REQUIRED openblas)
+ endif()
+ elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
+ pkg_check_modules(DepBLAS REQUIRED blis)
+ elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
+ pkg_check_modules(DepBLAS REQUIRED blas-atlas)
+ elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
+ pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
+ elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
+ # all Intel* libraries share the same include path
+ pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
+ elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
+ # this doesn't provide pkg-config
+ # suggest to assign BLAS_INCLUDE_DIRS on your own
+ if ("${NVHPC_VERSION}" STREQUAL "")
+ message(WARNING "Better to set NVHPC_VERSION")
+ else()
+ set(DepBLAS_FOUND ON)
+ set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
+ endif()
+ endif()
+ if (DepBLAS_FOUND)
+ set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
+ else()
+ message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
+ " detected by pkgconfig, trying to find cblas.h from possible paths...")
+ find_path(BLAS_INCLUDE_DIRS
+ NAMES cblas.h
+ HINTS
+ /usr/include
+ /usr/local/include
+ /usr/include/openblas
+ /opt/homebrew/opt/openblas/include
+ /usr/local/opt/openblas/include
+ /usr/include/x86_64-linux-gnu/openblas/include
+ )
+ endif()
+ endif()
+
+ message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")
+
+ add_compile_options(${BLAS_LINKER_FLAGS})
+
+ add_compile_definitions(GGML_USE_BLAS)
+
+ if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
+ add_compile_definitions(GGML_BLAS_USE_MKL)
+ endif()
+
+ set(GGML_HEADERS_BLAS ggml-blas.h)
+ set(GGML_SOURCES_BLAS ggml-blas.cpp)
+
+ set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES})
+ set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
+ set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_USE_BLAS)
  else()
- message(WARNING "OpenBLAS not found")
+ message(WARNING "BLAS not found, please refer to "
+ "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
+ " to set correct GGML_BLAS_VENDOR")
  endif()
 endif()
 
@@ -513,9 +574,10 @@ add_library(${TARGET}
  ../include/ggml/ggml.h
  ../include/ggml/ggml-alloc.h
  ../include/ggml/ggml-backend.h
- ${GGML_SOURCES_CUDA}
- ${GGML_SOURCES_METAL}
- ${GGML_SOURCES_RPC}
+ ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
+ ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
+ ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
+ ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
  )
 
 target_include_directories(${TARGET} PUBLIC

diff --git a/ggml-blas.cpp → src/ggml-blas.cpp b/ggml-blas.cpp → src/ggml-blas.cpp
diff --git a/ggml-blas.h → src/ggml-blas.h b/ggml-blas.h → src/ggml-blas.h