diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4da1cd48a..817e66953 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,7 @@ jobs: - name: Configure CMake working-directory: ./build - run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON .. + run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF .. - name: Build working-directory: ./build @@ -112,7 +112,7 @@ jobs: - name: Configure CMake working-directory: ./build - run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON .. + run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF .. - name: Build working-directory: ./build diff --git a/.gitignore b/.gitignore index d588aa998..dd2ca4b97 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ build/ +build-blas/ build-debug/ build-release/ build-sanitize-addr/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 2af2122d4..f8f418bfa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,16 @@ endif() # options +if (APPLE) + set(GGML_METAL_DEFAULT ON) + set(GGML_BLAS_DEFAULT ON) + set(GGML_BLAS_VENDOR_DEFAULT "Apple") +else() + set(GGML_METAL_DEFAULT OFF) + set(GGML_BLAS_DEFAULT OFF) + set(GGML_BLAS_VENDOR_DEFAULT "Generic") +endif() + option(BUILD_SHARED_LIBS "ggml: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT}) option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON) @@ -41,11 +51,13 @@ option(GGML_TEST_COVERAGE "ggml: enable test coverage" OFF) option(GGML_PERF "ggml: enable perf timings" OFF) option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF) -option(GGML_OPENBLAS "ggml: use OpenBLAS" OFF) +option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT}) +set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING + "ggml: BLAS library vendor") option(GGML_HIPBLAS "ggml: use hipBLAS" OFF) option(GGML_CUDA "ggml: use CUDA" OFF) option(GGML_CUBLAS "ggml: use CUDA (deprecated)" OFF) -option(GGML_METAL "ggml: use Metal" OFF) +option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF) option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" OFF) diff --git a/examples/common.h b/examples/common.h index 2ed91ca9a..79b983095 100644 --- a/examples/common.h +++ b/examples/common.h @@ -21,7 +21,7 @@ struct gpt_params { int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); int32_t n_predict = 200; // new tokens to predict int32_t n_parallel = 1; // number of parallel streams - int32_t n_batch = 8; // batch size for prompt processing + int32_t n_batch = 32; // batch size for prompt processing int32_t n_ctx = 2048; // context size (this is the KV cache max size) int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU diff --git a/examples/gpt-2/main-sched.cpp b/examples/gpt-2/main-sched.cpp index bdf3bff82..11c72973d 100644 --- a/examples/gpt-2/main-sched.cpp +++ b/examples/gpt-2/main-sched.cpp @@ -10,6 +10,10 @@ #include "ggml-metal.h" #endif +#ifdef GGML_USE_BLAS +#include "ggml-blas.h" +#endif + #include "common.h" #include "common-ggml.h" @@ -131,6 +135,16 @@ void init_backends(gpt2_model & model, const gpt_params & params) { model.backends.push_back(gpu_backend); } +#ifdef GGML_USE_BLAS + ggml_backend_t blas_backend = ggml_backend_blas_init(); + if (!blas_backend) { + fprintf(stderr, "%s: failed to initialize BLAS backend\n", __func__); + } else { + ggml_backend_blas_set_n_threads(blas_backend, params.n_threads); + model.backends.push_back(blas_backend); + } +#endif + // always add the CPU backend as a fallback ggml_backend_t cpu_backend = ggml_backend_cpu_init(); ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 25fb1ad17..0330c9b36 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -152,28 +152,89 @@ if (APPLE AND NOT GGML_NO_ACCELERATE) endif() endif() -if (GGML_OPENBLAS) - set(OPENBLAS_INCLUDE_SEARCH_PATHS - /usr/include - /usr/include/openblas - /usr/include/openblas-base - /usr/local/include - /usr/local/include/openblas - /usr/local/include/openblas-base - /opt/OpenBLAS/include - $ENV{OpenBLAS_HOME} - $ENV{OpenBLAS_HOME}/include - ) - find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) - find_library(OPENBLAS_LIB NAMES openblas libopenblas) - if (OPENBLAS_LIB) - message(STATUS "OpenBLAS found") - - set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${OPENBLAS_LIB}) - set(GGML_EXTRA_INCS ${GGML_EXTRA_INCS} ${OPENBLAS_INC}) - set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_USE_OPENBLAS) +if (GGML_BLAS) + if (GGML_STATIC) + set(BLA_STATIC ON) + endif() + #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) + # set(BLA_SIZEOF_INTEGER 8) + #endif() + + set(BLA_VENDOR ${GGML_BLAS_VENDOR}) + find_package(BLAS) + + if (BLAS_FOUND) + message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") + + if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${GGML_BLAS_VENDOR} MATCHES "Apple")) + # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. + # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 + find_package(PkgConfig REQUIRED) + if (${GGML_BLAS_VENDOR} MATCHES "Generic") + pkg_check_modules(DepBLAS REQUIRED blas) + elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS") + # As of openblas v0.3.22, the 64-bit is named openblas64.pc + pkg_check_modules(DepBLAS openblas64) + if (NOT DepBLAS_FOUND) + pkg_check_modules(DepBLAS REQUIRED openblas) + endif() + elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME") + pkg_check_modules(DepBLAS REQUIRED blis) + elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS") + pkg_check_modules(DepBLAS REQUIRED blas-atlas) + elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS") + pkg_check_modules(DepBLAS REQUIRED flexiblas_api) + elseif (${GGML_BLAS_VENDOR} MATCHES "Intel") + # all Intel* libraries share the same include path + pkg_check_modules(DepBLAS REQUIRED mkl-sdl) + elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC") + # this doesn't provide pkg-config + # suggest to assign BLAS_INCLUDE_DIRS on your own + if ("${NVHPC_VERSION}" STREQUAL "") + message(WARNING "Better to set NVHPC_VERSION") + else() + set(DepBLAS_FOUND ON) + set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") + endif() + endif() + if (DepBLAS_FOUND) + set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) + else() + message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" + " detected by pkgconfig, trying to find cblas.h from possible paths...") + find_path(BLAS_INCLUDE_DIRS + NAMES cblas.h + HINTS + /usr/include + /usr/local/include + /usr/include/openblas + /opt/homebrew/opt/openblas/include + /usr/local/opt/openblas/include + /usr/include/x86_64-linux-gnu/openblas/include + ) + endif() + endif() + + message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") + + add_compile_options(${BLAS_LINKER_FLAGS}) + + add_compile_definitions(GGML_USE_BLAS) + + if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel")) + add_compile_definitions(GGML_BLAS_USE_MKL) + endif() + + set(GGML_HEADERS_BLAS ggml-blas.h) + set(GGML_SOURCES_BLAS ggml-blas.cpp) + + set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES}) + set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) + set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_USE_BLAS) else() - message(WARNING "OpenBLAS not found") + message(WARNING "BLAS not found, please refer to " + "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" + " to set correct GGML_BLAS_VENDOR") endif() endif() @@ -513,9 +574,10 @@ add_library(${TARGET} ../include/ggml/ggml.h ../include/ggml/ggml-alloc.h ../include/ggml/ggml-backend.h - ${GGML_SOURCES_CUDA} - ${GGML_SOURCES_METAL} - ${GGML_SOURCES_RPC} + ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA} + ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL} + ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC} + ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS} ) target_include_directories(${TARGET} PUBLIC diff --git a/ggml-blas.cpp b/src/ggml-blas.cpp similarity index 100% rename from ggml-blas.cpp rename to src/ggml-blas.cpp diff --git a/ggml-blas.h b/src/ggml-blas.h similarity index 100% rename from ggml-blas.h rename to src/ggml-blas.h