Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync : ggml-blas #859

Merged
merged 2 commits into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:

- name: Configure CMake
working-directory: ./build
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON ..
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF ..

- name: Build
working-directory: ./build
Expand Down Expand Up @@ -112,7 +112,7 @@ jobs:

- name: Configure CMake
working-directory: ./build
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON ..
run: cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DGGML_TEST_COVERAGE=ON -DGGML_METAL=OFF ..

- name: Build
working-directory: ./build
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
build/
build-blas/
build-debug/
build-release/
build-sanitize-addr/
Expand Down
16 changes: 14 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ endif()

# options

if (APPLE)
set(GGML_METAL_DEFAULT ON)
set(GGML_BLAS_DEFAULT ON)
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
else()
set(GGML_METAL_DEFAULT OFF)
set(GGML_BLAS_DEFAULT OFF)
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
endif()

option(BUILD_SHARED_LIBS "ggml: build shared libs" ${BUILD_SHARED_LIBS_DEFAULT})

option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
Expand All @@ -41,11 +51,13 @@ option(GGML_TEST_COVERAGE "ggml: enable test coverage" OFF)

option(GGML_PERF "ggml: enable perf timings" OFF)
option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF)
option(GGML_OPENBLAS "ggml: use OpenBLAS" OFF)
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
"ggml: BLAS library vendor")
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
option(GGML_CUDA "ggml: use CUDA" OFF)
option(GGML_CUBLAS "ggml: use CUDA (deprecated)" OFF)
option(GGML_METAL "ggml: use Metal" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" OFF)
Expand Down
2 changes: 1 addition & 1 deletion examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ struct gpt_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_predict = 200; // new tokens to predict
int32_t n_parallel = 1; // number of parallel streams
int32_t n_batch = 8; // batch size for prompt processing
int32_t n_batch = 32; // batch size for prompt processing
int32_t n_ctx = 2048; // context size (this is the KV cache max size)
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU

Expand Down
14 changes: 14 additions & 0 deletions examples/gpt-2/main-sched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
#include "ggml-metal.h"
#endif

#ifdef GGML_USE_BLAS
#include "ggml-blas.h"
#endif

#include "common.h"
#include "common-ggml.h"

Expand Down Expand Up @@ -131,6 +135,16 @@ void init_backends(gpt2_model & model, const gpt_params & params) {
model.backends.push_back(gpu_backend);
}

#ifdef GGML_USE_BLAS
ggml_backend_t blas_backend = ggml_backend_blas_init();
if (!blas_backend) {
fprintf(stderr, "%s: failed to initialize BLAS backend\n", __func__);
} else {
ggml_backend_blas_set_n_threads(blas_backend, params.n_threads);
model.backends.push_back(blas_backend);
}
#endif

// always add the CPU backend as a fallback
ggml_backend_t cpu_backend = ggml_backend_cpu_init();
ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads);
Expand Down
4 changes: 4 additions & 0 deletions scripts/sync-llama-am.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ if [ -f $SRC_GGML/llama-src.patch ]; then
# ggml-alloc.c -> src/ggml-alloc.c
# ggml-backend-impl.h -> src/ggml-backend-impl.h
# ggml-backend.c -> src/ggml-backend.c
# ggml-blas.cpp -> src/ggml-blas.cpp
# ggml-blas.h -> src/ggml-blas.h
# ggml-common.h -> src/ggml-common.h
# ggml-cuda/* -> src/ggml-cuda/*
# ggml-cuda.cu -> src/ggml-cuda.cu
Expand Down Expand Up @@ -133,6 +135,8 @@ if [ -f $SRC_GGML/llama-src.patch ]; then
-e 's/\/ggml-alloc\.c/\/src\/ggml-alloc.c/g' \
-e 's/\/ggml-backend-impl\.h/\/src\/ggml-backend-impl.h/g' \
-e 's/\/ggml-backend\.c/\/src\/ggml-backend.c/g' \
-e 's/\/ggml-blas\.cpp/\/src\/ggml-blas.cpp/g' \
-e 's/\/ggml-blas\.h/\/src\/ggml-blas.h/g' \
-e 's/\/ggml-common\.h/\/src\/ggml-common.h/g' \
-e 's/\/ggml-cuda\//\/src\/ggml-cuda\//g' \
-e 's/\/ggml-cuda\.cu/\/src\/ggml-cuda.cu/g' \
Expand Down
2 changes: 2 additions & 0 deletions scripts/sync-llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ cp -rpv ../llama.cpp/ggml.c src/ggml.c
cp -rpv ../llama.cpp/ggml-alloc.c src/ggml-alloc.c
cp -rpv ../llama.cpp/ggml-backend-impl.h src/ggml-backend-impl.h
cp -rpv ../llama.cpp/ggml-backend.c src/ggml-backend.c
cp -rpv ../llama.cpp/ggml-blas.cpp src/ggml-blas.cpp
cp -rpv ../llama.cpp/ggml-blas.h src/ggml-blas.h
cp -rpv ../llama.cpp/ggml-common.h src/ggml-common.h
cp -rpv ../llama.cpp/ggml-cuda/* src/ggml-cuda/
cp -rpv ../llama.cpp/ggml-cuda.cu src/ggml-cuda.cu
Expand Down
4 changes: 4 additions & 0 deletions scripts/sync-whisper-am.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ if [ -f $SRC_GGML/whisper-src.patch ]; then
# ggml-alloc.c -> src/ggml-alloc.c
# ggml-backend-impl.h -> src/ggml-backend-impl.h
# ggml-backend.c -> src/ggml-backend.c
# ggml-blas.cpp -> src/ggml-blas.cpp
# ggml-blas.h -> src/ggml-blas.h
# ggml-common.h -> src/ggml-common.h
# ggml-cuda/* -> src/ggml-cuda/
# ggml-cuda.cu -> src/ggml-cuda.cu
Expand Down Expand Up @@ -143,6 +145,8 @@ if [ -f $SRC_GGML/whisper-src.patch ]; then
-e 's/\/ggml-alloc\.c/\/src\/ggml-alloc.c/g' \
-e 's/\/ggml-backend-impl\.h/\/src\/ggml-backend-impl.h/g' \
-e 's/\/ggml-backend\.c/\/src\/ggml-backend.c/g' \
-e 's/\/ggml-blas\.cpp/\/src\/ggml-blas.cpp/g' \
-e 's/\/ggml-blas\.h/\/src\/ggml-blas.h/g' \
-e 's/\/ggml-common\.h/\/src\/ggml-common.h/g' \
-e 's/\/ggml-cuda\//\/src\/ggml-cuda\//g' \
-e 's/\/ggml-cuda\.cu/\/src\/ggml-cuda.cu/g' \
Expand Down
2 changes: 2 additions & 0 deletions scripts/sync-whisper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ cp -rpv ../whisper.cpp/ggml-impl.h src/ggml-impl.h
cp -rpv ../whisper.cpp/ggml-alloc.c src/ggml-alloc.c
cp -rpv ../whisper.cpp/ggml-backend-impl.h src/ggml-backend-impl.h
cp -rpv ../whisper.cpp/ggml-backend.c src/ggml-backend.c
cp -rpv ../whisper.cpp/ggml-blas.cpp src/ggml-blas.cpp
cp -rpv ../whisper.cpp/ggml-blas.h src/ggml-blas.h
cp -rpv ../whisper.cpp/ggml-common.h src/ggml-common.h
cp -rpv ../whisper.cpp/ggml-cuda/* src/ggml-cuda/
cp -rpv ../whisper.cpp/ggml-cuda.cu src/ggml-cuda.cu
Expand Down
110 changes: 86 additions & 24 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,28 +152,89 @@ if (APPLE AND NOT GGML_NO_ACCELERATE)
endif()
endif()

if (GGML_OPENBLAS)
set(OPENBLAS_INCLUDE_SEARCH_PATHS
/usr/include
/usr/include/openblas
/usr/include/openblas-base
/usr/local/include
/usr/local/include/openblas
/usr/local/include/openblas-base
/opt/OpenBLAS/include
$ENV{OpenBLAS_HOME}
$ENV{OpenBLAS_HOME}/include
)
find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
find_library(OPENBLAS_LIB NAMES openblas libopenblas)
if (OPENBLAS_LIB)
message(STATUS "OpenBLAS found")

set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${OPENBLAS_LIB})
set(GGML_EXTRA_INCS ${GGML_EXTRA_INCS} ${OPENBLAS_INC})
set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
if (GGML_BLAS)
if (GGML_STATIC)
set(BLA_STATIC ON)
endif()
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
# set(BLA_SIZEOF_INTEGER 8)
#endif()

set(BLA_VENDOR ${GGML_BLAS_VENDOR})
find_package(BLAS)

if (BLAS_FOUND)
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")

if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${GGML_BLAS_VENDOR} MATCHES "Apple"))
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
find_package(PkgConfig REQUIRED)
if (${GGML_BLAS_VENDOR} MATCHES "Generic")
pkg_check_modules(DepBLAS REQUIRED blas)
elseif (${GGML_BLAS_VENDOR} MATCHES "OpenBLAS")
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
pkg_check_modules(DepBLAS openblas64)
if (NOT DepBLAS_FOUND)
pkg_check_modules(DepBLAS REQUIRED openblas)
endif()
elseif (${GGML_BLAS_VENDOR} MATCHES "FLAME")
pkg_check_modules(DepBLAS REQUIRED blis)
elseif (${GGML_BLAS_VENDOR} MATCHES "ATLAS")
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
elseif (${GGML_BLAS_VENDOR} MATCHES "FlexiBLAS")
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
elseif (${GGML_BLAS_VENDOR} MATCHES "Intel")
# all Intel* libraries share the same include path
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
elseif (${GGML_BLAS_VENDOR} MATCHES "NVHPC")
# this doesn't provide pkg-config
# suggest to assign BLAS_INCLUDE_DIRS on your own
if ("${NVHPC_VERSION}" STREQUAL "")
message(WARNING "Better to set NVHPC_VERSION")
else()
set(DepBLAS_FOUND ON)
set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
endif()
endif()
if (DepBLAS_FOUND)
set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
else()
message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
" detected by pkgconfig, trying to find cblas.h from possible paths...")
find_path(BLAS_INCLUDE_DIRS
NAMES cblas.h
HINTS
/usr/include
/usr/local/include
/usr/include/openblas
/opt/homebrew/opt/openblas/include
/usr/local/opt/openblas/include
/usr/include/x86_64-linux-gnu/openblas/include
)
endif()
endif()

message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")

add_compile_options(${BLAS_LINKER_FLAGS})

add_compile_definitions(GGML_USE_BLAS)

if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${GGML_BLAS_VENDOR} MATCHES "Generic" OR ${GGML_BLAS_VENDOR} MATCHES "Intel"))
add_compile_definitions(GGML_BLAS_USE_MKL)
endif()

set(GGML_HEADERS_BLAS ggml-blas.h)
set(GGML_SOURCES_BLAS ggml-blas.cpp)

set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${BLAS_LIBRARIES})
set(GGML_EXTRA_INCLUDES ${GGML_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
set(GGML_EXTRA_FLAGS ${GGML_EXTRA_FLAGS} -DGGML_USE_BLAS)
else()
message(WARNING "OpenBLAS not found")
message(WARNING "BLAS not found, please refer to "
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
" to set correct GGML_BLAS_VENDOR")
endif()
endif()

Expand Down Expand Up @@ -513,9 +574,10 @@ add_library(${TARGET}
../include/ggml/ggml.h
../include/ggml/ggml-alloc.h
../include/ggml/ggml-backend.h
${GGML_SOURCES_CUDA}
${GGML_SOURCES_METAL}
${GGML_SOURCES_RPC}
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}
${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC}
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
)

target_include_directories(${TARGET} PUBLIC
Expand Down
File renamed without changes.
File renamed without changes.