feat: add cuBLAS backend (#26)

PABannier · Oct 23, 2023 · b901d4f · b901d4f
1 parent b66d536
commit b901d4f
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 1 deletion.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -39,6 +39,10 @@ target_link_libraries(${ENCODEC_LIB} PUBLIC ggml)
 target_include_directories(${ENCODEC_LIB} PUBLIC .)
 target_compile_features(${ENCODEC_LIB} PUBLIC cxx_std_11)
 
+if (GGML_CUBLAS)
+ add_compile_definitions(GGML_USE_CUBLAS)
+endif()
+
 if (GGML_METAL)
  add_compile_definitions(GGML_USE_METAL)
 endif()
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ https://github.com/PABannier/encodec.cpp/assets/12958149/d11561be-98e9-4504-bba7
 - [x] Mixed F16 / F32 precision
 - [ ] 4-bit and 8-bit quantization
 - [x] Metal support
-- [ ] cuBLAS support
+- [x] cuBLAS support
 
 ## Implementation details
 
@@ -61,3 +61,12 @@ the power consumption and CPU activity is reduced.
 cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off ..
 cmake --build . --config Release
 ```
+
+### Using cuBLAS
+
+The inference can be offloaded on a CUDA backend with cuBLAS.
+
+```bash
+cmake -DGGML_CUBLAS=ON -DBUILD_SHARED_LIBS=Off ..
+cmake --build . --config Release
+```
diff --git a/encodec.cpp b/encodec.cpp
@@ -2,6 +2,10 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
+#ifdef GGML_USE_CUBLAS
+#include "ggml-cuda.h"
+#endif
+
 #ifdef GGML_USE_METAL
 #include "ggml-metal.h"
 #endif
@@ -446,6 +450,16 @@ bool encodec_load_model_weights(const std::string & fname, encodec_model & model
  }
  }
 
+#ifdef GGML_USE_CUBLAS
+ if (n_gpu_layers > 0) {
+ fprintf(stderr, "%s: using CUDA backend\n", __func__);
+ model.backend = ggml_backend_cuda_init();
+ if (!model.backend) {
+ fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
+ }
+ }
+#endif
+
 #ifdef GGML_USE_METAL
  if (n_gpu_layers > 0) {
  fprintf(stderr, "%s: using Metal backend\n", __func__);