Skip to content

Commit

Permalink
feat: add cuBLAS backend (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Oct 23, 2023
1 parent b66d536 commit b901d4f
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ target_link_libraries(${ENCODEC_LIB} PUBLIC ggml)
target_include_directories(${ENCODEC_LIB} PUBLIC .)
target_compile_features(${ENCODEC_LIB} PUBLIC cxx_std_11)

if (GGML_CUBLAS)
add_compile_definitions(GGML_USE_CUBLAS)
endif()

if (GGML_METAL)
add_compile_definitions(GGML_USE_METAL)
endif()
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ https://github.com/PABannier/encodec.cpp/assets/12958149/d11561be-98e9-4504-bba7
- [x] Mixed F16 / F32 precision
- [ ] 4-bit and 8-bit quantization
- [x] Metal support
- [ ] cuBLAS support
- [x] cuBLAS support

## Implementation details

Expand Down Expand Up @@ -61,3 +61,12 @@ the power consumption and CPU activity is reduced.
cmake -DGGML_METAL=ON -DBUILD_SHARED_LIBS=Off ..
cmake --build . --config Release
```

### Using cuBLAS

The inference can be offloaded on a CUDA backend with cuBLAS.

```bash
cmake -DGGML_CUBLAS=ON -DBUILD_SHARED_LIBS=Off ..
cmake --build . --config Release
```
14 changes: 14 additions & 0 deletions encodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
#include "ggml-alloc.h"
#include "ggml-backend.h"

#ifdef GGML_USE_CUBLAS
#include "ggml-cuda.h"
#endif

#ifdef GGML_USE_METAL
#include "ggml-metal.h"
#endif
Expand Down Expand Up @@ -446,6 +450,16 @@ bool encodec_load_model_weights(const std::string & fname, encodec_model & model
}
}

#ifdef GGML_USE_CUBLAS
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using CUDA backend\n", __func__);
model.backend = ggml_backend_cuda_init();
if (!model.backend) {
fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
}
}
#endif

#ifdef GGML_USE_METAL
if (n_gpu_layers > 0) {
fprintf(stderr, "%s: using Metal backend\n", __func__);
Expand Down

0 comments on commit b901d4f

Please sign in to comment.