Skip to content

Commit

Permalink
print cuda or opencl based on what's used
Browse files Browse the repository at this point in the history
  • Loading branch information
YellowRoseCx committed Jul 3, 2023
1 parent 72c16d2 commit cf65429
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 3 deletions.
10 changes: 9 additions & 1 deletion otherarch/gptj_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
const auto & hparams = model.hparams;
size_t vram_total = 0;
const int n_gpu = std::min(gpulayers, int(hparams.n_layer));
#if defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu);
#else
fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu);
#endif
for (int i = 0; i < n_gpu; ++i) {
const auto & layer = model.layers[i];
layer.c_attn_q_proj_w->backend = GGML_BACKEND_GPU;
Expand All @@ -373,7 +377,11 @@ ModelLoadResult gptj_model_load(const std::string & fname, gptj_model & model, g
ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w);
#endif
}
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#if defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#else
fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#endif
}
#endif

Expand Down
10 changes: 9 additions & 1 deletion otherarch/mpt_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
const auto & hparams = model.hparams;
size_t vram_total = 0;
const int n_gpu = std::min(gpulayers, int(hparams.n_layers));
#if defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu);
#else
fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu);
#endif
for (int i = 0; i < n_gpu; ++i) {
const auto & layer = model.layers[i];
layer.ffn_up_proj->backend = GGML_BACKEND_GPU;
Expand All @@ -320,7 +324,11 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
ggml_cuda_transform_tensor(layer.c_attn_out_proj_weight->data,layer.c_attn_out_proj_weight); vram_total += ggml_nbytes(layer.c_attn_out_proj_weight);
#endif
}
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#if defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#else
fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#endif
}
#endif

Expand Down
10 changes: 9 additions & 1 deletion otherarch/neox_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
const auto & hparams = model.hparams;
size_t vram_total = 0;
const int n_gpu = std::min(gpulayers, int(hparams.n_layer));
#if defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: [opencl] offloading %d layers to GPU\n", __func__, n_gpu);
#else
fprintf(stderr, "%s: [CUDA] offloading %d layers to GPU\n", __func__, n_gpu);
#endif
for (int i = 0; i < n_gpu; ++i) {
const auto & layer = model.layers[i];
layer.c_attn_attn_w->backend = GGML_BACKEND_GPU;
Expand All @@ -354,7 +358,11 @@ ModelLoadResult gpt_neox_model_load(const std::string & fname, gpt_neox_model &
ggml_cuda_transform_tensor(layer.c_mlp_proj_w->data,layer.c_mlp_proj_w); vram_total += ggml_nbytes(layer.c_mlp_proj_w);
#endif
}
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#if defined(GGML_USE_CLBLAST)
fprintf(stderr, "%s: [opencl] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#else
fprintf(stderr, "%s: [CUDA] total VRAM used: %zu MB\n", __func__, vram_total / 1024 / 1024);
#endif
}
#endif

Expand Down

0 comments on commit cf65429

Please sign in to comment.