Skip to content

Commit

Permalink
llama : offload to RPC in addition to other backends (llama/7640)
Browse files Browse the repository at this point in the history
* llama : offload to RPC in addition to other backends

* - fix copy_tensor being called on the src buffer instead of the dst buffer

- always initialize views in the view_src buffer

- add RPC backend to Makefile build

- add endpoint to all RPC object names

* add rpc-server to Makefile

* Update llama.cpp

Co-authored-by: slaren <[email protected]>

---------

Co-authored-by: slaren <[email protected]>
  • Loading branch information
2 people authored and ggerganov committed Jun 15, 2024
1 parent 703d5e6 commit 8d8d6b1
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion include/ggml/ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ extern "C" {

// Tensor initialization
GGML_API void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr);
GGML_API void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor);
GGML_API void ggml_backend_view_init(struct ggml_tensor * tensor);


#ifdef __cplusplus
Expand Down
6 changes: 3 additions & 3 deletions src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
// this tensor was allocated without ggml-backend
return;
}
ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
ggml_backend_view_init(tensor);
}
} else {
if (tensor->data == NULL) {
Expand Down Expand Up @@ -899,12 +899,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
if (t->view_src == NULL) {
ggml_tallocr_alloc(&tallocr, t);
} else if (t->buffer == NULL) {
ggml_backend_view_init(buffer, t);
ggml_backend_view_init(t);
}
} else {
if (t->view_src != NULL && t->buffer == NULL) {
// view of a pre-allocated tensor
ggml_backend_view_init(buffer, t);
ggml_backend_view_init(t);
}
}
}
Expand Down
10 changes: 5 additions & 5 deletions src/ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) {
bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) {
ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer;
if (dst_buf->iface.cpy_tensor) {
return src->buffer->iface.cpy_tensor(dst_buf, src, dst);
return dst_buf->iface.cpy_tensor(dst_buf, src, dst);
}
return false;
}
Expand Down Expand Up @@ -1887,15 +1887,15 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched,

// utils

void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
void ggml_backend_view_init(struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->buffer == NULL);
GGML_ASSERT(tensor->view_src != NULL);
GGML_ASSERT(tensor->view_src->buffer != NULL);
GGML_ASSERT(tensor->view_src->data != NULL);

tensor->buffer = buffer;
tensor->buffer = tensor->view_src->buffer;
tensor->data = (char *)tensor->view_src->data + tensor->view_offs;
ggml_backend_buffer_init_tensor(buffer, tensor);
ggml_backend_buffer_init_tensor(tensor->buffer, tensor);
}

void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) {
Expand Down Expand Up @@ -1954,7 +1954,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te
struct ggml_tensor * dst = node_copies[id];
if (dst->view_src != NULL) {
graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src);
ggml_backend_view_init(dst->view_src->buffer, dst);
ggml_backend_view_init(dst);
}
else {
ggml_backend_tensor_copy(src, dst);
Expand Down
4 changes: 2 additions & 2 deletions src/ggml-rpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ GGML_CALL static ggml_backend_buffer_t ggml_backend_rpc_buffer_type_alloc_buffer
if (remote_ptr != 0) {
ggml_backend_buffer_t buffer = ggml_backend_buffer_init(buft,
ggml_backend_rpc_buffer_interface,
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC"},
new ggml_backend_rpc_buffer_context{sock, {}, remote_ptr, "RPC[" + std::string(buft_ctx->endpoint) + "]"},
remote_size);
return buffer;
} else {
Expand Down Expand Up @@ -692,7 +692,7 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_rpc_buffer_type(const
GGML_CALL ggml_backend_t ggml_backend_rpc_init(const char * endpoint) {
ggml_backend_rpc_context * ctx = new ggml_backend_rpc_context {
/* .endpoint = */ endpoint,
/* .name = */ "RPC",
/* .name = */ "RPC[" + std::string(endpoint) + "]",
};

ggml_backend_t backend = new ggml_backend {
Expand Down

0 comments on commit 8d8d6b1

Please sign in to comment.