Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync: use encodec's latest version as a submodule #124

Merged
merged 69 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
2302881
rm encodec
PABannier Oct 26, 2023
450a606
add git submodules
PABannier Oct 26, 2023
4ece5fa
removed bark util
PABannier Oct 26, 2023
9c9f7e8
updated CMakeLists
PABannier Oct 26, 2023
e2b350a
rm build scripts
PABannier Oct 26, 2023
d491cc7
mv dr_wav in examples
PABannier Oct 26, 2023
38c2e49
common cpp
PABannier Oct 26, 2023
6b32b3b
moved def constants
PABannier Oct 26, 2023
ab9b528
text encoder loaded with the latest ggml API
PABannier Oct 27, 2023
6a9b50a
pulled ggml upstream
PABannier Oct 27, 2023
efbdd56
temporarily removed subdirectory encodec.cpp
PABannier Oct 27, 2023
753d5cf
clean forward pass text encoder
PABannier Oct 27, 2023
e4e712f
compiling
PABannier Oct 27, 2023
d8fc378
fix issue definition
PABannier Oct 27, 2023
b258c08
clean
PABannier Oct 28, 2023
6642e75
remove codec parsing functions
PABannier Oct 28, 2023
33d186e
kinda works
PABannier Oct 28, 2023
83a21ec
bias is stored in hparams
PABannier Oct 28, 2023
6cad888
working text encoder
PABannier Oct 28, 2023
242e7c5
cln tests
PABannier Oct 28, 2023
c1d0edd
coarse working?
PABannier Oct 28, 2023
94cd5e2
override bias
PABannier Oct 29, 2023
acf9dfa
working fine encoder
PABannier Oct 29, 2023
c1def75
rename quantize.cpp into main.cpp
PABannier Oct 29, 2023
cfaa59c
included quantize as a target
PABannier Oct 29, 2023
05ef89d
exposed quantization function
PABannier Oct 29, 2023
6172381
minor
PABannier Oct 29, 2023
6d0db93
update CIs
PABannier Oct 29, 2023
a978908
updated CIs
PABannier Oct 29, 2023
8ae7dc5
passing tokenizer test
PABannier Oct 29, 2023
7ad8cd5
Merge branch 'main' of https://github.com/PABannier/bark.cpp into enc…
PABannier Oct 29, 2023
7c2ae84
fast text encoder
PABannier Dec 11, 2023
d3971c2
Merge branch 'main' of https://github.com/PABannier/bark.cpp into enc…
PABannier Dec 11, 2023
5874a87
`bark.cpp` -> `bark`
PABannier Dec 11, 2023
5312577
server abides by latest API
PABannier Dec 12, 2023
e7b7d75
rm fast-text-encoder example
PABannier Dec 12, 2023
2aaf7b2
pass `-O3` release flag
PABannier Dec 12, 2023
79ed551
rm fast_text_encoder from CMakeLists
PABannier Dec 12, 2023
4f72d56
restructured
PABannier Dec 12, 2023
f13498a
CMakeLists arranged
PABannier Dec 13, 2023
f517570
update CIs
PABannier Dec 13, 2023
b8bdd76
add encodec.cpp in the loop
PABannier Dec 14, 2023
5319d26
add verbosity level
PABannier Dec 15, 2023
11c3f9a
Fix CIs (#128)
AlexHayton Dec 30, 2023
da3cc56
Merge branch 'encodec_as_submodule' of https://github.com/PABannier/b…
PABannier Jan 2, 2024
07a322c
fix coarse encoder internal pass
PABannier Jan 3, 2024
3002698
`VerbosityLevel` -> `bark_verbosity_level`
PABannier Jan 5, 2024
747345c
updated examples
PABannier Jan 5, 2024
a3e3e92
populated time per token
PABannier Jan 5, 2024
19e1683
remove whitespace
PABannier Jan 5, 2024
fa6975c
BarkProgressBar implemented
PABannier Jan 6, 2024
b9e2109
verbosity level controlled for cleaner output
PABannier Jan 6, 2024
4401975
removed params as macros and moved them into default constructor
PABannier Jan 6, 2024
38846ec
updated README
PABannier Jan 6, 2024
59d5352
removed useless `n_predict` in params
PABannier Jan 6, 2024
07e92de
removed old tests
PABannier Jan 6, 2024
ec677fb
fix wrong return type, quantization works again
PABannier Jan 6, 2024
035ef16
Added Metal and CUDA backend
PABannier Jan 6, 2024
6e4ac9a
updated docs
PABannier Jan 6, 2024
ac327a9
cosmit
PABannier Jan 6, 2024
d347134
rm submodule
PABannier Jan 7, 2024
d7e9661
added encodec submodule
PABannier Jan 7, 2024
1fbe29d
remove mem_per_token
PABannier Jan 7, 2024
b3d9179
more verbose errors
PABannier Jan 7, 2024
94fea82
clean
PABannier Jan 7, 2024
bec8547
reset allocr to reduce memory footprint
PABannier Jan 7, 2024
df7c22a
add tests
PABannier Jan 7, 2024
6fbc184
expose forward passes
PABannier Jan 7, 2024
87a102b
enhanced README.md
PABannier Feb 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add verbosity level
  • Loading branch information
PABannier committed Dec 15, 2023
commit 5319d26df860d2e5cb9b36083f5c86f970b3a5d5
103 changes: 70 additions & 33 deletions bark/bark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
#define COARSE_INFER_TOKEN 12050
#define COARSE_SEMANTIC_PAD_TOKEN 12048

// static const size_t MB = 1024*1024;

void print_tensor(struct ggml_tensor * a) {
float sum = 0;
Expand Down Expand Up @@ -423,8 +422,10 @@ static void bark_tokenize_input(struct bark_context * ctx, const std::string & t
printf("\n");
}

static bool gpt_load_model_weights(const std::string & fname, gpt_model & model) {
fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
static bool gpt_load_model_weights(const std::string & fname, gpt_model & model, VerbosityLevel verbosity) {
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
}

auto fin = std::ifstream(fname, std::ios::binary);
if (!fin) {
Expand Down Expand Up @@ -459,17 +460,19 @@ static bool gpt_load_model_weights(const std::string & fname, gpt_model & model)

const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR;

printf("%s: n_in_vocab = %d\n", __func__, hparams.n_in_vocab);
printf("%s: n_out_vocab = %d\n", __func__, hparams.n_out_vocab);
printf("%s: block_size = %d\n", __func__, hparams.block_size);
printf("%s: bias = %d\n", __func__, hparams.bias);
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
printf("%s: n_head = %d\n", __func__, hparams.n_head);
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
printf("%s: n_lm_heads = %d\n", __func__, hparams.n_lm_heads);
printf("%s: n_wtes = %d\n", __func__, hparams.n_wtes);
printf("%s: ftype = %d\n", __func__, hparams.ftype);
printf("%s: qntvr = %d\n", __func__, qntvr);
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("%s: n_in_vocab = %d\n", __func__, hparams.n_in_vocab);
printf("%s: n_out_vocab = %d\n", __func__, hparams.n_out_vocab);
printf("%s: block_size = %d\n", __func__, hparams.block_size);
printf("%s: bias = %d\n", __func__, hparams.bias);
printf("%s: n_embd = %d\n", __func__, hparams.n_embd);
printf("%s: n_head = %d\n", __func__, hparams.n_head);
printf("%s: n_layer = %d\n", __func__, hparams.n_layer);
printf("%s: n_lm_heads = %d\n", __func__, hparams.n_lm_heads);
printf("%s: n_wtes = %d\n", __func__, hparams.n_wtes);
printf("%s: ftype = %d\n", __func__, hparams.ftype);
printf("%s: qntvr = %d\n", __func__, qntvr);
}

hparams.ftype %= GGML_QNT_VERSION_FACTOR;
}
Expand Down Expand Up @@ -547,8 +550,10 @@ static bool gpt_load_model_weights(const std::string & fname, gpt_model & model)
n_tensors += 4 * n_layer; // c_attn_attn_b, c_attn_proj_b, c_mlp_fc_b, c_mlp_proj_b
}

printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor));
printf("%s: ggml ctx size = %6.2f MB\n", __func__, buffer_size/(1024.0*1024.0));
if (verbosity == VerbosityLevel::HIGH) {
printf("%s: ggml tensor size = %d bytes\n", __func__, (int) sizeof(ggml_tensor));
printf("%s: ggml ctx size = %6.2f MB\n", __func__, buffer_size/(1024.0*1024.0));
}
}

// create the ggml context
Expand All @@ -568,12 +573,17 @@ static bool gpt_load_model_weights(const std::string & fname, gpt_model & model)

if (!model.backend) {
// fallback to CPU backend
fprintf(stderr, "%s: no backend specified, using CPU backend\n", __func__);
if (verbosity == VerbosityLevel::HIGH) {
fprintf(stderr, "%s: no backend specified, using CPU backend\n", __func__);
}
model.backend = ggml_backend_cpu_init();
}

if (!model.backend) {
fprintf(stderr, "%s: failed to initialize CPU backend\n", __func__);
if (verbosity == VerbosityLevel::HIGH) {
fprintf(stderr, "%s: failed to initialize CPU backend\n", __func__);
}

return false;
}

Expand Down Expand Up @@ -687,7 +697,9 @@ static bool gpt_load_model_weights(const std::string & fname, gpt_model & model)

const size_t memory_size = ggml_nbytes(model.memory_k) + ggml_nbytes(model.memory_v);

printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem);
if (verbosity == VerbosityLevel::HIGH) {
printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem);
}

// create a backend buffer (can be in host or device memory)
model.buffer_kv = ggml_backend_alloc_buffer(model.backend, memory_size + 256);
Expand Down Expand Up @@ -756,6 +768,7 @@ static bool gpt_load_model_weights(const std::string & fname, gpt_model & model)
return false;
}


const size_t bpe = ggml_type_size(ggml_type(ttype));

if ((nelements*bpe)/ggml_blck_size(tensor->type) != ggml_nbytes(tensor)) {
Expand All @@ -775,13 +788,19 @@ static bool gpt_load_model_weights(const std::string & fname, gpt_model & model)
ggml_backend_tensor_set(tensor, read_buf.data(), 0, ggml_nbytes(tensor));
}

// printf("%48s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], "float", ggml_nbytes(tensor)/1024.0/1024.0);
if (verbosity == VerbosityLevel::HIGH) {
printf("%48s - [%5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], "float", ggml_nbytes(tensor)/1024.0/1024.0);
}

total_size += ggml_nbytes(tensor);
}

ggml_allocr_free(alloc);
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);

if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("%s: model size = %8.2f MB\n", __func__, total_size/1024.0/1024.0);
}

model.memsize = total_size;
}

Expand Down Expand Up @@ -1995,7 +2014,7 @@ bool bark_generate_audio(

// Calling Encodec API to generate audio wavefrom from tokens
const int n_gpu_layers = 0;
const std::string encodec_model_path = "";
const std::string encodec_model_path = "/Users/pbannier/Documents/encodec.cpp/ggml_weights/ggml-model.bin";

struct encodec_context * ectx = encodec_load_model(encodec_model_path, n_gpu_layers);
if (!ectx) {
Expand Down Expand Up @@ -2051,25 +2070,35 @@ void bark_free(struct bark_context * bctx) {

static struct bark_model * bark_load_model_from_file(
const std::string & dirname,
struct bark_model * model) {
printf("%s: loading model from '%s'\n", __func__, dirname.c_str());
struct bark_model * model,
VerbosityLevel verbosity) {
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("%s: loading model from '%s'\n", __func__, dirname.c_str());
}

// text
{
printf("%s: reading bark text model\n", __func__);
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("%s: reading bark text model\n", __func__);
}

const std::string fname = std::string(dirname) + "/ggml_weights_text.bin";
if (!gpt_load_model_weights(fname, model->text_model)) {
if (!gpt_load_model_weights(fname, model->text_model, verbosity)) {
fprintf(stderr, "%s: invalid model file '%s' (bad text)\n", __func__, fname.c_str());
return nullptr;
}
}

// vocab
{
printf("%s: reading bark vocab\n", __func__);
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("%s: reading bark vocab\n", __func__);
}

const std::string fname = std::string(dirname) + "/ggml_vocab.bin";
const gpt_hparams hparams = model->text_model.hparams;
const int32_t expected_size = hparams.n_in_vocab - hparams.n_out_vocab - 5;

if (!bark_vocab_load(fname, &model->vocab, expected_size)) {
fprintf(stderr, "%s: invalid model file '%s' (bad text)\n", __func__, fname.c_str());
return nullptr;
Expand All @@ -2078,19 +2107,27 @@ static struct bark_model * bark_load_model_from_file(

// coarse
{
printf("\n%s: reading bark coarse model\n", __func__);
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("\n%s: reading bark coarse model\n", __func__);
}

const std::string fname = std::string(dirname) + "/ggml_weights_coarse.bin";
if (!gpt_load_model_weights(fname, model->coarse_model)) {

if (!gpt_load_model_weights(fname, model->coarse_model, verbosity)) {
fprintf(stderr, "%s: invalid model file '%s' (bad coarse)\n", __func__, fname.c_str());
return nullptr;
}
}

// fine
{
printf("\n%s: reading bark fine model\n", __func__);
if (verbosity == VerbosityLevel::MEDIUM || verbosity == VerbosityLevel::HIGH) {
printf("\n%s: reading bark fine model\n", __func__);
}

const std::string fname = std::string(dirname) + "/ggml_weights_fine.bin";
if (!gpt_load_model_weights(fname, model->fine_model)) {

if (!gpt_load_model_weights(fname, model->fine_model, verbosity)) {
fprintf(stderr, "%s: invalid model file '%s' (bad fine)\n", __func__, fname.c_str());
return nullptr;
}
Expand All @@ -2114,13 +2151,13 @@ struct bark_context_params bark_context_default_params() {
return result;
}

struct bark_context * bark_load_model(const std::string & model_path) {
struct bark_context * bark_load_model(const std::string & model_path, VerbosityLevel verbosity) {
int64_t t_load_start_us = ggml_time_us();

struct bark_context * bctx = new bark_context();

bctx->model = bark_model();
if (!bark_load_model_from_file(model_path, &bctx->model)) {
if (!bark_load_model_from_file(model_path, &bctx->model, verbosity)) {
fprintf(stderr, "%s: failed to load model weights from '%s'\n", __func__, model_path.c_str());
return {};
}
Expand Down
21 changes: 16 additions & 5 deletions bark/bark.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
# define BARK_API
#endif

enum VerbosityLevel {
LOW = 0,
MEDIUM = 1,
HIGH = 2,
};

typedef int32_t bark_token;

typedef std::vector<int32_t> bark_sequence;
Expand Down Expand Up @@ -122,7 +128,8 @@ struct bark_model {
};

struct bark_context_params {
uint32_t seed; // RNG seed
// RNG seed
uint32_t seed;

// Temperature for sampling (text and coarse encoders)
float temp;
Expand All @@ -135,6 +142,9 @@ struct bark_context_params {
int sliding_window_size;
// Max history for coarse encoder
int max_coarse_history;

// Verbosity level
VerbosityLevel verbosity;
};

struct bark_context {
Expand Down Expand Up @@ -176,11 +186,12 @@ BARK_API struct bark_context_params bark_context_default_params(void);
* Loads a BARK model from the specified file path with the given parameters.
*
* @param model_path The directory path of the bark model to load.
* @param params The parameters to use when loading the bark model.
* @param verbosity The verbosity level when loading the model.
* @return A pointer to the loaded bark model context.
*/
BARK_API struct bark_context * bark_load_model(
const std::string & model_path);
const std::string & model_path,
VerbosityLevel verbosity);

/**
* Generates an audio file from the given text using the specified Bark context.
Expand Down Expand Up @@ -250,11 +261,11 @@ void bert_tokenize(

/**
* Encodes the input text using the forward algorithm.
*
*
* @param bctx A pointer to the bark context struct.
* @param n_threads The number of threads to use for encoding.
* @return Returns true if the encoding was successful, false otherwise.
*/
bool bark_forward_text_encoder(
struct bark_context * bctx,
struct bark_context * bctx,
int n_threads);
27 changes: 14 additions & 13 deletions bark/examples/common.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <iostream>
#include <string>
#include <vector>

Expand Down Expand Up @@ -25,19 +26,19 @@ void write_wav_on_disk(std::vector<float> & audio_arr, std::string dest_path) {
}

void bark_print_usage(char ** argv, const bark_params & params) {
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -s N, --seed N seed for random number generator (default: %d)\n", params.seed);
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
fprintf(stderr, " prompt to start generation with (default: random)\n");
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " model path (default: %s)\n", params.model_path.c_str());
fprintf(stderr, " -o FNAME, --outwav FNAME\n");
fprintf(stderr, " output generated wav (default: %s)\n", params.dest_wav_path.c_str());
fprintf(stderr, "\n");
std::cout << "usage: " << argv[0] << " [options]\n"
<< "\n"
<< "options:\n"
<< " -h, --help show this help message and exit\n"
<< " -t N, --threads N number of threads to use during computation (default: " << params.n_threads << ")\n"
<< " -s N, --seed N seed for random number generator (default: " << params.seed << ")\n"
<< " -p PROMPT, --prompt PROMPT\n"
<< " prompt to start generation with (default: random)\n"
<< " -m FNAME, --model FNAME\n"
<< " model path (default: " << params.model_path << ")\n"
<< " -o FNAME, --outwav FNAME\n"
<< " output generated wav (default: " << params.dest_wav_path << ")\n"
<< "\n";
}

int bark_params_parse(int argc, char ** argv, bark_params & params) {
Expand Down
11 changes: 10 additions & 1 deletion bark/examples/main/main.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <iostream>
#include <tuple>

#include "ggml.h"
Expand All @@ -16,8 +17,16 @@ int main(int argc, char **argv) {
return 1;
}

std::cout << R"( __ __ )" << "\n"
<< R"( / /_ ____ ______/ /__ _________ ____ )" << "\n"
<< R"( / __ \/ __ `/ ___/ //_/ / ___/ __ \/ __ \)" << "\n"
<< R"( / /_/ / /_/ / / / ,< _ / /__/ /_/ / /_/ /)" << "\n"
<< R"(/_.___/\__,_/_/ /_/|_| (_) \___/ .___/ .___/ )" << "\n"
<< R"( /_/ /_/ )" << "\n"
<< "\n";

// initialize bark context
struct bark_context * bctx = bark_load_model(params.model_path);
struct bark_context * bctx = bark_load_model(params.model_path, VerbosityLevel::LOW);
if (!bctx) {
fprintf(stderr, "%s: Could not load model\n", __func__);
exit(1);
Expand Down
2 changes: 1 addition & 1 deletion bark/examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ int main(int argc, char ** argv) {

bark_params_parse(argc, argv, params);

struct bark_context * bctx = bark_load_model(params.model_path.c_str());
struct bark_context * bctx = bark_load_model(params.model_path.c_str(), VerbosityLevel::LOW);
if (!bctx) {
fprintf(stderr, "%s: Could not load model\n", __func__);
return 1;
Expand Down
2 changes: 1 addition & 1 deletion bark/tests/test-forward-semantic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ int main() {
std::mt19937 rng(0);

// initialize bark context
struct bark_context * bctx = bark_load_model(dirname);
struct bark_context * bctx = bark_load_model(dirname, VerbosityLevel::LOW);
if (!bctx) {
fprintf(stderr, "%s: Could not load model\n", __func__);
exit(1);
Expand Down