Skip to content

Commit

Permalink
ENH Introduce bark context (PABannier#101)
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Sep 1, 2023
1 parent 441490b commit e33b060
Show file tree
Hide file tree
Showing 9 changed files with 280 additions and 237 deletions.
322 changes: 160 additions & 162 deletions bark.cpp

Large diffs are not rendered by default.

116 changes: 69 additions & 47 deletions bark.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#define COARSE_SEMANTIC_PAD_TOKEN 12048
#define COARSE_INFER_TOKEN 12050


struct bark_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());

Expand Down Expand Up @@ -115,7 +116,14 @@ struct gpt_model {
struct ggml_context * ctx;
std::map<std::string, struct ggml_tensor *> tensors;

//
int64_t t_sample_us = 0;
int64_t t_predict_us = 0;
int64_t t_main_us = 0;

//
int64_t memsize = 0;
size_t mem_per_token = 0;
};


Expand All @@ -134,32 +142,54 @@ struct bark_model {
int64_t memsize = 0;
};

struct bark_context {
bark_context(bark_model & model) : model(model) {}
~bark_context() {
if (model_owner) {
delete &model;
}
}

std::mt19937 rng;

bark_model & model;

bool model_owner = false;

int64_t t_load_us;
int64_t t_start_us;

bark_sequence tokens;

bark_sequence semantic_tokens;

bark_codes coarse_tokens;

bark_codes fine_tokens;

std::vector<float> audio_arr;
};

void bark_free(bark_context * ctx);

bool gpt_model_load(const std::string& fname, gpt_model& model);

bool gpt_eval(
const gpt_model & model,
gpt_model & model,
bark_vocab::id * tokens,
int n_tokens,
float * logits,
int * n_past,
bool merge_ctx,
int n_threads,
size_t & mem_per_token);
int n_threads);

bool fine_gpt_eval(
const gpt_model & model,
gpt_model & model,
bark_vocab::id * tokens,
int n_tokens,
float * logits,
int n_threads,
int codebook_ix,
size_t & mem_per_token);

bark_vocab::id gpt_sample(
std::vector<float> & logits,
std::mt19937 & rng,
float temp,
float * eos_p);
int codebook_ix);

bool bark_model_load(const std::string & dirname, bark_model & model);

Expand All @@ -173,40 +203,30 @@ void bert_tokenize(
int32_t n_max_tokens);

bool bark_generate_audio(
bark_model model,
const bark_vocab& vocab,
const char * text,
const int n_threads,
const int32_t seed,
const std::string& dest_wav_path);

bark_sequence bark_forward_text_encoder(
const bark_sequence & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp,
const float min_eos_p);

bark_codes bark_forward_coarse_encoder(
const bark_sequence & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp,
const int max_coarse_history,
const int sliding_window_size);

bark_codes bark_forward_fine_encoder(
const bark_codes & tokens,
const gpt_model model,
std::mt19937 & rng,
const int n_threads,
const float temp);

audio_arr_t bark_forward_encodec(
const bark_codes & tokens,
const encodec_model model);
struct bark_context * ctx,
const char * text,
std::string & dest_wav_path,
int n_threads);

void bark_forward_text_encoder(
struct bark_context * ctx,
float temp,
float min_eos_p,
int n_threads);

void bark_forward_coarse_encoder(
struct bark_context * ctx,
int max_coarse_history,
int sliding_window_size,
float temp,
int n_threads);

void bark_forward_fine_encoder(
struct bark_context * ctx,
float temp,
int n_threads);

void bark_forward_encodec(struct bark_context * ctx);

struct bark_progress {
float current = 0.0f;
Expand Down Expand Up @@ -240,4 +260,6 @@ void read_tensor_from_file(std::ifstream & fin, struct ggml_tensor * t);

bool allequal(struct ggml_tensor * a, struct ggml_tensor * b, std::string test_name);

bool allclose(struct ggml_tensor * a, struct ggml_tensor * b, float tol, std::string test_name);
bool allclose(struct ggml_tensor * a, struct ggml_tensor * b, float tol, std::string test_name);

struct bark_context * bark_new_context_with_model(struct bark_model * model);
4 changes: 4 additions & 0 deletions encodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ struct encodec_model {

std::map<std::string, struct ggml_tensor *> tensors;

int64_t t_predict_us = 0;
int64_t t_main_us = 0;

int64_t memsize = 0;
size_t mem_per_token = 0;
};


Expand Down
15 changes: 9 additions & 6 deletions examples/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ int main(int argc, char **argv) {
t_load_us = ggml_time_us() - t_start_us;
}

// create a context
bark_context * ctx = bark_new_context_with_model(&model);
if (ctx == nullptr) {
fprintf(stderr, "%s: failed to create context\n", __func__);
return 1;
}

printf("\n");

std::string prompt = "this is an audio";
Expand All @@ -42,7 +49,7 @@ int main(int argc, char **argv) {
}

const int64_t t_eval_us_start = ggml_time_us();
bark_generate_audio(model, model.vocab, prompt.data(), params.n_threads, params.seed, params.dest_wav_path);
bark_generate_audio(ctx, prompt.data(), params.dest_wav_path, params.n_threads);
t_eval_us = ggml_time_us() - t_eval_us_start;

// report timing
Expand All @@ -55,11 +62,7 @@ int main(int argc, char **argv) {
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0f);
}

// TODO: write wrapper
ggml_free(model.coarse_model.ctx);
ggml_free(model.fine_model.ctx);
ggml_free(model.text_model.ctx);
ggml_free(model.codec_model.ctx);
bark_free(ctx);

return 0;
}
5 changes: 2 additions & 3 deletions tests/test-fine-gpt-eval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ int main() {
std::vector<float> gt_logits, logits;

// dry run to estimate mem_per_token
size_t mem_per_token = 0;
bark_sequence decoy = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 };
fine_gpt_eval(model, decoy.data(), decoy.size(), nullptr, n_threads, 2, mem_per_token);
fine_gpt_eval(model, decoy.data(), decoy.size(), nullptr, n_threads, 2);

for (int i = 0; i < (int) test_args.size(); i++) {
std::string path = std::get<0>(test_args[i]);
Expand All @@ -56,7 +55,7 @@ int main() {
std::vector<int> tokens_vec = flatten(tokens);

logits.resize(1024*1056);
fine_gpt_eval(model, tokens_vec.data(), tokens_vec.size(), logits.data(), n_threads, codebook_ix, mem_per_token);
fine_gpt_eval(model, tokens_vec.data(), tokens_vec.size(), logits.data(), n_threads, codebook_ix);

printf("\n");
printf("%s: %s\n", __func__, path.c_str());
Expand Down
16 changes: 11 additions & 5 deletions tests/test-forward-coarse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,16 @@ int main() {

std::mt19937 rng(0);

gpt_model model;
if(!gpt_model_load(fname, model)) {
bark_model model;

if(!gpt_model_load(fname, model.coarse_model)) {
fprintf(stderr, "%s: invalid model file '%s'\n", __func__, fname.c_str());
return 1;
}

bark_context * ctx = bark_new_context_with_model(&model);
ctx->rng = rng;

bark_sequence input;
bark_codes gt_tokens;

Expand All @@ -37,18 +41,20 @@ int main() {

std::string path = test_data[i];
load_test_data(path, input, gt_tokens);
ctx->semantic_tokens = input;

bark_codes tokens = bark_forward_coarse_encoder(
input, model, rng, n_threads, temp, max_coarse_history, sliding_window_size);
bark_forward_coarse_encoder(ctx, max_coarse_history, sliding_window_size, temp, n_threads);

printf("\n");
printf("%s: %s\n", __func__, path.c_str());
if (!run_test(transpose(gt_tokens), tokens)) {
if (!run_test(transpose(gt_tokens), ctx->coarse_tokens)) {
printf("%s: test %d failed.\n", __func__, i+1);
} else {
printf("%s: test %d passed.\n", __func__, i+1);
}
}

bark_free(ctx);

return 0;
}
18 changes: 12 additions & 6 deletions tests/test-forward-fine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,29 @@ int main() {

std::mt19937 rng(0);

gpt_model model;
if(!gpt_model_load(fname, model)) {
bark_model model;

if(!gpt_model_load(fname, model.fine_model)) {
fprintf(stderr, "%s: invalid model file '%s'\n", __func__, fname.c_str());
return 1;
}

bark_codes input, gt_tokens, tokens;
bark_context * ctx = bark_new_context_with_model(&model);
ctx->rng = rng;

bark_codes input, gt_tokens;

for (int i = 0; i < (int) test_data.size(); i++) {
input.clear();
gt_tokens.clear();
tokens.clear();

std::string path = test_data[i];
load_test_data(path, input, gt_tokens);

// TODO: need to remove transpose
bark_codes input_t = transpose(input);
bark_codes tokens = transpose(bark_forward_fine_encoder(input_t, model, rng, n_threads, temp));
ctx->coarse_tokens = transpose(input);
bark_forward_fine_encoder(ctx, temp, n_threads);
bark_codes tokens = transpose(ctx->fine_tokens);

printf("\n");
printf("%s: %s\n", __func__, path.c_str());
Expand All @@ -49,5 +53,7 @@ int main() {
}
}

bark_free(ctx);

return 0;
}
16 changes: 11 additions & 5 deletions tests/test-forward-semantic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,16 @@ int main() {

std::mt19937 rng(0);

gpt_model model;
if(!gpt_model_load(fname, model)) {
bark_model model;

if(!gpt_model_load(fname, model.text_model)) {
fprintf(stderr, "%s: invalid model file '%s'\n", __func__, fname.c_str());
return 1;
}

bark_context * ctx = bark_new_context_with_model(&model);
ctx->rng = rng;

bark_sequence input;
bark_sequence gt_tokens;

Expand All @@ -36,18 +40,20 @@ int main() {

std::string path = test_data[i];
load_test_data(path, input, gt_tokens);
ctx->tokens = input;

bark_sequence tokens = bark_forward_text_encoder(
input, model, rng, n_threads, temp, min_eos_p);
bark_forward_text_encoder(ctx, temp, min_eos_p, n_threads);

printf("\n");
printf("%s: %s\n", __func__, path.c_str());
if (!run_test(gt_tokens, tokens)) {
if (!run_test(gt_tokens, ctx->semantic_tokens)) {
printf("%s: test %d failed.\n", __func__, i+1);
} else {
printf("%s: test %d passed.\n", __func__, i+1);
}
}

bark_free(ctx);

return 0;
}
5 changes: 2 additions & 3 deletions tests/test-gpt-eval.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,10 @@ int main() {
logits.resize(n_vocab);

// dry run to estimate mem_per_token
size_t mem_per_token = 0;
{
int n_past = 0;
bark_vocab::id decoy[4] = { 0, 1, 2, 3 };
gpt_eval(model, decoy, 4, nullptr, &n_past, false, n_threads, mem_per_token);
gpt_eval(model, decoy, 4, nullptr, &n_past, false, n_threads);
}

for (int i = 0; i < (int) test_args.size(); i++) {
Expand All @@ -51,7 +50,7 @@ int main() {
load_test_data(path, tokens, gt_logits);

int n_past = 0;
gpt_eval(model, tokens.data(), tokens.size(), logits.data(), &n_past, merge_ctx, n_threads, mem_per_token);
gpt_eval(model, tokens.data(), tokens.size(), logits.data(), &n_past, merge_ctx, n_threads);

printf("\n");
printf("%s: %s\n", __func__, path.c_str());
Expand Down

0 comments on commit e33b060

Please sign in to comment.