Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync: use encodec's latest version as a submodule #124

Merged
merged 69 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
2302881
rm encodec
PABannier Oct 26, 2023
450a606
add git submodules
PABannier Oct 26, 2023
4ece5fa
removed bark util
PABannier Oct 26, 2023
9c9f7e8
updated CMakeLists
PABannier Oct 26, 2023
e2b350a
rm build scripts
PABannier Oct 26, 2023
d491cc7
mv dr_wav in examples
PABannier Oct 26, 2023
38c2e49
common cpp
PABannier Oct 26, 2023
6b32b3b
moved def constants
PABannier Oct 26, 2023
ab9b528
text encoder loaded with the latest ggml API
PABannier Oct 27, 2023
6a9b50a
pulled ggml upstream
PABannier Oct 27, 2023
efbdd56
temporarily removed subdirectory encodec.cpp
PABannier Oct 27, 2023
753d5cf
clean forward pass text encoder
PABannier Oct 27, 2023
e4e712f
compiling
PABannier Oct 27, 2023
d8fc378
fix issue definition
PABannier Oct 27, 2023
b258c08
clean
PABannier Oct 28, 2023
6642e75
remove codec parsing functions
PABannier Oct 28, 2023
33d186e
kinda works
PABannier Oct 28, 2023
83a21ec
bias is stored in hparams
PABannier Oct 28, 2023
6cad888
working text encoder
PABannier Oct 28, 2023
242e7c5
cln tests
PABannier Oct 28, 2023
c1d0edd
coarse working?
PABannier Oct 28, 2023
94cd5e2
override bias
PABannier Oct 29, 2023
acf9dfa
working fine encoder
PABannier Oct 29, 2023
c1def75
rename quantize.cpp into main.cpp
PABannier Oct 29, 2023
cfaa59c
included quantize as a target
PABannier Oct 29, 2023
05ef89d
exposed quantization function
PABannier Oct 29, 2023
6172381
minor
PABannier Oct 29, 2023
6d0db93
update CIs
PABannier Oct 29, 2023
a978908
updated CIs
PABannier Oct 29, 2023
8ae7dc5
passing tokenizer test
PABannier Oct 29, 2023
7ad8cd5
Merge branch 'main' of https://github.com/PABannier/bark.cpp into enc…
PABannier Oct 29, 2023
7c2ae84
fast text encoder
PABannier Dec 11, 2023
d3971c2
Merge branch 'main' of https://github.com/PABannier/bark.cpp into enc…
PABannier Dec 11, 2023
5874a87
`bark.cpp` -> `bark`
PABannier Dec 11, 2023
5312577
server abides by latest API
PABannier Dec 12, 2023
e7b7d75
rm fast-text-encoder example
PABannier Dec 12, 2023
2aaf7b2
pass `-O3` release flag
PABannier Dec 12, 2023
79ed551
rm fast_text_encoder from CMakeLists
PABannier Dec 12, 2023
4f72d56
restructured
PABannier Dec 12, 2023
f13498a
CMakeLists arranged
PABannier Dec 13, 2023
f517570
update CIs
PABannier Dec 13, 2023
b8bdd76
add encodec.cpp in the loop
PABannier Dec 14, 2023
5319d26
add verbosity level
PABannier Dec 15, 2023
11c3f9a
Fix CIs (#128)
AlexHayton Dec 30, 2023
da3cc56
Merge branch 'encodec_as_submodule' of https://github.com/PABannier/b…
PABannier Jan 2, 2024
07a322c
fix coarse encoder internal pass
PABannier Jan 3, 2024
3002698
`VerbosityLevel` -> `bark_verbosity_level`
PABannier Jan 5, 2024
747345c
updated examples
PABannier Jan 5, 2024
a3e3e92
populated time per token
PABannier Jan 5, 2024
19e1683
remove whitespace
PABannier Jan 5, 2024
fa6975c
BarkProgressBar implemented
PABannier Jan 6, 2024
b9e2109
verbosity level controlled for cleaner output
PABannier Jan 6, 2024
4401975
removed params as macros and moved them into default constructor
PABannier Jan 6, 2024
38846ec
updated README
PABannier Jan 6, 2024
59d5352
removed useless `n_predict` in params
PABannier Jan 6, 2024
07e92de
removed old tests
PABannier Jan 6, 2024
ec677fb
fix wrong return type, quantization works again
PABannier Jan 6, 2024
035ef16
Added Metal and CUDA backend
PABannier Jan 6, 2024
6e4ac9a
updated docs
PABannier Jan 6, 2024
ac327a9
cosmit
PABannier Jan 6, 2024
d347134
rm submodule
PABannier Jan 7, 2024
d7e9661
added encodec submodule
PABannier Jan 7, 2024
1fbe29d
remove mem_per_token
PABannier Jan 7, 2024
b3d9179
more verbose errors
PABannier Jan 7, 2024
94fea82
clean
PABannier Jan 7, 2024
bec8547
reset allocr to reduce memory footprint
PABannier Jan 7, 2024
df7c22a
add tests
PABannier Jan 7, 2024
6fbc184
expose forward passes
PABannier Jan 7, 2024
87a102b
enhanced README.md
PABannier Feb 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
clean forward pass text encoder
  • Loading branch information
PABannier committed Oct 27, 2023
commit 753d5cf26516bf144eb196445b76e63828f690b3
130 changes: 2 additions & 128 deletions bark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,131 +42,6 @@

static const size_t MB = 1024*1024;

typedef std::vector<int32_t> bark_sequence;
typedef std::vector<std::vector<int32_t>> bark_codes;

struct gpt_hparams {
int32_t n_in_vocab;
int32_t n_out_vocab;
int32_t n_layer;
int32_t n_head;
int32_t n_embd;
int32_t block_size;
int32_t n_lm_heads;
int32_t n_wtes;
int32_t ftype;

int32_t n_codes_given = 1;
};

struct bark_vocab {
using id = int32_t;
using token = std::string;

std::map<token, id> token_to_id;
std::map<id, token> id_to_token;
};

struct gpt_layer {
// normalization
struct ggml_tensor * ln_1_g;
struct ggml_tensor * ln_1_b;

struct ggml_tensor * ln_2_g;
struct ggml_tensor * ln_2_b;

// attention
struct ggml_tensor * c_attn_attn_w;
struct ggml_tensor * c_attn_attn_b;

struct ggml_tensor * c_attn_proj_w;
struct ggml_tensor * c_attn_proj_b;

// mlp
struct ggml_tensor * c_mlp_fc_w;
struct ggml_tensor * c_mlp_fc_b;

struct ggml_tensor * c_mlp_proj_w;
struct ggml_tensor * c_mlp_proj_b;
};

struct gpt_model {
gpt_hparams hparams;

// normalization
struct ggml_tensor * ln_f_g;
struct ggml_tensor * ln_f_b;

struct ggml_tensor * wpe; // position embedding
std::vector<struct ggml_tensor *> wtes; // token embedding
std::vector<struct ggml_tensor *> lm_heads; // language model head

std::vector<gpt_layer> layers;

// key + value memory
struct ggml_tensor * memory_k;
struct ggml_tensor * memory_v;

struct ggml_context * ctx;

ggml_backend_t backend = NULL;

ggml_backend_buffer_t buffer_w;
ggml_backend_buffer_t buffer_kv;

std::map<std::string, struct ggml_tensor *> tensors;

//
int64_t t_sample_us = 0;
int64_t t_predict_us = 0;
int64_t t_main_us = 0;

//
int64_t n_sample = 0;
int64_t n_predict = 0;

//
int64_t memsize = 0;
size_t mem_per_token = 0;
};

struct bark_model {
// encoder
gpt_model coarse_model;
gpt_model fine_model;
gpt_model text_model;

// vocab
bark_vocab vocab;
};

struct bark_context {
bark_model model;

// buffer for model evaluation
ggml_backend_buffer_t buf_compute;

// custom allocator
struct ggml_allocr * allocr = NULL;

std::mt19937 rng;

bark_sequence tokens;
bark_sequence semantic_tokens;

bark_codes coarse_tokens;
bark_codes fine_tokens;

std::vector<float> audio_arr;

// hyperparameters
bark_context_params params;

// statistics
int64_t t_load_us = 0;
int64_t t_start_us = 0;

};

struct bark_progress {
float current = 0.0f;
Expand Down Expand Up @@ -1330,9 +1205,7 @@ struct bark_context_params bark_context_default_params() {
return result;
}

struct bark_context * bark_load_model(
const std::string & model_path,
const bark_context_params & params) {
struct bark_context * bark_load_model(const std::string & model_path) {
int64_t t_load_start_us = ggml_time_us();

struct bark_context * bctx = new bark_context();
Expand All @@ -1343,6 +1216,7 @@ struct bark_context * bark_load_model(
return {};
}

bark_context_params params = bark_context_default_params();
bctx->rng = std::mt19937(params.seed);

bctx->params = params;
Expand Down
139 changes: 127 additions & 12 deletions bark.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "ggml.h"
#include "ggml-backend.h"

#include <map>
#include <random>
Expand All @@ -19,11 +20,133 @@
# define BARK_API
#endif


typedef int32_t bark_token;

struct bark_context;
struct bark_progress;
typedef std::vector<int32_t> bark_sequence;
typedef std::vector<std::vector<int32_t>> bark_codes;

struct gpt_hparams {
int32_t n_in_vocab;
int32_t n_out_vocab;
int32_t n_layer;
int32_t n_head;
int32_t n_embd;
int32_t block_size;
int32_t n_lm_heads;
int32_t n_wtes;
int32_t ftype;

int32_t n_codes_given = 1;
};

struct bark_vocab {
using id = int32_t;
using token = std::string;

std::map<token, id> token_to_id;
std::map<id, token> id_to_token;
};

struct gpt_layer {
// normalization
struct ggml_tensor * ln_1_g;
struct ggml_tensor * ln_1_b;

struct ggml_tensor * ln_2_g;
struct ggml_tensor * ln_2_b;

// attention
struct ggml_tensor * c_attn_attn_w;
struct ggml_tensor * c_attn_attn_b;

struct ggml_tensor * c_attn_proj_w;
struct ggml_tensor * c_attn_proj_b;

// mlp
struct ggml_tensor * c_mlp_fc_w;
struct ggml_tensor * c_mlp_fc_b;

struct ggml_tensor * c_mlp_proj_w;
struct ggml_tensor * c_mlp_proj_b;
};

struct gpt_model {
gpt_hparams hparams;

// normalization
struct ggml_tensor * ln_f_g;
struct ggml_tensor * ln_f_b;

struct ggml_tensor * wpe; // position embedding
std::vector<struct ggml_tensor *> wtes; // token embedding
std::vector<struct ggml_tensor *> lm_heads; // language model head

std::vector<gpt_layer> layers;

// key + value memory
struct ggml_tensor * memory_k;
struct ggml_tensor * memory_v;

struct ggml_context * ctx;

ggml_backend_t backend = NULL;

ggml_backend_buffer_t buffer_w;
ggml_backend_buffer_t buffer_kv;

std::map<std::string, struct ggml_tensor *> tensors;

//
int64_t t_sample_us = 0;
int64_t t_predict_us = 0;
int64_t t_main_us = 0;

//
int64_t n_sample = 0;
int64_t n_predict = 0;

//
int64_t memsize = 0;
size_t mem_per_token = 0;
};

struct bark_model {
// encoder
gpt_model coarse_model;
gpt_model fine_model;
gpt_model text_model;

// vocab
bark_vocab vocab;
};

struct bark_context {
bark_model model;

// buffer for model evaluation
ggml_backend_buffer_t buf_compute;

// custom allocator
struct ggml_allocr * allocr = NULL;

std::mt19937 rng;

bark_sequence tokens;
bark_sequence semantic_tokens;

bark_codes coarse_tokens;
bark_codes fine_tokens;

std::vector<float> audio_arr;

// hyperparameters
bark_context_params params;

// statistics
int64_t t_load_us = 0;
int64_t t_eval_us = 0;

};

struct bark_context_params {
uint32_t seed; // RNG seed
Expand All @@ -41,13 +164,6 @@ struct bark_context_params {
int max_coarse_history;
};

struct bark_model;
struct bark_vocab;

struct gpt_hparams;
struct gpt_layer;
struct gpt_model;

/**
* @brief Returns the default parameters for a bark context.
*
Expand All @@ -63,8 +179,7 @@ BARK_API struct bark_context_params bark_context_default_params(void);
* @return A pointer to the loaded bark model context.
*/
BARK_API struct bark_context * bark_load_model(
const std::string & model_path,
const bark_context_params & params);
const std::string & model_path);

/**
* Generates an audio file from the given text using the specified Bark context.
Expand Down
45 changes: 45 additions & 0 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"

#include "common.h"

#define SAMPLE_RATE 24000

void write_wav_on_disk(std::vector<float> & audio_arr, std::string dest_path) {
Expand All @@ -21,3 +23,46 @@ void write_wav_on_disk(std::vector<float> & audio_arr, std::string dest_path) {

fprintf(stderr, "%s: Number of frames written = %lld.\n", __func__, frames);
}

void bark_print_usage(char ** argv, const bark_params & params) {
fprintf(stderr, "usage: %s [options]\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "options:\n");
fprintf(stderr, " -h, --help show this help message and exit\n");
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -s N, --seed N seed for random number generator (default: %d)\n", params.seed);
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
fprintf(stderr, " prompt to start generation with (default: random)\n");
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " model path (default: %s)\n", params.model_path.c_str());
fprintf(stderr, " -o FNAME, --outwav FNAME\n");
fprintf(stderr, " output generated wav (default: %s)\n", params.dest_wav_path.c_str());
fprintf(stderr, "\n");
}

int bark_params_parse(int argc, char ** argv, bark_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];

if (arg == "-t" || arg == "--threads") {
params.n_threads = std::stoi(argv[++i]);
} else if (arg == "-p" || arg == "--prompt") {
params.prompt = argv[++i];
} else if (arg == "-m" || arg == "--model") {
params.model_path = argv[++i];
} else if (arg == "-s" || arg == "--seed") {
params.seed = std::stoi(argv[++i]);
} else if (arg == "-o" || arg == "--outwav") {
params.dest_wav_path = argv[++i];
} else if (arg == "-h" || arg == "--help") {
bark_print_usage(argv, params);
exit(0);
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
bark_print_usage(argv, params);
exit(0);
}
}

return 0;
}
Loading