Skip to content

Commit

Permalink
COSMIT bark.h as C header (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Sep 2, 2023
1 parent e33b060 commit a714d0e
Show file tree
Hide file tree
Showing 14 changed files with 815 additions and 741 deletions.
866 changes: 668 additions & 198 deletions bark.cpp

Large diffs are not rendered by default.

287 changes: 98 additions & 189 deletions bark.h
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
#pragma once
#ifndef BARK_H
#define BARK_H

#include "encodec.h"

#include <map>
#include <random>
#include <thread>
#include <vector>

#ifdef BARK_SHARED
# if defined(_WIN32) && !defined(__MINGW32__)
# ifdef BARK_BUILD
# define BARK_API __declspec(dllexport)
# else
# define BARK_API __declspec(dllimport)
# endif
# else
# define BARK_API __attribute__ ((visibility ("default")))
# endif
#else
# define BARK_API
#endif

#define SAMPLE_RATE 24000

#define CLS_TOKEN_ID 101
Expand All @@ -28,238 +44,131 @@
#define COARSE_INFER_TOKEN 12050


struct bark_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());

std::string model = "./ggml_weights/"; // weights location

int32_t seed = 0;

std::string prompt; // user prompt

std::string dest_wav_path = "./output.wav";
};

struct gpt_hparams {
int32_t n_in_vocab;
int32_t n_out_vocab;
int32_t n_layer;
int32_t n_head;
int32_t n_embd;
int32_t block_size;
int32_t n_lm_heads;
int32_t n_wtes;
int32_t ftype;

int32_t n_codes_given = 1;
};

struct bark_vocab {
using id = int32_t;
using token = std::string;

std::map<token, id> token_to_id;
std::map<id, token> id_to_token;

std::map<token, id> subword_token_to_id;
std::map<id, token> id_to_subword_token;
};

typedef std::vector<bark_vocab::id> bark_sequence;
typedef std::vector<std::vector<bark_vocab::id>> bark_codes;
typedef std::vector<float> audio_arr_t;

struct gpt_layer {
// normalization
struct ggml_tensor * ln_1_g;
struct ggml_tensor * ln_1_b;

struct ggml_tensor * ln_2_g;
struct ggml_tensor * ln_2_b;

// attention
struct ggml_tensor * c_attn_attn_w;
struct ggml_tensor * c_attn_attn_b;

struct ggml_tensor * c_attn_proj_w;
struct ggml_tensor * c_attn_proj_b;

// mlp
struct ggml_tensor * c_mlp_fc_w;
struct ggml_tensor * c_mlp_fc_b;

struct ggml_tensor * c_mlp_proj_w;
struct ggml_tensor * c_mlp_proj_b;
};

struct gpt_model {
gpt_hparams hparams;

// normalization
struct ggml_tensor * ln_f_g;
struct ggml_tensor * ln_f_b;

struct ggml_tensor * wpe; // token embedding
// struct ggml_tensor * wte; // position embedding
// struct ggml_tensor * lm_head; // language model head

std::vector<struct ggml_tensor *> wtes;
std::vector<struct ggml_tensor *> lm_heads;

std::vector<gpt_layer> layers;

// key + value memory
struct ggml_tensor * memory_k;
struct ggml_tensor * memory_v;
#ifdef __cplusplus
extern "C" {
#endif

//
struct ggml_context * ctx;
std::map<std::string, struct ggml_tensor *> tensors;

//
int64_t t_sample_us = 0;
int64_t t_predict_us = 0;
int64_t t_main_us = 0;

// C interface
//
int64_t memsize = 0;
size_t mem_per_token = 0;
};

struct bark_context;
struct bark_progress;

struct bark_model {
// encoder
gpt_model coarse_model;
gpt_model fine_model;
gpt_model text_model;
struct bark_model;
struct bark_vocab;

// decoder
encodec_model codec_model;
struct bark_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());

// vocab
bark_vocab vocab;
std::string model = "./ggml_weights/"; // weights location

int64_t memsize = 0;
};
int32_t seed = 0;

struct bark_context {
bark_context(bark_model & model) : model(model) {}
~bark_context() {
if (model_owner) {
delete &model;
}
}
std::string prompt; // user prompt

std::mt19937 rng;
std::string dest_wav_path = "./output.wav";
};

bark_model & model;
typedef int32_t bark_token;

bool model_owner = false;
typedef std::vector<bark_token> bark_sequence;
typedef std::vector<std::vector<bark_token>> bark_codes;
typedef std::vector<float> audio_arr_t;

int64_t t_load_us;
int64_t t_start_us;
struct gpt_hparams;
struct gpt_layer;
struct gpt_model;

bark_sequence tokens;
BARK_API struct bark_context * bark_new_context_with_model(struct bark_model * model);

bark_sequence semantic_tokens;
BARK_API void bark_free(bark_context * ctx);

bark_codes coarse_tokens;
BARK_API void bark_free_model(struct bark_model * ctx);

bark_codes fine_tokens;
BARK_API int bark_generate_audio(
struct bark_context * ctx,
const char * text,
std::string & dest_wav_path,
int n_threads);

std::vector<float> audio_arr;
};
BARK_API struct bark_model * bark_load_model_from_file(const std::string & dirname);

void bark_free(bark_context * ctx);
BARK_API int bark_model_quantize(
const std::string & fname_inp,
const std::string & fname_out,
ggml_ftype ftype);

bool gpt_model_load(const std::string& fname, gpt_model& model);
BARK_API int bark_vocab_load(const std::string & fname, bark_vocab& vocab, int32_t expected_size);

bool gpt_eval(
gpt_model & model,
bark_vocab::id * tokens,
int n_tokens,
float * logits,
int * n_past,
bool merge_ctx,
int n_threads);
BARK_API int bark_params_parse(int argc, char ** argv, bark_params & params);

bool fine_gpt_eval(
gpt_model & model,
bark_vocab::id * tokens,
int n_tokens,
float * logits,
int n_threads,
int codebook_ix);
BARK_API void bark_print_usage(char ** argv, const bark_params & params);

bool bark_model_load(const std::string & dirname, bark_model & model);
#ifdef __cplusplus
}
#endif

bool bark_vocab_load(const std::string & fname, bark_vocab& vocab, int32_t expected_size);
#ifdef BARK_API_INTERNAL

void bert_tokenize(
const bark_vocab & vocab,
const char * text,
int32_t * tokens,
int32_t * n_tokens,
int32_t n_max_tokens);

bool bark_generate_audio(
struct bark_context * ctx,
const char * text,
std::string & dest_wav_path,
int n_threads);
//
// Internal API for tests
//

void bark_forward_text_encoder(
int gpt_model_load(const std::string& fname, gpt_model& model);

int gpt_eval(
gpt_model & model,
bark_token * tokens,
int n_tokens,
float * logits,
int * n_past,
bool merge_ctx,
int n_threads);

bool fine_gpt_eval(
gpt_model & model,
bark_token * tokens,
int n_tokens,
float * logits,
int n_threads,
int codebook_ix);

void bert_tokenize(
const bark_vocab & vocab,
const char * text,
int32_t * tokens,
int32_t * n_tokens,
int32_t n_max_tokens);

void bark_forward_text_encoder(
struct bark_context * ctx,
float temp,
float min_eos_p,
int n_threads);

void bark_forward_coarse_encoder(
void bark_forward_coarse_encoder(
struct bark_context * ctx,
int max_coarse_history,
int sliding_window_size,
float temp,
int n_threads);

void bark_forward_fine_encoder(
void bark_forward_fine_encoder(
struct bark_context * ctx,
float temp,
int n_threads);

void bark_forward_encodec(struct bark_context * ctx);

struct bark_progress {
float current = 0.0f;
const char * func = NULL;

bark_progress() {}

void callback(float progress) {
float percentage = progress * 100;
if (percentage == 0.0f && func != NULL) {
fprintf(stderr, "%s: ", func);
}
while (percentage > current) {
current = percentage;
fprintf(stderr, ".");
fflush(stderr);
if (percentage >= 100) {
fprintf(stderr, "\n");
}
}
}
};

bool bark_params_parse(int argc, char ** argv, bark_params & params);
void bark_forward_encodec(struct bark_context * ctx);

void bark_print_usage(char ** argv, const bark_params & params);
void print_tensor(struct ggml_tensor * a);

void print_tensor(struct ggml_tensor * a);
void read_tensor_from_file(std::ifstream & fin, struct ggml_tensor * t);

void read_tensor_from_file(std::ifstream & fin, struct ggml_tensor * t);
bool allequal(struct ggml_tensor * a, struct ggml_tensor * b, std::string test_name);

bool allequal(struct ggml_tensor * a, struct ggml_tensor * b, std::string test_name);
bool allclose(struct ggml_tensor * a, struct ggml_tensor * b, float tol, std::string test_name);

bool allclose(struct ggml_tensor * a, struct ggml_tensor * b, float tol, std::string test_name);
#endif // BARK_API_INTERNAL

struct bark_context * bark_new_context_with_model(struct bark_model * model);
#endif // BARK_H

0 comments on commit a714d0e

Please sign in to comment.