-
Notifications
You must be signed in to change notification settings - Fork 52
/
bark.h
164 lines (122 loc) · 4.17 KB
/
bark.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#ifndef BARK_H
#define BARK_H
#include "encodec.h"
#include <map>
#include <random>
#include <thread>
#include <vector>
#ifdef BARK_SHARED
# if defined(_WIN32) && !defined(__MINGW32__)
# ifdef BARK_BUILD
# define BARK_API __declspec(dllexport)
# else
# define BARK_API __declspec(dllimport)
# endif
# else
# define BARK_API __attribute__ ((visibility ("default")))
# endif
#else
# define BARK_API
#endif
#define SAMPLE_RATE 24000
#define CLS_TOKEN_ID 101
#define SEP_TOKEN_ID 102
#define TEXT_ENCODING_OFFSET 10048
#define TEXT_PAD_TOKEN 129595
#define CODEBOOK_SIZE 1024
#define N_COARSE_CODEBOOKS 2
#define N_FINE_CODEBOOKS 8
#define SEMANTIC_PAD_TOKEN 10000
#define SEMANTIC_INFER_TOKEN 129599
#define SEMANTIC_VOCAB_SIZE 10000
#define SEMANTIC_RATE_HZ 49.9
#define COARSE_RATE_HZ 75
#define COARSE_SEMANTIC_PAD_TOKEN 12048
#define COARSE_INFER_TOKEN 12050
#ifdef __cplusplus
extern "C" {
#endif
//
// C interface
//
typedef int32_t bark_token;
struct bark_context;
struct bark_progress;
struct bark_context_params {
uint32_t seed; // RNG seed
float temp; // Temperature for sampling (text and coarse encoders)
float fine_temp; // Temperature for sampling (fine encoder)
float min_eos_p; // Minimum probability for EOS token (text encoder)
int sliding_window_size; // Sliding window size for coarse encoder
int max_coarse_history; // Max history for coarse encoder
};
struct bark_model;
struct bark_vocab;
struct gpt_hparams;
struct gpt_layer;
struct gpt_model;
BARK_API struct bark_context_params bark_context_default_params(void);
BARK_API struct bark_context * bark_new_context_with_model(
struct bark_model * model,
struct bark_context_params params);
BARK_API void bark_seed_rng(struct bark_context * ctx, int32_t seed);
BARK_API void bark_free(struct bark_context * ctx);
BARK_API void bark_free_model(struct bark_model * ctx);
BARK_API int bark_generate_audio(
struct bark_context * ctx,
const char * text,
const char * dest_wav_path,
int n_threads);
BARK_API struct bark_model * bark_load_model_from_file(const char * dirname);
BARK_API int bark_model_quantize(
const char * fname_inp,
const char * fname_out,
ggml_ftype ftype);
BARK_API int bark_vocab_load(
const char * fname,
bark_vocab * vocab,
int32_t expected_size);
#ifdef __cplusplus
}
#endif
#ifdef BARK_API_INTERNAL
//
// Internal API for tests
//
typedef std::vector<bark_token> bark_sequence;
typedef std::vector<std::vector<bark_token>> bark_codes;
typedef std::vector<float> audio_arr_t;
int gpt_model_load(const std::string& fname, gpt_model& model);
int gpt_eval(
gpt_model * model,
bark_token * tokens,
int n_tokens,
float * logits,
int * n_past,
bool merge_ctx,
int n_threads);
bool fine_gpt_eval(
gpt_model * model,
bark_token * tokens,
int n_tokens,
float * logits,
int n_threads,
int codebook_ix);
void bert_tokenize(
const bark_vocab * vocab,
const char * text,
int32_t * tokens,
int32_t * n_tokens,
int32_t n_max_tokens);
void bark_forward_text_encoder(
struct bark_context * ctx,
int n_threads);
void bark_forward_coarse_encoder(
struct bark_context * ctx,
int n_threads);
void bark_forward_fine_encoder(
struct bark_context * ctx,
int n_threads);
void bark_forward_encodec(struct bark_context * ctx);
#endif // BARK_API_INTERNAL
#endif // BARK_H