-
Notifications
You must be signed in to change notification settings - Fork 56
/
bark.h
110 lines (81 loc) · 2.27 KB
/
bark.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include "encodec.h"
#include <map>
#include <vector>
#define CLS_TOKEN_ID 101
#define SEP_TOKEN_ID 102
#define TEXT_ENCODING_OFFSET 10048
#define TEXT_PAD_TOKEN 129595
#define CODEBOOK_SIZE 1024
#define N_COARSE_CODEBOOKS 2
#define SEMANTIC_PAD_TOKEN 10000
#define SEMANTIC_INFER_TOKEN 129599
#define SEMANTIC_VOCAB_SIZE 10000
#define SEMANTIC_RATE_HZ 49.9
#define COARSE_RATE_HZ 75
#define COARSE_SEMANTIC_PAD_TOKEN 12048
#define COARSE_INFER_TOKEN 12050
struct gpt_hparams {
int32_t n_in_vocab;
int32_t n_out_vocab;
int32_t n_layer;
int32_t n_head;
int32_t n_embd;
int32_t block_size;
int32_t n_lm_heads;
int32_t n_wtes;
};
struct bark_vocab {
using id = int32_t;
using token = std::string;
std::map<token, id> token_to_id;
std::map<id, token> id_to_token;
std::map<token, id> subword_token_to_id;
std::map<id, token> id_to_subword_token;
};
struct gpt_layer {
// normalization
struct ggml_tensor * ln_1_g;
struct ggml_tensor * ln_1_b;
struct ggml_tensor * ln_2_g;
struct ggml_tensor * ln_2_b;
// attention
struct ggml_tensor * c_attn_attn_w;
struct ggml_tensor * c_attn_attn_b;
struct ggml_tensor * c_attn_proj_w;
struct ggml_tensor * c_attn_proj_b;
// mlp
struct ggml_tensor * c_mlp_fc_w;
struct ggml_tensor * c_mlp_fc_b;
struct ggml_tensor * c_mlp_proj_w;
struct ggml_tensor * c_mlp_proj_b;
};
struct gpt_model {
gpt_hparams hparams;
// normalization
struct ggml_tensor * ln_f_g;
struct ggml_tensor * ln_f_b;
struct ggml_tensor * wpe; // token embedding
// struct ggml_tensor * wte; // position embedding
// struct ggml_tensor * lm_head; // language model head
std::vector<struct ggml_tensor *> wtes;
std::vector<struct ggml_tensor *> lm_heads;
std::vector<gpt_layer> layers;
// key + value memory
struct ggml_tensor * memory_k;
struct ggml_tensor * memory_v;
//
struct ggml_context * ctx;
std::map<std::string, struct ggml_tensor *> tensors;
int32_t memsize = 0;
};
struct bark_model {
// encoder
gpt_model coarse_model;
gpt_model fine_model;
gpt_model text_model;
// decoder
encodec_model codec_model;
// vocab
bark_vocab vocab;
int32_t memsize = 0;
};