-
Notifications
You must be signed in to change notification settings - Fork 51
/
encodec.h
118 lines (90 loc) · 2.66 KB
/
encodec.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* This is a shortened version of the original Encodec.CPP here: https://github.com/PABannier/encodec.cpp.
Since bark only uses the decoder, only the decoding forward pass is present in this file.
*/
#pragma once
#include "ggml.h"
#include <cmath>
#include <fstream>
#include <iostream>
#include <map>
#include <thread>
#include <string>
#include <vector>
struct encodec_hparams {
int32_t in_channels = 1;
int32_t hidden_dim = 128;
int32_t n_filters = 32;
int32_t ratios[4] = {8, 5, 4, 2};
int32_t kernel_size = 7;
int32_t residual_kernel_size = 3;
int32_t compress = 2;
int32_t n_lstm_layers = 2;
int32_t stride = 1;
// 24kbps (n_q=32)
int32_t n_q = 32;
int32_t n_bins = 1024;
int32_t sr = 24000;
};
// res + downsample block at some ratio
struct encodec_encoder_block {
// conv1
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;
// conv2
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;
// shortcut
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;
// downsampling layers
struct ggml_tensor * ds_conv_w;
struct ggml_tensor * ds_conv_b;
};
struct encodec_lstm {
struct ggml_tensor * l0_ih_w;
struct ggml_tensor * l0_hh_w;
struct ggml_tensor * l0_ih_b;
struct ggml_tensor * l0_hh_b;
struct ggml_tensor * l1_ih_w;
struct ggml_tensor * l1_hh_w;
struct ggml_tensor * l1_ih_b;
struct ggml_tensor * l1_hh_b;
};
struct encodec_quant_block {
struct ggml_tensor * embed;
};
struct encodec_quantizer {
std::vector<encodec_quant_block> blocks;
};
struct encodec_decoder_block {
//upsampling layers
struct ggml_tensor * us_conv_w;
struct ggml_tensor * us_conv_b;
// conv1
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;
// conv2
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;
// shortcut
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;
};
struct encodec_decoder {
struct ggml_tensor * init_conv_w;
struct ggml_tensor * init_conv_b;
encodec_lstm lstm;
struct ggml_tensor * final_conv_w;
struct ggml_tensor * final_conv_b;
std::vector<encodec_decoder_block> blocks;
};
struct encodec_model {
encodec_hparams hparams;
encodec_quantizer quantizer;
encodec_decoder decoder;
// context
struct ggml_context * ctx;
int n_loaded;
std::map<std::string, struct ggml_tensor *> tensors;
};
bool encodec_model_load(const std::string& fname, encodec_model& model);