Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mnt: make header C compliant #39

Merged
merged 1 commit into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Moved encodec_params into source file
  • Loading branch information
PABannier committed May 10, 2024
commit 429830f2f4fdfce44916b68990dd72280ec6af9f
36 changes: 36 additions & 0 deletions encodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,42 @@

static const size_t MB = 1024 * 1024;

struct encodec_hparams {
// The number of input channels is always 1 (mono).
int32_t in_channels = 1;
// The hidden dimension for the codebook.
int32_t hidden_dim = 128;
// The number of filters for the first convolution.
int32_t n_filters = 32;
// The filter size for upsampling and downsampling.
int32_t ratios[4] = {8, 5, 4, 2};
// The kernel size for the first convolution.
int32_t kernel_size = 7;
// The kernel size for the residual blocks.
int32_t residual_kernel_size = 3;
// Compression
int32_t compress = 2;
// The number of layers in the LSTM modules.
int32_t n_lstm_layers = 2;
// The stride of the first convolution.
int32_t stride = 1;

// The dimension of the codebook.
int32_t n_bins = 1024;
// The sample rate of the model.
int32_t sr = 24000;
// The bandwidth.
int32_t bandwidth = 24;

// The number of codebooks.
int32_t n_q = 32;
// The product of the ratios.
int32_t hop_length = 1;

// File type of model weights.
int32_t ftype;
};

// res + downsample block at some ratio
struct encodec_encoder_block {
// conv1
Expand Down
44 changes: 4 additions & 40 deletions encodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,42 +33,6 @@ extern "C" {
#endif
struct encodec_context;

struct encodec_hparams {
// The number of input channels is always 1 (mono).
int32_t in_channels = 1;
// The hidden dimension for the codebook.
int32_t hidden_dim = 128;
// The number of filters for the first convolution.
int32_t n_filters = 32;
// The filter size for upsampling and downsampling.
int32_t ratios[4] = {8, 5, 4, 2};
// The kernel size for the first convolution.
int32_t kernel_size = 7;
// The kernel size for the residual blocks.
int32_t residual_kernel_size = 3;
// Compression
int32_t compress = 2;
// The number of layers in the LSTM modules.
int32_t n_lstm_layers = 2;
// The stride of the first convolution.
int32_t stride = 1;

// The dimension of the codebook.
int32_t n_bins = 1024;
// The sample rate of the model.
int32_t sr = 24000;
// The bandwidth.
int32_t bandwidth = 24;

// The number of codebooks.
int32_t n_q = 32;
// The product of the ratios.
int32_t hop_length = 1;

// File type of model weights.
int32_t ftype;
};

struct encodec_statistics {
// The time taken to load the model.
int64_t t_load_us;
Expand Down Expand Up @@ -162,10 +126,10 @@ extern "C" {
*/
float * encodec_get_audio(
struct encodec_context *ectx);

/**
* Gets the size of the audio data from the given encodec context.
*
*
* @param ectx The encodec context to get the audio size from.
* @return The size of the audio data.
*/
Expand All @@ -180,10 +144,10 @@ extern "C" {
*/
int32_t * encodec_get_codes(
struct encodec_context *ectx);

/**
* Gets the size of the code data from the given encodec context.
*
*
* @param ectx The encodec context to get the code size from.
* @return The size of the code data.
*/
Expand Down