Skip to content

Commit

Permalink
demo: add main example (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Oct 1, 2023
1 parent 2a23eed commit 8f5c964
Show file tree
Hide file tree
Showing 8 changed files with 6,771 additions and 119 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ main.dSYM
encodec
*.o
*.th
main
.vscode/

build/
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ else()
set(ENCODEC_STANDALONE OFF)
endif()

option(ENCODEC_BUILD_EXAMPLES "encodec: build examples" ${ENCODEC_STANDALONE})

# Build libraries

set(ENCODEC_LIB encodec.cpp)
Expand All @@ -29,6 +31,10 @@ add_library(
encodec.h
)

if (ENCODEC_BUILD_EXAMPLES)
add_subdirectory(examples)
endif()

target_link_libraries(${ENCODEC_LIB} PUBLIC ggml)
target_include_directories(${ENCODEC_LIB} PUBLIC .)
target_compile_features(${ENCODEC_LIB} PUBLIC cxx_std_11)
155 changes: 39 additions & 116 deletions encodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,106 +14,6 @@

static const size_t TENSOR_ALIGNMENT = 32;

// res + downsample block at some ratio
struct encodec_encoder_block {
// conv1
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;

// conv2
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;

// shortcut
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;

// downsampling layers
struct ggml_tensor * ds_conv_w;
struct ggml_tensor * ds_conv_b;
};

struct encodec_lstm {
struct ggml_tensor * l0_ih_w;
struct ggml_tensor * l0_hh_w;

struct ggml_tensor * l0_ih_b;
struct ggml_tensor * l0_hh_b;

struct ggml_tensor * l1_ih_w;
struct ggml_tensor * l1_hh_w;

struct ggml_tensor * l1_ih_b;
struct ggml_tensor * l1_hh_b;
};

struct encodec_encoder {
struct ggml_tensor * init_conv_w;
struct ggml_tensor * init_conv_b;

encodec_lstm lstm;

struct ggml_tensor * final_conv_w;
struct ggml_tensor * final_conv_b;

std::vector<encodec_encoder_block> blocks;
};

struct encodec_quant_block {
struct ggml_tensor * inited;
struct ggml_tensor * cluster_size;
struct ggml_tensor * embed;
struct ggml_tensor * embed_avg;
};

struct encodec_quantizer {
std::vector<encodec_quant_block> blocks;
};

struct encodec_decoder_block {
//upsampling layers
struct ggml_tensor * us_conv_w;
struct ggml_tensor * us_conv_b;

// conv1
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;

// conv2
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;

// shortcut
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;
};

struct encodec_decoder {
struct ggml_tensor * init_conv_w;
struct ggml_tensor * init_conv_b;

encodec_lstm lstm;

struct ggml_tensor * final_conv_w;
struct ggml_tensor * final_conv_b;

std::vector<encodec_decoder_block> blocks;
};

struct encodec_model {
encodec_hparams hparams;

encodec_encoder encoder;
encodec_quantizer quantizer;
encodec_decoder decoder;

// context
struct ggml_context * ctx;
int n_loaded;

std::map<std::string, struct ggml_tensor *> tensors;
};

template<typename T>
static void read_safe(std::ifstream& infile, T& dest) {
infile.read((char*)& dest, sizeof(T));
Expand All @@ -137,7 +37,12 @@ static void ggml_disconnect_node_from_graph(ggml_tensor * t) {
}
}

static void encodec_sigmoid_impl(struct ggml_tensor * dst, const struct ggml_tensor * src, int ith, int nth, void * userdata) {
static void encodec_sigmoid_impl(
struct ggml_tensor * dst,
const struct ggml_tensor * src,
int ith,
int nth,
void * userdata) {
GGML_ASSERT(userdata == NULL);
GGML_ASSERT(ggml_are_same_shape(dst, src));
GGML_ASSERT(ggml_is_contiguous(dst));
Expand Down Expand Up @@ -208,11 +113,11 @@ static struct ggml_tensor * unpad_1d(ggml_context * ctx0, ggml_tensor * inp, int
}

static struct ggml_tensor * strided_conv_1d(
ggml_context * ctx0,
ggml_tensor * inp,
ggml_tensor * conv_w,
ggml_tensor * conv_b,
int stride) {
ggml_context * ctx0,
ggml_tensor * inp,
ggml_tensor * conv_w,
ggml_tensor * conv_b,
int stride) {
int kernel_size = conv_w->ne[0];
int padding_total = kernel_size - stride;
int extra_padding = get_extra_padding_for_conv_1d(inp, kernel_size, stride, padding_total);
Expand All @@ -230,11 +135,11 @@ static struct ggml_tensor * strided_conv_1d(

static struct ggml_tensor * forward_pass_lstm_unilayer(
struct ggml_context * ctx0,
struct ggml_tensor * inp,
struct ggml_tensor * weight_ih,
struct ggml_tensor * weight_hh,
struct ggml_tensor * bias_ih,
struct ggml_tensor * bias_hh) {
struct ggml_tensor * inp,
struct ggml_tensor * weight_ih,
struct ggml_tensor * weight_hh,
struct ggml_tensor * bias_ih,
struct ggml_tensor * bias_hh) {

const int input_dim = inp->ne[1];
const int hidden_dim = weight_ih->ne[1]/4;
Expand Down Expand Up @@ -624,7 +529,7 @@ bool encodec_model_load(const std::string& fname, encodec_model& model) {

infile.read(reinterpret_cast<char *>(tensor->data), ggml_nbytes(tensor));

printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);
// printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0);

total_size += ggml_nbytes(tensor);
model.n_loaded++;
Expand Down Expand Up @@ -889,14 +794,12 @@ static struct ggml_cgraph * encodec_build_graph(
return gf;
}

static bool encodec_model_eval(
std::vector<float> & raw_audio,
bool encodec_model_eval(
encodec_context & ectx,
std::vector<float> & raw_audio,
int n_threads) {
const int64_t t_start_ms = ggml_time_ms();

fprintf(stderr, "%s: raw audio (t=%zu)\n", __func__, raw_audio.size());

static const size_t buf_size = 256u*1024*1024;

if (ectx.ctx_audio) {
Expand Down Expand Up @@ -951,3 +854,23 @@ static bool encodec_model_eval(

return true;
}

struct encodec_context encodec_new_context_with_model(encodec_model & model) {
encodec_context ctx = encodec_context(model);
return ctx;
}

struct encodec_model encodec_load_model_from_file(std::string fname) {
encodec_model model;
if (!encodec_model_load(fname, model)) {
fprintf(stderr, "%s: failed to load model\n", __func__);
exit(0);
}
return model;
}

void encodec_free(encodec_context & ectx) {
if (ectx.ctx_audio) {
ggml_free(ectx.ctx_audio);
}
}
114 changes: 112 additions & 2 deletions encodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#define ENCODEC_FILE_MAGIC 'ggml'
#define ENCODEC_FILE_VERSION 1

static const size_t MB = 4*1024*1024;
static const size_t MB = 1024*1024;

struct encodec_hparams {
int32_t in_channels = 1;
Expand All @@ -33,7 +33,105 @@ struct encodec_hparams {
int32_t sr = 24000;
};

struct encodec_model;
// res + downsample block at some ratio
struct encodec_encoder_block {
// conv1
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;

// conv2
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;

// shortcut
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;

// downsampling layers
struct ggml_tensor * ds_conv_w;
struct ggml_tensor * ds_conv_b;
};

struct encodec_lstm {
struct ggml_tensor * l0_ih_w;
struct ggml_tensor * l0_hh_w;

struct ggml_tensor * l0_ih_b;
struct ggml_tensor * l0_hh_b;

struct ggml_tensor * l1_ih_w;
struct ggml_tensor * l1_hh_w;

struct ggml_tensor * l1_ih_b;
struct ggml_tensor * l1_hh_b;
};

struct encodec_encoder {
struct ggml_tensor * init_conv_w;
struct ggml_tensor * init_conv_b;

encodec_lstm lstm;

struct ggml_tensor * final_conv_w;
struct ggml_tensor * final_conv_b;

std::vector<encodec_encoder_block> blocks;
};

struct encodec_quant_block {
struct ggml_tensor * inited;
struct ggml_tensor * cluster_size;
struct ggml_tensor * embed;
struct ggml_tensor * embed_avg;
};

struct encodec_quantizer {
std::vector<encodec_quant_block> blocks;
};

struct encodec_decoder_block {
//upsampling layers
struct ggml_tensor * us_conv_w;
struct ggml_tensor * us_conv_b;

// conv1
struct ggml_tensor * conv_1_w;
struct ggml_tensor * conv_1_b;

// conv2
struct ggml_tensor * conv_2_w;
struct ggml_tensor * conv_2_b;

// shortcut
struct ggml_tensor * conv_sc_w;
struct ggml_tensor * conv_sc_b;
};

struct encodec_decoder {
struct ggml_tensor * init_conv_w;
struct ggml_tensor * init_conv_b;

encodec_lstm lstm;

struct ggml_tensor * final_conv_w;
struct ggml_tensor * final_conv_b;

std::vector<encodec_decoder_block> blocks;
};

struct encodec_model {
encodec_hparams hparams;

encodec_encoder encoder;
encodec_quantizer quantizer;
encodec_decoder decoder;

// context
struct ggml_context * ctx;
int n_loaded;

std::map<std::string, struct ggml_tensor *> tensors;
};

struct encodec_context {
encodec_context(encodec_model & model) : model(model) {}
Expand Down Expand Up @@ -62,3 +160,15 @@ struct encodec_context {
// statistics
int64_t t_compute_ms = 0;
};


struct encodec_model encodec_load_model_from_file(std::string fname);

struct encodec_context encodec_new_context_with_model(encodec_model & model);

bool encodec_model_eval(
encodec_context & ectx,
std::vector<float> & raw_audio,
int n_threads);

void encodec_free(encodec_context & ectx);
3 changes: 3 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

add_subdirectory(main)
11 changes: 11 additions & 0 deletions examples/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
set(TARGET main)

add_executable(${TARGET} main.cpp dr_wav.h)

install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE encodec.cpp ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)

if(MSVC)
target_compile_definitions(${TARGET} PRIVATE -D_CRT_SECURE_NO_WARNINGS=1)
endif()
Loading

0 comments on commit 8f5c964

Please sign in to comment.