Skip to content

Commit

Permalink
working quantizer
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Jun 18, 2023
1 parent f7aee77 commit 7b62809
Show file tree
Hide file tree
Showing 4 changed files with 451 additions and 118 deletions.
85 changes: 67 additions & 18 deletions encodec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,42 +426,91 @@ static void encodec_model_eval(
{
inpL = ggml_elu(ctx0, inpL);

struct ggml_tensor * out = strided_conv_1d(
encoded_inp = strided_conv_1d(
ctx0, inpL, model.encoder.final_conv_w, model.encoder.final_conv_b, stride);

encoded_inp = out;
}
}

// quantizer
struct ggml_tensor * codes;
struct ggml_tensor * out;

{
const auto & hparams = model.hparams;
// originally, n_q = n_q or len(self.layers)
// for this model, n_q is at most 32, but the implementation we are comparing
// our model against has only 16, hence we hardcode 16 as n_q for now.
// const int n_q = hparams.n_q;
const int n_q = 16;

const int seq_length = encoded_inp->ne[0];
codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);

struct ggml_tensor * inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
struct ggml_tensor * residual = inpL;
struct ggml_tensor * indices;

for (int i = 0; i < n_q; i++) {
encodec_quant_block block = model.quantizer.blocks[i];

// compute distance
// [seq_length, n_bins]
struct ggml_tensor * dp = ggml_scale(
ctx0, ggml_mul_mat(ctx0, block.embed, residual), ggml_new_f32(ctx0, -2.0f));

// [n_bins]
struct ggml_tensor * sqr_embed = ggml_sqr(ctx0, block.embed);
struct ggml_tensor * sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);

// [seq_length]
struct ggml_tensor * sqr_inp = ggml_sqr(ctx0, residual);
struct ggml_tensor * sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);

// [seq_length, n_bins]
struct ggml_tensor * dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
dist = ggml_scale(ctx0, dist, ggml_new_f32(ctx0, -1.0f));

// take the argmax over the column dimension
// [seq_length]
indices = ggml_argmax(ctx0, dist);

// look up in embedding table
struct ggml_tensor * quantized = ggml_get_rows(ctx0, block.embed, indices);

residual = ggml_sub(ctx0, residual, quantized);

codes = ggml_set_1d(ctx0, codes, indices, i*codes->nb[1]);
}

out = codes;
}

ggml_build_forward_expand(&gf, encoded_inp);
ggml_build_forward_expand(&gf, out);
ggml_graph_compute (ctx0, &gf);

printf("\n");
printf("seq_length = %d\n", encoded_inp->ne[0]);
printf("n_channels = %d\n", encoded_inp->ne[1]);
printf("out_channels = %d\n", encoded_inp->ne[2]);
printf("seq_length = %d\n", out->ne[0]);
printf("n_channels = %d\n", out->ne[1]);
printf("out_channels = %d\n", out->ne[2]);
printf("\n");

for(int i = 0; i < encoded_inp->ne[1]; i++) {
for (int j = 0; j < encoded_inp->ne[0]; j++) {
float val = *(float *) ((char *) encoded_inp->data + j*encoded_inp->nb[0] + i*encoded_inp->nb[1]);
printf("%.4f ", val);
// for(int i = 0; i < out->ne[1]; i++) {
// for (int j = 0; j < out->ne[0]; j++) {
// float val = *(float *) ((char *) out->data + j*out->nb[0] + i*out->nb[1]);
// printf("%.4f ", val);
// }
// printf("\n");
// }

for(int i = 0; i < out->ne[1]; i++) {
for (int j = 0; j < out->ne[0]; j++) {
int32_t val = *(int32_t *) ((char *) out->data + j*out->nb[0] + i*out->nb[1]);
printf("%d ", val);
}
printf("\n");
}

// std::vector<float> out(10, 0);
// memcpy(out.data(), (float *) ggml_get_data(encoded_inp), out.size()*sizeof(float));

// for (const auto& v : out) {
// printf("%.2f\n", v);
// }

ggml_free(ctx0);
}

Expand Down
Loading

0 comments on commit 7b62809

Please sign in to comment.