working quantizer

PABannier · Jun 18, 2023 · 7b62809 · 7b62809
1 parent f7aee77
commit 7b62809
Show file tree

Hide file tree

Showing 4 changed files with 451 additions and 118 deletions.
diff --git a/encodec.cpp b/encodec.cpp
@@ -426,42 +426,91 @@ static void encodec_model_eval(
  {
  inpL = ggml_elu(ctx0, inpL);
 
- struct ggml_tensor * out = strided_conv_1d(
+ encoded_inp = strided_conv_1d(
  ctx0, inpL, model.encoder.final_conv_w, model.encoder.final_conv_b, stride);
-
- encoded_inp = out;
  }
  }
 
  // quantizer
+ struct ggml_tensor * codes;
+ struct ggml_tensor * out;
+
  {
+ const auto & hparams = model.hparams;
+ // originally, n_q = n_q or len(self.layers)
+ // for this model, n_q is at most 32, but the implementation we are comparing
+ // our model against has only 16, hence we hardcode 16 as n_q for now.
+ // const int n_q = hparams.n_q;
+ const int n_q = 16;
+
+ const int seq_length = encoded_inp->ne[0];
+ codes = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, seq_length, n_q);
+
+ struct ggml_tensor * inpL = ggml_cont(ctx0, ggml_transpose(ctx0, encoded_inp));
+ struct ggml_tensor * residual = inpL;
+ struct ggml_tensor * indices;
+
+ for (int i = 0; i < n_q; i++) {
+ encodec_quant_block block = model.quantizer.blocks[i];
+
+ // compute distance
+ // [seq_length, n_bins]
+ struct ggml_tensor * dp = ggml_scale(
+ ctx0, ggml_mul_mat(ctx0, block.embed, residual), ggml_new_f32(ctx0, -2.0f));
+
+ // [n_bins]
+ struct ggml_tensor * sqr_embed = ggml_sqr(ctx0, block.embed);
+ struct ggml_tensor * sqr_embed_nrm = ggml_sum_rows(ctx0, sqr_embed);
+
+ // [seq_length]
+ struct ggml_tensor * sqr_inp = ggml_sqr(ctx0, residual);
+ struct ggml_tensor * sqr_inp_nrm = ggml_sum_rows(ctx0, sqr_inp);
+
+ // [seq_length, n_bins]
+ struct ggml_tensor * dist = ggml_add(ctx0, ggml_repeat(ctx0, sqr_inp_nrm, dp), dp);
+ dist = ggml_add(ctx0, ggml_repeat(ctx0, ggml_transpose(ctx0, sqr_embed_nrm), dist), dist);
+ dist = ggml_scale(ctx0, dist, ggml_new_f32(ctx0, -1.0f));
 
+ // take the argmax over the column dimension
+ // [seq_length]
+ indices = ggml_argmax(ctx0, dist);
+
+ // look up in embedding table
+ struct ggml_tensor * quantized = ggml_get_rows(ctx0, block.embed, indices);
+
+ residual = ggml_sub(ctx0, residual, quantized);
+
+ codes = ggml_set_1d(ctx0, codes, indices, i*codes->nb[1]);
+ }
+
+ out = codes;
  }
 
- ggml_build_forward_expand(&gf, encoded_inp);
+ ggml_build_forward_expand(&gf, out);
  ggml_graph_compute (ctx0, &gf);
 
  printf("\n");
- printf("seq_length = %d\n", encoded_inp->ne[0]);
- printf("n_channels = %d\n", encoded_inp->ne[1]);
- printf("out_channels = %d\n", encoded_inp->ne[2]);
+ printf("seq_length = %d\n", out->ne[0]);
+ printf("n_channels = %d\n", out->ne[1]);
+ printf("out_channels = %d\n", out->ne[2]);
  printf("\n");
 
- for(int i = 0; i < encoded_inp->ne[1]; i++) {
- for (int j = 0; j < encoded_inp->ne[0]; j++) {
- float val = *(float *) ((char *) encoded_inp->data + j*encoded_inp->nb[0] + i*encoded_inp->nb[1]);
- printf("%.4f ", val);
+ // for(int i = 0; i < out->ne[1]; i++) {
+ // for (int j = 0; j < out->ne[0]; j++) {
+ // float val = *(float *) ((char *) out->data + j*out->nb[0] + i*out->nb[1]);
+ // printf("%.4f ", val);
+ // }
+ // printf("\n");
+ // }
+
+ for(int i = 0; i < out->ne[1]; i++) {
+ for (int j = 0; j < out->ne[0]; j++) {
+ int32_t val = *(int32_t *) ((char *) out->data + j*out->nb[0] + i*out->nb[1]);
+ printf("%d ", val);
  }
  printf("\n");
  }
 
- // std::vector<float> out(10, 0);
- // memcpy(out.data(), (float *) ggml_get_data(encoded_inp), out.size()*sizeof(float));
-
- // for (const auto& v : out) {
- // printf("%.2f\n", v);
- // }
-
  ggml_free(ctx0);
 }