Skip to content

Commit

Permalink
MNT Expose bark_vocab in internal API for CIs to pass (#110)
Browse files Browse the repository at this point in the history
  • Loading branch information
PABannier committed Sep 10, 2023
1 parent aa9c968 commit 8e0672f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
12 changes: 4 additions & 8 deletions bark.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,10 @@ extern "C" {
// Internal API for tests
//

typedef std::vector<bark_token> bark_sequence;
typedef std::vector<std::vector<bark_token>> bark_codes;
typedef std::vector<float> audio_arr_t;

int gpt_model_load(const std::string& fname, gpt_model& model);

int gpt_eval(
Expand Down Expand Up @@ -155,14 +159,6 @@ extern "C" {

void bark_forward_encodec(struct bark_context * ctx);

void print_tensor(struct ggml_tensor * a);

void read_tensor_from_file(std::ifstream & fin, struct ggml_tensor * t);

bool allequal(struct ggml_tensor * a, struct ggml_tensor * b, std::string test_name);

bool allclose(struct ggml_tensor * a, struct ggml_tensor * b, float tol, std::string test_name);

#endif // BARK_API_INTERNAL

#endif // BARK_H
17 changes: 13 additions & 4 deletions tests/test-tokenizer.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
#include "bark.h"

#include <cstdio>
#include <string>
#include <map>
#include <vector>

#define BARK_API_INTERNAL
#include "bark.h"

struct bark_vocab {
using id = int32_t;
using token = std::string;

std::map<token, id> token_to_id;
std::map<id, token> id_to_token;
};

static const std::map<std::string, bark_sequence> & k_tests()
{
static std::map<std::string, bark_sequence> _k_tests = {
Expand All @@ -30,15 +39,15 @@ int main(int argc, char **argv) {
bark_vocab vocab;
int max_ctx_size = 256;

if (bark_vocab_load(fname, vocab, 119547) > 0) {
if (bark_vocab_load(fname.c_str(), &vocab, 119547) > 0) {
fprintf(stderr, "%s: invalid vocab file '%s'\n", __func__, fname.c_str());
return 1;
}

for (const auto & test_kv : k_tests()) {
bark_sequence res(test_kv.first.size());
int n_tokens;
bert_tokenize(vocab, test_kv.first.c_str(), res.data(), &n_tokens, max_ctx_size);
bert_tokenize(&vocab, test_kv.first.c_str(), res.data(), &n_tokens, max_ctx_size);
res.resize(n_tokens);

bool correct = res.size() == test_kv.second.size();
Expand Down

0 comments on commit 8e0672f

Please sign in to comment.