From 8e0672f0a078401b64d85ee7984746afc2a5ea3b Mon Sep 17 00:00:00 2001 From: PAB Date: Sun, 10 Sep 2023 21:27:34 +0200 Subject: [PATCH] MNT Expose bark_vocab in internal API for CIs to pass (#110) --- bark.h | 12 ++++-------- tests/test-tokenizer.cpp | 17 +++++++++++++---- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/bark.h b/bark.h index 33001fa..5114fe0 100644 --- a/bark.h +++ b/bark.h @@ -115,6 +115,10 @@ extern "C" { // Internal API for tests // + typedef std::vector bark_sequence; + typedef std::vector> bark_codes; + typedef std::vector audio_arr_t; + int gpt_model_load(const std::string& fname, gpt_model& model); int gpt_eval( @@ -155,14 +159,6 @@ extern "C" { void bark_forward_encodec(struct bark_context * ctx); - void print_tensor(struct ggml_tensor * a); - - void read_tensor_from_file(std::ifstream & fin, struct ggml_tensor * t); - - bool allequal(struct ggml_tensor * a, struct ggml_tensor * b, std::string test_name); - - bool allclose(struct ggml_tensor * a, struct ggml_tensor * b, float tol, std::string test_name); - #endif // BARK_API_INTERNAL #endif // BARK_H \ No newline at end of file diff --git a/tests/test-tokenizer.cpp b/tests/test-tokenizer.cpp index 2ae2074..7fb8973 100644 --- a/tests/test-tokenizer.cpp +++ b/tests/test-tokenizer.cpp @@ -1,10 +1,19 @@ -#include "bark.h" - #include #include #include #include +#define BARK_API_INTERNAL +#include "bark.h" + +struct bark_vocab { + using id = int32_t; + using token = std::string; + + std::map token_to_id; + std::map id_to_token; +}; + static const std::map & k_tests() { static std::map _k_tests = { @@ -30,7 +39,7 @@ int main(int argc, char **argv) { bark_vocab vocab; int max_ctx_size = 256; - if (bark_vocab_load(fname, vocab, 119547) > 0) { + if (bark_vocab_load(fname.c_str(), &vocab, 119547) > 0) { fprintf(stderr, "%s: invalid vocab file '%s'\n", __func__, fname.c_str()); return 1; } @@ -38,7 +47,7 @@ int main(int argc, char **argv) { for (const auto & test_kv : k_tests()) { bark_sequence res(test_kv.first.size()); int n_tokens; - bert_tokenize(vocab, test_kv.first.c_str(), res.data(), &n_tokens, max_ctx_size); + bert_tokenize(&vocab, test_kv.first.c_str(), res.data(), &n_tokens, max_ctx_size); res.resize(n_tokens); bool correct = res.size() == test_kv.second.size();