Skip to content

Commit

Permalink
revert the wstring tokenization. coherency was affected
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Jun 24, 2023
1 parent 6da38b0 commit 8342fe8
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
2 changes: 1 addition & 1 deletion koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def utfprint(str):
maxhordelen = 256
modelbusy = False
defaultport = 5001
KcppVersion = "1.32.2"
KcppVersion = "1.32.3"
showdebug = True

class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
Expand Down
16 changes: 8 additions & 8 deletions otherarch/mpt_v3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,14 @@ bool mpt_model_load(const std::string & fname, mpt_model & model, gpt_vocab & vo
word.assign(buf.data(), len);

// Convert token from utf-8
std::wstring word_multibytes = convert_to_wstring(word);
if(word_multibytes!=L"")
{
word.resize(word_multibytes.size());
for (int w = 0; w < word_multibytes.size(); w++) {
word[w] = uint8_t(word_multibytes[w]);
}
}
// std::wstring word_multibytes = convert_to_wstring(word);
// if(word_multibytes!=L"")
// {
// word.resize(word_multibytes.size());
// for (int w = 0; w < word_multibytes.size(); w++) {
// word[w] = uint8_t(word_multibytes[w]);
// }
// }

vocab.token_to_id[word] = i;
vocab.id_to_token[i] = word;
Expand Down

0 comments on commit 8342fe8

Please sign in to comment.