-
Notifications
You must be signed in to change notification settings - Fork 24
/
m_a_p.cpp
70 lines (61 loc) · 2.05 KB
/
m_a_p.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
namespace neo
{
typedef yi::Config Config;
class ChatHistoryEncoder : public BaseHistoryEncoder
{
public:
void append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const override
{
auto *tok = tokenizer;
append_ai_opening(round_idx, ids);
tok->encode(ai, ids);
ids.push_back(tok->eos_token_id);
}
void append_sys_prompt(std::vector<int> &ids) const override
{
auto *tok = tokenizer;
if (tok->get_system_prompt().size() > 0)
{
std::ostringstream oss;
oss << "<<SYS>>\n" << tok->get_system_prompt() << "\n<</SYS>>";
tok->encode(oss.str(), ids);
}
}
void append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override
{
auto *tok = tokenizer;
std::ostringstream oss;
oss << "[INST] " << user << " [/INST]";
tok->encode(oss.str(), ids);
}
void append_ai_opening(int round_idx, std::vector<int> &ids) const override
{
auto *tok = tokenizer;
ids.push_back(tok->bos_token_id);
}
};
static ChatHistoryEncoder _chat_encoder;
class Tokenizer : public llama::v2::Tokenizer
{
public:
Tokenizer(const Config &config)
: llama::v2::Tokenizer::Tokenizer(config, &_chat_encoder)
{
sys_prompt = "You are a helpful, respectful and honest assistant named Neo.";
}
size_t load(tokenizer::DataReader *buffer, int n_vocab) override
{
tp = new tokenizer::BPEProcessor1();
size_t size = tp->Load(buffer, n_vocab);
return size;
}
};
class ConditionalGeneration : public yi::ConditionalGeneration
{
public:
ConditionalGeneration() = default;
ConditionalGeneration(const Config &config)
: yi::ConditionalGeneration(config, ModelType::MODEL_TYPE_MAP_NEO)
{}
};
}