-
Notifications
You must be signed in to change notification settings - Fork 20
/
tigerbot.cpp
84 lines (71 loc) · 2.43 KB
/
tigerbot.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
struct Config : public llama::v2::Config
{
int num_key_value_heads;
float rope_scaling;
float rope_theta;
};
class ChatHistoryEncoder : public BaseHistoryEncoder
{
public:
void append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const override;
void do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override;
};
static ChatHistoryEncoder _chat_encoder;
class Tokenizer : public llama::v2::Tokenizer
{
public:
Tokenizer(const Config &config)
: llama::v2::Tokenizer::Tokenizer(config, &_chat_encoder)
{
sys_prompt = "";
}
size_t load(tokenizer::DataReader *buffer, int n_vocab) override;
bool is_special_id(int id) const override;
public:
int instruct_token_id;
int response_token_id;
};
class ConditionalGeneration : public llama::v2::ConditionalGeneration
{
public:
ConditionalGeneration() = default;
ConditionalGeneration(const Config &config)
: llama::v2::ConditionalGeneration(config, MODEL_TYPE_TIGERBOT, config.num_key_value_heads, config.max_length)
{
for (int i = 0; i < config.num_hidden_layers; i++)
{
auto &attention = transformer->layers[i].attention;
attention.freq_base = config.rope_theta;
attention.freq_scale = 1 / config.rope_scaling;
}
}
};
size_t Tokenizer::load(tokenizer::DataReader *buffer, int n_vocab)
{
size_t size = llama::v2::Tokenizer::load(buffer, n_vocab);
response_token_id = pad_token_id - 1;
instruct_token_id = pad_token_id - 2;
return size;
}
void ChatHistoryEncoder::append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const
{
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
append_user(round_idx, user, ids);
tok->encode(ai, ids, false, true);
}
void ChatHistoryEncoder::do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
{
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
ids.push_back(tok->bos_token_id);
ids.push_back(tok->instruct_token_id);
tok->encode(user, ids);
ids.push_back(tok->response_token_id);
}
bool Tokenizer::is_special_id(int id) const
{
return llama::v2::Tokenizer::is_special_id(id)
|| (id == bos_token_id)
|| (id == eos_token_id)
|| (id == instruct_token_id)
|| (id == response_token_id);
}