-
Notifications
You must be signed in to change notification settings - Fork 0
/
04_main.py
74 lines (60 loc) · 3.21 KB
/
04_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""
Các công việc
- Tìm hiểu cách dataloader hoạt động.
+ Input: images or text, cross_attention | x
+ Position Embeddings | x
+ Masking and Paddings | x
- Tìm hiểu cách hoạt động của mô hình. | x
+ Encoder (Image Embeddings for Encoder) | x
+ Decoder | x
+ Improve Decoder | o
- Greendy-Beam Search: https://machinelearningmastery.com/beam-search-decoder-natural-language-processing/
- Tìm hiểu cách tính độ chính xác và loss function | x
+ Đơn giản là category accuracy | x
"""
import tensorflow as tf
from supervisor.tf_trainer import TFTrainer
from model.total_model_subclassing import TotalModel
from utils.optimizer_helpers import CustomSchedule
from utils.tokens_helpers import get_vocab_from_huggingface
from DataLoader.dataloader_archiscribe import Dataset
from settings import config as cfg_training
from utils.metrics_helpers import softmax_ce_loss
import logging
tf.get_logger().setLevel(logging.ERROR)
if __name__ == '__main__':
# get vocab information
vocab_size = get_vocab_from_huggingface(name_model=cfg_training.MODEL_TOKENIZER)
train_dataset = Dataset(record_path='./DatasetTFrecord/archiscribe-corpus/all_archiscribe.tfrec')
train_dataset.load_tfrecord(repeat=True, batch_size=cfg_training.BATCH_SIZE,
with_padding_type="padding_have_right")
architecture_model = TotalModel(
enc_stack_size=cfg_training.ENC_STACK_SIZE,
dec_stack_size=cfg_training.DEC_STACK_SIZE,
num_heads=cfg_training.NUM_HEADS,
d_model=cfg_training.D_MODEL,
d_ff=cfg_training.D_FF,
vocab_size=32000,
max_seq_leng=cfg_training.MAX_LENGTH_SEQUENCE
)
architecture_model.build_graph()
if cfg_training.LEARNING_RATE_TYPE == 'schedule':
learning_rate = CustomSchedule(cfg_training.D_MODEL)
else:
learning_rate = cfg_training.LEARCH_RATE
optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)
supervisor = TFTrainer(train_dataloader=train_dataset,
validation_dataloader=train_dataset,
model=architecture_model,
loss_fn=softmax_ce_loss,
optimizer=optimizer,
save_freq=cfg_training.SAVE_FREQ,
max_length_sequence=cfg_training.MAX_LENGTH_SEQUENCE,
monitor="loss",
mode="min",
training_dir="./logs_training",
name="Trainer_Supervisor")
supervisor.restore(weights_only=False, from_scout=True)
supervisor.train(epochs=cfg_training.EPOCHS,
steps_per_epoch=cfg_training.NUM_SAMPLES // cfg_training.BATCH_SIZE)
supervisor.export(export_dir=cfg_training.EXPORT_DIR)