-
Notifications
You must be signed in to change notification settings - Fork 34
/
input_dfm.yaml
111 lines (103 loc) · 4.33 KB
/
input_dfm.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
general:
use_gpu: True # only if available
# specify CUDA device, these are 0-indexed, e.g.,
# cuda:0, cuda:1 or others. "cuda" is the default CUDA device
gpu_device: "cuda"
# Parent dir to save trained models
models_dir: "./models"
preprocessing:
# normalizing text to the ASCII encoding standard
uni2ascii: True
lowercase: True
# removing both the leading and the trailing empty characters
strip: True
only_latin_letters: False
# Accepted proportion of characters in a string that are not present in our vocabulary, i.e.,
# String is accepted if:
# (number of its characters found in the vocabulary)/(total number its characters) >= missing_char_threshold
missing_char_threshold: 0.5
# read a list of characters and add to the vocabulary
read_list_chars: "./inputs/characters_v001.vocab"
# --- RNN/GRU/LSTM architecture/misc info
gru_lstm:
main_architecture: "gru" # rnn, gru, lstm
mode: # Tokenization mode
# choices: "char", "ngram", "word"
# for example: tokenize: ["char", "ngram", "word"] or ["char", "word"]
tokenize: ["char", "ngram"]
# ONLY if "char" or "ngram" are slected in tokenize, the following arg will be used:
# Strings in the inputs will be prefix + string + suffix:
# NOTE: please use only STRINGS in prefix_suffix list,
# if you want only prefix or suffix, enter the other as an empty string ""
# e.g., ["|", ""] would add | as prefix and no suffix
prefix_suffix: ["<", ">"]
# ONLY if "ngram" is selected in tokenize, the following args will be used:
min_gram: 2
max_gram: 3
# ONLY if "word" is selected in tokenize, the following arg will be used:
# Characters according to which the string will split into tokens:
# Accepted inputs are either a string of delimiters (e.g. ", -!?:()" or
# "default", in which case delimiters will be the white space and any
# punctuation mark):
token_sep: "default"
bidirectional: True # if True, becomes a bidirectional RNN/GRU/LSTM
# num_layers
# number of recurrent layers. e.g., setting num_layers=2 means stacking two
# RNN/GRU/LSTMs together to form a stacked RNN/GRU/LSTM,
# with the second RNN/GRU/LSTM taking in outputs of the first RNN/GRU/LSTM and computing the final results.
num_layers: 2
# number of dimensions of the first fully connected network
fc1_out_dim: 120
# pooling_mode:
# hstates_layers_simple, hstates_layers, hstates
# hstates_subtract, hstates_l2_distance, hstates_cosine
# average, max
# attention
pooling_mode: 'hstates_layers_simple'
# rnn_dropout:
# if non-zero, introduces a Dropout layer on the outputs of each RNN/LSTM/GRU layer except the last layer,
# with dropout probability equal to rnn_dropout.
rnn_dropout: 0.01
# fully-connected layers dropout depends on the number of fc layers (currently there are two)
fc_dropout: [0.01, 0.01]
# attention layer dropout depends on the number of attention layers (currently there are two)
att_dropout: [0.01, 0.01]
# Add bias to all learnable parameters
bias: True
rnn_hidden_dim: 60
max_seq_len: 120
embedding_dim: 60
output_dim: 2
learning_rate: 0.001 # 3e-4
optimizer: adam
epochs: 5
batch_size: 256
# shuffle when creating DataLoader
dl_shuffle: True
random_seed: 123
# Early stopping:
# Number of epochs with no improvement after which training will be stopped and
# the model with the least validation loss will be saved
# If 0 or negative, early stopping will be deactivated
early_stopping_patience: -1
# if -1 or 1, perform the validation step in every epoch;
# if 0, no validation will be done
# otherwise, specify the interval (integer)
validation: 1
# split dataset
train_proportion: 0.7
val_proportion: 0.15
test_proportion: 0.15
# False or path to a directory to create tensor-board
#create_tensor_board: "./tb_gru_test"
create_tensor_board: False
# Layers to freeze during fine-tuning
layers_to_freeze: ["emb", "rnn_1", "attn"]
inference:
# Output predictions and save the results in output_preds_file
output_preds: True
# either a path or "default"
# "default" saves the prediction output inside the model directory
output_preds_file: "default"
# change it to true to have Mean Average Precision as an eval metric. Note that this would have an impact on computational time
eval_map_metric: False