-
Notifications
You must be signed in to change notification settings - Fork 7
/
optimizer.py
61 lines (46 loc) · 2.03 KB
/
optimizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from typing import List, Optional
import tensorflow as tf
import tensorflow_addons as tfa
class AdamW(tfa.optimizers.AdamW):
def __init__(
self,
*args,
decay_var_list: Optional[List[str]] = None,
**kwargs,
):
super().__init__(*args, **kwargs)
self._decay_var_list_kept = decay_var_list
def minimize(self, *args, **kwargs):
return super().minimize(*args, **kwargs, decay_var_list=self._decay_var_list_kept)
def apply_gradients(self, *args, **kwargs):
return super().apply_gradients(*args, **kwargs, decay_var_list=self._decay_var_list_kept)
class LinearWarmupAndDecayScheduler(tf.keras.optimizers.schedules.LearningRateSchedule):
def __init__(self, rate, warmup_steps, total_steps, name=None):
super().__init__()
self.rate = rate
self.warmup_steps = float(warmup_steps)
self.total_steps = float(total_steps)
self.name = name
def __call__(self, step):
with tf.name_scope("LinearWarmupAndDecayScheduler"):
total_steps = tf.convert_to_tensor(self.total_steps, name="total_steps")
warmup_steps = tf.convert_to_tensor(self.warmup_steps, name="warmup_steps")
current_step = tf.cast(step + 1, warmup_steps.dtype)
return self.rate * tf.cond(
current_step < warmup_steps,
lambda: self.warmup(current_step, warmup_steps),
lambda: self.decay(current_step, total_steps, warmup_steps),
)
@tf.function
def warmup(self, step, warmup_steps):
return step / tf.math.maximum(tf.constant(1.0), warmup_steps)
@tf.function
def decay(self, step, total_steps, warmup_steps):
return tf.math.maximum(tf.constant(0.0), (total_steps - step) / tf.math.maximum(tf.constant(1.0), total_steps - warmup_steps))
def get_config(self):
return {
"rate": self.rate,
"warmup_steps": self.warmup_steps,
"total_steps": self.total_steps,
"name": self.name,
}