-
Notifications
You must be signed in to change notification settings - Fork 93
/
configs.yaml
184 lines (171 loc) · 4.36 KB
/
configs.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
defaults:
logdir: null
traindir: null
evaldir: null
offline_traindir: ''
offline_evaldir: ''
seed: 0
deterministic_run: False
steps: 1e6
parallel: False
eval_every: 1e4
eval_episode_num: 10
log_every: 1e4
reset_every: 0
device: 'cuda:0'
compile: True
precision: 32
debug: False
video_pred_log: True
# Environment
task: 'dmc_walker_walk'
size: [64, 64]
envs: 1
action_repeat: 2
time_limit: 1000
grayscale: False
prefill: 2500
reward_EMA: True
# Model
dyn_hidden: 512
dyn_deter: 512
dyn_stoch: 32
dyn_discrete: 32
dyn_rec_depth: 1
dyn_mean_act: 'none'
dyn_std_act: 'sigmoid2'
dyn_min_std: 0.1
grad_heads: ['decoder', 'reward', 'cont']
units: 512
act: 'SiLU'
norm: True
encoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, symlog_inputs: True}
decoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 5, mlp_units: 1024, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
actor:
{layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, std: 'learned', min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
critic:
{layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
reward_head:
{layers: 2, dist: 'symlog_disc', loss_scale: 1.0, outscale: 0.0}
cont_head:
{layers: 2, loss_scale: 1.0, outscale: 1.0}
dyn_scale: 0.5
rep_scale: 0.1
kl_free: 1.0
weight_decay: 0.0
unimix_ratio: 0.01
initial: 'learned'
# Training
batch_size: 16
batch_length: 64
train_ratio: 512
pretrain: 100
model_lr: 1e-4
opt_eps: 1e-8
grad_clip: 1000
dataset_size: 1000000
opt: 'adam'
# Behavior.
discount: 0.997
discount_lambda: 0.95
imag_horizon: 15
imag_gradient: 'dynamics'
imag_gradient_mix: 0.0
eval_state_mean: False
# Exploration
expl_behavior: 'greedy'
expl_until: 0
expl_extr_scale: 0.0
expl_intr_scale: 1.0
disag_target: 'stoch'
disag_log: True
disag_models: 10
disag_offset: 1
disag_layers: 4
disag_units: 400
disag_action_cond: False
dmc_proprio:
steps: 5e5
action_repeat: 2
envs: 4
train_ratio: 512
video_pred_log: false
encoder: {mlp_keys: '.*', cnn_keys: '$^'}
decoder: {mlp_keys: '.*', cnn_keys: '$^'}
dmc_vision:
steps: 1e6
action_repeat: 2
envs: 4
train_ratio: 512
video_pred_log: true
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
crafter:
task: crafter_reward
step: 1e6
action_repeat: 1
envs: 1
train_ratio: 512
video_pred_log: true
dyn_hidden: 1024
dyn_deter: 4096
units: 1024
encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
actor: {layers: 5, dist: 'onehot', std: 'none'}
value: {layers: 5}
reward_head: {layers: 5}
cont_head: {layers: 5}
imag_gradient: 'reinforce'
atari100k:
steps: 4e5
envs: 1
action_repeat: 4
train_ratio: 1024
video_pred_log: true
eval_episode_num: 100
actor: {dist: 'onehot', std: 'none'}
imag_gradient: 'reinforce'
stickey: False
lives: unused
noops: 30
resize: opencv
actions: needed
time_limit: 108000
minecraft:
task: minecraft_diamond
step: 1e8
parallel: True
envs: 16
# no eval
eval_episode_num: 0
eval_every: 1e4
action_repeat: 1
train_ratio: 16
video_pred_log: true
dyn_hidden: 1024
dyn_deter: 4096
units: 1024
encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
actor: {layers: 5, dist: 'onehot', std: 'none'}
value: {layers: 5}
reward_head: {layers: 5}
cont_head: {layers: 5}
imag_gradient: 'reinforce'
break_speed: 100.0
time_limit: 36000
memorymaze:
steps: 1e8
action_repeat: 2
actor: {dist: 'onehot', std: 'none'}
imag_gradient: 'reinforce'
task: 'memorymaze_9x9'
debug:
debug: True
pretrain: 1
prefill: 1
batch_size: 10
batch_length: 20