update ppo yaml

huawei-noah · AmiyaSX · Feb 7, 2023 · Feb 7, 2023 · Feb 12, 2023 · Feb 13, 2023
commit 907a54922e0b1b1ea5f8c11db05fc38dc85cdcfd
diff --git a/examples/beamrider_ppo_ms.yaml b/examples/beamrider_ppo_ms.yaml
@@ -32,7 +32,7 @@ model_para:
  VF_SHARE_LAYERS: True
  activation: relu
  hidden_sizes: [512]
-
+ USE_AMSGRAD: False
 env_num: 10
 speedup: False
 

diff --git a/examples/breakout_ppo_ms.yaml b/examples/breakout_ppo_ms.yaml
@@ -25,14 +25,14 @@ model_para:
  CRITIC_LOSS_COEF: 1.0
  ENTROPY_LOSS: 0.003
  LOSS_CLIPPING: 0.1
- LR: 0.00025
+ LR: 0.0004
  MAX_GRAD_NORM: 5.0
  NUM_SGD_ITER: 4
  SUMMARY: False
  VF_SHARE_LAYERS: True
  activation: relu
  hidden_sizes: [256]
-
+ USE_AMSGRAD: False
 env_num: 10
 speedup: False
 

diff --git a/examples/qbert_ppo_ms.yaml b/examples/qbert_ppo_ms.yaml
@@ -25,14 +25,14 @@ model_para:
  CRITIC_LOSS_COEF: 1.0
  ENTROPY_LOSS: 0.003
  LOSS_CLIPPING: 0.1
- LR: 0.00025
+ LR: 0.0005
  MAX_GRAD_NORM: 5.0
  NUM_SGD_ITER: 4
  SUMMARY: False
  VF_SHARE_LAYERS: True
  activation: relu
  hidden_sizes: [512]
-
+ USE_AMSGRAD: True
 env_num: 10
 speedup: False
 

diff --git a/examples/spaceinvader_ppo_ms.yaml b/examples/spaceinvader_ppo_ms.yaml
@@ -32,7 +32,7 @@ model_para:
  VF_SHARE_LAYERS: True
  activation: relu
  hidden_sizes: [512]
-
+ USE_AMSGRAD: False
 env_num: 10
 speedup: False
 

diff --git a/xt/model/ppo/ppo_ms.py b/xt/model/ppo/ppo_ms.py
@@ -66,10 +66,10 @@ def __init__(self, model_info):
  self.verbose = model_config.get('SUMMARY', SUMMARY)
  self.vf_clip = Tensor(model_config.get('VF_CLIP', VF_CLIP))
  self.dist = make_dist(self.action_type, self.action_dim)
-
+ self.amsgrad = model_config.get('USE_AMSGRAD', False)
  super().__init__(model_info)
  self.predict_net = self.PPOPredictPolicy(self.model, self.dist)
- adam = Adam(params=self.predict_net.trainable_params(), learning_rate=self._lr, use_amsgrad=True, use_locking=True)
+ adam = Adam(params=self.predict_net.trainable_params(), learning_rate=self._lr, use_amsgrad=self.amsgrad, use_locking=True)
  loss_fn = WithLossCell(self.critic_loss_coef, self.clip_ratio, self.ent_coef, self.vf_clip)
  forward_fn = NetWithLoss(self.model, loss_fn, self.dist)
  device_target = ms.get_context("device_target")