Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dian xt ms #29

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
files organize
  • Loading branch information
AmiyaSX committed Feb 12, 2023
commit b71dbcc6e8698794f8f16cb7ecdf27667f116b4f
43 changes: 43 additions & 0 deletions examples/ant_ppo._ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
alg_para:
alg_name: PPO

env_para:
env_name: GymEnv
env_info:
name: MiniGrid-Ant-v0
vision: False

agent_para:
agent_name: AtariPpo
agent_num : 1
agent_config:
max_steps: 200
complete_step: 100000

model_para:
actor:
model_name: PpoCnnMS
state_dim: [84, 84, 3]
action_dim: 4
input_dtype: uint8
model_config:
BATCH_SIZE: 10
CRITIC_LOSS_COEF: 1.0
ENTROPY_LOSS: 0.003
LOSS_CLIPPING: 0.1
LR: 0.00025
MAX_GRAD_NORM: 5.0
NUM_SGD_ITER: 4
SUMMARY: False
VF_SHARE_LAYERS: True
activation: relu
hidden_sizes: [512]


env_num: 1

benchmark:
log_interval_to_train: 5



32 changes: 32 additions & 0 deletions examples/beamrider_dqn_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
alg_para:
alg_name: DQN
alg_config: {
'train_per_checkpoint': 50,
'prepare_times_per_train': 4,
'learning_starts': 10000,
'BUFFER_SIZE': 400000,
}

env_para:
env_name: AtariEnv
env_info: { 'name': BeamRiderNoFrameskip-v4, 'vision': False}

agent_para:
agent_name: AtariDqn
agent_num : 1
agent_config: {
'max_steps': 2000,
'complete_step': 10000000,
'episode_count': 200000
}

model_para:
actor:
model_name: DqnCnnMS
state_dim: [84,84,4]
action_dim: 9
model_config: {
'LR': 0.00015,
}

env_num: 2
39 changes: 39 additions & 0 deletions examples/beamrider_ppo_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
alg_para:
alg_name: PPO

env_para:
env_name: AtariEnv
env_info:
name: BeamRiderNoFrameskip-v4
vision: False

agent_para:
agent_name: AtariPpo
agent_num : 1
agent_config:
max_steps: 128
complete_step: 10000000

model_para:
actor:
model_name: PpoCnnMS
state_dim: [84, 84, 4]
action_dim: 9
input_dtype: uint8
model_config:
BATCH_SIZE: 320
CRITIC_LOSS_COEF: 1.0
ENTROPY_LOSS: 0.003
LOSS_CLIPPING: 0.1
LR: 0.00025
MAX_GRAD_NORM: 5.0
NUM_SGD_ITER: 4
SUMMARY: False
VF_SHARE_LAYERS: True
activation: relu
hidden_sizes: [512]

env_num: 10

benchmark:
log_interval_to_train: 10
32 changes: 32 additions & 0 deletions examples/breakout_dqn_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
alg_para:
alg_name: DQN
alg_config: {
'train_per_checkpoint': 50,
'prepare_times_per_train': 4,
'learning_starts': 10000,
'BUFFER_SIZE': 400000,
}

env_para:
env_name: AtariEnv
env_info: { 'name': BreakoutNoFrameskip-v4, 'vision': False}

agent_para:
agent_name: AtariDqn
agent_num : 1
agent_config: {
'max_steps': 2000,
'complete_step': 10000000,
'episode_count': 200000
}

model_para:
actor:
model_name: DqnCnnMS
state_dim: [84,84,4]
action_dim: 4
model_config: {
'LR': 0.00015,
}

env_num: 2
40 changes: 40 additions & 0 deletions examples/breakout_ppo_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
alg_para:
alg_name: PPO

env_para:
env_name: AtariEnv
env_info:
name: BreakoutNoFrameskip-v4
vision: False

agent_para:
agent_name: AtariPpo
agent_num : 1
agent_config:
max_steps: 128
complete_step: 10000000

model_para:
actor:
model_name: PpoCnnMS
state_dim: [84, 84, 4]
action_dim: 4
input_dtype: uint8
model_config:
BATCH_SIZE: 320
CRITIC_LOSS_COEF: 1.0
ENTROPY_LOSS: 0.003
LOSS_CLIPPING: 0.1
LR: 0.00025
MAX_GRAD_NORM: 5.0
NUM_SGD_ITER: 4
SUMMARY: False
VF_SHARE_LAYERS: True
activation: relu
hidden_sizes: [256]

env_num: 10
speedup: False

benchmark:
log_interval_to_train: 10
29 changes: 29 additions & 0 deletions examples/cartpole_dqn_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
alg_para:
alg_name: DQN
alg_config: {
'train_per_checkpoint': 10,
'prepare_times_per_train': 4,
'learning_starts': 100,
'save_model': True,
'save_interval': 100
}

env_para:
env_name: GymEnv
env_info: { 'name': CartPole-v0, 'vision': False}

agent_para:
agent_name: CartpoleDqn
agent_num : 1
agent_config: {
'max_steps': 2000 ,
'complete_step': 5000000
}

model_para:
actor:
model_name: DqnMlpMS
state_dim: [4]
action_dim: 2

env_num: 1
52 changes: 52 additions & 0 deletions examples/cartpole_ppo_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
alg_para:
alg_name: PPO
alg_config:
process_num: 1
save_model: True # default False
save_interval: 100

env_para:
env_name: GymEnv
env_info:
name: CartPole-v0
vision: False

agent_para:
agent_name: PPO
agent_num : 1
agent_config:
max_steps: 200
complete_step: 2000000
complete_episode: 9000

model_para:
actor:
model_name: PpoMlpMS
state_dim: [4]
action_dim: 2
input_dtype: float32
model_config:
BATCH_SIZE: 200
CRITIC_LOSS_COEF: 1.0
ENTROPY_LOSS: 0.01
LR: 0.0003
LOSS_CLIPPING: 0.2
MAX_GRAD_NORM: 5.0
NUM_SGD_ITER: 8
SUMMARY: False
VF_SHARE_LAYERS: False
activation: tanh
hidden_sizes: [64, 64]

env_num: 10
speedup: False

benchmark:
log_interval_to_train: 20
eval:
# model_path: /YOUR/PATH/TO/EVAL/models
gap: 100
model_divided_freq: 1 # how many times to divided of each model
episodes_per_eval: 1
evaluator_num: 1
max_step_per_episode: 4000
43 changes: 43 additions & 0 deletions examples/dog_ppo_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
alg_para:
alg_name: PPO

env_para:
env_name: GymEnv
env_info:
name: MiniGrid-Dog-v0
vision: False

agent_para:
agent_name: AtariPpo
agent_num : 1
agent_config:
max_steps: 200
complete_step: 100000

model_para:
actor:
model_name: PpoCnnMS
state_dim: [84, 84, 3]
action_dim: 4
input_dtype: uint8
model_config:
BATCH_SIZE: 10
CRITIC_LOSS_COEF: 1.0
ENTROPY_LOSS: 0.003
LOSS_CLIPPING: 0.1
LR: 0.00025
MAX_GRAD_NORM: 5.0
NUM_SGD_ITER: 4
SUMMARY: False
VF_SHARE_LAYERS: True
activation: relu
hidden_sizes: [512]


env_num: 1

benchmark:
log_interval_to_train: 5



42 changes: 42 additions & 0 deletions examples/pendulum_ppo_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
alg_para:
alg_name: PPO
alg_config:
process_num: 1
only_save_best_model: True

env_para:
env_name: GymEnv
env_info:
name: Pendulum-v0
vision: False

agent_para:
agent_name: PPO
agent_num : 1
agent_config:
max_steps: 200
complete_step: 2000000

model_para:
actor:
model_name: PpoMlpMS
state_dim: [3]
action_dim: 1
input_dtype: float32
model_config:
BATCH_SIZE: 200
CRITIC_LOSS_COEF: 1.0
ENTROPY_LOSS: 0.01
LR: 0.0003
LOSS_CLIPPING: 0.2
MAX_GRAD_NORM: 5.0
NUM_SGD_ITER: 8
SUMMARY: False
VF_SHARE_LAYERS: False
activation: tanh
hidden_sizes: [64, 64]

env_num: 10

benchmark:
log_interval_to_train: 10
30 changes: 30 additions & 0 deletions examples/pong_dqn_ms.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
alg_para:
alg_name: DQN
alg_config: {
'train_per_checkpoint': 50,
'prepare_times_per_train': 4,
'learning_starts': 10000,
'BUFFER_SIZE': 10000,
# 'save_model': True,
'save_interval': 100
}

env_para:
env_name: AtariEnv
env_info: { 'name': PongNoFrameskip-v4, 'vision': False}

agent_para:
agent_name: AtariDqn
agent_num : 1
agent_config: {
'max_steps': 2000,
'complete_step': 10000000
}

model_para:
actor:
model_name: DqnCnnPongMS
state_dim: [84,84,4]
action_dim: 6

env_num: 2
Loading