Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
wisnunugroho21 committed Sep 15, 2020
1 parent 4601bf9 commit a617f15
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 4 deletions.
10 changes: 9 additions & 1 deletion PPO_RND/pytorch/ppo_rnd_frozen_notslippery_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,7 @@ def run_inits_episode(env, agent, state_dim, render, n_init_episode):
env.reset()

agent.updateObsNormalizationParam(agent.obs_memory.observations)
agent.memory.clearObs()
agent.obs_memory.clearMemory()

return agent

Expand Down Expand Up @@ -547,6 +547,7 @@ def main():
n_eps_update = 5 # How many episode before you update the PPO. Recommended set to 5 for Discrete
n_plot_batch = 100000000 # How many episode you want to plot the result
n_episode = 100000 # How many episode you want to run
n_init_episode = 256
n_saved = 10 # How many episode to run before saving the weights

policy_kl_range = 0.0008 # Recommended set to 0.0008 for Discrete
Expand Down Expand Up @@ -587,6 +588,13 @@ def main():

t_updates = 0

#############################################

if training_mode:
agent = run_inits_episode(env, agent, state_dim, render, n_init_episode)

#############################################

for i_episode in range(1, n_episode + 1):
total_reward, time, t_updates = run_episode(env, agent, state_dim, render, training_mode, t_updates, n_step_update)
print('Episode {} \t t_reward: {} \t time: {} \t '.format(i_episode, total_reward, time))
Expand Down
10 changes: 9 additions & 1 deletion PPO_RND/pytorch/ppo_rnd_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def run_inits_episode(env, agent, state_dim, render, n_init_episode):
env.reset()

agent.updateObsNormalizationParam(agent.obs_memory.observations)
agent.memory.clearObs()
agent.obs_memory.clearMemory()

return agent

Expand Down Expand Up @@ -533,6 +533,7 @@ def main():
n_eps_update = 5 # How many episode before you update the PPO. Recommended set to 5 for Discrete
n_plot_batch = 100000000 # How many episode you want to plot the result
n_episode = 100000 # How many episode you want to run
n_init_episode = 1024
n_saved = 10 # How many episode to run before saving the weights

policy_kl_range = 0.0008 # Recommended set to 0.0008 for Discrete
Expand Down Expand Up @@ -573,6 +574,13 @@ def main():

t_updates = 0

#############################################

if training_mode:
agent = run_inits_episode(env, agent, state_dim, render, n_init_episode)

#############################################

for i_episode in range(1, n_episode + 1):
total_reward, time, t_updates = run_episode(env, agent, state_dim, render, training_mode, t_updates, n_step_update)
print('Episode {} \t t_reward: {} \t time: {} \t '.format(i_episode, total_reward, time))
Expand Down
10 changes: 9 additions & 1 deletion PPO_RND/tensorflow 2/ppo_frozenlake_notslippery_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def run_inits_episode(env, agent, state_dim, render, n_init_episode):
env.reset()

agent.updateObsNormalizationParam(agent.obs_memory.observations)
agent.memory.clearObs()
agent.obs_memory.clearMemory()

return agent

Expand Down Expand Up @@ -514,6 +514,7 @@ def main():
n_eps_update = 5 # How many episode before you update the PPO. Recommended set to 5 for Discrete
n_plot_batch = 100000000 # How many episode you want to plot the result
n_episode = 100000 # How many episode you want to run
n_init_episode = 256
n_saved = 10 # How many episode to run before saving the weights

policy_kl_range = 0.0008 # Recommended set to 0.0008 for Discrete
Expand Down Expand Up @@ -553,6 +554,13 @@ def main():
batch_times = []

t_updates = 0

#############################################

if training_mode:
agent = run_inits_episode(env, agent, state_dim, render, n_init_episode)

#############################################

for i_episode in range(1, n_episode + 1):
total_reward, time, t_updates = run_episode(env, agent, state_dim, render, training_mode, t_updates, n_step_update)
Expand Down
10 changes: 9 additions & 1 deletion PPO_RND/tensorflow 2/ppo_rnd_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def run_inits_episode(env, agent, state_dim, render, n_init_episode):
env.reset()

agent.updateObsNormalizationParam(agent.obs_memory.observations)
agent.memory.clearObs()
agent.obs_memory.clearMemory()

return agent

Expand Down Expand Up @@ -514,6 +514,7 @@ def main():
n_eps_update = 5 # How many episode before you update the PPO. Recommended set to 5 for Discrete
n_plot_batch = 100000000 # How many episode you want to plot the result
n_episode = 100000 # How many episode you want to run
n_init_episode = 1024
n_saved = 10 # How many episode to run before saving the weights

policy_kl_range = 0.0008 # Recommended set to 0.0008 for Discrete
Expand Down Expand Up @@ -554,6 +555,13 @@ def main():

t_updates = 0

#############################################

if training_mode:
agent = run_inits_episode(env, agent, state_dim, render, n_init_episode)

#############################################

for i_episode in range(1, n_episode + 1):
total_reward, time, t_updates = run_episode(env, agent, state_dim, render, training_mode, t_updates, n_step_update)
print('Episode {} \t t_reward: {} \t time: {} \t '.format(i_episode, total_reward, time))
Expand Down

0 comments on commit a617f15

Please sign in to comment.