Skip to content

Commit

Permalink
fix bug in final plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
wisnunugroho21 committed Sep 30, 2020
1 parent 5ad60c6 commit 19ee15c
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 98 deletions.
18 changes: 10 additions & 8 deletions PPO/pytorch/ppo_pong_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,12 +396,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)
if np.mean(batch_solved_reward) >= reward_threshold:

print('You solved task after {} episode'.format(len(rewards)))
break
Expand Down Expand Up @@ -433,9 +428,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO/pytorch/ppo_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,13 +388,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -425,9 +419,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
11 changes: 9 additions & 2 deletions PPO/tensorflow 2/ppo_pong_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,9 +410,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
17 changes: 9 additions & 8 deletions PPO/tensorflow 2/ppo_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -401,7 +395,14 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)

Expand Down
19 changes: 10 additions & 9 deletions PPO_RND/pytorch/ppo_rnd_frozen_notslippery_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,13 +611,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -648,9 +642,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO_RND/pytorch/ppo_rnd_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,13 +597,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -634,9 +628,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO_RND/tensorflow 2/ppo_frozenlake_notslippery_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,13 +578,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -615,9 +609,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO_RND/tensorflow 2/ppo_rnd_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,13 +578,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -615,9 +609,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO_continous/pytorch/ppo_continous_bipedal_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,13 +378,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -415,9 +409,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
18 changes: 10 additions & 8 deletions PPO_continous/pytorch/ppo_continous_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,12 +378,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)
if np.mean(batch_solved_reward) >= reward_threshold:

print('You solved task after {} episode'.format(len(rewards)))
break
Expand Down Expand Up @@ -415,9 +410,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO_continous/tensorflow/ppo_continous_bipedal_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,13 +366,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -403,9 +397,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()
19 changes: 10 additions & 9 deletions PPO_continous/tensorflow/ppo_continous_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,13 +366,7 @@ def main():

if reward_threshold:
if len(batch_solved_reward) == 100:
if np.mean(batch_solved_reward) >= reward_threshold :
for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

if np.mean(batch_solved_reward) >= reward_threshold:
print('You solved task after {} episode'.format(len(rewards)))
break

Expand Down Expand Up @@ -403,9 +397,16 @@ def main():
plot(times)

print('========== Final ==========')
# Plot the reward, times for every episode
# Plot the reward, times for every episode

for reward in batch_rewards:
rewards.append(reward)

for time in batch_times:
times.append(time)

plot(rewards)
plot(times)
plot(times)

if __name__ == '__main__':
main()

0 comments on commit 19ee15c

Please sign in to comment.