fix bug in final plotting

wisnunugroho21 · Sep 30, 2020 · 19ee15c · 19ee15c
1 parent 5ad60c6
commit 19ee15c
Show file tree

Hide file tree

Showing 12 changed files with 118 additions and 98 deletions.
diff --git a/PPO/pytorch/ppo_pong_pytorch.py b/PPO/pytorch/ppo_pong_pytorch.py
@@ -396,12 +396,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
+ if np.mean(batch_solved_reward) >= reward_threshold: 
 
  print('You solved task after {} episode'.format(len(rewards)))
  break
@@ -433,9 +428,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO/pytorch/ppo_pytorch.py b/PPO/pytorch/ppo_pytorch.py
@@ -388,13 +388,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -425,9 +419,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO/tensorflow 2/ppo_pong_tensorflow.py b/PPO/tensorflow 2/ppo_pong_tensorflow.py
@@ -410,9 +410,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO/tensorflow 2/ppo_tensorflow.py b/PPO/tensorflow 2/ppo_tensorflow.py
@@ -364,13 +364,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -401,7 +395,14 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
  plot(times) 
 

diff --git a/PPO_RND/pytorch/ppo_rnd_frozen_notslippery_pytorch.py b/PPO_RND/pytorch/ppo_rnd_frozen_notslippery_pytorch.py
@@ -611,13 +611,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -648,9 +642,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_RND/pytorch/ppo_rnd_pytorch.py b/PPO_RND/pytorch/ppo_rnd_pytorch.py
@@ -597,13 +597,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -634,9 +628,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_RND/tensorflow 2/ppo_frozenlake_notslippery_tensorflow.py b/PPO_RND/tensorflow 2/ppo_frozenlake_notslippery_tensorflow.py
@@ -578,13 +578,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -615,9 +609,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_RND/tensorflow 2/ppo_rnd_tensorflow.py b/PPO_RND/tensorflow 2/ppo_rnd_tensorflow.py
@@ -578,13 +578,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -615,9 +609,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_continous/pytorch/ppo_continous_bipedal_pytorch.py b/PPO_continous/pytorch/ppo_continous_bipedal_pytorch.py
@@ -378,13 +378,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -415,9 +409,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_continous/pytorch/ppo_continous_pytorch.py b/PPO_continous/pytorch/ppo_continous_pytorch.py
@@ -378,12 +378,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
+ if np.mean(batch_solved_reward) >= reward_threshold:
 
  print('You solved task after {} episode'.format(len(rewards)))
  break
@@ -415,9 +410,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_continous/tensorflow/ppo_continous_bipedal_tensorflow.py b/PPO_continous/tensorflow/ppo_continous_bipedal_tensorflow.py
@@ -366,13 +366,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -403,9 +397,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()
diff --git a/PPO_continous/tensorflow/ppo_continous_tensorflow.py b/PPO_continous/tensorflow/ppo_continous_tensorflow.py
@@ -366,13 +366,7 @@ def main():
 
  if reward_threshold:
  if len(batch_solved_reward) == 100: 
- if np.mean(batch_solved_reward) >= reward_threshold : 
- for reward in batch_rewards:
- rewards.append(reward)
-
- for time in batch_times:
- times.append(time) 
-
+ if np.mean(batch_solved_reward) >= reward_threshold:
  print('You solved task after {} episode'.format(len(rewards)))
  break
 
@@ -403,9 +397,16 @@ def main():
  plot(times)
 
  print('========== Final ==========')
- # Plot the reward, times for every episode
+ # Plot the reward, times for every episode
+
+ for reward in batch_rewards:
+ rewards.append(reward)
+
+ for time in batch_times:
+ times.append(time)
+
  plot(rewards)
- plot(times) 
+ plot(times)
 
 if __name__ == '__main__':
  main()