Skip to content

Commit

Permalink
Merge pull request #7 from wisnunugroho21/version_2
Browse files Browse the repository at this point in the history
fix monte carlo method
  • Loading branch information
wisnunugroho21 committed Sep 28, 2020
2 parents a617f15 + b55d1ec commit 5ad60c6
Show file tree
Hide file tree
Showing 12 changed files with 48 additions and 48 deletions.
8 changes: 4 additions & 4 deletions PPO/pytorch/ppo_pong_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return torch.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO/pytorch/ppo_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return torch.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO/tensorflow 2/ppo_pong_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return tf.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO/tensorflow 2/ppo_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return tf.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_RND/pytorch/ppo_rnd_frozen_notslippery_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return torch.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_RND/pytorch/ppo_rnd_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return torch.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_RND/tensorflow 2/ppo_frozenlake_notslippery_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return tf.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_RND/tensorflow 2/ppo_rnd_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return tf.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_continous/pytorch/ppo_continous_bipedal_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return torch.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_continous/pytorch/ppo_continous_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return torch.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_continous/tensorflow/ppo_continous_bipedal_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return tf.stack(returns)
Expand Down
8 changes: 4 additions & 4 deletions PPO_continous/tensorflow/ppo_continous_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,12 +104,12 @@ def __init__(self, gamma = 0.99, lam = 0.95):
self.gamma = gamma
self.lam = lam

def monte_carlo_discounted(self, datas):
returns = []
def monte_carlo_discounted(self, rewards, dones):
running_add = 0
returns = []

for i in reversed(range(len(datas))):
running_add = running_add * self.gamma + datas[i]
for step in reversed(range(len(rewards))):
running_add = rewards[step] + (1.0 - dones[step]) * self.gamma * running_add
returns.insert(0, running_add)

return tf.stack(returns)
Expand Down

0 comments on commit 5ad60c6

Please sign in to comment.