Skip to content

Commit

Permalink
fix algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
wisnunugroho21 committed Oct 7, 2020
1 parent e5b9de7 commit 3f51f38
Show file tree
Hide file tree
Showing 12 changed files with 12 additions and 27 deletions.
3 changes: 1 addition & 2 deletions PPO/pytorch/ppo_pong_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from torch.distributions.kl import kl_divergence
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torchvision

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -190,7 +189,7 @@ def get_loss(self, action_probs, values, old_action_probs, old_values, next_valu

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = torch.where(
(Kl >= self.policy_kl_range) & (ratios >= 1),
(Kl >= self.policy_kl_range) & (ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
4 changes: 1 addition & 3 deletions PPO/pytorch/ppo_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
from torch.distributions.kl import kl_divergence
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torchvision

from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import sys
Expand Down Expand Up @@ -191,7 +189,7 @@ def get_loss(self, action_probs, values, old_action_probs, old_values, next_valu

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = torch.where(
(Kl >= self.policy_kl_range) & (ratios >= 1),
(Kl >= self.policy_kl_range) & (ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO/tensorflow 2/ppo_pong_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -176,7 +175,7 @@ def get_loss(self, action_probs, values, old_action_probs, old_values, next_valu

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = tf.where(
tf.logical_and(Kl >= self.policy_kl_range, ratios >= 1),
tf.logical_and(Kl >= self.policy_kl_range, ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO/tensorflow 2/ppo_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -176,7 +175,7 @@ def get_loss(self, action_probs, values, old_action_probs, old_values, next_valu

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = tf.where(
tf.logical_and(Kl >= self.policy_kl_range, ratios >= 1),
tf.logical_and(Kl >= self.policy_kl_range, ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
4 changes: 1 addition & 3 deletions PPO_RND/pytorch/ppo_rnd_frozen_notslippery_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
from torch.distributions.kl import kl_divergence
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torchvision

from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import sys
Expand Down Expand Up @@ -310,7 +308,7 @@ def get_PPO_loss(self, action_probs, ex_values, old_action_probs, old_ex_values,

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = torch.where(
(Kl >= self.policy_kl_range) & (ratios >= 1),
(Kl >= self.policy_kl_range) & (ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
4 changes: 1 addition & 3 deletions PPO_RND/pytorch/ppo_rnd_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
from torch.distributions.kl import kl_divergence
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torchvision

from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import sys
Expand Down Expand Up @@ -310,7 +308,7 @@ def get_PPO_loss(self, action_probs, ex_values, old_action_probs, old_ex_values,

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = torch.where(
(Kl >= self.policy_kl_range) & (ratios >= 1),
(Kl >= self.policy_kl_range) & (ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -291,7 +290,7 @@ def get_PPO_loss(self, action_probs, ex_values, old_action_probs, old_ex_values,

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = tf.where(
tf.logical_and(Kl >= self.policy_kl_range, ratios >= 1),
tf.logical_and(Kl >= self.policy_kl_range, ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO_RND/tensorflow 2/ppo_rnd_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -291,7 +290,7 @@ def get_PPO_loss(self, action_probs, ex_values, old_action_probs, old_ex_values,

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = tf.where(
tf.logical_and(Kl >= self.policy_kl_range, ratios >= 1),
tf.logical_and(Kl >= self.policy_kl_range, ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO_continous/pytorch/ppo_continous_bipedal_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from torch.distributions.kl import kl_divergence
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torchvision

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -181,7 +180,7 @@ def get_loss(self, action_mean, values, old_action_mean, old_values, next_values

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = torch.where(
(Kl >= self.policy_kl_range) & (ratios * Advantages >= 1 * Advantages),
(Kl >= self.policy_kl_range) & (ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO_continous/pytorch/ppo_continous_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from torch.distributions.kl import kl_divergence
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam
import torchvision

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -181,7 +180,7 @@ def get_loss(self, action_mean, values, old_action_mean, old_values, next_values

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = torch.where(
(Kl >= self.policy_kl_range) & (ratios * Advantages >= 1 * Advantages),
(Kl >= self.policy_kl_range) & (ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO_continous/tensorflow/ppo_continous_bipedal_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -177,7 +176,7 @@ def get_loss(self, action_mean, values, old_action_mean, old_values, next_values

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = tf.where(
tf.logical_and(Kl >= self.policy_kl_range, ratios >= 1),
tf.logical_and(Kl >= self.policy_kl_range, ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down
3 changes: 1 addition & 2 deletions PPO_continous/tensorflow/ppo_continous_tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import tensorflow_probability as tfp
from tensorflow.keras.layers import Dense
from tensorflow.keras import Model
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
Expand Down Expand Up @@ -177,7 +176,7 @@ def get_loss(self, action_mean, values, old_action_mean, old_values, next_values

# Combining TR-PPO with Rollback (Truly PPO)
pg_loss = tf.where(
tf.logical_and(Kl >= self.policy_kl_range, ratios >= 1),
tf.logical_and(Kl >= self.policy_kl_range, ratios > 1),
ratios * Advantages - self.policy_params * Kl,
ratios * Advantages
)
Expand Down

0 comments on commit 3f51f38

Please sign in to comment.