Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dian xt ms #29

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
modify ppo
  • Loading branch information
AmiyaSX committed Mar 23, 2023
commit 3a59633fa2881b671758b2188892f69fe582da48
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions .idea/xingtian.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 1 addition & 3 deletions xt/model/model_utils_ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ def __init__(
self.dense_layer_share = bulid_mlp_layers_ms(dim, hidden_sizes, activation)
self.dense_pi = Dense(hidden_sizes[-1], act_dim, weight_init="XavierUniform")
self.dense_v = Dense(hidden_sizes[-1], 1, weight_init="XavierUniform")

def construct(self, x):
x = x.transpose((0, 3, 1, 2))
if self.dtype == "uint8":
Expand All @@ -139,7 +138,6 @@ def construct(self, x):
share = self.dense_layer_share(share)
pi_latent = self.dense_pi(share)
out_value = self.dense_v(share)

return [pi_latent, out_value]


Expand Down Expand Up @@ -335,4 +333,4 @@ def _initializer(shape, dtype=None, partition_info=None):
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
return tf.constant(out)

return _initializer
return _initializer
19 changes: 9 additions & 10 deletions xt/model/ms_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, size):
self.shape = ops.Shape()
self.square = ops.Square()
self.Normal = ops.StandardNormal()

self.cast = Cast()
def init_by_param(self, param):
self.param = param
self.mean, self.log_std = ops.split(self.param, axis=-1, output_num=2)
Expand All @@ -72,7 +72,7 @@ def sample_dtype(self):

def neglog_prob(self, x,mean,sd):
log_sd = self.log(sd)
neglog_prob= 0.5 * self.log(2.0 * np.pi) * Cast()((self.shape(x)[-1]), ms.float32) + \
neglog_prob= 0.5 * self.log(2.0 * np.pi) * self.cast()((self.shape(x)[-1]), ms.float32) + \
0.5 * self.reduce_sum(self.square((x - mean) / sd), axis=-1) + \
self.reduce_sum(log_sd, axis=-1)
return neglog_prob
Expand Down Expand Up @@ -106,7 +106,8 @@ def __init__(self, size):
self.exp = ops.Exp()
self.log = ops.Log()
self.expand_dims = ops.ExpandDims()

self.random_categorical = ops.RandomCategorical(dtype=ms.int32)
self.on_value, self.off_value = Tensor(1.0, ms.float32), Tensor(0.0, ms.float32)
def init_by_param(self, logits):
self.logits = logits

Expand All @@ -117,17 +118,16 @@ def sample_dtype(self):
return ms.int32

def neglog_prob(self, x,logits):
on_value, off_value = Tensor(1.0, ms.float32), Tensor(0.0, ms.float32)
x = self.OneHot(x , self.size, on_value, off_value)
x = self.OneHot(x , self.size, self.on_value, self.off_value)
loss, dlogits = self.softmax_cross(logits, x)
return self.expand_dims(loss, -1)


def entropy(self,logits):

rescaled_logits = logits - self.reduce_max(logits, -1)
exp_logits = self.exp(rescaled_logits)

z = self.reduce_sum(exp_logits, -1)
p = exp_logits / z
return self.reduce_sum(p * (self.log(z) - rescaled_logits), -1)
Expand All @@ -149,8 +149,7 @@ def kl(self, other):
def sample(self,logits):
# u = tf.random_uniform(tf.shape(self.logits), dtype=self.logits.dtype)
# return tf.argmax(self.logits - tf.log(-tf.log(u)), axis=-1, output_type=tf.int32)
action = ops.squeeze(ops.random_categorical(logits,1,dtype=ms.int32),-1)
return action
return self.random_categorical(logits,1,0).squeeze(-1)


def make_dist(ac_type, ac_dim):
Expand All @@ -159,4 +158,4 @@ def make_dist(ac_type, ac_dim):
elif ac_type == 'DiagGaussian':
return DiagGaussianDist(ac_dim)
else:
raise NotImplementedError
raise NotImplementedError
112 changes: 46 additions & 66 deletions xt/model/ppo/ppo_ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,15 @@
from xt.model.ms_dist import make_dist
from zeus.common.util.common import import_config
from zeus.common.util.register import Registers
from mindspore.ops import composite as C
from mindspore.ops import functional as F
from xt.model.ms_compat import ms, ReduceMean, Tensor, Adam, MultitypeFuncGraph, Reciprocal
from xt.model.ms_compat import ms, ReduceMean, Tensor, Adam, Model
from xt.model.model_ms import XTModel_MS
from xt.model.ms_utils import MSVariables
from mindspore import nn, common, ops, ParameterTuple, Parameter
import mindspore.nn.probability.distribution as msd
from mindspore import set_context
import time
import os
import psutil
from mindspore import nn, ops, amp, boost, set_context

@Registers.model
class PPOMS(XTModel_MS):
"""Build PPO MLP network."""

def __init__(self, model_info):
model_config = model_info.get('model_config')
import_config(globals(), model_config)
Expand All @@ -37,41 +31,36 @@ def __init__(self, model_info):
self.num_sgd_iter = model_config.get('NUM_SGD_ITER', NUM_SGD_ITER)
self.verbose = model_config.get('SUMMARY', SUMMARY)
self.vf_clip = Tensor(model_config.get('VF_CLIP', VF_CLIP))

self.dist = make_dist(self.action_type, self.action_dim)

super().__init__(model_info)

'''创建训练网络'''
adam = Adam(params=self.model.trainable_params(), learning_rate=0.0005,use_amsgrad=True)
loss_fn = WithLossCell(self.critic_loss_coef,self.clip_ratio, self.ent_coef, self.vf_clip)
forward_fn = NetWithLoss(self.model,loss_fn, self.dist, self.action_type)
adam = Adam(params=self.model.trainable_params(), learning_rate=0.00055, use_amsgrad=True)
loss_fn = WithLossCell(self.critic_loss_coef, self.clip_ratio, self.ent_coef, self.vf_clip)
forward_fn = NetWithLoss(self.model, loss_fn, self.dist, self.action_type)
self.train_net = MyTrainOneStepCell(forward_fn, optimizer=adam, max_grad_norm=self._max_grad_norm)
self.train_net.set_train()

def predict(self, state):
"""Predict state."""
self.model.set_train(False)

state = Tensor(state, ms.uint8)
state = Tensor(state)
pi_latent, v_out = self.model(state)

if self.action_type == 'DiagGaussian':
std = ms.common.initializer('ones', [pi_latent.shape[0], self.action_dim], ms.float32)
self.action = self.dist.sample(pi_latent, std)
self.logp = self.dist.log_prob(self.action, pi_latent, std)
action = self.dist.sample(pi_latent, std)
logp = self.dist.log_prob(action, pi_latent, std)
elif self.action_type == 'Categorical':

self.action = self.dist.sample(pi_latent)
self.logp = self.dist.log_prob(self.action, pi_latent)

self.action = self.action.asnumpy()
self.logp = self.logp.asnumpy()
action = self.dist.sample(pi_latent)
logp = self.dist.log_prob(action, pi_latent)

action = action.asnumpy()
logp = logp.asnumpy()
v_out = v_out.asnumpy()
return self.action, self.logp, v_out

return action, logp, v_out

def train(self, state, label):

self.model.set_train(True)
nbatch = state[0].shape[0]
inds = np.arange(nbatch)
Expand All @@ -85,78 +74,69 @@ def train(self, state, label):
mbinds = inds[start:end]
state_ph = Tensor(state[0][mbinds])
behavior_action_ph = Tensor(label[0][mbinds])
old_logp_ph = Tensor(label[1][mbinds],ms.float32)
adv_ph = Tensor(label[2][mbinds],ms.float32)
old_v_ph = Tensor(label[3][mbinds],ms.float32)
target_v_ph = Tensor(label[4][mbinds],ms.float32)

loss = self.train_net( state_ph,adv_ph, old_logp_ph,behavior_action_ph, target_v_ph, old_v_ph ).asnumpy()
old_logp_ph = Tensor(label[1][mbinds])
adv_ph = Tensor(label[2][mbinds])
old_v_ph = Tensor(label[3][mbinds])
target_v_ph = Tensor(label[4][mbinds])
loss = self.train_net(state_ph, adv_ph, old_logp_ph, behavior_action_ph, target_v_ph,
old_v_ph).asnumpy()
loss_val.append(np.mean(loss))

self.actor_var = MSVariables(self.model)

return np.mean(loss_val)


class MyTrainOneStepCell(nn.TrainOneStepCell):
def __init__(self, network, optimizer, max_grad_norm, sens=1.0):
super(MyTrainOneStepCell, self).__init__(network, optimizer, sens)
self.sens = sens
self.depend = ops.Depend()
self.max_grad_norm = max_grad_norm

def construct(self,*inputs):
weights = self.weights
loss, grads = ops.value_and_grad(self.network, grad_position=None, weights=weights)(*inputs)
self.grad_fn = ops.value_and_grad(self.network, grad_position=None, weights=self.weights)

def construct(self, *inputs):
loss, grads = self.grad_fn(*inputs)
grads = ops.clip_by_global_norm(grads, self.max_grad_norm)
grads = self.grad_reducer(grads)
loss = ops.depend(loss, self.optimizer(grads))
loss = self.depend(loss, self.optimizer(grads))
return loss


class NetWithLoss(nn.Cell):
def __init__(self, net,loss_fn, dist,action_type):
def __init__(self, net, loss_fn, dist, action_type):
super(NetWithLoss, self).__init__(auto_prefix=False)
self.net = net
self._loss_fn = loss_fn
self.action_type = action_type
self.dist = dist
self.reduce_sum = ops.ReduceSum(keep_dims=True)
self.split = ops.Split()
self.concat = ops.Concat()
self.exp = ops.Exp()
self.log = ops.Log()
def construct(self,state_ph,adv_ph, old_logp_ph,behavior_action, target_v, old_v_ph ):

def construct(self, state_ph, adv_ph, old_logp_ph, behavior_action, target_v, old_v_ph):
pi_latent, v_out = self.net(state_ph)
if self.action_type == 'DiagGaussian':
log_std = ms.common.initializer('zeros', [1, self.action_dim], ms.float32)
dist_param = self.concat([pi_latent, pi_latent * 0.0 + log_std], axis=-1)
mean, log_std = self.split(dist_param, axis=-1, output_num=2)
sd = self.exp(log_std)
ent = self.dist.entropy(sd)
action_log_prob = self.dist.log_prob(behavior_action,mean,sd)
std = ms.common.initializer('ones', [pi_latent.shape[0], pi_latent.shape[1]], ms.float32)
ent = self.dist.entropy(std)
action_log_prob = self.dist.log_prob(behavior_action, pi_latent, std)
else:
ent = self.dist.entropy(pi_latent)
action_log_prob = self.dist.log_prob(behavior_action,pi_latent)
loss = self._loss_fn(action_log_prob,ent, adv_ph, old_logp_ph, target_v, v_out, old_v_ph)
action_log_prob = self.dist.log_prob(behavior_action, pi_latent)
loss = self._loss_fn(action_log_prob, ent, adv_ph, old_logp_ph, target_v, v_out, old_v_ph)
return loss


class WithLossCell(nn.LossBase):
def __init__(self, critic_loss_coef,clip_ratio,ent_coef,val_clip):
def __init__(self, critic_loss_coef, clip_ratio, ent_coef, val_clip):
super(WithLossCell, self).__init__()
self.reduce_mean = ReduceMean(keep_dims=True)
self.critic_loss_coef = critic_loss_coef
self.Mul = ops.Mul()
self.clip_ratio = clip_ratio
self.ent_coef = ent_coef
self.val_clip = val_clip
self.reducesum = ops.ReduceSum(keep_dims=True)
self.minimum = ops.Minimum()
self.maximum = ops.Maximum()
self.exp = ops.Exp()
self.square = ops.Square()
self.squeeze = ops.Squeeze()

def construct(self, action_log_prob,ent, adv, old_log_p, target_v, out_v, old_v):

def construct(self, action_log_prob, ent, adv, old_log_p, target_v, out_v, old_v):
ratio = self.exp(action_log_prob - old_log_p)

surr_loss_1 = ratio * adv
Expand All @@ -170,7 +150,7 @@ def construct(self, action_log_prob,ent, adv, old_log_p, target_v, out_v, old_
val_pred_clipped = old_v + ops.clip_by_value(out_v - old_v, -self.val_clip, self.val_clip)
vf_losses2 = self.square(val_pred_clipped - target_v)

critic_loss = 0.5 * self.reduce_mean(self.maximum(vf_losses1, vf_losses2))
critic_loss = 0.5 * self.reduce_mean(self.maximum(vf_losses1, vf_losses2))
loss = actor_loss + self.critic_loss_coef * critic_loss
return loss