Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dian xt ms #29

Open
wants to merge 27 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
DQN Model ms版本移植
  • Loading branch information
AmiyaSX committed Feb 7, 2023
commit 31b49cbc0e20a9378453f3f367ef1799ee1cd6b3
8 changes: 8 additions & 0 deletions xt/framework/explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from copy import deepcopy
from absl import logging
import setproctitle
try:
from xt.model.ms_compat import ms
except:
pass
from zeus.common.ipc.share_buffer import ShareBuf
from xt.framework.agent_group import AgentGroup
from zeus.common.ipc.uni_comm import UniComm
Expand Down Expand Up @@ -58,6 +62,10 @@ def start_explore(self):
"""Start explore process."""
signal.signal(signal.SIGINT, signal.SIG_IGN)
os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
try:
ms.set_context(device_target='CPU')
except:
pass
explored_times = 0

try:
Expand Down
8 changes: 8 additions & 0 deletions xt/framework/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
from copy import deepcopy
from xt.algorithm import alg_builder
import setproctitle
try:
from xt.model.ms_compat import ms
except:
pass
from zeus.common.ipc.uni_comm import UniComm
from zeus.common.ipc.message import message, get_msg_data, set_msg_info, set_msg_data, get_msg_info
from zeus.common.util.profile_stats import PredictStats, TimerRecorder
Expand Down Expand Up @@ -86,6 +90,10 @@ def predict(self, recv_data):

def start(self):
os.environ["CUDA_VISIBLE_DEVICES"] = str(-1)
try:
ms.set_context(device_target='CPU')
except:
pass
alg_para = self.config_info.get('alg_para')
setproctitle.setproctitle("xt_predictor")

Expand Down
137 changes: 95 additions & 42 deletions xt/model/dqn/dqn_cnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,20 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from xt.model.tf_compat import tf
from xt.model.tf_compat import Conv2D, Dense, Flatten, Input, Model, Adam, Lambda, K
from zeus.common.util.register import Registers
from zeus.common.util.common import import_config
from xt.model.model_ms import XTModel_MS
from xt.model.ms_utils import MSVariables
from xt.model.dqn.default_config import LR
from xt.model.dqn.dqn_mlp import layer_normalize, layer_add
from xt.model import XTModel
from xt.model.tf_utils import TFVariables
from xt.model.ms_compat import ms
from xt.model.ms_compat import Conv2d, Dense, Flatten, ReLU, Adam, MSELoss, WithLossCell, MultitypeFuncGraph, \
DynamicLossScaleUpdateCell, Cast, Cell, Tensor
from zeus.common.util.common import import_config

from zeus.common.util.register import Registers
import mindspore.ops as ops


@Registers.model
class DqnCnn(XTModel):
class DqnCnn(XTModel_MS):
"""Docstring for DqnCnn."""

def __init__(self, model_info):
Expand All @@ -40,44 +41,96 @@ def __init__(self, model_info):
self.action_dim = model_info['action_dim']
self.learning_rate = LR
self.dueling = model_config.get('dueling', False)
self.net = DqnCnnNet(state_dim=self.state_dim, action_dim=self.action_dim, dueling=self.dueling)
super().__init__(model_info)

def create_model(self, model_info):
"""Create Deep-Q CNN network."""
state = Input(shape=self.state_dim, dtype="uint8")
state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state)
convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state1)
convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer)
convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer)
flattenlayer = Flatten()(convlayer)
denselayer = Dense(256, activation='relu')(flattenlayer)
value = Dense(self.action_dim, activation='linear')(denselayer)
if self.dueling:
adv = Dense(1, activation='linear')(denselayer)
mean = Lambda(layer_normalize)(value)
value = Lambda(layer_add)([adv, mean])
model = Model(inputs=state, outputs=value)
adam = Adam(lr=self.learning_rate, clipnorm=10.)
model.compile(loss='mse', optimizer=adam)
if model_info.get("summary"):
model.summary()

self.infer_state = tf.placeholder(tf.uint8, name="infer_input",
shape=(None, ) + tuple(self.state_dim))
self.infer_v = model(self.infer_state)
self.actor_var = TFVariables([self.infer_v], self.sess)

self.sess.run(tf.initialize_all_variables())
loss_fn = MSELoss()
adam = Adam(params=self.net.trainable_params(), learning_rate=self.learning_rate, use_amsgrad=True)
loss_net = WithLossCell(self.net, loss_fn)
device_target = ms.get_context("device_target")
if device_target == 'Ascend':
manager = DynamicLossScaleUpdateCell(loss_scale_value=2 ** 12, scale_factor=2, scale_window=1000)
model = MyTrainOneStepCell(loss_net, adam, manager, grad_clip=True, clipnorm=10.)
else:
model = MyTrainOneStepCell(loss_net, adam, grad_clip=True, clipnorm=10.)
self.actor_var = MSVariables(self.net)
return model

def predict(self, state):
"""
Do predict use the newest model.

:param state:
:return:
"""
with self.graph.as_default():
K.set_session(self.sess)
feed_dict = {self.infer_state: state}
return self.sess.run(self.infer_v, feed_dict)
state = Tensor(state, dtype=ms.float32)
return self.net(state).asnumpy()


class DqnCnnNet(Cell):
def __init__(self, **descript):
super(DqnCnnNet, self).__init__()
self.state_dim = descript.get("state_dim")
action_dim = descript.get("action_dim")
self.dueling = descript.get("dueling")
self.convlayer1 = Conv2d(self.state_dim[2], 32, kernel_size=8, stride=4, pad_mode='valid',
weight_init="xavier_uniform")
self.convlayer2 = Conv2d(32, 64, kernel_size=4, stride=2, pad_mode='valid', weight_init="xavier_uniform")
self.convlayer3 = Conv2d(64, 64, kernel_size=3, stride=1, pad_mode='valid', weight_init="xavier_uniform")
self.relu = ReLU()
self.flattenlayer = Flatten()
_dim = (
(((self.state_dim[0] - 4) // 4 - 2) // 2 - 2)
* (((self.state_dim[1] - 4) // 4 - 2) // 2 - 2)
* 64
)
self.denselayer1 = Dense(_dim, 256, activation='relu', weight_init="xavier_uniform")
self.denselayer2 = Dense(256, action_dim, weight_init="xavier_uniform")
self.denselayer3 = Dense(256, 1, weight_init="xavier_uniform")

def construct(self, x):
out = Cast()(x.transpose((0, 3, 1, 2)), ms.float32) / 255.
out = self.convlayer1(out)
out = self.relu(out)
out = self.convlayer2(out)
out = self.relu(out)
out = self.convlayer3(out)
out = self.relu(out)
out = self.flattenlayer(out)
out = self.denselayer1(out)
value = self.denselayer2(out)
if self.dueling:
adv = self.denselayer3(out)
mean = value.sub(value.mean(axis=1, keep_dims=True))
value = adv.add(mean)
return value


_grad_scale = MultitypeFuncGraph("grad_scale")


@_grad_scale.register("Tensor", "Tensor")
def tensor_grad_scale(scale, grad):
return grad * ms.ops.cast(ms.ops.Reciprocal()(scale), ms.ops.dtype(grad))


class MyTrainOneStepCell(ms.nn.TrainOneStepWithLossScaleCell):
def __init__(self, network, optimizer, scale_sense=1, grad_clip=False, clipnorm=1.):
self.clipnorm = clipnorm
if isinstance(scale_sense, (int, float)):
scale_sense = Tensor(scale_sense, dtype=ms.float32)
super(MyTrainOneStepCell, self).__init__(network, optimizer, scale_sense)
self.grad_clip = grad_clip

def construct(self, state, label):
weights = self.weights
loss = self.network(state, label)
scaling_sens = self.scale_sense
status, scaling_sens = self.start_overflow_check(loss, scaling_sens)
scaling_sens_filled = ms.ops.ones_like(loss) * ms.ops.cast(scaling_sens, ms.ops.dtype(loss))
grads = self.grad(self.network, weights)(state, label, scaling_sens_filled)
grads = self.hyper_map(ms.ops.partial(_grad_scale, scaling_sens), grads)
if self.grad_clip:
grads = ms.ops.clip_by_global_norm(grads, self.clipnorm)
grads = self.grad_reducer(grads)
cond = self.get_overflow_status(status, grads)
overflow = self.process_loss_scale(cond)
if not overflow:
loss = ops.depend(loss, self.optimizer(grads))
return loss
46 changes: 17 additions & 29 deletions xt/model/dqn/dqn_cnn_pong.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,12 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from xt.model.tf_compat import tf
from xt.model.tf_compat import Conv2D, Dense, Flatten, Input, Model, Adam, Lambda, K
from xt.model.dqn.default_config import LR
from xt.model.dqn.dqn_mlp import layer_normalize, layer_add
from xt.model.dqn.dqn_cnn import DqnCnn
from xt.model.tf_utils import TFVariables

from xt.model.ms_utils import MSVariables
from xt.model.ms_compat import ms, Adam, MSELoss, WithLossCell, DynamicLossScaleUpdateCell, Tensor
from zeus.common.util.register import Registers
from xt.model.dqn.dqn_cnn import MyTrainOneStepCell


@Registers.model
Expand All @@ -33,28 +31,18 @@ class DqnCnnPong(DqnCnn):

def create_model(self, model_info):
"""Create Deep-Q CNN network."""
state = Input(shape=self.state_dim, dtype="int8")
state1 = Lambda(lambda x: K.cast(x, dtype='float32') / 255.)(state)
convlayer = Conv2D(32, (8, 8), strides=(4, 4), activation='relu', padding='valid')(state1)
convlayer = Conv2D(64, (4, 4), strides=(2, 2), activation='relu', padding='valid')(convlayer)
convlayer = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='valid')(convlayer)
flattenlayer = Flatten()(convlayer)
denselayer = Dense(256, activation='relu')(flattenlayer)
value = Dense(self.action_dim, activation='linear')(denselayer)
if self.dueling:
adv = Dense(1, activation='linear')(denselayer)
mean = Lambda(layer_normalize)(value)
value = Lambda(layer_add)([adv, mean])
model = Model(inputs=state, outputs=value)
adam = Adam(lr=self.learning_rate, clipnorm=10.)
model.compile(loss='mse', optimizer=adam)
if model_info.get("summary"):
model.summary()

self.infer_state = tf.placeholder(tf.int8, name="infer_input",
shape=(None, ) + tuple(self.state_dim))
self.infer_v = model(self.infer_state)
self.actor_var = TFVariables([self.infer_v], self.sess)

self.sess.run(tf.initialize_all_variables())
loss_fn = MSELoss()
adam = Adam(params=self.net.trainable_params(), learning_rate=self.learning_rate, use_amsgrad=True)
loss_net = WithLossCell(self.net, loss_fn)
device_target = ms.get_context("device_target")
if device_target == 'Ascend':
manager = DynamicLossScaleUpdateCell(loss_scale_value=2 ** 12, scale_factor=2, scale_window=1000)
model = MyTrainOneStepCell(loss_net, adam, manager, grad_clip=True, clipnorm=10.)
else:
model = MyTrainOneStepCell(loss_net, adam, grad_clip=True, clipnorm=10.)
self.actor_var = MSVariables(self.net)
return model

def predict(self, state):
state = Tensor(state, dtype=ms.float32)
return self.net(state).asnumpy()
90 changes: 43 additions & 47 deletions xt/model/dqn/dqn_mlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,18 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from xt.model.tf_compat import Dense, Input, Model, Adam, tf, Lambda
from xt.model.tf_utils import TFVariables
from xt.model.dqn.default_config import HIDDEN_SIZE, NUM_LAYERS, LR
from xt.model import XTModel
from xt.model.model_ms import XTModel_MS
from zeus.common.util.common import import_config

from zeus.common.util.register import Registers
from xt.model.ms_compat import ms, Dense, Adam, MSELoss, WithLossCell, Cell, DynamicLossScaleUpdateCell, Tensor
from xt.model.ms_utils import MSVariables
from xt.model.dqn.dqn_cnn import MyTrainOneStepCell
import mindspore.ops as ops


@Registers.model
class DqnMlp(XTModel):
"""Docstring for DqnMlp."""
class DqnMlp(XTModel_MS):

def __init__(self, model_info):
model_config = model_info.get('model_config', None)
Expand All @@ -38,50 +38,46 @@ def __init__(self, model_info):
self.action_dim = model_info['action_dim']
self.learning_rate = LR
self.dueling = model_config.get('dueling', False)
self.net = DqnMlpNet(state_dim=self.state_dim, action_dim=self.action_dim, dueling=self.dueling)
super().__init__(model_info)

def create_model(self, model_info):
"""Create Deep-Q network."""
state = Input(shape=self.state_dim)
denselayer = Dense(HIDDEN_SIZE, activation='relu')(state)
for _ in range(NUM_LAYERS - 1):
denselayer = Dense(HIDDEN_SIZE, activation='relu')(denselayer)

value = Dense(self.action_dim, activation='linear')(denselayer)
if self.dueling:
adv = Dense(1, activation='linear')(denselayer)
mean = Lambda(layer_normalize)(value)
value = Lambda(layer_add)([adv, mean])

model = Model(inputs=state, outputs=value)
adam = Adam(lr=self.learning_rate)
model.compile(loss='mse', optimizer=adam)

self.infer_state = tf.placeholder(tf.float32, name="infer_input",
shape=(None, ) + tuple(self.state_dim))
self.infer_v = model(self.infer_state)
self.actor_var = TFVariables([self.infer_v], self.sess)

self.sess.run(tf.initialize_all_variables())
"""Create Deep-Q CNN network."""
loss_fn = MSELoss()
adam = Adam(params=self.net.trainable_params(), learning_rate=self.learning_rate, use_amsgrad=True)
loss_net = WithLossCell(self.net, loss_fn)
device_target = ms.get_context("device_target")
if device_target == 'Ascend':
manager = DynamicLossScaleUpdateCell(loss_scale_value=2 ** 12, scale_factor=2, scale_window=1000)
model = MyTrainOneStepCell(loss_net, adam, manager, grad_clip=True, clipnorm=10.)
else:
model = MyTrainOneStepCell(loss_net, adam, grad_clip=True, clipnorm=10.)
self.actor_var = MSVariables(self.net)
return model

def predict(self, state):
"""
Do predict use the newest model.

:param state:
:return:
"""
with self.graph.as_default():

feed_dict = {self.infer_state: state}
return self.sess.run(self.infer_v, feed_dict)

def layer_normalize(x):
"""Normalize data."""
return tf.subtract(x, tf.reduce_mean(x, axis=1, keep_dims=True))


def layer_add(x):
"""Compute Q given Advantage and V."""
return x[0] + x[1]
state = Tensor(state, dtype=ms.float32)
return self.net(state).asnumpy()


class DqnMlpNet(Cell):
def __init__(self, **descript):
super(DqnMlpNet, self).__init__()
self.state_dim = descript.get("state_dim")
self.action_dim = descript.get("action_dim")
self.dueling = descript.get("dueling")
self.denselayer1 = Dense(self.state_dim[-1], HIDDEN_SIZE, activation='relu', weight_init='xavier_uniform')
self.denselayer2 = Dense(HIDDEN_SIZE, HIDDEN_SIZE, activation='relu', weight_init='xavier_uniform')
self.denselayer3 = Dense(HIDDEN_SIZE, self.action_dim, weight_init='xavier_uniform')
self.denselayer4 = Dense(HIDDEN_SIZE, 1, weight_init='xavier_uniform')

def construct(self, x):
out = self.denselayer1(x.astype("float32"))
for _ in range(NUM_LAYERS - 1):
out = self.denselayer2(out)
value = self.denselayer3(out)
if self.dueling:
adv = self.denselayer4(out)
mean = value.sub(value.mean(axis=1, keep_dims=True))
value = adv.add(mean)
return value
Loading