apache · szha · Jun 2, 2019 · May 29, 2019 · May 29, 2019 · May 29, 2019
@@ -30,7 +30,8 @@
 from .. import symbol, ndarray, initializer, context
 from ..context import Context, cpu
 from .. import autograd
-from .utils import _indent, _brief_print_list
+from .utils import _indent, _brief_print_list, shape_is_known
+from .. import is_np_shape
 
 # pylint: disable= invalid-name
 tensor_types = (symbol.Symbol, ndarray.NDArray)
@@ -156,7 +157,20 @@ def grad_req(self, req):
 
  @property
  def shape(self):
- return self._shape
+ """The shape of the parameter.
+
+ By default, an unknown dimension size is 0. However, when the NumPy semantic
+ is turned on, unknown dimension size is -1.
+ """
+ if self._shape is None:
+ return None
+ elif is_np_shape():
+ # Parameters shouldn't be zero-size. If one of its dimension is 0,
+ # it means the parameter isn't initialized. In the NumPy semantics,
+ # the unknown dimension should be marked with -1.
+ return tuple(i if i != 0 else -1 for i in self._shape)
+ else:
+ return self._shape
 
  @shape.setter
  def shape(self, new_shape):
@@ -269,7 +283,7 @@ def _finish_deferred_init(self):
  return
  init, ctx, default_init, data = self._deferred_init
  self._deferred_init = ()
- assert self.shape is not None and np.prod(self.shape) > 0, \
+ assert shape_is_known(self.shape), \
  "Cannot initialize Parameter '%s' because it has " \
  "invalid shape: %s. Please specify in_units, " \
  "in_channels, etc for `Block`s."%(
@@ -380,7 +394,7 @@ def initialize(self, init=None, ctx=None, default_init=initializer.Uniform(),
  ctx = [ctx]
  if init is None:
  init = default_init if self.init is None else self.init
- if not self.shape or np.prod(self.shape) <= 0:
+ if not shape_is_known(self.shape):
  if self._allow_deferred_init:
  self._deferred_init = (init, ctx, default_init, None)
  return

@@ -38,6 +38,7 @@ class requests_failed_to_import(object):
 import numpy as np
 
 from .. import ndarray
+from ..util import is_np_shape
 
 def split_data(data, num_slice, batch_axis=0, even_split=True):
  """Splits an NDArray into `num_slice` slices along `batch_axis`.
@@ -412,3 +413,20 @@ def __enter__(self):
 
  def __exit__(self, ptype, value, trace):
  self.detach()
+
+def shape_is_known(shape):
+ """Check whether a shape is completely known with or without np semantics.
+
+ Please see the doc of is_np_shape for more details.
+ """
+ if shape is None:
+ return False
+ unknown_dim_size = -1 if is_np_shape() else 0
+ if len(shape) == 0:
+ return unknown_dim_size == -1
+ for dim_size in shape:
+ if dim_size == unknown_dim_size:
+ return False
+ assert dim_size > unknown_dim_size, "shape dimension size cannot be less than {}, while " \
+ "received {}".format(unknown_dim_size, dim_size)
+ return True
@@ -89,6 +89,10 @@ def is_np_shape():
  the shapes of zero-size tensors. This is turned off by default for keeping
  backward compatibility.
 
+ In the NumPy shape semantics, `-1` indicates an unknown size. For example,
+ `(-1, 2, 2)` means that the size of the first dimension is unknown. Its size
+ may be inferred during shape inference.
+
  Please note that this is designed as an infrastructure for the incoming
  MXNet-NumPy operators. Legacy operators registered in the modules
  `mx.nd` and `mx.sym` are not guaranteed to behave like their counterparts

diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
@@ -2726,6 +2726,22 @@ def hybrid_forward(self, F, x):
  net = Net(act0, act1, shape, slice)
  check_layer_forward_withinput(net, x)
 
+@with_seed()
+def test_np_shape_parameters():
+ class Foo(gluon.Block):
+ def __init__(self, **kwargs):
+ super(Foo, self).__init__(**kwargs)
+ self.dense = gluon.nn.Dense(16)
+ def forward(self, x):
+ return self.dense(x)
+
+ with mx.np_shape(True):
+ z = mx.nd.zeros((2,2016))
+ print(z.shape)
+ foo = Foo()
+ foo.initialize()
+ print(foo(z).shape)
+
 if __name__ == '__main__':
  import nose
  nose.runmodule()