Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[BUGFIX] fix unknown parameter shapes when np_shape is turned on. #15097

Merged
merged 9 commits into from
Jun 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions python/mxnet/gluon/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
from .. import symbol, ndarray, initializer, context
from ..context import Context, cpu
from .. import autograd
from .utils import _indent, _brief_print_list
from .utils import _indent, _brief_print_list, shape_is_known
from .. import is_np_shape

# pylint: disable= invalid-name
tensor_types = (symbol.Symbol, ndarray.NDArray)
Expand Down Expand Up @@ -156,7 +157,20 @@ def grad_req(self, req):

@property
def shape(self):
return self._shape
"""The shape of the parameter.

By default, an unknown dimension size is 0. However, when the NumPy semantic
is turned on, unknown dimension size is -1.
"""
if self._shape is None:
return None
elif is_np_shape():
# Parameters shouldn't be zero-size. If one of its dimension is 0,
# it means the parameter isn't initialized. In the NumPy semantics,
# the unknown dimension should be marked with -1.
return tuple(i if i != 0 else -1 for i in self._shape)
szha marked this conversation as resolved.
Show resolved Hide resolved
zheng-da marked this conversation as resolved.
Show resolved Hide resolved
else:
return self._shape

@shape.setter
def shape(self, new_shape):
Expand Down Expand Up @@ -269,7 +283,7 @@ def _finish_deferred_init(self):
return
init, ctx, default_init, data = self._deferred_init
self._deferred_init = ()
assert self.shape is not None and np.prod(self.shape) > 0, \
assert shape_is_known(self.shape), \
"Cannot initialize Parameter '%s' because it has " \
"invalid shape: %s. Please specify in_units, " \
"in_channels, etc for `Block`s."%(
Expand Down Expand Up @@ -380,7 +394,7 @@ def initialize(self, init=None, ctx=None, default_init=initializer.Uniform(),
ctx = [ctx]
if init is None:
init = default_init if self.init is None else self.init
if not self.shape or np.prod(self.shape) <= 0:
if not shape_is_known(self.shape):
if self._allow_deferred_init:
self._deferred_init = (init, ctx, default_init, None)
return
Expand Down
18 changes: 18 additions & 0 deletions python/mxnet/gluon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class requests_failed_to_import(object):
import numpy as np

from .. import ndarray
from ..util import is_np_shape

def split_data(data, num_slice, batch_axis=0, even_split=True):
"""Splits an NDArray into `num_slice` slices along `batch_axis`.
Expand Down Expand Up @@ -412,3 +413,20 @@ def __enter__(self):

def __exit__(self, ptype, value, trace):
self.detach()

def shape_is_known(shape):
"""Check whether a shape is completely known with or without np semantics.

Please see the doc of is_np_shape for more details.
"""
if shape is None:
return False
unknown_dim_size = -1 if is_np_shape() else 0
if len(shape) == 0:
return unknown_dim_size == -1
for dim_size in shape:
if dim_size == unknown_dim_size:
return False
assert dim_size > unknown_dim_size, "shape dimension size cannot be less than {}, while " \
"received {}".format(unknown_dim_size, dim_size)
return True
4 changes: 4 additions & 0 deletions python/mxnet/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ def is_np_shape():
the shapes of zero-size tensors. This is turned off by default for keeping
backward compatibility.

In the NumPy shape semantics, `-1` indicates an unknown size. For example,
`(-1, 2, 2)` means that the size of the first dimension is unknown. Its size
may be inferred during shape inference.

Please note that this is designed as an infrastructure for the incoming
MXNet-NumPy operators. Legacy operators registered in the modules
`mx.nd` and `mx.sym` are not guaranteed to behave like their counterparts
Expand Down
16 changes: 16 additions & 0 deletions tests/python/unittest/test_gluon.py
Original file line number Diff line number Diff line change
Expand Up @@ -2726,6 +2726,22 @@ def hybrid_forward(self, F, x):
net = Net(act0, act1, shape, slice)
check_layer_forward_withinput(net, x)

@with_seed()
def test_np_shape_parameters():
class Foo(gluon.Block):
def __init__(self, **kwargs):
super(Foo, self).__init__(**kwargs)
self.dense = gluon.nn.Dense(16)
def forward(self, x):
return self.dense(x)

with mx.np_shape(True):
z = mx.nd.zeros((2,2016))
print(z.shape)
foo = Foo()
foo.initialize()
print(foo(z).shape)

if __name__ == '__main__':
import nose
nose.runmodule()