diff --git a/README.md b/README.md index 54e9523..4125c53 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # [Learning to Learn](https://arxiv.org/abs/1606.04474) in TensorFlow -Compatible with TensorFlow 0.12. +Compatible with TensorFlow 1.0 ## Training diff --git a/meta.py b/meta.py index a2bb889..41f4b10 100644 --- a/meta.py +++ b/meta.py @@ -360,7 +360,7 @@ def time_step(t, fx_array, x, state): fx_final = _make_with_custom_variables(make_loss, x_final) fx_array = fx_array.write(len_unroll, fx_final) - loss = tf.reduce_sum(fx_array.pack(), name="loss") + loss = tf.reduce_sum(fx_array.stack(), name="loss") # Reset the state; should be called at the beginning of an epoch. with tf.name_scope("reset"): diff --git a/networks.py b/networks.py index 9e6cf41..2c45058 100644 --- a/networks.py +++ b/networks.py @@ -88,7 +88,7 @@ def _convert_to_initializer(initializer): """ if isinstance(initializer, str): - return getattr(tf, initializer + "_initializer") + return getattr(tf, initializer + "_initializer")(dtype=tf.float32) elif isinstance(initializer, np.ndarray): return tf.constant_initializer(initializer) else: @@ -182,7 +182,7 @@ def __init__(self, output_size, layers, preprocess_name="identity", else: self._preprocess = getattr(tf, preprocess_name) - with tf.variable_scope(self._template.var_scope): + with tf.variable_scope(self._template.variable_scope): self._cores = [] for i, size in enumerate(layers, start=1): name = "lstm_{}".format(i) diff --git a/nn/base.py b/nn/base.py index 7996513..7259768 100644 --- a/nn/base.py +++ b/nn/base.py @@ -145,30 +145,30 @@ def __call__(self, *args, **kwargs): return out @property - def var_scope(self): + def variable_scope(self): """Returns the variable_scope declared by the module. - It is valid for library users to access the internal templated var_scope, + It is valid for library users to access the internal templated variable_scope, but only makes sense to do so after connection. Therefore we raise an error - here if the var_scope is requested before connection. + here if the variable_scope is requested before connection. - The only case where it does make sense to access the var_scope before + The only case where it does make sense to access the variable_scope before connection is to get the post-uniquification name, which we support using the separate .name property. Returns: - var_scope: `tf.VariableScope` instance of the internal `tf.Template`. + variable_scope: `tf.VariableScope` instance of the internal `tf.Template`. Raises: NotConnectedError: If the module is not connected to the Graph. """ self._ensure_is_connected() - return self._template.var_scope + return self._template.variable_scope @property def name(self): """Returns the name of the Module.""" - return self._template.var_scope.name + return self._template.variable_scope.name @property def is_connected(self): diff --git a/nn/batch_norm.py b/nn/batch_norm.py index 1ca307e..ba16e8a 100644 --- a/nn/batch_norm.py +++ b/nn/batch_norm.py @@ -131,7 +131,7 @@ def _set_default_initializer(self, var_name): if var_name == self.GAMMA: self._initializers[self.GAMMA] = tf.ones_initializer() elif var_name == self.BETA: - self._initializers[self.BETA] = tf.zeros_initializer + self._initializers[self.BETA] = tf.zeros_initializer() def _build_statistics_variance(self, input_batch, reduction_indices, use_batch_stats): @@ -151,15 +151,15 @@ def _build_statistics_variance(self, input_batch, "moving_mean", shape=self._mean_shape, collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.VARIABLES], - initializer=tf.zeros_initializer, + tf.GraphKeys.GLOBAL_VARIABLES], + initializer=tf.zeros_initializer(), trainable=False) self._moving_variance = tf.get_variable( "moving_variance", shape=self._mean_shape, collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.VARIABLES], + tf.GraphKeys.GLOBAL_VARIABLES], initializer=tf.ones_initializer(), trainable=False) @@ -217,15 +217,15 @@ def _build_statistics_second_moment(self, input_batch, "moving_mean", shape=self._mean_shape, collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.VARIABLES], - initializer=tf.zeros_initializer, + tf.GraphKeys.GLOBAL_VARIABLES], + initializer=tf.zeros_initializer(), trainable=False) self._moving_second_moment = tf.get_variable( "moving_second_moment", shape=self._mean_shape, collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES, - tf.GraphKeys.VARIABLES], + tf.GraphKeys.GLOBAL_VARIABLES], initializer=tf.ones_initializer(), trainable=False) diff --git a/nn/convnet.py b/nn/convnet.py index c8ab46e..b7190dc 100644 --- a/nn/convnet.py +++ b/nn/convnet.py @@ -159,7 +159,7 @@ def __init__(self, def _instantiate_layers(self): """Instantiates all the convolutional modules used in the network.""" - with tf.variable_scope(self._template.var_scope): + with tf.variable_scope(self._template.variable_scope): self._layers = tuple(conv.Conv2D(name="conv_2d_{}".format(i), output_channels=self._output_channels[i], kernel_shape=self._kernel_shapes[i], @@ -387,7 +387,7 @@ def __init__(self, def _instantiate_layers(self): """Instantiates all the convolutional modules used in the network.""" - with tf.variable_scope(self._template.var_scope): + with tf.variable_scope(self._template.variable_scope): self._layers = tuple( conv.Conv2DTranspose(name="conv_2d_transpose_{}".format(i), output_channels=self._output_channels[i], diff --git a/nn/gated_rnn.py b/nn/gated_rnn.py index fc4e85f..ad5eca5 100644 --- a/nn/gated_rnn.py +++ b/nn/gated_rnn.py @@ -42,7 +42,7 @@ import tensorflow as tf from tensorflow.python.ops import array_ops -from tensorflow.python.ops import rnn_cell +from tensorflow.contrib import rnn from nn import base from nn import basic @@ -318,11 +318,11 @@ def _build(self, inputs, prev_state, is_training=True, test_local_stats=True): gates = gates_h + gates_x + self._b else: # Parameters of gates are concatenated into one multiply for efficiency. - inputs_and_hidden = tf.concat(1, [inputs, prev_hidden]) + inputs_and_hidden = tf.concat_v2([inputs, prev_hidden], 1) gates = tf.matmul(inputs_and_hidden, self._w_xh) + self._b # i = input_gate, j = new_input, f = forget_gate, o = output_gate - i, j, f, o = array_ops.split(1, 4, gates) + i, j, f, o = array_ops.split(gates, 4, 1) if self._use_peepholes: # diagonal connections self._create_peephole_variables(inputs.dtype) @@ -559,7 +559,7 @@ def create_batch_norm(): else: return create_batch_norm() - class CellWithExtraInput(rnn_cell.RNNCell): + class CellWithExtraInput(rnn.RNNCell): """Wraps an RNNCell to create a new RNNCell with extra input appended. This will pass the additional input `args` and `kwargs` to the __call__ diff --git a/nn/mlp.py b/nn/mlp.py index 2316a30..2cc7880 100644 --- a/nn/mlp.py +++ b/nn/mlp.py @@ -97,7 +97,7 @@ def _instantiate_layers(self): connected to the graph. """ - with tf.variable_scope(self._template.var_scope): + with tf.variable_scope(self._template.variable_scope): self._layers = [basic.Linear(self._output_sizes[i], name="linear_{}".format(i), initializers=self._initializers, diff --git a/nn/rnn_core.py b/nn/rnn_core.py index d0504a6..02fdee5 100644 --- a/nn/rnn_core.py +++ b/nn/rnn_core.py @@ -30,7 +30,7 @@ import tensorflow as tf from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import rnn_cell +from tensorflow.contrib import rnn from tensorflow.python.util import nest from nn import base @@ -142,7 +142,7 @@ def trainable_initial_state(batch_size, state_size, dtype, initializers=None): @six.add_metaclass(abc.ABCMeta) -class RNNCore(base.AbstractModule, rnn_cell.RNNCell): +class RNNCore(base.AbstractModule, rnn.RNNCell): """Superclass for Recurrent Neural Network Cores. This class defines the basic functionality that every core should implement, diff --git a/nn/util.py b/nn/util.py index 8fff25e..e8d4f58 100644 --- a/nn/util.py +++ b/nn/util.py @@ -62,7 +62,7 @@ def get_variables_in_module(module, Raises: NotConnectedError: If the module is not connected to the Graph. """ - return get_variables_in_scope(module.var_scope, collection=collection) + return get_variables_in_scope(module.variable_scope, collection=collection) def check_initializers(initializers, keys): diff --git a/preprocess.py b/preprocess.py index d1d570e..85406a8 100644 --- a/preprocess.py +++ b/preprocess.py @@ -68,4 +68,4 @@ def _build(self, gradients): clamped_log = Clamp(min_value=-1.0)(log / self._k) # pylint: disable=not-callable sign = Clamp(min_value=-1.0, max_value=1.0)(gradients * np.exp(self._k)) # pylint: disable=not-callable - return tf.concat(ndims - 1, [clamped_log, sign]) + return tf.concat_v2([clamped_log, sign], ndims - 1) diff --git a/problems.py b/problems.py index bda145b..6917646 100644 --- a/problems.py +++ b/problems.py @@ -63,7 +63,7 @@ def get_coordinate(i): def build(): coordinates = [get_coordinate(i) for i in xrange(num_dims)] - x = tf.concat(0, [tf.expand_dims(c, 0) for c in coordinates]) + x = tf.concat_v2([tf.expand_dims(c, 0) for c in coordinates], 0) return tf.reduce_sum(tf.square(x, name="x_squared")) return build @@ -94,7 +94,7 @@ def build(): initializer=tf.random_uniform_initializer(), trainable=False) - product = tf.squeeze(tf.batch_matmul(w, tf.expand_dims(x, -1))) + product = tf.squeeze(tf.matmul(w, tf.expand_dims(x, -1))) return tf.reduce_mean(tf.reduce_sum((product - y) ** 2, 1)) return build @@ -134,7 +134,7 @@ def build(): def _xent_loss(output, labels): - loss = tf.nn.sparse_softmax_cross_entropy_with_logits(output, labels) + loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=labels) return tf.reduce_mean(loss)