diff --git a/README.md b/README.md
index 54e9523..4125c53 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # [Learning to Learn](https://arxiv.org/abs/1606.04474) in TensorFlow
 
-Compatible with TensorFlow 0.12.
+Compatible with TensorFlow 1.0
 
 
 ## Training
diff --git a/meta.py b/meta.py
index a2bb889..41f4b10 100644
--- a/meta.py
+++ b/meta.py
@@ -360,7 +360,7 @@ def time_step(t, fx_array, x, state):
       fx_final = _make_with_custom_variables(make_loss, x_final)
       fx_array = fx_array.write(len_unroll, fx_final)
 
-    loss = tf.reduce_sum(fx_array.pack(), name="loss")
+    loss = tf.reduce_sum(fx_array.stack(), name="loss")
 
     # Reset the state; should be called at the beginning of an epoch.
     with tf.name_scope("reset"):
diff --git a/networks.py b/networks.py
index 9e6cf41..2c45058 100644
--- a/networks.py
+++ b/networks.py
@@ -88,7 +88,7 @@ def _convert_to_initializer(initializer):
   """
 
   if isinstance(initializer, str):
-    return getattr(tf, initializer + "_initializer")
+    return getattr(tf, initializer + "_initializer")(dtype=tf.float32)
   elif isinstance(initializer, np.ndarray):
     return tf.constant_initializer(initializer)
   else:
@@ -182,7 +182,7 @@ def __init__(self, output_size, layers, preprocess_name="identity",
     else:
       self._preprocess = getattr(tf, preprocess_name)
 
-    with tf.variable_scope(self._template.var_scope):
+    with tf.variable_scope(self._template.variable_scope):
       self._cores = []
       for i, size in enumerate(layers, start=1):
         name = "lstm_{}".format(i)
diff --git a/nn/base.py b/nn/base.py
index 7996513..7259768 100644
--- a/nn/base.py
+++ b/nn/base.py
@@ -145,30 +145,30 @@ def __call__(self, *args, **kwargs):
     return out
 
   @property
-  def var_scope(self):
+  def variable_scope(self):
     """Returns the variable_scope declared by the module.
 
-    It is valid for library users to access the internal templated var_scope,
+    It is valid for library users to access the internal templated variable_scope,
     but only makes sense to do so after connection. Therefore we raise an error
-    here if the var_scope is requested before connection.
+    here if the variable_scope is requested before connection.
 
-    The only case where it does make sense to access the var_scope before
+    The only case where it does make sense to access the variable_scope before
     connection is to get the post-uniquification name, which we support using
     the separate .name property.
 
     Returns:
-      var_scope: `tf.VariableScope` instance of the internal `tf.Template`.
+      variable_scope: `tf.VariableScope` instance of the internal `tf.Template`.
 
     Raises:
       NotConnectedError: If the module is not connected to the Graph.
     """
     self._ensure_is_connected()
-    return self._template.var_scope
+    return self._template.variable_scope
 
   @property
   def name(self):
     """Returns the name of the Module."""
-    return self._template.var_scope.name
+    return self._template.variable_scope.name
 
   @property
   def is_connected(self):
diff --git a/nn/batch_norm.py b/nn/batch_norm.py
index 1ca307e..ba16e8a 100644
--- a/nn/batch_norm.py
+++ b/nn/batch_norm.py
@@ -131,7 +131,7 @@ def _set_default_initializer(self, var_name):
       if var_name == self.GAMMA:
         self._initializers[self.GAMMA] = tf.ones_initializer()
       elif var_name == self.BETA:
-        self._initializers[self.BETA] = tf.zeros_initializer
+        self._initializers[self.BETA] = tf.zeros_initializer()
 
   def _build_statistics_variance(self, input_batch,
                                  reduction_indices, use_batch_stats):
@@ -151,15 +151,15 @@ def _build_statistics_variance(self, input_batch,
         "moving_mean",
         shape=self._mean_shape,
         collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
-                     tf.GraphKeys.VARIABLES],
-        initializer=tf.zeros_initializer,
+                     tf.GraphKeys.GLOBAL_VARIABLES],
+        initializer=tf.zeros_initializer(),
         trainable=False)
 
     self._moving_variance = tf.get_variable(
         "moving_variance",
         shape=self._mean_shape,
         collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
-                     tf.GraphKeys.VARIABLES],
+                     tf.GraphKeys.GLOBAL_VARIABLES],
         initializer=tf.ones_initializer(),
         trainable=False)
 
@@ -217,15 +217,15 @@ def _build_statistics_second_moment(self, input_batch,
         "moving_mean",
         shape=self._mean_shape,
         collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
-                     tf.GraphKeys.VARIABLES],
-        initializer=tf.zeros_initializer,
+                     tf.GraphKeys.GLOBAL_VARIABLES],
+        initializer=tf.zeros_initializer(),
         trainable=False)
 
     self._moving_second_moment = tf.get_variable(
         "moving_second_moment",
         shape=self._mean_shape,
         collections=[tf.GraphKeys.MOVING_AVERAGE_VARIABLES,
-                     tf.GraphKeys.VARIABLES],
+                     tf.GraphKeys.GLOBAL_VARIABLES],
         initializer=tf.ones_initializer(),
         trainable=False)
 
diff --git a/nn/convnet.py b/nn/convnet.py
index c8ab46e..b7190dc 100644
--- a/nn/convnet.py
+++ b/nn/convnet.py
@@ -159,7 +159,7 @@ def __init__(self,
   def _instantiate_layers(self):
     """Instantiates all the convolutional modules used in the network."""
 
-    with tf.variable_scope(self._template.var_scope):
+    with tf.variable_scope(self._template.variable_scope):
       self._layers = tuple(conv.Conv2D(name="conv_2d_{}".format(i),
                                        output_channels=self._output_channels[i],
                                        kernel_shape=self._kernel_shapes[i],
@@ -387,7 +387,7 @@ def __init__(self,
   def _instantiate_layers(self):
     """Instantiates all the convolutional modules used in the network."""
 
-    with tf.variable_scope(self._template.var_scope):
+    with tf.variable_scope(self._template.variable_scope):
       self._layers = tuple(
           conv.Conv2DTranspose(name="conv_2d_transpose_{}".format(i),
                                output_channels=self._output_channels[i],
diff --git a/nn/gated_rnn.py b/nn/gated_rnn.py
index fc4e85f..ad5eca5 100644
--- a/nn/gated_rnn.py
+++ b/nn/gated_rnn.py
@@ -42,7 +42,7 @@
 import tensorflow as tf
 
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import rnn_cell
+from tensorflow.contrib import rnn
 
 from nn import base
 from nn import basic
@@ -318,11 +318,11 @@ def _build(self, inputs, prev_state, is_training=True, test_local_stats=True):
       gates = gates_h + gates_x + self._b
     else:
       # Parameters of gates are concatenated into one multiply for efficiency.
-      inputs_and_hidden = tf.concat(1, [inputs, prev_hidden])
+      inputs_and_hidden = tf.concat_v2([inputs, prev_hidden], 1)
       gates = tf.matmul(inputs_and_hidden, self._w_xh) + self._b
 
     # i = input_gate, j = new_input, f = forget_gate, o = output_gate
-    i, j, f, o = array_ops.split(1, 4, gates)
+    i, j, f, o = array_ops.split(gates, 4, 1)
 
     if self._use_peepholes:  # diagonal connections
       self._create_peephole_variables(inputs.dtype)
@@ -559,7 +559,7 @@ def create_batch_norm():
       else:
         return create_batch_norm()
 
-  class CellWithExtraInput(rnn_cell.RNNCell):
+  class CellWithExtraInput(rnn.RNNCell):
     """Wraps an RNNCell to create a new RNNCell with extra input appended.
 
     This will pass the additional input `args` and `kwargs` to the __call__
diff --git a/nn/mlp.py b/nn/mlp.py
index 2316a30..2cc7880 100644
--- a/nn/mlp.py
+++ b/nn/mlp.py
@@ -97,7 +97,7 @@ def _instantiate_layers(self):
     connected to the graph.
     """
 
-    with tf.variable_scope(self._template.var_scope):
+    with tf.variable_scope(self._template.variable_scope):
       self._layers = [basic.Linear(self._output_sizes[i],
                                    name="linear_{}".format(i),
                                    initializers=self._initializers,
diff --git a/nn/rnn_core.py b/nn/rnn_core.py
index d0504a6..02fdee5 100644
--- a/nn/rnn_core.py
+++ b/nn/rnn_core.py
@@ -30,7 +30,7 @@
 import tensorflow as tf
 
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import rnn_cell
+from tensorflow.contrib import rnn
 from tensorflow.python.util import nest
 
 from nn import base
@@ -142,7 +142,7 @@ def trainable_initial_state(batch_size, state_size, dtype, initializers=None):
 
 
 @six.add_metaclass(abc.ABCMeta)
-class RNNCore(base.AbstractModule, rnn_cell.RNNCell):
+class RNNCore(base.AbstractModule, rnn.RNNCell):
   """Superclass for Recurrent Neural Network Cores.
 
   This class defines the basic functionality that every core should implement,
diff --git a/nn/util.py b/nn/util.py
index 8fff25e..e8d4f58 100644
--- a/nn/util.py
+++ b/nn/util.py
@@ -62,7 +62,7 @@ def get_variables_in_module(module,
   Raises:
     NotConnectedError: If the module is not connected to the Graph.
   """
-  return get_variables_in_scope(module.var_scope, collection=collection)
+  return get_variables_in_scope(module.variable_scope, collection=collection)
 
 
 def check_initializers(initializers, keys):
diff --git a/preprocess.py b/preprocess.py
index d1d570e..85406a8 100644
--- a/preprocess.py
+++ b/preprocess.py
@@ -68,4 +68,4 @@ def _build(self, gradients):
     clamped_log = Clamp(min_value=-1.0)(log / self._k)  # pylint: disable=not-callable
     sign = Clamp(min_value=-1.0, max_value=1.0)(gradients * np.exp(self._k))  # pylint: disable=not-callable
 
-    return tf.concat(ndims - 1, [clamped_log, sign])
+    return tf.concat_v2([clamped_log, sign], ndims - 1)
diff --git a/problems.py b/problems.py
index bda145b..6917646 100644
--- a/problems.py
+++ b/problems.py
@@ -63,7 +63,7 @@ def get_coordinate(i):
 
   def build():
     coordinates = [get_coordinate(i) for i in xrange(num_dims)]
-    x = tf.concat(0, [tf.expand_dims(c, 0) for c in coordinates])
+    x = tf.concat_v2([tf.expand_dims(c, 0) for c in coordinates], 0)
     return tf.reduce_sum(tf.square(x, name="x_squared"))
 
   return build
@@ -94,7 +94,7 @@ def build():
                         initializer=tf.random_uniform_initializer(),
                         trainable=False)
 
-    product = tf.squeeze(tf.batch_matmul(w, tf.expand_dims(x, -1)))
+    product = tf.squeeze(tf.matmul(w, tf.expand_dims(x, -1)))
     return tf.reduce_mean(tf.reduce_sum((product - y) ** 2, 1))
 
   return build
@@ -134,7 +134,7 @@ def build():
 
 
 def _xent_loss(output, labels):
-  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(output, labels)
+  loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, labels=labels)
   return tf.reduce_mean(loss)