BVLC · shelhamer · Jan 25, 2015 · Jan 25, 2015 · Jan 29, 2015 · Jan 29, 2015
diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
@@ -1,11 +1,17 @@
 #ifndef CAFFE_BLOB_HPP_
 #define CAFFE_BLOB_HPP_
 
+#include <algorithm>
+#include <string>
+#include <vector>
+
 #include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/syncedmem.hpp"
 #include "caffe/util/math_functions.hpp"
 
+const int kMaxBlobAxes = INT_MAX;
+
 namespace caffe {
 
 /**
@@ -19,10 +25,16 @@ template <typename Dtype>
 class Blob {
  public:
  Blob()
- : data_(), diff_(), num_(0), channels_(0), height_(0), width_(0),
- count_(0), capacity_(0) {}
+ : data_(), diff_(), count_(0), capacity_(0) {}
+
+ /// @brief Deprecated; use <code>Blob(const vector<int>& shape)</code>.
  explicit Blob(const int num, const int channels, const int height,
- const int width);
+ const int width);
+ explicit Blob(const vector<int>& shape);
+
+ /// @brief Deprecated; use <code>Reshape(const vector<int>& shape)</code>.
+ void Reshape(const int num, const int channels, const int height,
+ const int width);
  /**
  * @brief Change the dimensions of the blob, allocating new memory if
  * necessary.
@@ -37,25 +49,133 @@ class Blob {
  * an error; either Net::Forward or Net::Reshape need to be called to
  * propagate the new input shape to higher layers.
  */
- void Reshape(const int num, const int channels, const int height,
-  const int width);
+ void Reshape(const vector<int>& shape);
+ void Reshape(const BlobShape& shape);
  void ReshapeLike(const Blob& other);
- inline int num() const { return num_; }
- inline int channels() const { return channels_; }
- inline int height() const { return height_; }
- inline int width() const { return width_; }
+ inline string shape_string() const {
+ ostringstream stream;
+ for (int i = 0; i < shape_.size(); ++i) {
+ stream << shape_[i] << " ";
+ }
+ stream << "(" << count_ << ")";
+ return stream.str();
+ }
+ inline const vector<int>& shape() const { return shape_; }
+ /**
+ * @brief Returns the dimension of the index-th axis (or the negative index-th
+ * axis from the end, if index is negative).
+ *
+ * @param index the axis index, which may be negative as it will be
+ * "canonicalized" using CanonicalAxisIndex.
+ * Dies on out of range index.
+ */
+ inline int shape(int index) const {
+ return shape_[CanonicalAxisIndex(index)];
+ }
+ inline int num_axes() const { return shape_.size(); }
  inline int count() const { return count_; }
+
+ /**
+ * @brief Compute the volume of a slice; i.e., the product of dimensions
+ * among a range of axes.
+ *
+ * @param start_axis The first axis to include in the slice.
+ *
+ * @param end_axis The first axis to exclude from the slice.
+ */
+ inline int count(int start_axis, int end_axis) const {
+ CHECK_LE(start_axis, end_axis);
+ CHECK_GE(start_axis, 0);
+ CHECK_GE(end_axis, 0);
+ CHECK_LE(start_axis, num_axes());
+ CHECK_LE(end_axis, num_axes());
+ int count = 1;
+ for (int i = start_axis; i < end_axis; ++i) {
+ count *= shape(i);
+ }
+ return count;
+ }
+ /**
+ * @brief Compute the volume of a slice spanning from a particular first
+ * axis to the final axis.
+ *
+ * @param start_axis The first axis to include in the slice.
+ */
+ inline int count(int start_axis) const {
+ return count(start_axis, num_axes());
+ }
+
+ /**
+ * @brief Returns the 'canonical' version of a (usually) user-specified axis,
+ * allowing for negative indexing (e.g., -1 for the last axis).
+ *
+ * @param index the axis index.
+ * If 0 <= index < num_axes(), return index.
+ * If -num_axes <= index <= -1, return (num_axes() - (-index)),
+ * e.g., the last axis index (num_axes() - 1) if index == -1,
+ * the second to last if index == -2, etc.
+ * Dies on out of range index.
+ */
+ inline int CanonicalAxisIndex(int axis_index) const {
+ CHECK_GE(axis_index, -num_axes())
+ << "axis " << axis_index << " out of range for " << num_axes()
+ << "-D Blob with shape " << shape_string();
+ CHECK_LT(axis_index, num_axes())
+ << "axis " << axis_index << " out of range for " << num_axes()
+ << "-D Blob with shape " << shape_string();
+ if (axis_index < 0) {
+ return axis_index + num_axes();
+ }
+ return axis_index;
+ }
+
+ /// @brief Deprecated legacy shape accessor num: use shape(0) instead.
+ inline int num() const { return LegacyShape(0); }
+ /// @brief Deprecated legacy shape accessor channels: use shape(1) instead.
+ inline int channels() const { return LegacyShape(1); }
+ /// @brief Deprecated legacy shape accessor height: use shape(2) instead.
+ inline int height() const { return LegacyShape(2); }
+ /// @brief Deprecated legacy shape accessor width: use shape(3) instead.
+ inline int width() const { return LegacyShape(3); }
+ inline int LegacyShape(int index) const {
+ CHECK_LE(num_axes(), 4)
+ << "Cannot use legacy accessors on Blobs with > 4 axes.";
+ CHECK_LT(index, 4);
+ CHECK_GE(index, -4);
+ if (index >= num_axes() || index < -num_axes()) {
+ // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse
+ // indexing) -- this special case simulates the one-padding used to fill
+ // extraneous axes of legacy blobs.
+ return 1;
+ }
+ return shape(index);
+ }
+
  inline int offset(const int n, const int c = 0, const int h = 0,
  const int w = 0) const {
  CHECK_GE(n, 0);
- CHECK_LE(n, num_);
- CHECK_GE(channels_, 0);
- CHECK_LE(c, channels_);
- CHECK_GE(height_, 0);
- CHECK_LE(h, height_);
- CHECK_GE(width_, 0);
- CHECK_LE(w, width_);
- return ((n * channels_ + c) * height_ + h) * width_ + w;
+ CHECK_LE(n, num());
+ CHECK_GE(channels(), 0);
+ CHECK_LE(c, channels());
+ CHECK_GE(height(), 0);
+ CHECK_LE(h, height());
+ CHECK_GE(width(), 0);
+ CHECK_LE(w, width());
+ return ((n * channels() + c) * height() + h) * width() + w;
+ }
+
+ inline int offset(const vector<int>& indices) const {
+ CHECK_LE(indices.size(), num_axes());
+ int offset = 0;
+ for (int i = 0; i < num_axes(); ++i) {
+ offset *= shape(i);
+ if (indices.size() > i) {
+ CHECK_GE(indices[i], 0);
+ CHECK_LT(indices[i], shape(i));
+ offset += indices[i];
+ }
+ }
+ return offset;
  }
  /**
  * @brief Copy from a source Blob.
@@ -71,12 +191,20 @@ class Blob {
 
  inline Dtype data_at(const int n, const int c, const int h,
  const int w) const {
- return *(cpu_data() + offset(n, c, h, w));
+ return cpu_data()[offset(n, c, h, w)];
  }
 
  inline Dtype diff_at(const int n, const int c, const int h,
  const int w) const {
- return *(cpu_diff() + offset(n, c, h, w));
+ return cpu_diff()[offset(n, c, h, w)];
+ }
+
+ inline Dtype data_at(const vector<int>& index) const {
+ return cpu_data()[offset(index)];
+ }
+
+ inline Dtype diff_at(const vector<int>& index) const {
+ return cpu_diff()[offset(index)];
  }
 
  inline const shared_ptr<SyncedMemory>& data() const {
@@ -99,7 +227,7 @@ class Blob {
  Dtype* mutable_cpu_diff();
  Dtype* mutable_gpu_diff();
  void Update();
- void FromProto(const BlobProto& proto);
+ void FromProto(const BlobProto& proto, bool reshape = true);
  void ToProto(BlobProto* proto, bool write_diff = false) const;
 
  /// @brief Compute the sum of absolute values (L1 norm) of the data.
@@ -135,13 +263,12 @@ class Blob {
  */
  void ShareDiff(const Blob& other);
 
+ bool ShapeEquals(const BlobProto& other);
+
  protected:
  shared_ptr<SyncedMemory> data_;
  shared_ptr<SyncedMemory> diff_;
- int num_;
- int channels_;
- int height_;
- int width_;
+ vector<int> shape_;
  int count_;
  int capacity_;
 

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
@@ -99,8 +99,8 @@ class ConcatLayer : public Layer<Dtype> {
  * - K @f$ (N \times C \times H \times W) @f$
  * the inputs @f$ x_K @f$
  * @param top output Blob vector (length 1)
- * -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or
- * @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1:
+ * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
+ * @f$ (N \times KC \times H \times W) @f$ if axis == 1:
  * the concatenated output @f$
  * y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}]
  * @f$
@@ -115,8 +115,8 @@ class ConcatLayer : public Layer<Dtype> {
  *
  * @param top output Blob vector (length 1), providing the error gradient with
  * respect to the outputs
- * -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or
- * @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1:
+ * -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
+ * @f$ (N \times KC \times H \times W) @f$ if axis == 1:
  * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
  * with respect to concatenated outputs @f$ y @f$
  * @param propagate_down see Layer::Backward.
@@ -137,13 +137,10 @@ class ConcatLayer : public Layer<Dtype> {
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
- Blob<Dtype> col_bob_;
  int count_;
- int num_;
- int channels_;
- int height_;
- int width_;
- int concat_dim_;
+ int num_concats_;
+ int concat_input_size_;
+ int concat_axis_;
 };
 
 /**
@@ -216,8 +213,6 @@ class FlattenLayer : public Layer<Dtype> {
  */
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
  const vector<Blob<Dtype>*>& top);
- virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- const vector<Blob<Dtype>*>& top);
 
  /**
  * @brief Computes the error gradient w.r.t. the concatenate inputs.
@@ -230,10 +225,6 @@ class FlattenLayer : public Layer<Dtype> {
  */
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
- const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
- int count_;
 };
 
 /**
@@ -362,6 +353,9 @@ class SoftmaxLayer : public Layer<Dtype> {
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+ int outer_num_;
+ int inner_num_;
+ int softmax_axis_;
  /// sum_multiplier is used to carry out sum using BLAS
  Blob<Dtype> sum_multiplier_;
  /// scale is an intermediate Blob to hold temporary results.
@@ -458,13 +452,10 @@ class SliceLayer : public Layer<Dtype> {
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
  const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
- Blob<Dtype> col_bob_;
  int count_;
- int num_;
- int channels_;
- int height_;
- int width_;
- int slice_dim_;
+ int num_slices_;
+ int slice_size_;
+ int slice_axis_;
  vector<int> slice_point_;
 };
 

diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp
@@ -79,9 +79,8 @@ class GaussianFiller : public Filler<Dtype> {
  // These have num == channels == 1; width is number of inputs; height is
  // number of outputs. The 'sparse' variable specifies the mean number
  // of non-zero input weights for a given output.
- CHECK_EQ(blob->num(), 1);
- CHECK_EQ(blob->channels(), 1);
- int num_outputs = blob->height();
+ CHECK_GE(blob->num_axes(), 1);
+ const int num_outputs = blob->shape(0);
  Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
  rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
  int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());

diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp
@@ -754,6 +754,8 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
  /// Whether to normalize the loss by the total number of values present
  /// (otherwise just by the batch size).
  bool normalize_;
+
+ int softmax_axis_, outer_num_, inner_num_;
 };
 
 } // namespace caffe

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
@@ -5,6 +5,7 @@
 
 #include <boost/make_shared.hpp>
 #include <boost/python.hpp>
+#include <boost/python/raw_function.hpp>
 #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
 #include <numpy/arrayobject.h>
 
@@ -163,9 +164,10 @@ struct NdarrayCallPolicies : public bp::default_call_policies {
  // the shape information from the blob.
  void* data = PyArray_DATA(reinterpret_cast<PyArrayObject*>(result));
  Py_DECREF(result);
- npy_intp dims[] = {blob->num(), blob->channels(),
- blob->height(), blob->width()};
- PyObject* arr_obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32, data);
+ const int num_axes = blob->num_axes();
+ vector<npy_intp> dims(blob->shape().begin(), blob->shape().end());
+ PyObject *arr_obj = PyArray_SimpleNewFromData(num_axes, dims.data(),
+ NPY_FLOAT32, data);
  // SetBaseObject steals a ref, so we need to INCREF.
  Py_INCREF(pyblob.ptr());
  PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(arr_obj),
@@ -174,6 +176,20 @@ struct NdarrayCallPolicies : public bp::default_call_policies {
  }
 };
 
+bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) {
+ if (bp::len(kwargs) > 0) {
+ throw std::runtime_error("Blob.reshape takes no kwargs");
+ }
+ Blob<Dtype>* self = bp::extract<Blob<Dtype>*>(args[0]);
+ vector<int> shape(bp::len(args) - 1);
+ for (int i = 1; i < bp::len(args); ++i) {
+ shape[i - 1] = bp::extract<int>(args[i]);
+ }
+ self->Reshape(shape);
+ // We need to explicitly return None to use bp::raw_function.
+ return bp::object();
+}
+
 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
 
 BOOST_PYTHON_MODULE(_caffe) {
@@ -218,8 +234,9 @@ BOOST_PYTHON_MODULE(_caffe) {
  .add_property("channels", &Blob<Dtype>::channels)
  .add_property("height", &Blob<Dtype>::height)
  .add_property("width", &Blob<Dtype>::width)
- .add_property("count", &Blob<Dtype>::count)
- .def("reshape", &Blob<Dtype>::Reshape)
+ .add_property("count", static_cast<int (Blob<Dtype>::*)() const>(
+ &Blob<Dtype>::count))
+ .def("reshape", bp::raw_function(&Blob_Reshape))
  .add_property("data", bp::make_function(&Blob<Dtype>::mutable_cpu_data,
  NdarrayCallPolicies()))
  .add_property("diff", bp::make_function(&Blob<Dtype>::mutable_cpu_diff,