From 2f869e7426e47d5123ee399704ff386bfdee8387 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Sat, 24 Jan 2015 18:27:15 -0800
Subject: [PATCH 01/65] clarify draw_net.py usage: net prototxt, not caffemodel

---
 python/draw_net.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/draw_net.py b/python/draw_net.py
index ba488294275..abf701572a2 100755
--- a/python/draw_net.py
+++ b/python/draw_net.py
@@ -11,7 +11,7 @@
 
 def main(argv):
     if len(argv) != 3:
-        print 'Usage: %s input_net_proto_file output_image_file' % \
+        print 'Usage: %s input_net_prototxt output_image_file' % \
                 os.path.basename(sys.argv[0])
     else:
         net = caffe_pb2.NetParameter()

From 61c63f6d1ed4cd6a3c4b4d9229497fc89c5ef662 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Sat, 24 Jan 2015 18:28:46 -0800
Subject: [PATCH 02/65] [docs] ask install + hardware questions on caffe-users

---
 docs/installation.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/installation.md b/docs/installation.md
index a42ef5b309c..16e319b4392 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -11,6 +11,8 @@ We have installed Caffe on Ubuntu 14.04, Ubuntu 12.04, OS X 10.9, and OS X 10.8.
 - [Compilation](#compilation)
 - [Hardware questions](#hardware_questions)
 
+Ask installation questions on the [caffe-users](https://groups.google.com/forum/#!forum/caffe-users) mailing list.
+
 ## Prerequisites
 
 Caffe depends on several software packages.
@@ -299,4 +301,4 @@ As a workaround, if you are using Ubuntu 12.04 you can try the following steps t
 
 Once installed, check your times against our [reference performance numbers](performance_hardware.html) to make sure everything is configured properly.
 
-Refer to the project's issue tracker for [hardware/compatibility](https://github.com/BVLC/caffe/issues?labels=hardware%2Fcompatibility&page=1&state=open).
+Ask hardware questions on the [caffe-users](https://groups.google.com/forum/#!forum/caffe-users) mailing list.

From 4cc8195d82f13391545b90c9c61baaca39b6df84 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 29 Jan 2015 09:26:12 -0800
Subject: [PATCH 03/65] [docs] send API link to class list

---
 docs/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index ccc8f750eef..bf1d9c3c78b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -46,7 +46,7 @@ Tested on Ubuntu, Red Hat, OS X.
 BVLC suggests a standard distribution format for Caffe models, and provides trained models.
 * [Developing & Contributing](/development.html)<br>
 Guidelines for development and contributing to Caffe.
-* [API Documentation](/doxygen/)<br>
+* [API Documentation](/doxygen/annotated.html)<br>
 Developer documentation automagically generated from code comments.
 
 ### Examples

From 1f7c3dea034ef19acd1addf5aa8f4c2b94bc358c Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 29 Jan 2015 10:53:44 -0800
Subject: [PATCH 04/65] [docs] add check mode hint to CPU-only mode error

---
 include/caffe/util/device_alternate.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
index 3df28a49ac3..1a33b947991 100644
--- a/include/caffe/util/device_alternate.hpp
+++ b/include/caffe/util/device_alternate.hpp
@@ -7,7 +7,7 @@
 
 // Stub out GPU calls as unavailable.
 
-#define NO_GPU LOG(FATAL) << "CPU-only Mode: cannot make GPU call."
+#define NO_GPU LOG(FATAL) << "Cannot make GPU call in CPU-only mode: check mode setting."
 
 #define STUB_GPU(classname) \
 template <typename Dtype> \

From 8b9647223346a2528804cb5bc8943f2065846d7d Mon Sep 17 00:00:00 2001
From: Andre Ambrosio Boechat <boechat107@gmail.com>
Date: Fri, 30 Jan 2015 13:57:31 -0200
Subject: [PATCH 05/65] Brief explanation of SLICE layer's attributes

* A sample code was added.
* `slice_dim` and `slice_point` attributes were explained.
---
 docs/tutorial/layers.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index 5f8f519cdc4..34bb48050e8 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -451,6 +451,26 @@ The `CONCAT` layer is a utility layer that concatenates its multiple input blobs
 
 The `SLICE` layer is a utility layer that slices an input layer to multiple output layers along a given dimension (currently num or channel only) with given slice indices.
 
+* Sample
+
+     layers {
+       name: "slicer_label"
+       type: SLICE
+       bottom: "label"
+       ## Example of label with a shape N x 3 x 1 x 1
+       top: "label1"
+       top: "label2"
+       top: "label3"
+       slice_param {
+           slice_dim: 1
+           slice_point: 1
+           slice_point: 2
+       }
+     }
+
+`slice_dim` indicates the target dimension and can assume only two values: 0 for num or 1 for channel; `slice_point` indicates indexes in the selected dimension (the number of indexes must be equal to the number of top blobs minus one). 
+
+
 #### Elementwise Operations
 
 `ELTWISE`

From 75d0e16be912a8dd23eddd8756ee0d278c66d6ab Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 30 Jan 2015 11:25:31 -0800
Subject: [PATCH 06/65] lint 1f7c3de

---
 include/caffe/util/device_alternate.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
index 1a33b947991..4d731e26199 100644
--- a/include/caffe/util/device_alternate.hpp
+++ b/include/caffe/util/device_alternate.hpp
@@ -7,7 +7,7 @@
 
 // Stub out GPU calls as unavailable.
 
-#define NO_GPU LOG(FATAL) << "Cannot make GPU call in CPU-only mode: check mode setting."
+#define NO_GPU LOG(FATAL) << "Cannot use GPU in CPU-only Caffe: check mode."
 
 #define STUB_GPU(classname) \
 template <typename Dtype> \

From 1e0d49a39d1e38c8de2a4c24ab0bff3a71da21ff Mon Sep 17 00:00:00 2001
From: Brandon Amos <bdamos@vt.edu>
Date: Mon, 16 Feb 2015 15:09:24 -0500
Subject: [PATCH 07/65] Correct 'epochs' to 'iterations'

See https://github.com/BVLC/caffe/blob/master/models/bvlc_reference_caffenet/solver.prototxt
---
 examples/imagenet/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/imagenet/readme.md b/examples/imagenet/readme.md
index 41384f9475b..c2dd62ec963 100644
--- a/examples/imagenet/readme.md
+++ b/examples/imagenet/readme.md
@@ -67,7 +67,7 @@ We will also lay out a protocol buffer for running the solver. Let's make a few
 * We will run in batches of 256, and run a total of 450,000 iterations (about 90 epochs).
 * For every 1,000 iterations, we test the learned net on the validation data.
 * We set the initial learning rate to 0.01, and decrease it every 100,000 iterations (about 20 epochs).
-* Information will be displayed every 20 epochs.
+* Information will be displayed every 20 iterations.
 * The network will be trained with momentum 0.9 and a weight decay of 0.0005.
 * For every 10,000 iterations, we will take a snapshot of the current status.
 

From af01b9c7354b36620881b0d2e608df83ebcedfd5 Mon Sep 17 00:00:00 2001
From: e3 <erik@cpan.org>
Date: Thu, 19 Feb 2015 22:07:27 -0800
Subject: [PATCH 08/65] Updated the path for get_ilsvrc_aux.sh to match what is
 found in the current project

---
 examples/imagenet/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/imagenet/readme.md b/examples/imagenet/readme.md
index c2dd62ec963..a6bdf49ca4d 100644
--- a/examples/imagenet/readme.md
+++ b/examples/imagenet/readme.md
@@ -26,7 +26,7 @@ We assume that you already have downloaded the ImageNet training data and valida
 
 You will first need to prepare some auxiliary data for training. This data can be downloaded by:
 
-    ./data/get_ilsvrc_aux.sh
+    ./data/ilsvrc12/get_ilsvrc_aux.sh
 
 The training and validation input are described in `train.txt` and `val.txt` as text listing all the files and their labels. Note that we use a different indexing for labels than the ILSVRC devkit: we sort the synset names in their ASCII order, and then label them from 0 to 999. See `synset_words.txt` for the synset/name mapping.
 

From eabbccd4d3f4ba5e1f7b37bf1e5ae3f7b67992cc Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 20 Feb 2015 11:18:47 -0800
Subject: [PATCH 09/65] [build] fix dynamic linking of tools

set the right rpath for tools and examples respectively

thanks for the report @mees!
---
 Makefile | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 29827270baf..2a75d66e02a 100644
--- a/Makefile
+++ b/Makefile
@@ -537,7 +537,12 @@ $(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR)
 	@ $(RM) $@
 	@ ln -s $(abspath $<) $@
 
-$(TOOL_BINS) $(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME)
+$(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME)
+	@ echo CXX/LD -o $@
+	$(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \
+		-Wl,-rpath,$(ORIGIN)/../lib
+
+$(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME)
 	@ echo CXX/LD -o $@
 	$(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \
 		-Wl,-rpath,$(ORIGIN)/../../lib

From 5a2633370f460b6a4d57c7564a38b14311420ab3 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 20 Feb 2015 20:29:53 -0800
Subject: [PATCH 10/65] check caffe tool runs in runtest

---
 Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile b/Makefile
index 2a75d66e02a..642bde3571d 100644
--- a/Makefile
+++ b/Makefile
@@ -450,6 +450,7 @@ $(MAT$(PROJECT)_SO): $(MAT$(PROJECT)_SRC) $(STATIC_NAME)
 			CXXLIBS="\$$CXXLIBS $(STATIC_LINK_COMMAND) $(LDFLAGS)" -output $@
 
 runtest: $(TEST_ALL_BIN)
+	$(TOOL_BUILD_DIR)/caffe
 	$(TEST_ALL_BIN) $(TEST_GPUID) --gtest_shuffle $(TEST_FILTER)
 
 pytest: py

From a1e951dcf9f534796de9a0f73f4869b3df33ab58 Mon Sep 17 00:00:00 2001
From: Anatoly Baksheev <no@email>
Date: Sun, 22 Feb 2015 18:58:12 +0300
Subject: [PATCH 11/65] ignore pycharm files

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 73bba6cb364..28f2aca854b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,9 @@
 # QtCreator files
 *.user
 
+# PyCharm files
+.idea
+
 # OSX dir files
 .DS_Store
 

From fca05c34c67701368245410e6f7c5e118e84a09f Mon Sep 17 00:00:00 2001
From: Anatoly Baksheev <no@email>
Date: Sun, 22 Feb 2015 19:03:47 +0300
Subject: [PATCH 12/65] set proper CMAKE_INSTALL_RPATH for _caffe.so and tools

---
 cmake/Misc.cmake | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cmake/Misc.cmake b/cmake/Misc.cmake
index 608a5f13a79..39569eaf996 100644
--- a/cmake/Misc.cmake
+++ b/cmake/Misc.cmake
@@ -32,6 +32,11 @@ endif()
 set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE CACHE BOOLEAN "Use link paths for shared library rpath")
 set(CMAKE_MACOSX_RPATH TRUE)
 
+list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES ${CMAKE_INSTALL_PREFIX}/lib __is_systtem_dir)
+if(${__is_systtem_dir} STREQUAL -1)
+  set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
+endif()
+
 # ---[ Funny target
 if(UNIX OR APPLE)
   add_custom_target(symlink_to_build COMMAND "ln" "-sf" "${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/build"

From 645aa03207b24dc5f092686f399736c9334bc096 Mon Sep 17 00:00:00 2001
From: Anatoly Baksheev <no@email>
Date: Sun, 22 Feb 2015 19:04:22 +0300
Subject: [PATCH 13/65] fixed bug in install-tree: _caffe.so installed by
 install(TARGET ...) was overwritten with symlink created at build time and
 installed with install(DIRECTORY ...)

---
 python/CMakeLists.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 6afed4fa183..a2f82089cac 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -4,7 +4,7 @@ if(NOT HAVE_PYTHON)
 endif()
 
 include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
-file(GLOB_RECURSE python_srcs ${PROJECT_SOURCE_DIR}/python/*.cpp)
+file(GLOB_RECURSE python_srcs ${CMAKE_SOURCE_DIR}/python/*.cpp)
 
 add_library(pycaffe SHARED ${python_srcs})
 target_link_libraries(pycaffe ${Caffe_LINK} ${PYTHON_LIBRARIES} ${Boost_LIBRARIES})
@@ -22,9 +22,13 @@ if(UNIX OR APPLE)
 endif()
 
 # ---[ Install
-file(GLOB files *.py requirements.txt)
-install(FILES  ${files} DESTINATION python)
-install(DIRECTORY caffe DESTINATION python)
-install(TARGETS pycaffe DESTINATION python/caffe)
+file(GLOB files1 *.py requirements.txt)
+install(FILES ${files1} DESTINATION python)
+
+file(GLOB files2 caffe/*.py)
+install(FILES  ${files2} DESTINATION python/caffe)
+install(TARGETS pycaffe  DESTINATION python/caffe)
+install(DIRECTORY caffe/imagenet caffe/proto caffe/test DESTINATION python/caffe)
+
 
 

From 5e06d16d14d8a46d3c7a3f82497c6cb1401e160f Mon Sep 17 00:00:00 2001
From: Anatoly Baksheev <no@email>
Date: Sun, 22 Feb 2015 20:14:40 +0300
Subject: [PATCH 14/65] minor cmake sumamry log fix

---
 cmake/Summary.cmake | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index 3f7dff6b6e0..32931942846 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -107,8 +107,9 @@ function(caffe_print_configuration_summary)
   caffe_status("  C++ compiler      :   ${CMAKE_CXX_COMPILER}")
   caffe_status("  Release CXX flags :   ${__flags_rel}")
   caffe_status("  Debug CXX flags   :   ${__flags_deb}")
-  caffe_status("  BUILD_SHARED_LIBS :   ${BUILD_SHARED_LIBS}")
   caffe_status("  Build type        :   ${CMAKE_BUILD_TYPE}")
+  caffe_status("")
+  caffe_status("  BUILD_SHARED_LIBS :   ${BUILD_SHARED_LIBS}")
   caffe_status("  BUILD_python      :   ${BUILD_python}")
   caffe_status("  BUILD_matlab      :   ${BUILD_matlab}")
   caffe_status("  BUILD_docs        :   ${BUILD_docs}")
@@ -116,8 +117,9 @@ function(caffe_print_configuration_summary)
   caffe_status("")
   caffe_status("Dependencies:")
   caffe_status("  BLAS              : " APPLE THEN "Yes (vecLib)" ELSE "Yes (${BLAS})")
+  caffe_status("  Boost             :   Yes (ver. ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION})")
   caffe_status("  glog              :   Yes")
-  caffe_status("  gflags            :   Yes")
+  caffe_status("  gflags            :   Yes")  
   caffe_status("  protobuf          : " PROTOBUF_FOUND THEN "Yes (ver. ${PROTOBUF_VERSION})" ELSE "No" )
   caffe_status("  lmdb              : " LMDB_FOUND THEN "Yes (ver. ${LMDB_VERSION})" ELSE "No")
   caffe_status("  Snappy            : " SNAPPY_FOUND THEN "Yes (ver. ${Snappy_VERSION})" ELSE "No" )

From 569ae01cc309fc3e14352479735a137ae53cfb62 Mon Sep 17 00:00:00 2001
From: James Supancic III <arrummzen@gmail.com>
Date: Sun, 22 Feb 2015 10:16:45 -0800
Subject: [PATCH 15/65] cpp_lint.py fails silently with Python3 (which is the
 default on some systems). This commit specifies Python2 with which
 cpp_lint.py works :-)

---
 scripts/cpp_lint.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/cpp_lint.py b/scripts/cpp_lint.py
index 1b7c6c0536c..f750489f4f9 100755
--- a/scripts/cpp_lint.py
+++ b/scripts/cpp_lint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python2
 #
 # Copyright (c) 2009 Google Inc. All rights reserved.
 #

From 845f9eac2b873e6d018ab83d5a32100ab443a35e Mon Sep 17 00:00:00 2001
From: spmallick <spmallick@gmail.com>
Date: Mon, 23 Feb 2015 16:13:20 -0800
Subject: [PATCH 16/65] APPLE was misspelled. in Line 27

---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index adea37be565..2a48e062eca 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,7 +24,7 @@ caffe_option(BUILD_docs   "Build documentation" ON IF UNIX OR APPLE)
 include(cmake/Dependencies.cmake)
 
 # ---[ Flags
-if(UNIX OR APLE)
+if(UNIX OR APPLE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall")
 endif()
 

From 54037d3942647ff3659028e1d7437f2e8f7cf911 Mon Sep 17 00:00:00 2001
From: philkr <philkr@users.noreply.github.com>
Date: Tue, 17 Feb 2015 10:50:12 -0800
Subject: [PATCH 17/65] Making python3 work with cmake and the new python
 wrapper

---
 CMakeLists.txt           |  1 +
 cmake/Dependencies.cmake | 39 +++++++++++++++++++++++++++++++++------
 docs/installation.md     |  4 ++--
 python/caffe/_caffe.cpp  |  4 +++-
 python/caffe/io.py       |  9 ++++++++-
 python/caffe/pycaffe.py  |  5 ++++-
 python/classify.py       |  6 +++---
 python/detect.py         |  2 +-
 python/draw_net.py       |  2 +-
 9 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index adea37be565..626d5b44459 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,6 +17,7 @@ caffe_option(CPU_ONLY  "Build Caffe wihtout CUDA support" OFF) # TODO: rename to
 caffe_option(USE_CUDNN "Build Caffe with cuDNN libary support" ON IF NOT CPU_ONLY)
 caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 caffe_option(BUILD_python "Build Python wrapper" ON)
+set(python_version "2" CACHE STRING "Specify which python version to use")
 caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE)
 caffe_option(BUILD_docs   "Build documentation" ON IF UNIX OR APPLE)
 
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index aa2dcbe1d0d..b1ac96c6777 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -92,12 +92,39 @@ endif()
 
 # ---[ Python
 if(BUILD_python)
-  # disable Python 3 search
-  find_package(PythonInterp 2.7)
-  find_package(PythonLibs 2.7)
-  find_package(NumPy 1.7.1)
-  find_package(Boost 1.46 COMPONENTS python)
-
+  if(NOT "${python_version}" VERSION_LESS "3.0.0")
+    # use python3
+    find_package(PythonInterp 3.0)
+    find_package(PythonLibs 3.0)
+    find_package(NumPy 1.7.1)
+    # Find the matching boost python implementation
+    set(version ${PYTHONLIBS_VERSION_STRING})
+    
+    STRING( REPLACE "." "" boost_py_version ${version} )
+    find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
+    set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
+    
+    while(NOT "${version}" STREQUAL "" AND NOT Boost_PYTHON_FOUND)
+      STRING( REGEX REPLACE "([0-9.]+).[0-9]+" "\\1" version ${version} )
+      STRING( REGEX MATCHALL "([0-9.]+).[0-9]+" has_more_version ${version} )
+      if("${has_more_version}" STREQUAL "")
+        break()
+      endif()
+      
+      STRING( REPLACE "." "" boost_py_version ${version} )
+      find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
+      set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
+    endwhile()
+    if(NOT Boost_PYTHON_FOUND)
+      find_package(Boost 1.46 COMPONENTS python)
+    endif()
+  else()
+    # disable Python 3 search
+    find_package(PythonInterp 2.7)
+    find_package(PythonLibs 2.7)
+    find_package(NumPy 1.7.1)
+    find_package(Boost 1.46 COMPONENTS python)
+  endif()
   if(PYTHONLIBS_FOUND AND NUMPY_FOUND AND Boost_PYTHON_FOUND)
     set(HAVE_PYTHON TRUE)
   endif()
diff --git a/docs/installation.md b/docs/installation.md
index 16575b54029..144e6a34f67 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -30,7 +30,7 @@ Caffe has several dependencies.
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
-* For Python Caffe:  `Python 2.7`, `numpy (>= 1.7)`, boost-provided `boost.python`
+* For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
 **cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. For now cuDNN v1 is integrated but see [PR #1731](https://github.com/BVLC/caffe/pull/1731) for v2.
@@ -69,7 +69,7 @@ but we suggest first installing the [Anaconda](https://store.continuum.io/cshop/
 
 To import the `caffe` Python module after completing the installation, add the module directory to your `$PYTHONPATH` by `export PYTHONPATH=/path/to/caffe/python:$PYTHONPATH` or the like. You should not import the module in the `caffe/python/caffe` directory!
 
-*Caffe's Python interface works with Python 2.7. Python 3 or earlier Pythons are your own adventure.*
+*Caffe's Python interface works with Python 2.7. Python 3.3+ should work out of the box without protobuf support. For protobuf support please install protobuf 3.0 alpha (https://developers.google.com/protocol-buffers/). Earlier Pythons are your own adventure.*
 
 #### MATLAB
 
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index a5d0e64605e..03967a21029 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -275,7 +275,9 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::class_<vector<bool> >("BoolVec")
     .def(bp::vector_indexing_suite<vector<bool> >());
 
-  import_array();
+  // boost python expects a void (missing) return value, while import_array
+  // returns NULL for python3. import_array1() forces a void return value.
+  import_array1();
 }
 
 }  // namespace caffe
diff --git a/python/caffe/io.py b/python/caffe/io.py
index 0ce9ecfeeed..f51e3a64d36 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -3,7 +3,14 @@
 from scipy.ndimage import zoom
 from skimage.transform import resize
 
-from caffe.proto import caffe_pb2
+try:
+    # Python3 will most likely not be able to load protobuf
+    from caffe.proto import caffe_pb2
+except:
+    if sys.version_info >= (3,0):
+        print("Failed to include caffe_pb2, things might go wrong!")
+    else:
+        raise
 
 ## proto / datum / ndarray conversion
 
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 31c145d77a5..d662d6cc282 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -4,7 +4,10 @@
 """
 
 from collections import OrderedDict
-from itertools import izip_longest
+try:
+	from itertools import izip_longest
+except:
+	from itertools import zip_longest as izip_longest
 import numpy as np
 
 from ._caffe import Net, SGDSolver
diff --git a/python/classify.py b/python/classify.py
index d435a572266..81d06369341 100755
--- a/python/classify.py
+++ b/python/classify.py
@@ -103,7 +103,7 @@ def main(argv):
             channel_swap=channel_swap)
 
     if args.gpu:
-        print 'GPU mode'
+        print('GPU mode')
 
     # Load numpy array (.npy), directory glob (*.jpg), or image file.
     args.input_file = os.path.expanduser(args.input_file)
@@ -115,12 +115,12 @@ def main(argv):
     else:
         inputs = [caffe.io.load_image(args.input_file)]
 
-    print "Classifying %d inputs." % len(inputs)
+    print("Classifying %d inputs." % len(inputs))
 
     # Classify.
     start = time.time()
     predictions = classifier.predict(inputs, not args.center_only)
-    print "Done in %.2f s." % (time.time() - start)
+    print("Done in %.2f s." % (time.time() - start))
 
     # Save
     np.save(args.output_file, predictions)
diff --git a/python/detect.py b/python/detect.py
index cb0c2645761..d395bd97abf 100755
--- a/python/detect.py
+++ b/python/detect.py
@@ -115,7 +115,7 @@ def main(argv):
             context_pad=args.context_pad)
 
     if args.gpu:
-        print 'GPU mode'
+        print('GPU mode')
 
     # Load input.
     t = time.time()
diff --git a/python/draw_net.py b/python/draw_net.py
index 4457b793e86..6320f775ef7 100755
--- a/python/draw_net.py
+++ b/python/draw_net.py
@@ -36,7 +36,7 @@ def main():
     args = parse_args()
     net = caffe_pb2.NetParameter()
     text_format.Merge(open(args.input_net_proto_file).read(), net)
-    print 'Drawing net to %s' % args.output_image_file
+    print('Drawing net to %s' % args.output_image_file)
     caffe.draw.draw_net_to_file(net, args.output_image_file, args.rankdir)
 
 

From 2cf5089d273e0e46f51c1b4b7aa018cbf3b983fe Mon Sep 17 00:00:00 2001
From: philkr <philkr@users.noreply.github.com>
Date: Tue, 24 Feb 2015 16:02:06 -0800
Subject: [PATCH 18/65] Decoding the datum before feeding it into the reshaping
 data layer

---
 src/caffe/layers/data_layer.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 8877caf89c8..1861090f953 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -89,9 +89,17 @@ void DataLayer<Dtype>::InternalThreadEntry() {
   // Reshape on single input batches for inputs of varying dimension.
   const int batch_size = this->layer_param_.data_param().batch_size();
   const int crop_size = this->layer_param_.transform_param().crop_size();
+  bool force_color = this->layer_param_.data_param().force_encoded_color();
   if (batch_size == 1 && crop_size == 0) {
     Datum datum;
     datum.ParseFromString(cursor_->value());
+    if (datum.encoded()) {
+      if (force_color) {
+        DecodeDatum(&datum, true);
+      } else {
+        DecodeDatumNative(&datum);
+      }
+    }
     this->prefetch_data_.Reshape(1, datum.channels(),
         datum.height(), datum.width());
     this->transformed_data_.Reshape(1, datum.channels(),
@@ -104,7 +112,6 @@ void DataLayer<Dtype>::InternalThreadEntry() {
   if (this->output_labels_) {
     top_label = this->prefetch_label_.mutable_cpu_data();
   }
-  bool force_color = this->layer_param_.data_param().force_encoded_color();
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     timer.Start();
     // get a blob

From 4a3887ab1791f7a0ea6d17cafb861f53398a42d5 Mon Sep 17 00:00:00 2001
From: forresti <forresti@eecs.berkeley.edu>
Date: Tue, 24 Feb 2015 16:51:56 -0800
Subject: [PATCH 19/65] fixed matcaffe printout to specify num of args (now
 including train/test phase)

---
 matlab/caffe/matcaffe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/matlab/caffe/matcaffe.cpp b/matlab/caffe/matcaffe.cpp
index 996d3d2149c..da37d920b20 100644
--- a/matlab/caffe/matcaffe.cpp
+++ b/matlab/caffe/matcaffe.cpp
@@ -272,7 +272,7 @@ static void get_init_key(MEX_ARGS) {
 static void init(MEX_ARGS) {
   if (nrhs != 3) {
     ostringstream error_msg;
-    error_msg << "Expected 2 arguments, got " << nrhs;
+    error_msg << "Expected 3 arguments, got " << nrhs;
     mex_error(error_msg.str());
   }
 

From d2beb8ab20b55aebd25a2fb1f3eed3cf2923efaa Mon Sep 17 00:00:00 2001
From: Gustav Larsson <gustav.m.larsson@gmail.com>
Date: Tue, 24 Feb 2015 23:22:15 -0600
Subject: [PATCH 20/65] Replaced illegal tab in Makefile with spaces.

Commands, such as $(error ...), are not allowed to be indented with tabs
outside of targets, throwing an error instead of outputting the actual
error. The solution is to use innocuous spaces instead. Ideally, spaces
should be used everywhere outside targets, but since make does not mind
it if variable assignments are tab-indented outside targets, a complete
overhaul is not necessary. However, if more errors are added, it might
make more sense to be consistent.

Also, make will already add a period so I removed it.
---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 642bde3571d..033473ac45b 100644
--- a/Makefile
+++ b/Makefile
@@ -261,7 +261,8 @@ ifneq (,$(findstring clang++,$(CXX)))
 else ifneq (,$(findstring g++,$(CXX)))
 	STATIC_LINK_COMMAND := -Wl,--whole-archive $(STATIC_NAME) -Wl,--no-whole-archive
 else
-	$(error Cannot static link with the $(CXX) compiler.)
+  # The following line must not be indented with a tab, since we are not inside a target
+  $(error Cannot static link with the $(CXX) compiler)
 endif
 
 # Debugging

From 1377e1be4ee70e9f57f8cef53d767603124ee84f Mon Sep 17 00:00:00 2001
From: Sergey Karayev <sergeykarayev@gmail.com>
Date: Tue, 24 Feb 2015 21:52:32 -0800
Subject: [PATCH 21/65] Makefile fix for OS X 10.10

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 642bde3571d..933e9be93f3 100644
--- a/Makefile
+++ b/Makefile
@@ -319,7 +319,7 @@ else
 		# 10.10 has accelerate while 10.9 has veclib
 		XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep -o 'version: 6')
 		ifneq (,$(findstring version: 6,$(XCODE_CLT_VER)))
-			BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.10.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
+			BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
 			LDFLAGS += -framework Accelerate
 		else
 			BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/

From 76519702a415467a61a3c1e9ce706f41647c7faf Mon Sep 17 00:00:00 2001
From: philkr <philkr@users.noreply.github.com>
Date: Wed, 25 Feb 2015 08:17:06 -0800
Subject: [PATCH 22/65] Fixing two bugs related to python3 and
 PROJECT_SOURCE_DIR

---
 python/CMakeLists.txt   | 2 +-
 python/caffe/pycaffe.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index a2f82089cac..df0401daa1c 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -4,7 +4,7 @@ if(NOT HAVE_PYTHON)
 endif()
 
 include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
-file(GLOB_RECURSE python_srcs ${CMAKE_SOURCE_DIR}/python/*.cpp)
+file(GLOB_RECURSE python_srcs ${PROJECT_SOURCE_DIR}/python/*.cpp)
 
 add_library(pycaffe SHARED ${python_srcs})
 target_link_libraries(pycaffe ${Caffe_LINK} ${PYTHON_LIBRARIES} ${Boost_LIBRARIES})
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index d662d6cc282..9cd894a576d 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -41,12 +41,12 @@ def _Net_params(self):
 
 @property
 def _Net_inputs(self):
-    return [self.blobs.keys()[i] for i in self._inputs]
+    return [list(self.blobs.keys())[i] for i in self._inputs]
 
 
 @property
 def _Net_outputs(self):
-    return [self.blobs.keys()[i] for i in self._outputs]
+    return [list(self.blobs.keys())[i] for i in self._outputs]
 
 
 def _Net_forward(self, blobs=None, start=None, end=None, **kwargs):

From 25cdd35c9ab1447f06040720243526fc8d0343c5 Mon Sep 17 00:00:00 2001
From: Andre Ambrosio Boechat <boechat107@gmail.com>
Date: Wed, 25 Feb 2015 14:56:18 -0300
Subject: [PATCH 23/65] Small fix (visualization) on SLICE layer's
 documentation

The sample was missing some additional spaces to be correctly rendered on the HTML. The mistake was mine.
---
 docs/tutorial/layers.md | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index 34bb48050e8..839939f5ad6 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -453,20 +453,20 @@ The `SLICE` layer is a utility layer that slices an input layer to multiple outp
 
 * Sample
 
-     layers {
-       name: "slicer_label"
-       type: SLICE
-       bottom: "label"
-       ## Example of label with a shape N x 3 x 1 x 1
-       top: "label1"
-       top: "label2"
-       top: "label3"
-       slice_param {
-           slice_dim: 1
-           slice_point: 1
-           slice_point: 2
-       }
-     }
+      layers {
+        name: "slicer_label"
+        type: SLICE
+        bottom: "label"
+        ## Example of label with a shape N x 3 x 1 x 1
+        top: "label1"
+        top: "label2"
+        top: "label3"
+        slice_param {
+            slice_dim: 1
+            slice_point: 1
+            slice_point: 2
+        }
+      }
 
 `slice_dim` indicates the target dimension and can assume only two values: 0 for num or 1 for channel; `slice_point` indicates indexes in the selected dimension (the number of indexes must be equal to the number of top blobs minus one). 
 

From 1434e87a7410835500a8feb2e7b2f96431b0c00a Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Tue, 25 Nov 2014 18:17:45 -0800
Subject: [PATCH 24/65] Blobs are ND arrays (for N not necessarily equals 4).

vector<int> shape_ instead of (num, channels, height, width).
---
 include/caffe/blob.hpp       | 146 ++++++++++++++++++++++++++++++-----
 src/caffe/blob.cpp           |  92 +++++++++++++++++-----
 src/caffe/net.cpp            |  25 +-----
 src/caffe/proto/caffe.proto  |   7 +-
 src/caffe/solver.cpp         |  14 +---
 src/caffe/test/test_blob.cpp |   5 +-
 6 files changed, 213 insertions(+), 76 deletions(-)

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index 42e4420408c..2d4df34ec6b 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -1,11 +1,17 @@
 #ifndef CAFFE_BLOB_HPP_
 #define CAFFE_BLOB_HPP_
 
+#include <algorithm>
+#include <string>
+#include <vector>
+
 #include "caffe/common.hpp"
 #include "caffe/proto/caffe.pb.h"
 #include "caffe/syncedmem.hpp"
 #include "caffe/util/math_functions.hpp"
 
+const int kMaxBlobAxes = INT_MAX;
+
 namespace caffe {
 
 /**
@@ -19,10 +25,16 @@ template <typename Dtype>
 class Blob {
  public:
   Blob()
-       : data_(), diff_(), num_(0), channels_(0), height_(0), width_(0),
-       count_(0), capacity_(0) {}
+       : data_(), diff_(), count_(0), capacity_(0) {}
+
+  /// @brief Deprecated; use <code>Blob(const vector<int>& shape)</code>.
   explicit Blob(const int num, const int channels, const int height,
-    const int width);
+      const int width);
+  explicit Blob(const vector<int>& shape);
+
+  /// @brief Deprecated; use <code>Reshape(const vector<int>& shape)</code>.
+  void Reshape(const int num, const int channels, const int height,
+      const int width);
   /**
    * @brief Change the dimensions of the blob, allocating new memory if
    *        necessary.
@@ -37,25 +49,118 @@ class Blob {
    * an error; either Net::Forward or Net::Reshape need to be called to
    * propagate the new input shape to higher layers.
    */
-  void Reshape(const int num, const int channels, const int height,
-    const int width);
+  void Reshape(const vector<int>& shape);
   void ReshapeLike(const Blob& other);
-  inline int num() const { return num_; }
-  inline int channels() const { return channels_; }
-  inline int height() const { return height_; }
-  inline int width() const { return width_; }
+  inline string shape_string() const {
+    ostringstream stream;
+    for (int i = 0; i < shape_.size(); ++i) {
+      stream << shape_[i] << " ";
+    }
+    stream << "(" << count_ << ")";
+    return stream.str();
+  }
+  inline const vector<int>& shape() const { return shape_; }
+  /**
+   * @brief Returns the dimension of the index-th axis (or the negative index-th
+   *        axis from the end, if index is negative).
+   *
+   * @param index the axis index, which may be negative as it will be
+   *        "canonicalized" using CanonicalAxisIndex.
+   *        Dies on out of range index.
+   */
+  inline int shape(int index) const {
+    return shape_[CanonicalAxisIndex(index)];
+  }
+  inline int num_axes() const { return shape_.size(); }
   inline int count() const { return count_; }
+
+  /**
+   * @brief Compute the volume of a slice; i.e., the product of dimensions
+   *        among a range of axes.
+   *
+   * @param start_axis The first axis to include in the slice.
+   *
+   * @param end_axis The first axis to exclude from the slice.
+   */
+  inline int count(int start_axis, int end_axis) const {
+    CHECK_LE(start_axis, end_axis);
+    CHECK_GE(start_axis, 0);
+    CHECK_GE(end_axis, 0);
+    CHECK_LE(start_axis, num_axes());
+    CHECK_LE(end_axis, num_axes());
+    int count = 1;
+    for (int i = start_axis; i < end_axis; ++i) {
+      count *= shape(i);
+    }
+    return count;
+  }
+  /**
+   * @brief Compute the volume of a slice spanning from a particular first
+   *        axis to the final axis.
+   *
+   * @param start_axis The first axis to include in the slice.
+   */
+  inline int count(int start_axis) const {
+    return count(start_axis, num_axes());
+  }
+
+  /**
+   * @brief Returns the 'canonical' version of a (usually) user-specified axis,
+   *        allowing for negative indexing (e.g., -1 for the last axis).
+   *
+   * @param index the axis index.
+   *        If 0 <= index < num_axes(), return index.
+   *        If -num_axes <= index <= -1, return (num_axes() - (-index)),
+   *        e.g., the last axis index (num_axes() - 1) if index == -1,
+   *        the second to last if index == -2, etc.
+   *        Dies on out of range index.
+   */
+  inline int CanonicalAxisIndex(int axis_index) const {
+    CHECK_GE(axis_index, -num_axes())
+        << "axis " << axis_index << " out of range for " << num_axes()
+        << "-D Blob with shape " << shape_string();
+    CHECK_LT(axis_index, num_axes())
+        << "axis " << axis_index << " out of range for " << num_axes()
+        << "-D Blob with shape " << shape_string();
+    if (axis_index < 0) {
+      return axis_index + num_axes();
+    }
+    return axis_index;
+  }
+
+  /// @brief Deprecated legacy shape accessor num: use shape(0) instead.
+  inline int num() const { return LegacyShape(0); }
+  /// @brief Deprecated legacy shape accessor channels: use shape(1) instead.
+  inline int channels() const { return LegacyShape(1); }
+  /// @brief Deprecated legacy shape accessor height: use shape(2) instead.
+  inline int height() const { return LegacyShape(2); }
+  /// @brief Deprecated legacy shape accessor width: use shape(3) instead.
+  inline int width() const { return LegacyShape(3); }
+  inline int LegacyShape(int index) const {
+    CHECK_LE(num_axes(), 4)
+        << "Cannot use legacy accessors on Blobs with > 4 axes.";
+    CHECK_LT(index, 4);
+    CHECK_GE(index, -4);
+    if (index >= num_axes() || index < -num_axes()) {
+      // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse
+      // indexing) -- this special case simulates the one-padding used to fill
+      // extraneous axes of legacy blobs.
+      return 1;
+    }
+    return shape(index);
+  }
+
   inline int offset(const int n, const int c = 0, const int h = 0,
       const int w = 0) const {
     CHECK_GE(n, 0);
-    CHECK_LE(n, num_);
-    CHECK_GE(channels_, 0);
-    CHECK_LE(c, channels_);
-    CHECK_GE(height_, 0);
-    CHECK_LE(h, height_);
-    CHECK_GE(width_, 0);
-    CHECK_LE(w, width_);
-    return ((n * channels_ + c) * height_ + h) * width_ + w;
+    CHECK_LE(n, num());
+    CHECK_GE(channels(), 0);
+    CHECK_LE(c, channels());
+    CHECK_GE(height(), 0);
+    CHECK_LE(h, height());
+    CHECK_GE(width(), 0);
+    CHECK_LE(w, width());
+    return ((n * channels() + c) * height() + h) * width() + w;
   }
   /**
    * @brief Copy from a source Blob.
@@ -135,13 +240,12 @@ class Blob {
    */
   void ShareDiff(const Blob& other);
 
+  bool ShapeEquals(const BlobProto& other);
+
  protected:
   shared_ptr<SyncedMemory> data_;
   shared_ptr<SyncedMemory> diff_;
-  int num_;
-  int channels_;
-  int height_;
-  int width_;
+  vector<int> shape_;
   int count_;
   int capacity_;
 
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index fbc1361a19d..4a6332f382d 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -1,3 +1,6 @@
+#include <climits>
+#include <vector>
+
 #include "caffe/blob.hpp"
 #include "caffe/common.hpp"
 #include "caffe/syncedmem.hpp"
@@ -8,15 +11,24 @@ namespace caffe {
 template <typename Dtype>
 void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
     const int width) {
-  CHECK_GE(num, 0);
-  CHECK_GE(channels, 0);
-  CHECK_GE(height, 0);
-  CHECK_GE(width, 0);
-  num_ = num;
-  channels_ = channels;
-  height_ = height;
-  width_ = width;
-  count_ = num_ * channels_ * height_ * width_;
+  vector<int> shape(4);
+  shape[0] = num;
+  shape[1] = channels;
+  shape[2] = height;
+  shape[3] = width;
+  Reshape(shape);
+}
+
+template <typename Dtype>
+void Blob<Dtype>::Reshape(const vector<int>& shape) {
+  CHECK_LE(shape.size(), kMaxBlobAxes);
+  count_ = 1;
+  shape_.resize(shape.size());
+  for (int i = 0; i < shape.size(); ++i) {
+    CHECK_GE(shape[i], 0);
+    count_ *= shape[i];
+    shape_[i] = shape[i];
+  }
   if (count_ > capacity_) {
     capacity_ = count_;
     data_.reset(new SyncedMemory(capacity_ * sizeof(Dtype)));
@@ -26,7 +38,7 @@ void Blob<Dtype>::Reshape(const int num, const int channels, const int height,
 
 template <typename Dtype>
 void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
-  Reshape(other.num(), other.channels(), other.height(), other.width());
+  Reshape(other.shape());
 }
 
 template <typename Dtype>
@@ -37,6 +49,13 @@ Blob<Dtype>::Blob(const int num, const int channels, const int height,
   Reshape(num, channels, height, width);
 }
 
+template <typename Dtype>
+Blob<Dtype>::Blob(const vector<int>& shape)
+  // capacity_ must be initialized before calling Reshape
+  : capacity_(0) {
+  Reshape(shape);
+}
+
 template <typename Dtype>
 const Dtype* Blob<Dtype>::cpu_data() const {
   CHECK(data_);
@@ -345,12 +364,34 @@ void Blob<Dtype>::scale_diff(Dtype scale_factor) {
   }
 }
 
+template <typename Dtype>
+bool Blob<Dtype>::ShapeEquals(const BlobProto& other) {
+  if (other.has_num() || other.has_channels() ||
+      other.has_height() || other.has_width()) {
+    // Using deprecated 4D Blob dimensions --
+    // shape is (num, channels, height, width).
+    // Note: we do not use the normal Blob::num(), Blob::channels(), etc.
+    // methods as these index from the beginning of the blob shape, where legacy
+    // parameter blobs were indexed from the end of the blob shape (e.g., bias
+    // Blob shape (1 x 1 x 1 x N), IP layer weight Blob shape (1 x 1 x M x N)).
+    return shape_.size() <= 4 &&
+           LegacyShape(-4) == other.num() &&
+           LegacyShape(-3) == other.channels() &&
+           LegacyShape(-2) == other.height() &&
+           LegacyShape(-1) == other.width();
+  }
+  vector<int> other_shape(other.dim_size());
+  for (int i = 0; i < other.dim_size(); ++i) {
+    other_shape[i] = other.dim(i);
+  }
+  return shape_ == other_shape;
+}
+
 template <typename Dtype>
 void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
-  if (num_ != source.num() || channels_ != source.channels() ||
-      height_ != source.height() || width_ != source.width()) {
+  if (source.count() != count_ || source.shape() != shape_) {
     if (reshape) {
-      Reshape(source.num(), source.channels(), source.height(), source.width());
+      ReshapeLike(source);
     } else {
       LOG(FATAL) << "Trying to copy blobs of different sizes.";
     }
@@ -381,7 +422,23 @@ void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
 
 template <typename Dtype>
 void Blob<Dtype>::FromProto(const BlobProto& proto) {
-  Reshape(proto.num(), proto.channels(), proto.height(), proto.width());
+  vector<int> shape;
+  if (proto.has_num() || proto.has_channels() ||
+      proto.has_height() || proto.has_width()) {
+    // Using deprecated 4D Blob dimensions --
+    // shape is (num, channels, height, width).
+    shape.resize(4);
+    shape[0] = proto.num();
+    shape[1] = proto.channels();
+    shape[2] = proto.height();
+    shape[3] = proto.width();
+  } else {
+    shape.resize(proto.dim_size());
+    for (int i = 0; i < proto.dim_size(); ++i) {
+      shape[i] = proto.dim(i);
+    }
+  }
+  Reshape(shape);
   // copy data
   Dtype* data_vec = mutable_cpu_data();
   for (int i = 0; i < count_; ++i) {
@@ -397,10 +454,9 @@ void Blob<Dtype>::FromProto(const BlobProto& proto) {
 
 template <typename Dtype>
 void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const {
-  proto->set_num(num_);
-  proto->set_channels(channels_);
-  proto->set_height(height_);
-  proto->set_width(width_);
+  for (int i = 0; i < shape_.size(); ++i) {
+    proto->add_dim(shape_[i]);
+  }
   proto->clear_data();
   proto->clear_diff();
   const Dtype* data_vec = cpu_data();
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index c359be9b575..a6aa917b556 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -109,11 +109,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
         blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
       }
       blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
-      LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->num() << " "
-          << top_vecs_[layer_id][top_id]->channels() << " "
-          << top_vecs_[layer_id][top_id]->height() << " "
-          << top_vecs_[layer_id][top_id]->width() << " ("
-          << top_vecs_[layer_id][top_id]->count() << ")";
+      LOG(INFO) << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
       if (layer->loss(top_id)) {
         LOG(INFO) << "    with loss weight " << layer->loss(top_id);
       }
@@ -427,14 +423,7 @@ void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,
           << "Shared parameter blobs must have the same count.";
     } else {
       // Strict dimension checking -- all dims must be the same.
-      CHECK_EQ(this_blob->num(), owner_blob->num())
-          << "Shared parameter blobs must have the same num.";
-      CHECK_EQ(this_blob->channels(), owner_blob->channels())
-          << "Shared parameter blobs must have the same channels.";
-      CHECK_EQ(this_blob->height(), owner_blob->height())
-          << "Shared parameter blobs must have the same height.";
-      CHECK_EQ(this_blob->width(), owner_blob->width())
-          << "Shared parameter blobs must have the same width.";
+      CHECK(this_blob->shape() == owner_blob->shape());
     }
     layers_[layer_id]->blobs()[param_id]->ShareData(
         *layers_[owner_layer_id]->blobs()[owner_param_id]);
@@ -640,10 +629,7 @@ void Net<Dtype>::ShareTrainedLayersWith(const Net* other) {
         << "Incompatible number of blobs for layer " << source_layer_name;
     for (int j = 0; j < target_blobs.size(); ++j) {
       Blob<Dtype>* source_blob = source_layer->blobs()[j].get();
-      CHECK_EQ(target_blobs[j]->num(), source_blob->num());
-      CHECK_EQ(target_blobs[j]->channels(), source_blob->channels());
-      CHECK_EQ(target_blobs[j]->height(), source_blob->height());
-      CHECK_EQ(target_blobs[j]->width(), source_blob->width());
+      CHECK(target_blobs[j]->shape() == source_blob->shape());
       target_blobs[j]->ShareData(*source_blob);
     }
   }
@@ -707,10 +693,7 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
         << "Incompatible number of blobs for layer " << source_layer_name;
     for (int j = 0; j < target_blobs.size(); ++j) {
-      CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
-      CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
-      CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
-      CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
+      CHECK(target_blobs[j]->ShapeEquals(source_layer.blobs(j)));
       target_blobs[j]->FromProto(source_layer.blobs(j));
     }
   }
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 84b475ce3cd..e82b75d281b 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -3,12 +3,15 @@ syntax = "proto2";
 package caffe;
 
 message BlobProto {
+  repeated int32 dim = 7 [packed = true];
+  repeated float data = 5 [packed = true];
+  repeated float diff = 6 [packed = true];
+
+  // 4D dimensions -- deprecated.  Use "dim" instead.
   optional int32 num = 1 [default = 0];
   optional int32 channels = 2 [default = 0];
   optional int32 height = 3 [default = 0];
   optional int32 width = 4 [default = 0];
-  repeated float data = 5 [packed = true];
-  repeated float diff = 6 [packed = true];
 }
 
 // The BlobProtoVector is simply a way to pass multiple blobproto instances
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 8ed8aec2fc8..034390e6824 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -420,16 +420,10 @@ void SGDSolver<Dtype>::PreSolve() {
   update_.clear();
   temp_.clear();
   for (int i = 0; i < net_params.size(); ++i) {
-    const Blob<Dtype>* net_param = net_params[i].get();
-    history_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(
-        net_param->num(), net_param->channels(), net_param->height(),
-        net_param->width())));
-    update_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(
-        net_param->num(), net_param->channels(), net_param->height(),
-        net_param->width())));
-    temp_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(
-        net_param->num(), net_param->channels(), net_param->height(),
-        net_param->width())));
+    const vector<int>& shape = net_params[i]->shape();
+    history_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+    update_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
+    temp_.push_back(shared_ptr<Blob<Dtype> >(new Blob<Dtype>(shape)));
   }
 }
 
diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp
index e0678061173..a654896c654 100644
--- a/src/caffe/test/test_blob.cpp
+++ b/src/caffe/test/test_blob.cpp
@@ -31,10 +31,7 @@ TYPED_TEST(BlobSimpleTest, TestInitialization) {
   EXPECT_EQ(this->blob_preshaped_->height(), 4);
   EXPECT_EQ(this->blob_preshaped_->width(), 5);
   EXPECT_EQ(this->blob_preshaped_->count(), 120);
-  EXPECT_EQ(this->blob_->num(), 0);
-  EXPECT_EQ(this->blob_->channels(), 0);
-  EXPECT_EQ(this->blob_->height(), 0);
-  EXPECT_EQ(this->blob_->width(), 0);
+  EXPECT_EQ(this->blob_->num_axes(), 0);
   EXPECT_EQ(this->blob_->count(), 0);
 }
 

From 5407f82a184a0d35cc1a9265fc7cfa61dae22517 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 31 Dec 2014 16:05:52 -0800
Subject: [PATCH 25/65] Add BlobShape message; use for Net input shapes

---
 include/caffe/blob.hpp      |  1 +
 src/caffe/blob.cpp          | 25 ++++++++++++++++++-------
 src/caffe/net.cpp           | 24 ++++++++++++++++++------
 src/caffe/proto/caffe.proto | 16 +++++++++++++---
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index 2d4df34ec6b..07b996ee392 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -50,6 +50,7 @@ class Blob {
    * propagate the new input shape to higher layers.
    */
   void Reshape(const vector<int>& shape);
+  void Reshape(const BlobShape& shape);
   void ReshapeLike(const Blob& other);
   inline string shape_string() const {
     ostringstream stream;
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index 4a6332f382d..c65dc2dd1ac 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -36,6 +36,16 @@ void Blob<Dtype>::Reshape(const vector<int>& shape) {
   }
 }
 
+template <typename Dtype>
+void Blob<Dtype>::Reshape(const BlobShape& shape) {
+  CHECK_LE(shape.dim_size(), kMaxBlobAxes);
+  vector<int> shape_vec(shape.dim_size());
+  for (int i = 0; i < shape.dim_size(); ++i) {
+    shape_vec[i] = shape.dim(i);
+  }
+  Reshape(shape_vec);
+}
+
 template <typename Dtype>
 void Blob<Dtype>::ReshapeLike(const Blob<Dtype>& other) {
   Reshape(other.shape());
@@ -380,9 +390,9 @@ bool Blob<Dtype>::ShapeEquals(const BlobProto& other) {
            LegacyShape(-2) == other.height() &&
            LegacyShape(-1) == other.width();
   }
-  vector<int> other_shape(other.dim_size());
-  for (int i = 0; i < other.dim_size(); ++i) {
-    other_shape[i] = other.dim(i);
+  vector<int> other_shape(other.shape().dim_size());
+  for (int i = 0; i < other.shape().dim_size(); ++i) {
+    other_shape[i] = other.shape().dim(i);
   }
   return shape_ == other_shape;
 }
@@ -433,9 +443,9 @@ void Blob<Dtype>::FromProto(const BlobProto& proto) {
     shape[2] = proto.height();
     shape[3] = proto.width();
   } else {
-    shape.resize(proto.dim_size());
-    for (int i = 0; i < proto.dim_size(); ++i) {
-      shape[i] = proto.dim(i);
+    shape.resize(proto.shape().dim_size());
+    for (int i = 0; i < proto.shape().dim_size(); ++i) {
+      shape[i] = proto.shape().dim(i);
     }
   }
   Reshape(shape);
@@ -454,8 +464,9 @@ void Blob<Dtype>::FromProto(const BlobProto& proto) {
 
 template <typename Dtype>
 void Blob<Dtype>::ToProto(BlobProto* proto, bool write_diff) const {
+  proto->clear_shape();
   for (int i = 0; i < shape_.size(); ++i) {
-    proto->add_dim(shape_[i]);
+    proto->mutable_shape()->add_dim(shape_[i]);
   }
   proto->clear_data();
   proto->clear_diff();
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index a6aa917b556..60f387b92e9 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -48,8 +48,16 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   name_ = param.name();
   map<string, int> blob_name_to_idx;
   set<string> available_blobs;
-  CHECK_EQ(param.input_size() * 4, param.input_dim_size())
-      << "Incorrect input blob dimension specifications.";
+  CHECK(param.input_dim_size() == 0 || param.input_shape_size() == 0)
+      << "Must specify either input_shape OR deprecated input_dim, not both.";
+  if (param.input_dim_size() > 0) {
+    // Deprecated 4D dimensions.
+    CHECK_EQ(param.input_size() * 4, param.input_dim_size())
+        << "Incorrect input blob dimension specifications.";
+  } else {
+    CHECK_EQ(param.input_size(), param.input_shape_size())
+        << "Exactly one input_shape must be specified per input.";
+  }
   memory_used_ = 0;
   // set the input blobs
   for (int input_id = 0; input_id < param.input_size(); ++input_id) {
@@ -339,10 +347,14 @@ void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
     if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }
     if (layer_id == -1) {
       // Set the (explicitly specified) dimensions of the input blob.
-      blob_pointer->Reshape(param.input_dim(top_id * 4),
-                            param.input_dim(top_id * 4 + 1),
-                            param.input_dim(top_id * 4 + 2),
-                            param.input_dim(top_id * 4 + 3));
+      if (param.input_dim_size() > 0) {
+        blob_pointer->Reshape(param.input_dim(top_id * 4),
+                              param.input_dim(top_id * 4 + 1),
+                              param.input_dim(top_id * 4 + 2),
+                              param.input_dim(top_id * 4 + 3));
+      } else {
+        blob_pointer->Reshape(param.input_shape(top_id));
+      }
       net_input_blob_indices_.push_back(blob_id);
       net_input_blobs_.push_back(blob_pointer.get());
     } else {
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index e82b75d281b..fdcb57366ad 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -2,12 +2,17 @@ syntax = "proto2";
 
 package caffe;
 
+// Specifies the shape (dimensions) of a Blob.
+message BlobShape {
+  repeated int64 dim = 1 [packed = true];
+}
+
 message BlobProto {
-  repeated int32 dim = 7 [packed = true];
+  optional BlobShape shape = 7;
   repeated float data = 5 [packed = true];
   repeated float diff = 6 [packed = true];
 
-  // 4D dimensions -- deprecated.  Use "dim" instead.
+  // 4D dimensions -- deprecated.  Use "shape" instead.
   optional int32 num = 1 [default = 0];
   optional int32 channels = 2 [default = 0];
   optional int32 height = 3 [default = 0];
@@ -50,10 +55,15 @@ message NetParameter {
   optional string name = 1; // consider giving the network a name
   // The input blobs to the network.
   repeated string input = 3;
-  // The dim of the input blobs. For each input blob there should be four
+  // The shape of the input blobs.
+  repeated BlobShape input_shape = 8;
+
+  // 4D input dimensions -- deprecated.  Use "shape" instead.
+  // If specified, for each input blob there should be four
   // values specifying the num, channels, height and width of the input blob.
   // Thus, there should be a total of (4 * #input) numbers.
   repeated int32 input_dim = 4;
+
   // Whether the network will force every layer to carry out backward operation.
   // If set False, then whether to carry out backward is determined
   // automatically according to the net structure and learning rates.

From 119a1c6699b7d97b39699a5764fa231f15bd6dfe Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 4 Feb 2015 15:00:03 -0800
Subject: [PATCH 26/65] add offset, {data,diff}_at nd blob accessors

---
 include/caffe/blob.hpp | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index 07b996ee392..82ca95a5f0f 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -163,6 +163,20 @@ class Blob {
     CHECK_LE(w, width());
     return ((n * channels() + c) * height() + h) * width() + w;
   }
+
+  inline int offset(const vector<int>& indices) const {
+    CHECK_LE(indices.size(), num_axes());
+    int offset = 0;
+    for (int i = 0; i < num_axes(); ++i) {
+      offset *= shape(i);
+      if (indices.size() > i) {
+        CHECK_GE(indices[i], 0);
+        CHECK_LT(indices[i], shape(i));
+        offset += indices[i];
+      }
+    }
+    return offset;
+  }
   /**
    * @brief Copy from a source Blob.
    *
@@ -177,12 +191,20 @@ class Blob {
 
   inline Dtype data_at(const int n, const int c, const int h,
       const int w) const {
-    return *(cpu_data() + offset(n, c, h, w));
+    return cpu_data()[offset(n, c, h, w)];
   }
 
   inline Dtype diff_at(const int n, const int c, const int h,
       const int w) const {
-    return *(cpu_diff() + offset(n, c, h, w));
+    return cpu_diff()[offset(n, c, h, w)];
+  }
+
+  inline Dtype data_at(const vector<int>& index) const {
+    return cpu_data()[offset(index)];
+  }
+
+  inline Dtype diff_at(const vector<int>& index) const {
+    return cpu_diff()[offset(index)];
   }
 
   inline const shared_ptr<SyncedMemory>& data() const {

From c4e9ec40427693f3a3848932519e175d11b1738b Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sat, 29 Nov 2014 16:11:07 -0800
Subject: [PATCH 27/65] TestBlob: test that legacy BlobProtos are correctly
 handled by ShapeEquals

---
 src/caffe/test/test_blob.cpp | 54 ++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp
index a654896c654..7da6423b67c 100644
--- a/src/caffe/test/test_blob.cpp
+++ b/src/caffe/test/test_blob.cpp
@@ -1,4 +1,5 @@
 #include <cstring>
+#include <vector>
 
 #include "gtest/gtest.h"
 
@@ -51,6 +52,59 @@ TYPED_TEST(BlobSimpleTest, TestReshape) {
   EXPECT_EQ(this->blob_->count(), 120);
 }
 
+TYPED_TEST(BlobSimpleTest, TestLegacyBlobProtoShapeEquals) {
+  BlobProto blob_proto;
+
+  // Reshape to (3 x 2).
+  vector<int> shape(2);
+  shape[0] = 3;
+  shape[1] = 2;
+  this->blob_->Reshape(shape);
+
+  // (3 x 2) blob == (1 x 1 x 3 x 2) legacy blob
+  blob_proto.set_num(1);
+  blob_proto.set_channels(1);
+  blob_proto.set_height(3);
+  blob_proto.set_width(2);
+  EXPECT_TRUE(this->blob_->ShapeEquals(blob_proto));
+
+  // (3 x 2) blob != (0 x 1 x 3 x 2) legacy blob
+  blob_proto.set_num(0);
+  blob_proto.set_channels(1);
+  blob_proto.set_height(3);
+  blob_proto.set_width(2);
+  EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto));
+
+  // (3 x 2) blob != (3 x 1 x 3 x 2) legacy blob
+  blob_proto.set_num(3);
+  blob_proto.set_channels(1);
+  blob_proto.set_height(3);
+  blob_proto.set_width(2);
+  EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto));
+
+  // Reshape to (1 x 3 x 2).
+  shape.insert(shape.begin(), 1);
+  this->blob_->Reshape(shape);
+
+  // (1 x 3 x 2) blob == (1 x 1 x 3 x 2) legacy blob
+  blob_proto.set_num(1);
+  blob_proto.set_channels(1);
+  blob_proto.set_height(3);
+  blob_proto.set_width(2);
+  EXPECT_TRUE(this->blob_->ShapeEquals(blob_proto));
+
+  // Reshape to (2 x 3 x 2).
+  shape[0] = 2;
+  this->blob_->Reshape(shape);
+
+  // (2 x 3 x 2) blob != (1 x 1 x 3 x 2) legacy blob
+  blob_proto.set_num(1);
+  blob_proto.set_channels(1);
+  blob_proto.set_height(3);
+  blob_proto.set_width(2);
+  EXPECT_FALSE(this->blob_->ShapeEquals(blob_proto));
+}
+
 template <typename TypeParam>
 class BlobMathTest : public MultiDeviceTest<TypeParam> {
   typedef typename TypeParam::Dtype Dtype;

From 559ff3ae05677801d4904f103de6dc4d88398e07 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Tue, 25 Nov 2014 22:48:03 -0800
Subject: [PATCH 28/65] InnerProductLayer weights are 2D; biases are 1D

---
 src/caffe/layers/inner_product_layer.cpp | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index b1ec6cb25c0..60a547c5b49 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -26,14 +26,18 @@ void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       this->blobs_.resize(1);
     }
     // Intialize the weight
-    this->blobs_[0].reset(new Blob<Dtype>(1, 1, N_, K_));
+    vector<int> weight_shape(2);
+    weight_shape[0] = N_;
+    weight_shape[1] = K_;
+    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
     // fill the weights
     shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
         this->layer_param_.inner_product_param().weight_filler()));
     weight_filler->Fill(this->blobs_[0].get());
     // If necessary, intiialize and fill the bias term
     if (bias_term_) {
-      this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, N_));
+      vector<int> bias_shape(1, N_);
+      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
       shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
           this->layer_param_.inner_product_param().bias_filler()));
       bias_filler->Fill(this->blobs_[1].get());
@@ -49,10 +53,14 @@ void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   M_ = bottom[0]->num();
   CHECK_EQ(bottom[0]->count() / bottom[0]->num(), K_) << "Input size "
     "incompatible with inner product parameters.";
-  top[0]->Reshape(bottom[0]->num(), N_, 1, 1);
+  vector<int> top_shape(2);
+  top_shape[0] = M_;
+  top_shape[1] = N_;
+  top[0]->Reshape(top_shape);
   // Set up the bias multiplier
   if (bias_term_) {
-    bias_multiplier_.Reshape(1, 1, 1, M_);
+    vector<int> bias_shape(1, M_);
+    bias_multiplier_.Reshape(bias_shape);
     caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());
   }
 }

From cf9fdda4403996326b6a240aa94165df73135625 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Mon, 16 Feb 2015 01:29:17 -0800
Subject: [PATCH 29/65] Fix sparse GaussianFiller for new IPLayer weight axes

---
 include/caffe/filler.hpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp
index eebf565b1d5..bb18e8e1e28 100644
--- a/include/caffe/filler.hpp
+++ b/include/caffe/filler.hpp
@@ -79,9 +79,8 @@ class GaussianFiller : public Filler<Dtype> {
       // These have num == channels == 1; width is number of inputs; height is
       // number of outputs.  The 'sparse' variable specifies the mean number
       // of non-zero input weights for a given output.
-      CHECK_EQ(blob->num(), 1);
-      CHECK_EQ(blob->channels(), 1);
-      int num_outputs = blob->height();
+      CHECK_GE(blob->num_axes(), 1);
+      const int num_outputs = blob->shape(0);
       Dtype non_zero_probability = Dtype(sparse) / Dtype(num_outputs);
       rand_vec_.reset(new SyncedMemory(blob->count() * sizeof(int)));
       int* mask = reinterpret_cast<int*>(rand_vec_->mutable_cpu_data());

From 29581e6a4a2c9b88c2399a85628fcd0e9fa94383 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sat, 29 Nov 2014 14:26:48 -0800
Subject: [PATCH 30/65] InnerProductLayer can multiply along any axis

---
 src/caffe/layers/inner_product_layer.cpp | 26 +++++++++++++++++-------
 src/caffe/proto/caffe.proto              |  5 +++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index 60a547c5b49..6b88724491b 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -15,7 +15,12 @@ void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const int num_output = this->layer_param_.inner_product_param().num_output();
   bias_term_ = this->layer_param_.inner_product_param().bias_term();
   N_ = num_output;
-  K_ = bottom[0]->count() / bottom[0]->num();
+  const int axis = bottom[0]->CanonicalAxisIndex(
+      this->layer_param_.inner_product_param().axis());
+  // Dimensions starting from "axis" are "flattened" into a single
+  // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
+  // N inner products with dimension CHW are performed.
+  K_ = bottom[0]->count(axis);
   // Check if we need to set up the weights
   if (this->blobs_.size() > 0) {
     LOG(INFO) << "Skipping parameter initialization";
@@ -50,12 +55,19 @@ template <typename Dtype>
 void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   // Figure out the dimensions
-  M_ = bottom[0]->num();
-  CHECK_EQ(bottom[0]->count() / bottom[0]->num(), K_) << "Input size "
-    "incompatible with inner product parameters.";
-  vector<int> top_shape(2);
-  top_shape[0] = M_;
-  top_shape[1] = N_;
+  const int axis = bottom[0]->CanonicalAxisIndex(
+      this->layer_param_.inner_product_param().axis());
+  const int new_K = bottom[0]->count(axis);
+  CHECK_EQ(K_, new_K)
+      << "Input size incompatible with inner product parameters.";
+  // The first "axis" dimensions are independent inner products; the total
+  // number of these is M_, the product over these dimensions.
+  M_ = bottom[0]->count(0, axis);
+  // The top shape will be the bottom shape with the flattened axes dropped,
+  // and replaced by a single axis with dimension num_output (N_).
+  vector<int> top_shape = bottom[0]->shape();
+  top_shape.resize(axis + 1);
+  top_shape[axis] = N_;
   top[0]->Reshape(top_shape);
   // Set up the bias multiplier
   if (bias_term_) {
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index fdcb57366ad..e8a134865bb 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -561,6 +561,11 @@ message InnerProductParameter {
   optional bool bias_term = 2 [default = true]; // whether to have bias terms
   optional FillerParameter weight_filler = 3; // The filler for the weight
   optional FillerParameter bias_filler = 4; // The filler for the bias
+
+  // The first axis to be lumped into a single inner product computation;
+  // all preceding axes are retained in the output.
+  // May be negative to index from the end (e.g., -1 for the last axis).
+  optional int32 axis = 5 [default = 1];
 }
 
 // Message that stores parameters used by LRNLayer

From 94179cc242a02017538362842125f929e5acc2be Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sat, 29 Nov 2014 17:02:18 -0800
Subject: [PATCH 31/65] ConvLayer biases are 1D

---
 src/caffe/layers/base_conv_layer.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index dccd5170c11..d4ab5e429b5 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -85,10 +85,10 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
         this->layer_param_.convolution_param().weight_filler()));
     weight_filler->Fill(this->blobs_[0].get());
-    // If necessary, initialize and fill the biases:
-    // 1 x 1 x 1 x output channels
+    // If necessary, initialize and fill the biases.
     if (bias_term_) {
-      this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, num_output_));
+      vector<int> bias_shape(1, num_output_);
+      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
       shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
           this->layer_param_.convolution_param().bias_filler()));
       bias_filler->Fill(this->blobs_[1].get());
@@ -144,7 +144,8 @@ void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   }
   // Set up the all ones "bias multiplier" for adding biases by BLAS
   if (bias_term_) {
-    bias_multiplier_.Reshape(1, 1, 1, height_out_ * width_out_);
+    vector<int> bias_multiplier_shape(1, height_out_ * width_out_);
+    bias_multiplier_.Reshape(bias_multiplier_shape);
     caffe_set(bias_multiplier_.count(), Dtype(1),
         bias_multiplier_.mutable_cpu_data());
   }

From a0fa2a9535bda318008c8a2a9edc9ca6218d5c41 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Tue, 25 Nov 2014 23:46:57 -0800
Subject: [PATCH 32/65] LossLayer output is 0D (scalar)

---
 src/caffe/layers/loss_layer.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp
index a5b6d11b065..3496a5c2a8a 100644
--- a/src/caffe/layers/loss_layer.cpp
+++ b/src/caffe/layers/loss_layer.cpp
@@ -24,7 +24,8 @@ void LossLayer<Dtype>::Reshape(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   CHECK_EQ(bottom[0]->num(), bottom[1]->num())
       << "The data and label should have the same number.";
-  top[0]->Reshape(1, 1, 1, 1);
+  vector<int> loss_shape(0);  // Loss layers output a scalar; 0 axes.
+  top[0]->Reshape(loss_shape);
 }
 
 INSTANTIATE_CLASS(LossLayer);

From d8c6aeb3d213ead88328ec8e859ae91585c545f9 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sat, 29 Nov 2014 16:48:32 -0800
Subject: [PATCH 33/65] AccuracyLayer output is 0D (scalar)

---
 src/caffe/layers/accuracy_layer.cpp | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp
index 3e8df34c0d6..7f28324541a 100644
--- a/src/caffe/layers/accuracy_layer.cpp
+++ b/src/caffe/layers/accuracy_layer.cpp
@@ -19,14 +19,15 @@ void AccuracyLayer<Dtype>::LayerSetUp(
 template <typename Dtype>
 void AccuracyLayer<Dtype>::Reshape(
   const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(bottom[0]->num(), bottom[1]->num())
-      << "The data and label should have the same number.";
-  CHECK_LE(top_k_, bottom[0]->count() / bottom[0]->num())
+  CHECK_LE(top_k_, bottom[0]->count() / bottom[1]->count())
       << "top_k must be less than or equal to the number of classes.";
-  CHECK_EQ(bottom[1]->channels(), 1);
-  CHECK_EQ(bottom[1]->height(), 1);
-  CHECK_EQ(bottom[1]->width(), 1);
-  top[0]->Reshape(1, 1, 1, 1);
+  CHECK_GE(bottom[0]->num_axes(), bottom[1]->num_axes());
+  for (int i = 0; i < bottom[1]->num_axes(); ++i) {
+    CHECK_LE(bottom[0]->shape(i), bottom[1]->shape(i))
+        << "Dimension mismatch between predictions and label.";
+  }
+  vector<int> top_shape(0);  // Accuracy is a scalar; 0 axes.
+  top[0]->Reshape(top_shape);
 }
 
 template <typename Dtype>

From 6b8a765864e853397e15c8ef57c8110d6a1b4332 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Fri, 30 Jan 2015 23:23:27 -0800
Subject: [PATCH 34/65] AccuracyLayer generalized to N instance axes

---
 src/caffe/layers/accuracy_layer.cpp    |  4 ++--
 src/caffe/test/test_accuracy_layer.cpp | 10 ++++++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp
index 7f28324541a..186f9f8632c 100644
--- a/src/caffe/layers/accuracy_layer.cpp
+++ b/src/caffe/layers/accuracy_layer.cpp
@@ -36,8 +36,8 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   Dtype accuracy = 0;
   const Dtype* bottom_data = bottom[0]->cpu_data();
   const Dtype* bottom_label = bottom[1]->cpu_data();
-  int num = bottom[0]->num();
-  int dim = bottom[0]->count() / bottom[0]->num();
+  int num = bottom[0]->count(0, bottom[1]->num_axes());
+  int dim = bottom[0]->count() / num;
   vector<Dtype> maxval(top_k_+1);
   vector<int> max_id(top_k_+1);
   for (int i = 0; i < num; ++i) {
diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp
index fa59fab1e8a..1c58b767bfc 100644
--- a/src/caffe/test/test_accuracy_layer.cpp
+++ b/src/caffe/test/test_accuracy_layer.cpp
@@ -19,10 +19,16 @@ template <typename Dtype>
 class AccuracyLayerTest : public ::testing::Test {
  protected:
   AccuracyLayerTest()
-      : blob_bottom_data_(new Blob<Dtype>(100, 10, 1, 1)),
-        blob_bottom_label_(new Blob<Dtype>(100, 1, 1, 1)),
+      : blob_bottom_data_(new Blob<Dtype>()),
+        blob_bottom_label_(new Blob<Dtype>()),
         blob_top_(new Blob<Dtype>()),
         top_k_(3) {
+    vector<int> shape(2);
+    shape[0] = 100;
+    shape[1] = 10;
+    blob_bottom_data_->Reshape(shape);
+    shape.resize(1);
+    blob_bottom_label_->Reshape(shape);
     // fill the probability values
     FillerParameter filler_param;
     GaussianFiller<Dtype> filler(filler_param);

From 8e96445ba57890a982f6e8f5570e3aac5eaf64e6 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Fri, 13 Feb 2015 15:52:39 -0800
Subject: [PATCH 35/65] Test{Net,Solver} fixes for AccuracyLayer generalization

---
 src/caffe/test/test_net.cpp    | 17 +++++++++--------
 src/caffe/test/test_solver.cpp | 17 +++++++++--------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 1680a3f28d5..08106e79274 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -63,18 +63,19 @@ class NetTest : public MultiDeviceTest<TypeParam> {
         "  name: 'data' "
         "  type: 'DummyData' "
         "  dummy_data_param { "
-        "    num: 5 "
-        "    channels: 2 "
-        "    height: 3 "
-        "    width: 4 "
-        "    num: 5 "
-        "    channels: 1 "
-        "    height: 1 "
-        "    width: 1 "
+        "    shape { "
+        "      dim: 5 "
+        "      dim: 2 "
+        "      dim: 3 "
+        "      dim: 4 "
+        "    } "
         "    data_filler { "
         "      type: 'gaussian' "
         "      std: 0.01 "
         "    } "
+        "    shape { "
+        "      dim: 5 "
+        "    } "
         "    data_filler { "
         "      type: 'constant' "
         "      value: 0 "
diff --git a/src/caffe/test/test_solver.cpp b/src/caffe/test/test_solver.cpp
index 1c2c9bbb740..ceabc9cdd2c 100644
--- a/src/caffe/test/test_solver.cpp
+++ b/src/caffe/test/test_solver.cpp
@@ -55,14 +55,15 @@ TYPED_TEST(SolverTest, TestInitTrainTestNets) {
      "    name: 'data' "
      "    type: 'DummyData' "
      "    dummy_data_param { "
-     "      num: 5 "
-     "      channels: 3 "
-     "      height: 10 "
-     "      width: 10 "
-     "      num: 5 "
-     "      channels: 1 "
-     "      height: 1 "
-     "      width: 1 "
+     "      shape { "
+     "        dim: 5 "
+     "        dim: 2 "
+     "        dim: 3 "
+     "        dim: 4 "
+     "      } "
+     "      shape { "
+     "        dim: 5 "
+     "      } "
      "    } "
      "    top: 'data' "
      "    top: 'label' "

From bf73cb18982712d03f096418e2aa33074457ae8c Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Tue, 25 Nov 2014 23:47:10 -0800
Subject: [PATCH 36/65] EltwiseLayer need not assume old 4D dim names

---
 src/caffe/layers/eltwise_layer.cpp | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
index bbc34449588..a80700736bd 100644
--- a/src/caffe/layers/eltwise_layer.cpp
+++ b/src/caffe/layers/eltwise_layer.cpp
@@ -31,21 +31,14 @@ void EltwiseLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  const int num = bottom[0]->num();
-  const int channels = bottom[0]->channels();
-  const int height = bottom[0]->height();
-  const int width = bottom[0]->width();
   for (int i = 1; i < bottom.size(); ++i) {
-    CHECK_EQ(num, bottom[i]->num());
-    CHECK_EQ(channels, bottom[i]->channels());
-    CHECK_EQ(height, bottom[i]->height());
-    CHECK_EQ(width, bottom[i]->width());
+    CHECK(bottom[i]->shape() == bottom[0]->shape());
   }
-  top[0]->Reshape(num, channels, height, width);
+  top[0]->ReshapeLike(*bottom[0]);
   // If max operation, we will initialize the vector index part.
   if (this->layer_param_.eltwise_param().operation() ==
       EltwiseParameter_EltwiseOp_MAX && top.size() == 1) {
-    max_idx_.Reshape(bottom[0]->num(), channels, height, width);
+    max_idx_.Reshape(bottom[0]->shape());
   }
 }
 

From 1b97c06b16cae847578fb2d7130f347862b1a262 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 00:11:06 -0800
Subject: [PATCH 37/65] FlattenLayer: generalized Blob axes

---
 include/caffe/common_layers.hpp    |  6 ------
 src/caffe/layers/flatten_layer.cpp | 15 +++++----------
 src/caffe/layers/flatten_layer.cu  | 23 -----------------------
 3 files changed, 5 insertions(+), 39 deletions(-)
 delete mode 100644 src/caffe/layers/flatten_layer.cu

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index c67822c3738..a9c774f1485 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -216,8 +216,6 @@ class FlattenLayer : public Layer<Dtype> {
    */
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top);
 
   /**
    * @brief Computes the error gradient w.r.t. the concatenate inputs.
@@ -230,10 +228,6 @@ class FlattenLayer : public Layer<Dtype> {
    */
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
-
-  int count_;
 };
 
 /**
diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp
index eb7b42bc10b..745f271ea45 100644
--- a/src/caffe/layers/flatten_layer.cpp
+++ b/src/caffe/layers/flatten_layer.cpp
@@ -9,12 +9,11 @@ namespace caffe {
 template <typename Dtype>
 void FlattenLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  int channels_out = bottom[0]->channels() * bottom[0]->height()
-      * bottom[0]->width();
-  top[0]->Reshape(bottom[0]->num(), channels_out, 1, 1);
-  count_ = bottom[0]->num() * channels_out;
-  CHECK_EQ(count_, bottom[0]->count());
-  CHECK_EQ(count_, top[0]->count());
+  vector<int> top_shape(2);
+  top_shape[0] = bottom[0]->num();
+  top_shape[1] = bottom[0]->count() / bottom[0]->num();
+  top[0]->Reshape(top_shape);
+  CHECK_EQ(top[0]->count(), bottom[0]->count());
 }
 
 template <typename Dtype>
@@ -29,10 +28,6 @@ void FlattenLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   bottom[0]->ShareDiff(*top[0]);
 }
 
-#ifdef CPU_ONLY
-STUB_GPU(FlattenLayer);
-#endif
-
 INSTANTIATE_CLASS(FlattenLayer);
 REGISTER_LAYER_CLASS(Flatten);
 
diff --git a/src/caffe/layers/flatten_layer.cu b/src/caffe/layers/flatten_layer.cu
deleted file mode 100644
index 42abdad4499..00000000000
--- a/src/caffe/layers/flatten_layer.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <vector>
-
-#include "caffe/layer.hpp"
-#include "caffe/util/math_functions.hpp"
-#include "caffe/vision_layers.hpp"
-
-namespace caffe {
-
-template <typename Dtype>
-void FlattenLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-      const vector<Blob<Dtype>*>& top) {
-  top[0]->ShareData(*bottom[0]);
-}
-
-template <typename Dtype>
-void FlattenLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
-  bottom[0]->ShareDiff(*top[0]);
-}
-
-INSTANTIATE_LAYER_GPU_FUNCS(FlattenLayer);
-
-}  // namespace caffe

From fb9caeef09e75dbb6196977f2cfa592eb80c37e1 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 03:23:42 -0800
Subject: [PATCH 38/65] common_layers.hpp: remove unused "Blob col_bob_"

---
 include/caffe/common_layers.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index a9c774f1485..e9fe7cdeb9f 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -137,7 +137,6 @@ class ConcatLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
-  Blob<Dtype> col_bob_;
   int count_;
   int num_;
   int channels_;
@@ -452,7 +451,6 @@ class SliceLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
-  Blob<Dtype> col_bob_;
   int count_;
   int num_;
   int channels_;

From 704e524f6ec1e927db8cef57434002f6c34a30b7 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 02:12:09 -0800
Subject: [PATCH 39/65] TestConcatLayer: fix style errors

---
 src/caffe/test/test_concat_layer.cpp | 68 ++++++++++++++--------------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp
index f14f1d2fa4f..3fc507ee24e 100644
--- a/src/caffe/test/test_concat_layer.cpp
+++ b/src/caffe/test/test_concat_layer.cpp
@@ -19,9 +19,9 @@ class ConcatLayerTest : public MultiDeviceTest<TypeParam> {
 
  protected:
   ConcatLayerTest()
-      : blob_bottom_0(new Blob<Dtype>(2, 3, 6, 5)),
-        blob_bottom_1(new Blob<Dtype>(2, 5, 6, 5)),
-        blob_bottom_2(new Blob<Dtype>(5, 3, 6, 5)),
+      : blob_bottom_0_(new Blob<Dtype>(2, 3, 6, 5)),
+        blob_bottom_1_(new Blob<Dtype>(2, 5, 6, 5)),
+        blob_bottom_2_(new Blob<Dtype>(5, 3, 6, 5)),
         blob_top_(new Blob<Dtype>()) {}
   virtual void SetUp() {
     // fill the values
@@ -29,30 +29,30 @@ class ConcatLayerTest : public MultiDeviceTest<TypeParam> {
     FillerParameter filler_param;
     filler_param.set_value(1.);
     filler.reset(new ConstantFiller<Dtype>(filler_param));
-    filler->Fill(this->blob_bottom_0);
+    filler->Fill(this->blob_bottom_0_);
     filler_param.set_value(2.);
     filler.reset(new ConstantFiller<Dtype>(filler_param));
-    filler->Fill(this->blob_bottom_1);
+    filler->Fill(this->blob_bottom_1_);
     filler_param.set_value(3.);
     filler.reset(new ConstantFiller<Dtype>(filler_param));
-    filler->Fill(this->blob_bottom_2);
-    blob_bottom_vec_0.push_back(blob_bottom_0);
-    blob_bottom_vec_0.push_back(blob_bottom_1);
-    blob_bottom_vec_1.push_back(blob_bottom_0);
-    blob_bottom_vec_1.push_back(blob_bottom_2);
+    filler->Fill(this->blob_bottom_2_);
+    blob_bottom_vec_0_.push_back(blob_bottom_0_);
+    blob_bottom_vec_0_.push_back(blob_bottom_1_);
+    blob_bottom_vec_1_.push_back(blob_bottom_0_);
+    blob_bottom_vec_1_.push_back(blob_bottom_2_);
     blob_top_vec_.push_back(blob_top_);
   }
 
   virtual ~ConcatLayerTest() {
-    delete blob_bottom_0; delete blob_bottom_1;
-    delete blob_bottom_2; delete blob_top_;
+    delete blob_bottom_0_; delete blob_bottom_1_;
+    delete blob_bottom_2_; delete blob_top_;
   }
 
-  Blob<Dtype>* const blob_bottom_0;
-  Blob<Dtype>* const blob_bottom_1;
-  Blob<Dtype>* const blob_bottom_2;
+  Blob<Dtype>* const blob_bottom_0_;
+  Blob<Dtype>* const blob_bottom_1_;
+  Blob<Dtype>* const blob_bottom_2_;
   Blob<Dtype>* const blob_top_;
-  vector<Blob<Dtype>*> blob_bottom_vec_0, blob_bottom_vec_1;
+  vector<Blob<Dtype>*> blob_bottom_vec_0_, blob_bottom_vec_1_;
   vector<Blob<Dtype>*> blob_top_vec_;
 };
 
@@ -63,24 +63,24 @@ TYPED_TEST(ConcatLayerTest, TestSetupNum) {
   LayerParameter layer_param;
   layer_param.mutable_concat_param()->set_concat_dim(0);
   ConcatLayer<Dtype> layer(layer_param);
-  layer.SetUp(this->blob_bottom_vec_1, this->blob_top_vec_);
+  layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_->num(),
-    this->blob_bottom_0->num() + this->blob_bottom_2->num());
-  EXPECT_EQ(this->blob_top_->channels(), this->blob_bottom_0->channels());
-  EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0->height());
-  EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0->width());
+      this->blob_bottom_0_->num() + this->blob_bottom_2_->num());
+  EXPECT_EQ(this->blob_top_->channels(), this->blob_bottom_0_->channels());
+  EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height());
+  EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width());
 }
 
 TYPED_TEST(ConcatLayerTest, TestSetupChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   ConcatLayer<Dtype> layer(layer_param);
-  layer.SetUp(this->blob_bottom_vec_0, this->blob_top_vec_);
-  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0->num());
+  layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0_->num());
   EXPECT_EQ(this->blob_top_->channels(),
-    this->blob_bottom_0->channels()+this->blob_bottom_1->channels());
-  EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0->height());
-  EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0->width());
+      this->blob_bottom_0_->channels() + this->blob_bottom_1_->channels());
+  EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height());
+  EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width());
 }
 
 
@@ -88,22 +88,22 @@ TYPED_TEST(ConcatLayerTest, TestNum) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   ConcatLayer<Dtype> layer(layer_param);
-  layer.SetUp(this->blob_bottom_vec_0, this->blob_top_vec_);
-  layer.Forward(this->blob_bottom_vec_0, this->blob_top_vec_);
+  layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_0_, this->blob_top_vec_);
   for (int n = 0; n < this->blob_top_->num(); ++n) {
-    for (int c = 0; c < this->blob_bottom_0->channels(); ++c) {
+    for (int c = 0; c < this->blob_bottom_0_->channels(); ++c) {
       for (int h = 0; h < this->blob_top_->height(); ++h) {
         for (int w = 0; w < this->blob_top_->width(); ++w) {
           EXPECT_EQ(this->blob_top_->data_at(n, c, h, w),
-            this->blob_bottom_vec_0[0]->data_at(n, c, h, w));
+              this->blob_bottom_vec_0_[0]->data_at(n, c, h, w));
         }
       }
     }
-    for (int c = 0; c < this->blob_bottom_1->channels(); ++c) {
+    for (int c = 0; c < this->blob_bottom_1_->channels(); ++c) {
       for (int h = 0; h < this->blob_top_->height(); ++h) {
         for (int w = 0; w < this->blob_top_->width(); ++w) {
-          EXPECT_EQ(this->blob_top_->data_at(n, c+3, h, w),
-            this->blob_bottom_vec_0[1]->data_at(n, c, h, w));
+          EXPECT_EQ(this->blob_top_->data_at(n, c + 3, h, w),
+              this->blob_bottom_vec_0_[1]->data_at(n, c, h, w));
         }
       }
     }
@@ -115,7 +115,7 @@ TYPED_TEST(ConcatLayerTest, TestGradient) {
   LayerParameter layer_param;
   ConcatLayer<Dtype> layer(layer_param);
   GradientChecker<Dtype> checker(1e-2, 1e-2);
-  checker.CheckGradient(&layer, this->blob_bottom_vec_0,
+  checker.CheckGradient(&layer, this->blob_bottom_vec_0_,
     this->blob_top_vec_);
 }
 

From d52e9a811bd0afa12602248cb377ed9234be1a33 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 02:24:41 -0800
Subject: [PATCH 40/65] TestConcatLayer: add forward/gradient tests for
 concatenation along num

---
 src/caffe/test/test_concat_layer.cpp | 42 ++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp
index 3fc507ee24e..875468faa49 100644
--- a/src/caffe/test/test_concat_layer.cpp
+++ b/src/caffe/test/test_concat_layer.cpp
@@ -83,8 +83,36 @@ TYPED_TEST(ConcatLayerTest, TestSetupChannels) {
   EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width());
 }
 
+TYPED_TEST(ConcatLayerTest, TestForwardNum) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_concat_param()->set_concat_dim(0);
+  ConcatLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_1_, this->blob_top_vec_);
+  for (int n = 0; n < this->blob_bottom_vec_1_[0]->num(); ++n) {
+    for (int c = 0; c < this->blob_top_->channels(); ++c) {
+      for (int h = 0; h < this->blob_top_->height(); ++h) {
+        for (int w = 0; w < this->blob_top_->width(); ++w) {
+          EXPECT_EQ(this->blob_top_->data_at(n, c, h, w),
+              this->blob_bottom_vec_1_[0]->data_at(n, c, h, w));
+        }
+      }
+    }
+  }
+  for (int n = 0; n < this->blob_bottom_vec_1_[1]->num(); ++n) {
+    for (int c = 0; c < this->blob_top_->channels(); ++c) {
+      for (int h = 0; h < this->blob_top_->height(); ++h) {
+        for (int w = 0; w < this->blob_top_->width(); ++w) {
+          EXPECT_EQ(this->blob_top_->data_at(n + 2, c, h, w),
+              this->blob_bottom_vec_1_[1]->data_at(n, c, h, w));
+        }
+      }
+    }
+  }
+}
 
-TYPED_TEST(ConcatLayerTest, TestNum) {
+TYPED_TEST(ConcatLayerTest, TestForwardChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   ConcatLayer<Dtype> layer(layer_param);
@@ -110,7 +138,17 @@ TYPED_TEST(ConcatLayerTest, TestNum) {
   }
 }
 
-TYPED_TEST(ConcatLayerTest, TestGradient) {
+TYPED_TEST(ConcatLayerTest, TestGradientNum) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_concat_param()->set_concat_dim(0);
+  ConcatLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-2);
+  checker.CheckGradient(&layer, this->blob_bottom_vec_1_,
+    this->blob_top_vec_);
+}
+
+TYPED_TEST(ConcatLayerTest, TestGradientChannels) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
   ConcatLayer<Dtype> layer(layer_param);

From 8afdcd02f06923a1ea61fff7bd35e72fb5c57ab4 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 00:03:36 -0800
Subject: [PATCH 41/65] ConcatLayer: generalized Blob axes

---
 include/caffe/common_layers.hpp      |  16 ++--
 src/caffe/layers/concat_layer.cpp    | 118 ++++++++++++---------------
 src/caffe/layers/concat_layer.cu     |  71 +++++-----------
 src/caffe/proto/caffe.proto          |  10 ++-
 src/caffe/test/test_concat_layer.cpp |  22 ++++-
 5 files changed, 108 insertions(+), 129 deletions(-)

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index e9fe7cdeb9f..114f24a2b75 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -99,8 +99,8 @@ class ConcatLayer : public Layer<Dtype> {
    *   - K @f$ (N \times C \times H \times W) @f$
    *      the inputs @f$ x_K @f$
    * @param top output Blob vector (length 1)
-   *   -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or
-   *      @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1:
+   *   -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
+   *      @f$ (N \times KC \times H \times W) @f$ if axis == 1:
    *      the concatenated output @f$
    *        y = [\begin{array}{cccc} x_1 & x_2 & ... & x_K \end{array}]
    *      @f$
@@ -115,8 +115,8 @@ class ConcatLayer : public Layer<Dtype> {
    *
    * @param top output Blob vector (length 1), providing the error gradient with
    *        respect to the outputs
-   *   -# @f$ (KN \times C \times H \times W) @f$ if concat_dim == 0, or
-   *      @f$ (N \times KC \times H \times W) @f$ if concat_dim == 1:
+   *   -# @f$ (KN \times C \times H \times W) @f$ if axis == 0, or
+   *      @f$ (N \times KC \times H \times W) @f$ if axis == 1:
    *      containing error gradients @f$ \frac{\partial E}{\partial y} @f$
    *      with respect to concatenated outputs @f$ y @f$
    * @param propagate_down see Layer::Backward.
@@ -138,11 +138,9 @@ class ConcatLayer : public Layer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
   int count_;
-  int num_;
-  int channels_;
-  int height_;
-  int width_;
-  int concat_dim_;
+  int num_concats_;
+  int concat_input_size_;
+  int concat_axis_;
 };
 
 /**
diff --git a/src/caffe/layers/concat_layer.cpp b/src/caffe/layers/concat_layer.cpp
index fc88433c42b..1cac8fc3387 100644
--- a/src/caffe/layers/concat_layer.cpp
+++ b/src/caffe/layers/concat_layer.cpp
@@ -9,62 +9,63 @@ namespace caffe {
 template <typename Dtype>
 void ConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  concat_dim_ = this->layer_param_.concat_param().concat_dim();
-  CHECK_GE(concat_dim_, 0) <<
-    "concat_dim should be >= 0";
-  CHECK_LE(concat_dim_, 1) <<
-    "For now concat_dim <=1, it can only concat num and channels";
+  const ConcatParameter& concat_param = this->layer_param_.concat_param();
+  CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim()))
+      << "Either axis or concat_dim should be specified; not both.";
 }
 
 template <typename Dtype>
 void ConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  const int num_axes = bottom[0]->num_axes();
+  const ConcatParameter& concat_param = this->layer_param_.concat_param();
+  if (concat_param.has_concat_dim()) {
+    concat_axis_ = static_cast<int>(concat_param.concat_dim());
+    // Don't allow negative indexing for concat_dim, a uint32 -- almost
+    // certainly unintended.
+    CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 "
+        << "produced negative result; concat_dim must satisfy "
+        << "0 <= concat_dim < " << kMaxBlobAxes;
+    CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range.";
+  } else {
+    concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis());
+  }
   // Initialize with the first blob.
-  count_ = bottom[0]->count();
-  num_ = bottom[0]->num();
-  channels_ = bottom[0]->channels();
-  height_ = bottom[0]->height();
-  width_ = bottom[0]->width();
+  vector<int> top_shape = bottom[0]->shape();
+  num_concats_ = bottom[0]->count(0, concat_axis_);
+  concat_input_size_ = bottom[0]->count(concat_axis_ + 1);
+  int bottom_count_sum = bottom[0]->count();
   for (int i = 1; i < bottom.size(); ++i) {
-    count_ += bottom[i]->count();
-    if (concat_dim_== 0) {
-      num_ += bottom[i]->num();
-    } else if (concat_dim_ == 1) {
-      channels_ += bottom[i]->channels();
-    } else if (concat_dim_ == 2) {
-      height_ += bottom[i]->height();
-    } else if (concat_dim_ == 3) {
-      width_ += bottom[i]->width();
+    CHECK_EQ(num_axes, bottom[i]->num_axes())
+        << "All inputs must have the same #axes.";
+    for (int j = 0; j < num_axes; ++j) {
+      if (j == concat_axis_) { continue; }
+      CHECK_EQ(top_shape[j], bottom[i]->shape(j))
+          << "All inputs must have the same shape, except at concat_axis.";
     }
+    bottom_count_sum += bottom[i]->count();
+    top_shape[concat_axis_] += bottom[i]->shape(concat_axis_);
   }
-  top[0]->Reshape(num_, channels_, height_, width_);
-  CHECK_EQ(count_, top[0]->count());
+  top[0]->Reshape(top_shape);
+  CHECK_EQ(bottom_count_sum, top[0]->count());
 }
 
 template <typename Dtype>
 void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   Dtype* top_data = top[0]->mutable_cpu_data();
-  if (concat_dim_== 0) {
-    int offset_num = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      const Dtype* bottom_data = bottom[i]->cpu_data();
-      int num_elem = bottom[i]->count();
-      caffe_copy(num_elem, bottom_data, top_data+top[0]->offset(offset_num));
-      offset_num += bottom[i]->num();
+  int offset_concat_axis = 0;
+  const int top_concat_axis = top[0]->shape(concat_axis_);
+  for (int i = 0; i < bottom.size(); ++i) {
+    const Dtype* bottom_data = bottom[i]->cpu_data();
+    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
+    for (int n = 0; n < num_concats_; ++n) {
+      caffe_copy(bottom_concat_axis * concat_input_size_,
+          bottom_data + n * bottom_concat_axis * concat_input_size_,
+          top_data + (n * top_concat_axis + offset_concat_axis)
+              * concat_input_size_);
     }
-  } else if (concat_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      const Dtype* bottom_data = bottom[i]->cpu_data();
-      int num_elem =
-        bottom[i]->channels()*bottom[i]->height()*bottom[i]->width();
-      for (int n = 0; n < num_; ++n) {
-        caffe_copy(num_elem, bottom_data+bottom[i]->offset(n),
-          top_data+top[0]->offset(n, offset_channel));
-      }
-      offset_channel += bottom[i]->channels();
-    }  // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp.
+    offset_concat_axis += bottom_concat_axis;
   }
 }
 
@@ -72,32 +73,19 @@ template <typename Dtype>
 void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   const Dtype* top_diff = top[0]->cpu_diff();
-  if (concat_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      Blob<Dtype>* blob = bottom[i];
-      if (propagate_down[i]) {
-        Dtype* bottom_diff = blob->mutable_cpu_diff();
-        caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num),
-                   bottom_diff);
-      }
-      offset_num += blob->num();
-    }
-  } else if (concat_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      Blob<Dtype>* blob = bottom[i];
-      if (propagate_down[i]) {
-        Dtype* bottom_diff = blob->mutable_cpu_diff();
-        int num_elem = blob->channels()*blob->height()*blob->width();
-        for (int n = 0; n < num_; ++n) {
-          caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel),
-                     bottom_diff + blob->offset(n));
-        }
-      }
-      offset_channel += blob->channels();
+  int offset_concat_axis = 0;
+  const int top_concat_axis = top[0]->shape(concat_axis_);
+  for (int i = 0; i < bottom.size(); ++i) {
+    if (!propagate_down[i]) { continue; }
+    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
+    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
+    for (int n = 0; n < num_concats_; ++n) {
+      caffe_copy(bottom_concat_axis * concat_input_size_, top_diff +
+          (n * top_concat_axis + offset_concat_axis) * concat_input_size_,
+          bottom_diff + n * bottom_concat_axis * concat_input_size_);
     }
-  }  // concat_dim_ is guaranteed to be 0 or 1 by LayerSetUp.
+    offset_concat_axis += bottom_concat_axis;
+  }
 }
 
 #ifdef CPU_ONLY
diff --git a/src/caffe/layers/concat_layer.cu b/src/caffe/layers/concat_layer.cu
index 88fc090025f..dbadb5aeb30 100644
--- a/src/caffe/layers/concat_layer.cu
+++ b/src/caffe/layers/concat_layer.cu
@@ -10,29 +10,18 @@ template <typename Dtype>
 void ConcatLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   Dtype* top_data = top[0]->mutable_gpu_data();
-  if (concat_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      const Dtype* bottom_data = bottom[i]->gpu_data();
-      caffe_copy(bottom[i]->count(), bottom_data,
-        top_data + top[0]->offset(offset_num));
-      offset_num += bottom[i]->num();
+  int offset_concat_axis = 0;
+  const int top_concat_axis = top[0]->shape(concat_axis_);
+  for (int i = 0; i < bottom.size(); ++i) {
+    const Dtype* bottom_data = bottom[i]->gpu_data();
+    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
+    for (int n = 0; n < num_concats_; ++n) {
+      caffe_copy(bottom_concat_axis * concat_input_size_,
+          bottom_data + n * bottom_concat_axis * concat_input_size_,
+          top_data + (n * top_concat_axis + offset_concat_axis)
+              * concat_input_size_);
     }
-  } else if (concat_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      const Dtype* bottom_data = bottom[i]->gpu_data();
-      int num_elem =
-        bottom[i]->channels() * bottom[i]->height() * bottom[i]->width();
-      for (int n = 0; n < num_; ++n) {
-        caffe_copy(num_elem, bottom_data+bottom[i]->offset(n),
-          top_data + top[0]->offset(n, offset_channel));
-      }
-      offset_channel += bottom[i]->channels();
-    }
-  } else {
-    LOG(FATAL) << "concat_dim along dim" << concat_dim_ <<
-      " not implemented yet";
+    offset_concat_axis += bottom_concat_axis;
   }
 }
 
@@ -40,34 +29,18 @@ template <typename Dtype>
 void ConcatLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   const Dtype* top_diff = top[0]->gpu_diff();
-  if (concat_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      Blob<Dtype>* blob = bottom[i];
-      if (propagate_down[i]) {
-        Dtype* bottom_diff = blob->mutable_gpu_diff();
-        caffe_copy(blob->count(), top_diff + top[0]->offset(offset_num),
-                       bottom_diff);
-      }
-      offset_num += blob->num();
-    }
-  } else if (concat_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < bottom.size(); ++i) {
-      Blob<Dtype>* blob = bottom[i];
-      if (propagate_down[i]) {
-        Dtype* bottom_diff = blob->mutable_gpu_diff();
-        int num_elem = blob->channels()*blob->height()*blob->width();
-        for (int n = 0; n < num_; ++n) {
-          caffe_copy(num_elem, top_diff + top[0]->offset(n, offset_channel),
-                         bottom_diff + blob->offset(n));
-        }
-      }
-      offset_channel += blob->channels();
+  int offset_concat_axis = 0;
+  const int top_concat_axis = top[0]->shape(concat_axis_);
+  for (int i = 0; i < bottom.size(); ++i) {
+    if (!propagate_down[i]) { continue; }
+    Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
+    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
+    for (int n = 0; n < num_concats_; ++n) {
+      caffe_copy(bottom_concat_axis * concat_input_size_, top_diff +
+          (n * top_concat_axis + offset_concat_axis) * concat_input_size_,
+          bottom_diff + n * bottom_concat_axis * concat_input_size_);
     }
-  } else {
-    LOG(FATAL) << "concat_dim along dim" << concat_dim_ <<
-      " not implemented yet";
+    offset_concat_axis += bottom_concat_axis;
   }
 }
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index e8a134865bb..7a4ecf93c7a 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -378,9 +378,13 @@ message ArgMaxParameter {
 
 // Message that stores parameters used by ConcatLayer
 message ConcatParameter {
-  // Concat Layer needs to specify the dimension along the concat will happen,
-  // the other dimensions must be the same for all the bottom blobs
-  // By default it will concatenate blobs along channels dimension
+  // The axis along which to concatenate -- may be negative to index from the
+  // end (e.g., -1 for the last axis).  Other axes must have the
+  // same dimension for all the bottom blobs.
+  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
+  optional int32 axis = 2 [default = 1];
+
+  // DEPRECATED: alias for "axis" -- does not support negative indexing.
   optional uint32 concat_dim = 1 [default = 1];
 }
 
diff --git a/src/caffe/test/test_concat_layer.cpp b/src/caffe/test/test_concat_layer.cpp
index 875468faa49..662a50fa23b 100644
--- a/src/caffe/test/test_concat_layer.cpp
+++ b/src/caffe/test/test_concat_layer.cpp
@@ -61,7 +61,7 @@ TYPED_TEST_CASE(ConcatLayerTest, TestDtypesAndDevices);
 TYPED_TEST(ConcatLayerTest, TestSetupNum) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.mutable_concat_param()->set_concat_dim(0);
+  layer_param.mutable_concat_param()->set_axis(0);
   ConcatLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_->num(),
@@ -83,10 +83,26 @@ TYPED_TEST(ConcatLayerTest, TestSetupChannels) {
   EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width());
 }
 
+TYPED_TEST(ConcatLayerTest, TestSetupChannelsNegativeIndexing) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  ConcatLayer<Dtype> layer(layer_param);
+  // "channels" index is the third one from the end -- test negative indexing
+  // by setting axis to -3 and checking that we get the same results as above in
+  // TestSetupChannels.
+  layer_param.mutable_concat_param()->set_axis(-3);
+  layer.SetUp(this->blob_bottom_vec_0_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_->num(), this->blob_bottom_0_->num());
+  EXPECT_EQ(this->blob_top_->channels(),
+      this->blob_bottom_0_->channels() + this->blob_bottom_1_->channels());
+  EXPECT_EQ(this->blob_top_->height(), this->blob_bottom_0_->height());
+  EXPECT_EQ(this->blob_top_->width(), this->blob_bottom_0_->width());
+}
+
 TYPED_TEST(ConcatLayerTest, TestForwardNum) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.mutable_concat_param()->set_concat_dim(0);
+  layer_param.mutable_concat_param()->set_axis(0);
   ConcatLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_1_, this->blob_top_vec_);
   layer.Forward(this->blob_bottom_vec_1_, this->blob_top_vec_);
@@ -141,7 +157,7 @@ TYPED_TEST(ConcatLayerTest, TestForwardChannels) {
 TYPED_TEST(ConcatLayerTest, TestGradientNum) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.mutable_concat_param()->set_concat_dim(0);
+  layer_param.mutable_concat_param()->set_axis(0);
   ConcatLayer<Dtype> layer(layer_param);
   GradientChecker<Dtype> checker(1e-2, 1e-2);
   checker.CheckGradient(&layer, this->blob_bottom_vec_1_,

From b86891635dbb24f70d5634a679150070caf776e4 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 03:22:59 -0800
Subject: [PATCH 42/65] SliceLayer: generalized Blob axes

---
 include/caffe/common_layers.hpp     |   8 +-
 src/caffe/layers/slice_layer.cpp    | 142 ++++++++++++----------------
 src/caffe/layers/slice_layer.cu     |  70 ++++++--------
 src/caffe/proto/caffe.proto         |  12 ++-
 src/caffe/test/test_slice_layer.cpp |   6 +-
 5 files changed, 100 insertions(+), 138 deletions(-)

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index 114f24a2b75..4e47e55d5b6 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -450,11 +450,9 @@ class SliceLayer : public Layer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
   int count_;
-  int num_;
-  int channels_;
-  int height_;
-  int width_;
-  int slice_dim_;
+  int num_slices_;
+  int slice_size_;
+  int slice_axis_;
   vector<int> slice_point_;
 };
 
diff --git a/src/caffe/layers/slice_layer.cpp b/src/caffe/layers/slice_layer.cpp
index 46c3acd6513..e4418c9cf9c 100644
--- a/src/caffe/layers/slice_layer.cpp
+++ b/src/caffe/layers/slice_layer.cpp
@@ -11,9 +11,8 @@ template <typename Dtype>
 void SliceLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   const SliceParameter& slice_param = this->layer_param_.slice_param();
-  slice_dim_ = slice_param.slice_dim();
-  CHECK_GE(slice_dim_, 0);
-  CHECK_LE(slice_dim_, 1) << "Can only slice num and channels";
+  CHECK(!(slice_param.has_axis() && slice_param.has_slice_dim()))
+      << "Either axis or slice_dim should be specified; not both.";
   slice_point_.clear();
   std::copy(slice_param.slice_point().begin(),
       slice_param.slice_point().end(),
@@ -23,18 +22,27 @@ void SliceLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void SliceLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  count_ = 0;
-  num_ = bottom[0]->num();
-  channels_ = bottom[0]->channels();
-  height_ = bottom[0]->height();
-  width_ = bottom[0]->width();
+  const int num_axes = bottom[0]->num_axes();
+  const SliceParameter& slice_param = this->layer_param_.slice_param();
+  if (slice_param.has_slice_dim()) {
+    slice_axis_ = static_cast<int>(slice_param.slice_dim());
+    // Don't allow negative indexing for slice_dim, a uint32 -- almost
+    // certainly unintended.
+    CHECK_GE(slice_axis_, 0) << "casting slice_dim from uint32 to int32 "
+        << "produced negative result; slice_dim must satisfy "
+        << "0 <= slice_dim < " << kMaxBlobAxes;
+    CHECK_LT(slice_axis_, num_axes) << "slice_dim out of range.";
+  } else {
+    slice_axis_ = bottom[0]->CanonicalAxisIndex(slice_param.axis());
+  }
+  vector<int> top_shape = bottom[0]->shape();
+  const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
+  num_slices_ = bottom[0]->count(0, slice_axis_);
+  slice_size_ = bottom[0]->count(slice_axis_ + 1);
+  int count = 0;
   if (slice_point_.size() != 0) {
     CHECK_EQ(slice_point_.size(), top.size() - 1);
-    if (slice_dim_ == 0) {
-      CHECK_LE(top.size(), num_);
-    } else {
-      CHECK_LE(top.size(), channels_);
-    }
+    CHECK_LE(top.size(), bottom_slice_axis);
     int prev = 0;
     vector<int> slices;
     for (int i = 0; i < slice_point_.size(); ++i) {
@@ -42,94 +50,64 @@ void SliceLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       slices.push_back(slice_point_[i] - prev);
       prev = slice_point_[i];
     }
-    if (slice_dim_ == 0) {
-      slices.push_back(num_ - prev);
-      for (int i = 0; i < top.size(); ++i) {
-        top[i]->Reshape(slices[i], channels_, height_, width_);
-        count_ += top[i]->count();
-      }
-    } else {
-      slices.push_back(channels_ - prev);
-      for (int i = 0; i < top.size(); ++i) {
-        top[i]->Reshape(num_, slices[i], height_, width_);
-        count_ += top[i]->count();
-      }
+    slices.push_back(bottom_slice_axis - prev);
+    for (int i = 0; i < top.size(); ++i) {
+      top_shape[slice_axis_] = slices[i];
+      top[i]->Reshape(top_shape);
+      count += top[i]->count();
     }
   } else {
-    if (slice_dim_ == 0) {
-      CHECK_EQ(num_ % top.size(), 0)
-          << "Number of top blobs (" << top.size() << ") "
-          << "should evenly divide input num ( " << num_ << ")";
-      num_ = num_ / top.size();
-    } else {
-      CHECK_EQ(channels_ % top.size(), 0)
-          << "Number of top blobs (" << top.size() << ") "
-          << "should evenly divide input channels ( " << channels_ << ")";
-      channels_ = channels_ / top.size();
-    }
+    CHECK_EQ(bottom_slice_axis % top.size(), 0)
+        << "Number of top blobs (" << top.size() << ") should evenly "
+        << "divide input slice axis (" << bottom_slice_axis << ")";
+    top_shape[slice_axis_] = bottom_slice_axis / top.size();
     for (int i = 0; i < top.size(); ++i) {
-      top[i]->Reshape(num_, channels_, height_, width_);
-      count_ += top[i]->count();
+      top[i]->Reshape(top_shape);
+      count += top[i]->count();
     }
   }
-  CHECK_EQ(count_, bottom[0]->count());
+  CHECK_EQ(count, bottom[0]->count());
 }
 
 template <typename Dtype>
 void SliceLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->mutable_cpu_data();
-  if (slice_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      Dtype* top_data = blob->mutable_cpu_data();
-      caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num),
-                 top_data);
-      offset_num += blob->num();
+  int offset_slice_axis = 0;
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
+  for (int i = 0; i < top.size(); ++i) {
+    Dtype* top_data = top[i]->mutable_cpu_data();
+    const int top_slice_axis = top[i]->shape(slice_axis_);
+    for (int n = 0; n < num_slices_; ++n) {
+      const int top_offset = n * top_slice_axis * slice_size_;
+      const int bottom_offset =
+          (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
+      caffe_copy(top_slice_axis * slice_size_,
+          bottom_data + bottom_offset, top_data + top_offset);
     }
-  } else if (slice_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      Dtype* top_data = blob->mutable_cpu_data();
-      const int num_elem = blob->channels() * blob->height() * blob->width();
-      for (int n = 0; n < num_; ++n) {
-        caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel),
-                   top_data + blob->offset(n));
-      }
-      offset_channel += blob->channels();
-    }
-  }  // slice_dim_ is guaranteed to be 0 or 1 by SetUp.
+    offset_slice_axis += top_slice_axis;
+  }
 }
 
 template <typename Dtype>
 void SliceLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   if (!propagate_down[0]) { return; }
+  int offset_slice_axis = 0;
   Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-  if (slice_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      const Dtype* top_diff = blob->cpu_diff();
-      caffe_copy(blob->count(), top_diff,
-                 bottom_diff + bottom[0]->offset(offset_num));
-      offset_num += blob->num();
+  const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
+  for (int i = 0; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->cpu_diff();
+    const int top_slice_axis = top[i]->shape(slice_axis_);
+    for (int n = 0; n < num_slices_; ++n) {
+      const int top_offset = n * top_slice_axis * slice_size_;
+      const int bottom_offset =
+          (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
+      caffe_copy(top_slice_axis * slice_size_,
+          top_diff + top_offset, bottom_diff + bottom_offset);
     }
-  } else if (slice_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      const Dtype* top_diff = blob->cpu_diff();
-      const int num_elem = blob->channels() * blob->height() * blob->width();
-      for (int n = 0; n < num_; ++n) {
-        caffe_copy(num_elem, top_diff + blob->offset(n),
-                   bottom_diff + bottom[0]->offset(n, offset_channel));
-      }
-      offset_channel += blob->channels();
-    }
-  }  // slice_dim_ is guaranteed to be 0 or 1 by SetUp.
+    offset_slice_axis += top_slice_axis;
+  }
 }
 
 #ifdef CPU_ONLY
diff --git a/src/caffe/layers/slice_layer.cu b/src/caffe/layers/slice_layer.cu
index b5c5e61533f..e6e65677bd8 100644
--- a/src/caffe/layers/slice_layer.cu
+++ b/src/caffe/layers/slice_layer.cu
@@ -9,58 +9,42 @@ namespace caffe {
 template <typename Dtype>
 void SliceLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->mutable_gpu_data();
-  if (slice_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      Dtype* top_data = blob->mutable_gpu_data();
-      caffe_copy(blob->count(), bottom_data + bottom[0]->offset(offset_num),
-                 top_data);
-      offset_num += blob->num();
+  int offset_slice_axis = 0;
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
+  for (int i = 0; i < top.size(); ++i) {
+    Dtype* top_data = top[i]->mutable_gpu_data();
+    const int top_slice_axis = top[i]->shape(slice_axis_);
+    for (int n = 0; n < num_slices_; ++n) {
+      const int top_offset = n * top_slice_axis * slice_size_;
+      const int bottom_offset =
+          (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
+      caffe_copy(top_slice_axis * slice_size_,
+          bottom_data + bottom_offset, top_data + top_offset);
     }
-  } else if (slice_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      Dtype* top_data = blob->mutable_gpu_data();
-      const int num_elem = blob->channels() * blob->height() * blob->width();
-      for (int n = 0; n < num_; ++n) {
-        caffe_copy(num_elem, bottom_data + bottom[0]->offset(n, offset_channel),
-                   top_data + blob->offset(n));
-      }
-      offset_channel += blob->channels();
-    }
-  }  // slice_dim_ is guaranteed to be 0 or 1 by SetUp.
+    offset_slice_axis += top_slice_axis;
+  }
 }
 
 template <typename Dtype>
 void SliceLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   if (!propagate_down[0]) { return; }
+  int offset_slice_axis = 0;
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
-  if (slice_dim_ == 0) {
-    int offset_num = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      const Dtype* top_diff = blob->gpu_diff();
-      caffe_copy(blob->count(), top_diff,
-                 bottom_diff + bottom[0]->offset(offset_num));
-      offset_num += blob->num();
-    }
-  } else if (slice_dim_ == 1) {
-    int offset_channel = 0;
-    for (int i = 0; i < top.size(); ++i) {
-      Blob<Dtype>* blob = top[i];
-      const Dtype* top_diff = blob->gpu_diff();
-      const int num_elem = blob->channels() * blob->height() * blob->width();
-      for (int n = 0; n < num_; ++n) {
-        caffe_copy(num_elem, top_diff + blob->offset(n),
-                   bottom_diff +  bottom[0]->offset(n, offset_channel));
-      }
-      offset_channel += blob->channels();
+  const int bottom_slice_axis = bottom[0]->shape(slice_axis_);
+  for (int i = 0; i < top.size(); ++i) {
+    const Dtype* top_diff = top[i]->gpu_diff();
+    const int top_slice_axis = top[i]->shape(slice_axis_);
+    for (int n = 0; n < num_slices_; ++n) {
+      const int top_offset = n * top_slice_axis * slice_size_;
+      const int bottom_offset =
+          (n * bottom_slice_axis + offset_slice_axis) * slice_size_;
+      caffe_copy(top_slice_axis * slice_size_,
+          top_diff + top_offset, bottom_diff + bottom_offset);
     }
-  }  // slice_dim_ is guaranteed to be 0 or 1 by SetUp.
+    offset_slice_axis += top_slice_axis;
+  }
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(SliceLayer);
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 7a4ecf93c7a..7783a783dd7 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -674,12 +674,14 @@ message SigmoidParameter {
 
 // Message that stores parameters used by SliceLayer
 message SliceParameter {
-  // SliceLayer needs to know which dimension to slice across.
-  // Currently, SliceLayer only supports slicing across num (dim 0)
-  // and channels (dim 1).
-  // By default, SliceLayer slices across channels.
-  optional uint32 slice_dim = 1 [default = 1];
+  // The axis along which to slice -- may be negative to index from the end
+  // (e.g., -1 for the last axis).
+  // By default, SliceLayer concatenates blobs along the "channels" axis (1).
+  optional int32 axis = 3 [default = 1];
   repeated uint32 slice_point = 2;
+
+  // DEPRECATED: alias for "axis" -- does not support negative indexing.
+  optional uint32 slice_dim = 1 [default = 1];
 }
 
 // Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
diff --git a/src/caffe/test/test_slice_layer.cpp b/src/caffe/test/test_slice_layer.cpp
index 395be280089..ccd03646d19 100644
--- a/src/caffe/test/test_slice_layer.cpp
+++ b/src/caffe/test/test_slice_layer.cpp
@@ -62,7 +62,7 @@ TYPED_TEST_CASE(SliceLayerTest, TestDtypesAndDevices);
 TYPED_TEST(SliceLayerTest, TestSetupNum) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.mutable_slice_param()->set_slice_dim(0);
+  layer_param.mutable_slice_param()->set_axis(0);
   SliceLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_1_);
   EXPECT_EQ(this->blob_bottom_->num(), 3 * this->blob_top_0_->num());
@@ -91,7 +91,7 @@ TYPED_TEST(SliceLayerTest, TestSetupChannels) {
 TYPED_TEST(SliceLayerTest, TestSliceAcrossNum) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
-  layer_param.mutable_slice_param()->set_slice_dim(0);
+  layer_param.mutable_slice_param()->set_axis(0);
   SliceLayer<Dtype> layer(layer_param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_0_);
   const int top_num = this->blob_bottom_->num() / 2;
@@ -166,7 +166,7 @@ TYPED_TEST(SliceLayerTest, TestGradientAcrossNum) {
   // Gradient checks are slow; reduce blob size.
   this->ReduceBottomBlobSize();
   LayerParameter layer_param;
-  layer_param.mutable_slice_param()->set_slice_dim(0);
+  layer_param.mutable_slice_param()->set_axis(0);
   SliceLayer<Dtype> layer(layer_param);
   GradientChecker<Dtype> checker(1e-2, 1e-3);
   checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,

From abec30252ced89d9e2550ca47fca569f563479f6 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sun, 15 Feb 2015 13:26:36 -0800
Subject: [PATCH 43/65] SoftmaxLayer: generalized Blob axes

---
 include/caffe/common_layers.hpp    |  3 ++
 src/caffe/layers/softmax_layer.cpp | 62 +++++++++++++++---------------
 src/caffe/layers/softmax_layer.cu  | 35 ++++++++---------
 src/caffe/proto/caffe.proto        |  5 +++
 4 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index 4e47e55d5b6..b1ac3a93eff 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -353,6 +353,9 @@ class SoftmaxLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  int outer_num_;
+  int inner_num_;
+  int softmax_axis_;
   /// sum_multiplier is used to carry out sum using BLAS
   Blob<Dtype> sum_multiplier_;
   /// scale is an intermediate Blob to hold temporary results.
diff --git a/src/caffe/layers/softmax_layer.cpp b/src/caffe/layers/softmax_layer.cpp
index 25142fdec53..04712c9e653 100644
--- a/src/caffe/layers/softmax_layer.cpp
+++ b/src/caffe/layers/softmax_layer.cpp
@@ -10,14 +10,18 @@ namespace caffe {
 template <typename Dtype>
 void SoftmaxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  top[0]->Reshape(bottom[0]->num(), bottom[0]->channels(),
-      bottom[0]->height(), bottom[0]->width());
-  sum_multiplier_.Reshape(1, bottom[0]->channels(), 1, 1);
+  softmax_axis_ =
+      bottom[0]->CanonicalAxisIndex(this->layer_param_.softmax_param().axis());
+  top[0]->ReshapeLike(*bottom[0]);
+  vector<int> mult_dims(1, bottom[0]->shape(softmax_axis_));
+  sum_multiplier_.Reshape(mult_dims);
   Dtype* multiplier_data = sum_multiplier_.mutable_cpu_data();
-  for (int i = 0; i < sum_multiplier_.count(); ++i) {
-    multiplier_data[i] = 1.;
-  }
-  scale_.Reshape(bottom[0]->num(), 1, bottom[0]->height(), bottom[0]->width());
+  caffe_set(sum_multiplier_.count(), Dtype(1), multiplier_data);
+  outer_num_ = bottom[0]->count(0, softmax_axis_);
+  inner_num_ = bottom[0]->count(softmax_axis_ + 1);
+  vector<int> scale_dims = bottom[0]->shape();
+  scale_dims[softmax_axis_] = 1;
+  scale_.Reshape(scale_dims);
 }
 
 template <typename Dtype>
@@ -26,34 +30,32 @@ void SoftmaxLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = top[0]->mutable_cpu_data();
   Dtype* scale_data = scale_.mutable_cpu_data();
-  int num = bottom[0]->num();
-  int channels = bottom[0]->channels();
-  int dim = bottom[0]->count() / bottom[0]->num();
-  int spatial_dim = bottom[0]->height() * bottom[0]->width();
+  int channels = bottom[0]->shape(softmax_axis_);
+  int dim = bottom[0]->count() / outer_num_;
   caffe_copy(bottom[0]->count(), bottom_data, top_data);
   // We need to subtract the max to avoid numerical issues, compute the exp,
   // and then normalize.
-  for (int i = 0; i < num; ++i) {
+  for (int i = 0; i < outer_num_; ++i) {
     // initialize scale_data to the first plane
-    caffe_copy(spatial_dim, bottom_data + i * dim, scale_data);
+    caffe_copy(inner_num_, bottom_data + i * dim, scale_data);
     for (int j = 0; j < channels; j++) {
-      for (int k = 0; k < spatial_dim; k++) {
+      for (int k = 0; k < inner_num_; k++) {
         scale_data[k] = std::max(scale_data[k],
-            bottom_data[i * dim + j * spatial_dim + k]);
+            bottom_data[i * dim + j * inner_num_ + k]);
       }
     }
     // subtraction
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, spatial_dim,
-        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data + i * dim);
+    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_,
+        1, -1., sum_multiplier_.cpu_data(), scale_data, 1., top_data);
     // exponentiation
-    caffe_exp<Dtype>(dim, top_data + i * dim, top_data + i * dim);
+    caffe_exp<Dtype>(dim, top_data, top_data);
     // sum after exp
-    caffe_cpu_gemv<Dtype>(CblasTrans, channels, spatial_dim, 1.,
-        top_data + i * dim, sum_multiplier_.cpu_data(), 0., scale_data);
+    caffe_cpu_gemv<Dtype>(CblasTrans, channels, inner_num_, 1.,
+        top_data, sum_multiplier_.cpu_data(), 0., scale_data);
     // division
     for (int j = 0; j < channels; j++) {
-      caffe_div(spatial_dim, top_data + top[0]->offset(i, j), scale_data,
-          top_data + top[0]->offset(i, j));
+      caffe_div(inner_num_, top_data, scale_data, top_data);
+      top_data += inner_num_;
     }
   }
 }
@@ -66,20 +68,18 @@ void SoftmaxLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_data = top[0]->cpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
   Dtype* scale_data = scale_.mutable_cpu_data();
-  int num = top[0]->num();
-  int channels = top[0]->channels();
-  int dim = top[0]->count() / top[0]->num();
-  int spatial_dim = top[0]->height() * top[0]->width();
+  int channels = top[0]->shape(softmax_axis_);
+  int dim = top[0]->count() / outer_num_;
   caffe_copy(top[0]->count(), top_diff, bottom_diff);
-  for (int i = 0; i < num; ++i) {
+  for (int i = 0; i < outer_num_; ++i) {
     // compute dot(top_diff, top_data) and subtract them from the bottom diff
-    for (int k = 0; k < spatial_dim; ++k) {
+    for (int k = 0; k < inner_num_; ++k) {
       scale_data[k] = caffe_cpu_strided_dot<Dtype>(channels,
-          bottom_diff + i * dim + k, spatial_dim,
-          top_data + i * dim + k, spatial_dim);
+          bottom_diff + i * dim + k, inner_num_,
+          top_data + i * dim + k, inner_num_);
     }
     // subtraction
-    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, spatial_dim, 1,
+    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, channels, inner_num_, 1,
         -1., sum_multiplier_.cpu_data(), scale_data, 1., bottom_diff + i * dim);
   }
   // elementwise multiplication
diff --git a/src/caffe/layers/softmax_layer.cu b/src/caffe/layers/softmax_layer.cu
index 6b8871a0b20..1f9c3a41203 100644
--- a/src/caffe/layers/softmax_layer.cu
+++ b/src/caffe/layers/softmax_layer.cu
@@ -90,36 +90,33 @@ void SoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   Dtype* top_data = top[0]->mutable_gpu_data();
   Dtype* scale_data = scale_.mutable_gpu_data();
   int count = bottom[0]->count();
-  int num = bottom[0]->num();
-  int channels = bottom[0]->channels();
-  int spatial_dim = bottom[0]->height() * bottom[0]->width();
+  int channels = top[0]->shape(softmax_axis_);
   caffe_copy(count, bottom_data, top_data);
   // We need to subtract the max to avoid numerical issues, compute the exp,
   // and then normalize.
   // compute max
   // NOLINT_NEXT_LINE(whitespace/operators)
-  kernel_channel_max<Dtype><<<CAFFE_GET_BLOCKS(num * spatial_dim),
-      CAFFE_CUDA_NUM_THREADS>>>(num, channels, spatial_dim, top_data,
+  kernel_channel_max<Dtype><<<CAFFE_GET_BLOCKS(outer_num_ * inner_num_),
+      CAFFE_CUDA_NUM_THREADS>>>(outer_num_, channels, inner_num_, top_data,
       scale_data);
   // subtract
   // NOLINT_NEXT_LINE(whitespace/operators)
   kernel_channel_subtract<Dtype><<<CAFFE_GET_BLOCKS(count),
-      CAFFE_CUDA_NUM_THREADS>>>(count, num, channels, spatial_dim,
+      CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_,
       scale_data, top_data);
   // exponentiate
   // NOLINT_NEXT_LINE(whitespace/operators)
-  kernel_exp<Dtype><<<CAFFE_GET_BLOCKS(num * channels * spatial_dim),
-      CAFFE_CUDA_NUM_THREADS>>>(num * channels * spatial_dim, top_data,
-      top_data);
+  kernel_exp<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+      count, top_data, top_data);
   // sum after exp
   // NOLINT_NEXT_LINE(whitespace/operators)
-  kernel_channel_sum<Dtype><<<CAFFE_GET_BLOCKS(num * spatial_dim),
-      CAFFE_CUDA_NUM_THREADS>>>(num, channels, spatial_dim, top_data,
+  kernel_channel_sum<Dtype><<<CAFFE_GET_BLOCKS(outer_num_ * inner_num_),
+      CAFFE_CUDA_NUM_THREADS>>>(outer_num_, channels, inner_num_, top_data,
       scale_data);
   // divide
   // NOLINT_NEXT_LINE(whitespace/operators)
   kernel_channel_div<Dtype><<<CAFFE_GET_BLOCKS(count),
-      CAFFE_CUDA_NUM_THREADS>>>(count, num, channels, spatial_dim,
+      CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_,
       scale_data, top_data);
 }
 
@@ -131,18 +128,16 @@ void SoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
   Dtype* scale_data = scale_.mutable_gpu_data();
   int count = top[0]->count();
-  int num = top[0]->num();
-  int channels = top[0]->channels();
-  int spatial_dim = top[0]->height() * top[0]->width();
-  caffe_copy(top[0]->count(), top_diff, bottom_diff);
+  int channels = top[0]->shape(softmax_axis_);
+  caffe_copy(count, top_diff, bottom_diff);
   // Compute inner1d(top_diff, top_data) and subtract them from the bottom diff.
   // NOLINT_NEXT_LINE(whitespace/operators)
-  kernel_channel_dot<Dtype><<<CAFFE_GET_BLOCKS(num * spatial_dim),
-      CAFFE_CUDA_NUM_THREADS>>>(num, channels, spatial_dim, top_diff, top_data,
-      scale_data);
+  kernel_channel_dot<Dtype><<<CAFFE_GET_BLOCKS(outer_num_ * inner_num_),
+      CAFFE_CUDA_NUM_THREADS>>>(outer_num_, channels, inner_num_,
+      top_diff, top_data, scale_data);
   // NOLINT_NEXT_LINE(whitespace/operators)
   kernel_channel_subtract<Dtype><<<CAFFE_GET_BLOCKS(count),
-      CAFFE_CUDA_NUM_THREADS>>>(count, num, channels, spatial_dim,
+      CAFFE_CUDA_NUM_THREADS>>>(count, outer_num_, channels, inner_num_,
       scale_data, bottom_diff);
   // elementwise multiplication
   caffe_gpu_mul<Dtype>(top[0]->count(), bottom_diff, top_data, bottom_diff);
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 7783a783dd7..8fcb8def173 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -692,6 +692,11 @@ message SoftmaxParameter {
     CUDNN = 2;
   }
   optional Engine engine = 1 [default = DEFAULT];
+
+  // The axis along which to perform the softmax -- may be negative to index
+  // from the end (e.g., -1 for the last axis).
+  // Any other axes will be evaluated as independent softmaxes.
+  optional int32 axis = 2 [default = 1];
 }
 
 // Message that stores parameters used by TanHLayer

From 60c288b9de8ecd2b17ee2d2eff7a31fecfe3e98b Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Mon, 9 Feb 2015 18:12:54 -0800
Subject: [PATCH 44/65] CuDNNSoftmaxLayer: generalized Blob axes

---
 src/caffe/layers/cudnn_softmax_layer.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp
index 83a5b69a626..211701cad49 100644
--- a/src/caffe/layers/cudnn_softmax_layer.cpp
+++ b/src/caffe/layers/cudnn_softmax_layer.cpp
@@ -26,10 +26,10 @@ template <typename Dtype>
 void CuDNNSoftmaxLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   SoftmaxLayer<Dtype>::Reshape(bottom, top);
-  int N = bottom[0]->num();
-  int K = bottom[0]->channels();
-  int H = bottom[0]->height();
-  int W = bottom[0]->width();
+  int N = this->outer_num_;
+  int K = bottom[0]->shape(this->softmax_axis_);
+  int H = this->inner_num_;
+  int W = 1;
   cudnn::setTensor4dDesc<Dtype>(&bottom_desc_, N, K, H, W);
   cudnn::setTensor4dDesc<Dtype>(&top_desc_, N, K, H, W);
 }

From 94d93da095486c3137aacd101f495acde673ba10 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Fri, 30 Jan 2015 23:22:26 -0800
Subject: [PATCH 45/65] SoftmaxLossLayer generalized like SoftmaxLayer

---
 include/caffe/loss_layers.hpp           |  2 ++
 src/caffe/layers/softmax_loss_layer.cpp | 42 ++++++++++++++-----------
 src/caffe/layers/softmax_loss_layer.cu  | 20 +++++-------
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/include/caffe/loss_layers.hpp b/include/caffe/loss_layers.hpp
index 36413ccd176..62d6df71a4a 100644
--- a/include/caffe/loss_layers.hpp
+++ b/include/caffe/loss_layers.hpp
@@ -754,6 +754,8 @@ class SoftmaxWithLossLayer : public LossLayer<Dtype> {
   /// Whether to normalize the loss by the total number of values present
   /// (otherwise just by the batch size).
   bool normalize_;
+
+  int softmax_axis_, outer_num_, inner_num_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/softmax_loss_layer.cpp b/src/caffe/layers/softmax_loss_layer.cpp
index 0c9ba2c6626..132c30796a4 100644
--- a/src/caffe/layers/softmax_loss_layer.cpp
+++ b/src/caffe/layers/softmax_loss_layer.cpp
@@ -35,6 +35,14 @@ void SoftmaxWithLossLayer<Dtype>::Reshape(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   LossLayer<Dtype>::Reshape(bottom, top);
   softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);
+  softmax_axis_ = this->layer_param_.softmax_param().axis();
+  outer_num_ = bottom[0]->count(0, softmax_axis_);
+  inner_num_ = bottom[0]->count(softmax_axis_ + 1);
+  CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count())
+      << "Number of labels must match number of predictions; "
+      << "e.g., if softmax axis == 1 and prediction shape is (N, C, H, W), "
+      << "label count (number of labels) must be N*H*W, "
+      << "with integer values in {0, 1, ..., C-1}.";
   if (top.size() >= 2) {
     // softmax output
     top[1]->ReshapeLike(*bottom[0]);
@@ -48,20 +56,18 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
   const Dtype* prob_data = prob_.cpu_data();
   const Dtype* label = bottom[1]->cpu_data();
-  int num = prob_.num();
-  int dim = prob_.count() / num;
-  int spatial_dim = prob_.height() * prob_.width();
+  int dim = prob_.count() / outer_num_;
   int count = 0;
   Dtype loss = 0;
-  for (int i = 0; i < num; ++i) {
-    for (int j = 0; j < spatial_dim; j++) {
-      const int label_value = static_cast<int>(label[i * spatial_dim + j]);
+  for (int i = 0; i < outer_num_; ++i) {
+    for (int j = 0; j < inner_num_; j++) {
+      const int label_value = static_cast<int>(label[i * inner_num_ + j]);
       if (has_ignore_label_ && label_value == ignore_label_) {
         continue;
       }
       DCHECK_GE(label_value, 0);
-      DCHECK_LT(label_value, prob_.channels());
-      loss -= log(std::max(prob_data[i * dim + label_value * spatial_dim + j],
+      DCHECK_LT(label_value, prob_.shape(softmax_axis_));
+      loss -= log(std::max(prob_data[i * dim + label_value * inner_num_ + j],
                            Dtype(FLT_MIN)));
       ++count;
     }
@@ -69,7 +75,7 @@ void SoftmaxWithLossLayer<Dtype>::Forward_cpu(
   if (normalize_) {
     top[0]->mutable_cpu_data()[0] = loss / count;
   } else {
-    top[0]->mutable_cpu_data()[0] = loss / num;
+    top[0]->mutable_cpu_data()[0] = loss / outer_num_;
   }
   if (top.size() == 2) {
     top[1]->ShareData(prob_);
@@ -88,19 +94,17 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const Dtype* prob_data = prob_.cpu_data();
     caffe_copy(prob_.count(), prob_data, bottom_diff);
     const Dtype* label = bottom[1]->cpu_data();
-    int num = prob_.num();
-    int dim = prob_.count() / num;
-    int spatial_dim = prob_.height() * prob_.width();
+    int dim = prob_.count() / outer_num_;
     int count = 0;
-    for (int i = 0; i < num; ++i) {
-      for (int j = 0; j < spatial_dim; ++j) {
-        const int label_value = static_cast<int>(label[i * spatial_dim + j]);
+    for (int i = 0; i < outer_num_; ++i) {
+      for (int j = 0; j < inner_num_; ++j) {
+        const int label_value = static_cast<int>(label[i * inner_num_ + j]);
         if (has_ignore_label_ && label_value == ignore_label_) {
-          for (int c = 0; c < bottom[0]->channels(); ++c) {
-            bottom_diff[i * dim + c * spatial_dim + j] = 0;
+          for (int c = 0; c < bottom[0]->shape(softmax_axis_); ++c) {
+            bottom_diff[i * dim + c * inner_num_ + j] = 0;
           }
         } else {
-          bottom_diff[i * dim + label_value * spatial_dim + j] -= 1;
+          bottom_diff[i * dim + label_value * inner_num_ + j] -= 1;
           ++count;
         }
       }
@@ -110,7 +114,7 @@ void SoftmaxWithLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     if (normalize_) {
       caffe_scal(prob_.count(), loss_weight / count, bottom_diff);
     } else {
-      caffe_scal(prob_.count(), loss_weight / num, bottom_diff);
+      caffe_scal(prob_.count(), loss_weight / outer_num_, bottom_diff);
     }
   }
 }
diff --git a/src/caffe/layers/softmax_loss_layer.cu b/src/caffe/layers/softmax_loss_layer.cu
index 215d589ffee..7e0f3da4552 100644
--- a/src/caffe/layers/softmax_loss_layer.cu
+++ b/src/caffe/layers/softmax_loss_layer.cu
@@ -35,10 +35,8 @@ void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
   softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
   const Dtype* prob_data = prob_.gpu_data();
   const Dtype* label = bottom[1]->gpu_data();
-  const int num = prob_.num();
-  const int dim = prob_.count() / num;
-  const int spatial_dim = prob_.height() * prob_.width();
-  const int nthreads = num * spatial_dim;
+  const int dim = prob_.count() / outer_num_;
+  const int nthreads = outer_num_ * inner_num_;
   // Since this memory is not used for anything until it is overwritten
   // on the backward pass, we use it here to avoid having to allocate new GPU
   // memory to accumulate intermediate results in the kernel.
@@ -49,7 +47,7 @@ void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
   // NOLINT_NEXT_LINE(whitespace/operators)
   SoftmaxLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
       CAFFE_CUDA_NUM_THREADS>>>(nthreads, prob_data, label, loss_data,
-      num, dim, spatial_dim, has_ignore_label_, ignore_label_, counts);
+      outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
   Dtype loss;
   caffe_gpu_asum(nthreads, loss_data, &loss);
   if (normalize_) {
@@ -57,7 +55,7 @@ void SoftmaxWithLossLayer<Dtype>::Forward_gpu(
     caffe_gpu_asum(nthreads, counts, &count);
     loss /= count;
   } else {
-    loss /= num;
+    loss /= outer_num_;
   }
   top[0]->mutable_cpu_data()[0] = loss;
   if (top.size() == 2) {
@@ -102,24 +100,22 @@ void SoftmaxWithLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const Dtype* top_data = top[0]->gpu_data();
     caffe_gpu_memcpy(prob_.count() * sizeof(Dtype), prob_data, bottom_diff);
     const Dtype* label = bottom[1]->gpu_data();
-    const int num = prob_.num();
-    const int dim = prob_.count() / num;
-    const int spatial_dim = prob_.height() * prob_.width();
-    const int nthreads = num * spatial_dim;
+    const int dim = prob_.count() / outer_num_;
+    const int nthreads = outer_num_ * inner_num_;
     // Since this memory is never used for anything else,
     // we use to to avoid allocating new GPU memory.
     Dtype* counts = prob_.mutable_gpu_diff();
     // NOLINT_NEXT_LINE(whitespace/operators)
     SoftmaxLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
         CAFFE_CUDA_NUM_THREADS>>>(nthreads, top_data, label, bottom_diff,
-        num, dim, spatial_dim, has_ignore_label_, ignore_label_, counts);
+        outer_num_, dim, inner_num_, has_ignore_label_, ignore_label_, counts);
     const Dtype loss_weight = top[0]->cpu_diff()[0];
     if (normalize_) {
       Dtype count;
       caffe_gpu_asum(nthreads, counts, &count);
       caffe_gpu_scal(prob_.count(), loss_weight / count, bottom_diff);
     } else {
-      caffe_gpu_scal(prob_.count(), loss_weight / num, bottom_diff);
+      caffe_gpu_scal(prob_.count(), loss_weight / outer_num_, bottom_diff);
     }
   }
 }

From e2bc9f997d5ebfbefdc4a7e2fa0da9b029c1573a Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 05:02:15 -0800
Subject: [PATCH 46/65] SplitLayer: change Reshape(n,h,c,w) to ReshapeLike(...)

---
 src/caffe/layers/split_layer.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/caffe/layers/split_layer.cpp b/src/caffe/layers/split_layer.cpp
index d6929b99683..272cb59cd37 100644
--- a/src/caffe/layers/split_layer.cpp
+++ b/src/caffe/layers/split_layer.cpp
@@ -18,8 +18,7 @@ void SplitLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     // some strange effects in practice...)
     CHECK_NE(top[i], bottom[0]) << this->type() << " Layer does not "
         "allow in-place computation.";
-    top[i]->Reshape(bottom[0]->num(), bottom[0]->channels(),
-                       bottom[0]->height(), bottom[0]->width());
+    top[i]->ReshapeLike(*bottom[0]);
     CHECK_EQ(count_, top[i]->count());
   }
 }

From e6468e900f03b2c4f2d9ccf160b46bff0f52dfb9 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 05:42:11 -0800
Subject: [PATCH 47/65] HDF5DataLayer shapes output according to HDF5 shape

---
 src/caffe/layers/hdf5_data_layer.cpp   | 11 ++++++++---
 src/caffe/test/test_hdf5data_layer.cpp | 16 +++++++---------
 src/caffe/util/io.cpp                  | 10 +++++-----
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 3d856ec3001..1ceb6c24431 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -36,7 +36,7 @@ void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
   hdf_blobs_.resize(top_size);
 
   const int MIN_DATA_DIM = 1;
-  const int MAX_DATA_DIM = 4;
+  const int MAX_DATA_DIM = INT_MAX;
 
   for (int i = 0; i < top_size; ++i) {
     hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>());
@@ -88,9 +88,14 @@ void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   // Reshape blobs.
   const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
   const int top_size = this->layer_param_.top_size();
+  vector<int> top_shape;
   for (int i = 0; i < top_size; ++i) {
-    top[i]->Reshape(batch_size, hdf_blobs_[i]->channels(),
-                    hdf_blobs_[i]->height(), hdf_blobs_[i]->width());
+    top_shape.resize(hdf_blobs_[i]->num_axes());
+    top_shape[0] = batch_size;
+    for (int j = 1; j < top_shape.size(); ++j) {
+      top_shape[j] = hdf_blobs_[i]->shape(j);
+    }
+    top[i]->Reshape(top_shape);
   }
 }
 
diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp
index 8d3b3d1e987..c9b027f88cf 100644
--- a/src/caffe/test/test_hdf5data_layer.cpp
+++ b/src/caffe/test/test_hdf5data_layer.cpp
@@ -77,15 +77,13 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
   EXPECT_EQ(this->blob_top_data_->height(), height);
   EXPECT_EQ(this->blob_top_data_->width(), width);
 
-  EXPECT_EQ(this->blob_top_label_->num(), batch_size);
-  EXPECT_EQ(this->blob_top_label_->channels(), 1);
-  EXPECT_EQ(this->blob_top_label_->height(), 1);
-  EXPECT_EQ(this->blob_top_label_->width(), 1);
-
-  EXPECT_EQ(this->blob_top_label2_->num(), batch_size);
-  EXPECT_EQ(this->blob_top_label2_->channels(), 1);
-  EXPECT_EQ(this->blob_top_label2_->height(), 1);
-  EXPECT_EQ(this->blob_top_label2_->width(), 1);
+  EXPECT_EQ(this->blob_top_label_->num_axes(), 2);
+  EXPECT_EQ(this->blob_top_label_->shape(0), batch_size);
+  EXPECT_EQ(this->blob_top_label_->shape(1), 1);
+
+  EXPECT_EQ(this->blob_top_label2_->num_axes(), 2);
+  EXPECT_EQ(this->blob_top_label2_->shape(0), batch_size);
+  EXPECT_EQ(this->blob_top_label2_->shape(1), 1);
 
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
 
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index b243a9804ec..77ef7f257f4 100644
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
@@ -252,11 +252,11 @@ void hdf5_load_nd_dataset_helper(
   CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_;
   CHECK_EQ(class_, H5T_FLOAT) << "Expected float or double data";
 
-  blob->Reshape(
-    dims[0],
-    (dims.size() > 1) ? dims[1] : 1,
-    (dims.size() > 2) ? dims[2] : 1,
-    (dims.size() > 3) ? dims[3] : 1);
+  vector<int> blob_dims(dims.size());
+  for (int i = 0; i < dims.size(); ++i) {
+    blob_dims[i] = dims[i];
+  }
+  blob->Reshape(blob_dims);
 }
 
 template <>

From e56377d96e52fbfd2226bc35db6cd3e6e404afd0 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 05:34:47 -0800
Subject: [PATCH 48/65] DataLayer outputs 1D labels

---
 src/caffe/layers/data_layer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 1861090f953..0f2d66776a9 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -69,9 +69,9 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       << top[0]->width();
   // label
   if (this->output_labels_) {
-    top[1]->Reshape(this->layer_param_.data_param().batch_size(), 1, 1, 1);
-    this->prefetch_label_.Reshape(this->layer_param_.data_param().batch_size(),
-        1, 1, 1);
+    vector<int> label_shape(1, this->layer_param_.data_param().batch_size());
+    top[1]->Reshape(label_shape);
+    this->prefetch_label_.Reshape(label_shape);
   }
 }
 

From 7c8725bc6f2a017dc8c6979f75f9b3f0163c8a0a Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 05:42:50 -0800
Subject: [PATCH 49/65] MemoryDataLayer outputs 1D labels

---
 src/caffe/layers/memory_data_layer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp
index effdad90aff..42de4198bc4 100644
--- a/src/caffe/layers/memory_data_layer.cpp
+++ b/src/caffe/layers/memory_data_layer.cpp
@@ -19,10 +19,11 @@ void MemoryDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CHECK_GT(batch_size_ * size_, 0) <<
       "batch_size, channels, height, and width must be specified and"
       " positive in memory_data_param";
+  vector<int> label_shape(1, batch_size_);
   top[0]->Reshape(batch_size_, channels_, height_, width_);
-  top[1]->Reshape(batch_size_, 1, 1, 1);
+  top[1]->Reshape(label_shape);
   added_data_.Reshape(batch_size_, channels_, height_, width_);
-  added_label_.Reshape(batch_size_, 1, 1, 1);
+  added_label_.Reshape(label_shape);
   data_ = NULL;
   labels_ = NULL;
   added_data_.cpu_data();

From c87a136add3a43eb6ccf9f5b69a5a8a73ae4e753 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 12:56:14 -0800
Subject: [PATCH 50/65] ImageDataLayer outputs 1D labels

---
 src/caffe/layers/image_data_layer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index f9046e1b3a1..38ebbd5ec14 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -81,8 +81,9 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       << top[0]->channels() << "," << top[0]->height() << ","
       << top[0]->width();
   // label
-  top[1]->Reshape(batch_size, 1, 1, 1);
-  this->prefetch_label_.Reshape(batch_size, 1, 1, 1);
+  vector<int> label_shape(1, batch_size);
+  top[1]->Reshape(label_shape);
+  this->prefetch_label_.Reshape(label_shape);
 }
 
 template <typename Dtype>

From 9505001d82698cb8028c479a238eb49ef9201068 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 26 Nov 2014 12:57:15 -0800
Subject: [PATCH 51/65] WindowDataLayer outputs 1D labels

---
 src/caffe/layers/window_data_layer.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 36e41560327..c127d56bc46 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -177,8 +177,9 @@ void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       << top[0]->channels() << "," << top[0]->height() << ","
       << top[0]->width();
   // label
-  top[1]->Reshape(batch_size, 1, 1, 1);
-  this->prefetch_label_.Reshape(batch_size, 1, 1, 1);
+  vector<int> label_shape(1, batch_size);
+  top[1]->Reshape(label_shape);
+  this->prefetch_label_.Reshape(label_shape);
 
   // data mean
   has_mean_file_ = this->transform_param_.has_mean_file();

From fcbb933ab5e31418fc4f8705d72d01f9c80363ce Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sat, 29 Nov 2014 18:00:44 -0800
Subject: [PATCH 52/65] EuclideanLossLayer: generalized Blob axes

---
 src/caffe/layers/euclidean_loss_layer.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/caffe/layers/euclidean_loss_layer.cpp b/src/caffe/layers/euclidean_loss_layer.cpp
index b539d3487f5..80efa31b22c 100644
--- a/src/caffe/layers/euclidean_loss_layer.cpp
+++ b/src/caffe/layers/euclidean_loss_layer.cpp
@@ -11,11 +11,9 @@ template <typename Dtype>
 void EuclideanLossLayer<Dtype>::Reshape(
   const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   LossLayer<Dtype>::Reshape(bottom, top);
-  CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
-  CHECK_EQ(bottom[0]->height(), bottom[1]->height());
-  CHECK_EQ(bottom[0]->width(), bottom[1]->width());
-  diff_.Reshape(bottom[0]->num(), bottom[0]->channels(),
-      bottom[0]->height(), bottom[0]->width());
+  CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
+      << "Inputs must have the same dimension.";
+  diff_.ReshapeLike(*bottom[0]);
 }
 
 template <typename Dtype>

From 7462c84ca0b44b0c1e270e56e381c618f6134857 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 31 Dec 2014 16:06:46 -0800
Subject: [PATCH 53/65] DummyDataLayer outputs blobs of arbitrary shape

---
 src/caffe/layers/dummy_data_layer.cpp | 57 +++++++++++++++++----------
 src/caffe/proto/caffe.proto           |  6 ++-
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/src/caffe/layers/dummy_data_layer.cpp b/src/caffe/layers/dummy_data_layer.cpp
index d254eb1f961..6b0d617464c 100644
--- a/src/caffe/layers/dummy_data_layer.cpp
+++ b/src/caffe/layers/dummy_data_layer.cpp
@@ -16,18 +16,30 @@ void DummyDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
         num_data_filler == num_top)
       << "Number of data fillers must be 0, 1 or equal to the number of tops: "
       << num_top << "; you specified " << num_data_filler << " data fillers.";
-  CHECK(param.num_size() == 1 || param.num_size() == num_top)
-      << "Must specify either a single (1) 'num' or one for each top blob "
-      << "(" << num_top << "); you specified " << param.num_size() << ".";
-  CHECK(param.channels_size() == 1 || param.channels_size() == num_top)
-      << "Must specify either a single (1) 'channels' or one for each top blob "
-      << "(" << num_top << "); you specified " << param.channels_size() << ".";
-  CHECK(param.height_size() == 1 || param.height_size() == num_top)
-      << "Must specify either a single (1) 'height' or one for each top blob "
-      << "(" << num_top << "); you specified " << param.height_size() << ".";
-  CHECK(param.width_size() == 1 || param.width_size() == num_top)
-      << "Must specify either a single (1) 'width' or one for each top blob "
-      << "(" << num_top << "); you specified " << param.width_size() << ".";
+
+  const bool legacy_dims = param.num_size() || param.channels_size() ||
+                           param.height_size() || param.width_size();
+  if (legacy_dims) {
+    CHECK_EQ(0, param.shape_size())
+        << "Both shape and legacy fields were specified";
+    // Using deprecated 4D output dim specifiers.
+    CHECK(param.num_size() == 1 || param.num_size() == num_top)
+        << "Must specify 'num' once, or once per top blob "
+        << "(" << num_top << "); specified " << param.num_size() << ".";
+    CHECK(param.channels_size() == 1 || param.channels_size() == num_top)
+        << "Must specify 'channels' once, or once per top blob "
+        << "(" << num_top << "); specified " << param.channels_size() << ".";
+    CHECK(param.height_size() == 1 || param.height_size() == num_top)
+        << "Must specify 'height' once, or once per top blob "
+        << "(" << num_top << "); specified " << param.height_size() << ".";
+    CHECK(param.width_size() == 1 || param.width_size() == num_top)
+        << "Must specify 'width' once, or once per top blob "
+        << "(" << num_top << "); specified " << param.width_size() << ".";
+  } else {
+    CHECK(param.shape_size() == 1 || param.shape_size() == num_top)
+        << "Must specify 'shape' once, or once per top blob "
+        << "(" << num_top << "); specified " << param.shape_size() << ".";
+  }
   // refill_[i] tells Forward i whether or not to actually refill top Blob i.
   // If refill_[i] is false, Forward does nothing for Blob i. We use this to
   // avoid wastefully refilling "constant" Blobs in every forward pass.
@@ -63,14 +75,19 @@ void DummyDataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     }
   }
   for (int i = 0; i < num_top; ++i) {
-    const int num = (param.num_size() == 1) ? param.num(0) : param.num(i);
-    const int channels =
-        (param.channels_size() == 1) ? param.channels(0) : param.channels(i);
-    const int height =
-        (param.height_size() == 1) ? param.height(0) : param.height(i);
-    const int width =
-        (param.width_size() == 1) ? param.width(0) : param.width(i);
-    top[i]->Reshape(num, channels, height, width);
+    if (legacy_dims) {
+      const int num = (param.num_size() == 1) ? param.num(0) : param.num(i);
+      const int channels =
+          (param.channels_size() == 1) ? param.channels(0) : param.channels(i);
+      const int height =
+          (param.height_size() == 1) ? param.height(0) : param.height(i);
+      const int width =
+          (param.width_size() == 1) ? param.width(0) : param.width(i);
+      top[i]->Reshape(num, channels, height, width);
+    } else {
+      const int shape_index = (param.shape_size() == 1) ? 0 : i;
+      top[i]->Reshape(param.shape(shape_index));
+    }
   }
   // Run Forward once, with refill_ inverted, to fill the constant Blobs.
   this->Forward(bottom, top);
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 8fcb8def173..3b4794664b5 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -461,13 +461,15 @@ message DropoutParameter {
 // (or constant) data generated by "Fillers" (see "message FillerParameter").
 message DummyDataParameter {
   // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
-  // num, N channels, N height, and N width fields, and must specify 0, 1 or N
-  // data_fillers.
+  // shape fields, and 0, 1 or N data_fillers.
   //
   // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
   // If 1 data_filler is specified, it is applied to all top blobs.  If N are
   // specified, the ith is applied to the ith top blob.
   repeated FillerParameter data_filler = 1;
+  repeated BlobShape shape = 6;
+
+  // 4D dimensions -- deprecated.  Use "shape" instead.
   repeated uint32 num = 2;
   repeated uint32 channels = 3;
   repeated uint32 height = 4;

From 69fc1f642e8f87df44131aeaa0b55fb221897d40 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 15 Jan 2015 19:50:42 -0800
Subject: [PATCH 54/65] Add CHECK_EQ(4, ...)s to "vision layers" to enforce
 that the num/channnels/height/width indexing is valid.

---
 src/caffe/layers/base_conv_layer.cpp | 4 ++++
 src/caffe/layers/im2col_layer.cpp    | 2 ++
 src/caffe/layers/lrn_layer.cpp       | 2 ++
 src/caffe/layers/pooling_layer.cpp   | 2 ++
 4 files changed, 10 insertions(+)

diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index d4ab5e429b5..ccb3adc7e89 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -11,6 +11,8 @@ namespace caffe {
 template <typename Dtype>
 void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
+      << "corresponding to (num, channels, height, width)";
   // Configure the kernel size, padding, stride, and inputs.
   ConvolutionParameter conv_param = this->layer_param_.convolution_param();
   CHECK(!conv_param.has_kernel_size() !=
@@ -101,6 +103,8 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
+      << "corresponding to (num, channels, height, width)";
   num_ = bottom[0]->num();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp
index 112226116c8..1c802714e33 100644
--- a/src/caffe/layers/im2col_layer.cpp
+++ b/src/caffe/layers/im2col_layer.cpp
@@ -50,6 +50,8 @@ void Im2colLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void Im2colLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
+      << "corresponding to (num, channels, height, width)";
   channels_ = bottom[0]->channels();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();
diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp
index 5e3e7c429ef..36c1ace4c99 100644
--- a/src/caffe/layers/lrn_layer.cpp
+++ b/src/caffe/layers/lrn_layer.cpp
@@ -69,6 +69,8 @@ void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void LRNLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
+      << "corresponding to (num, channels, height, width)";
   num_ = bottom[0]->num();
   channels_ = bottom[0]->channels();
   height_ = bottom[0]->height();
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp
index 6f4c69c861e..c8d41499455 100644
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -81,6 +81,8 @@ void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
+  CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
+      << "corresponding to (num, channels, height, width)";
   channels_ = bottom[0]->channels();
   height_ = bottom[0]->height();
   width_ = bottom[0]->width();

From 269dafa37a96250012a10537ad773840b4621ff7 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 1 Jan 2015 17:32:38 -0800
Subject: [PATCH 55/65] PyBlobs support generalized axes

---
 python/caffe/_caffe.cpp | 35 ++++++++++++++++++++++++++++++-----
 python/caffe/pycaffe.py |  2 --
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 03967a21029..d4eda798192 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -163,9 +163,10 @@ struct NdarrayCallPolicies : public bp::default_call_policies {
     // the shape information from the blob.
     void* data = PyArray_DATA(reinterpret_cast<PyArrayObject*>(result));
     Py_DECREF(result);
-    npy_intp dims[] = {blob->num(), blob->channels(),
-                       blob->height(), blob->width()};
-    PyObject* arr_obj = PyArray_SimpleNewFromData(4, dims, NPY_FLOAT32, data);
+    const int num_axes = blob->num_axes();
+    vector<npy_intp> dims(blob->shape().begin(), blob->shape().end());
+    PyObject *arr_obj = PyArray_SimpleNewFromData(num_axes, dims.data(),
+                                                  NPY_FLOAT32, data);
     // SetBaseObject steals a ref, so we need to INCREF.
     Py_INCREF(pyblob.ptr());
     PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(arr_obj),
@@ -174,6 +175,27 @@ struct NdarrayCallPolicies : public bp::default_call_policies {
   }
 };
 
+void Blob_Reshape(Blob<Dtype>* blob, bp::object shape_obj) {
+  PyArrayObject* shape_arr =
+      reinterpret_cast<PyArrayObject*>(shape_obj.ptr());
+  if (!(PyArray_FLAGS(shape_arr) & NPY_ARRAY_C_CONTIGUOUS)) {
+    throw std::runtime_error("new shape must be C contiguous");
+  }
+  if (PyArray_NDIM(shape_arr) != 1) {
+    throw std::runtime_error("new shape must be 1-d");
+  }
+  if (PyArray_TYPE(shape_arr) != NPY_INT32) {
+    throw std::runtime_error("new shape must be specified as int32 array");
+  }
+  npy_int32* shape_data = static_cast<npy_int32*>(PyArray_DATA(shape_arr));
+  const int num_axes = PyArray_SIZE(shape_arr);
+  vector<int> shape(num_axes);
+  for (int i = 0; i < num_axes; ++i) {
+    shape[i] = shape_data[i];
+  }
+  blob->Reshape(shape);
+}
+
 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
 
 BOOST_PYTHON_MODULE(_caffe) {
@@ -218,8 +240,11 @@ BOOST_PYTHON_MODULE(_caffe) {
     .add_property("channels", &Blob<Dtype>::channels)
     .add_property("height",   &Blob<Dtype>::height)
     .add_property("width",    &Blob<Dtype>::width)
-    .add_property("count",    &Blob<Dtype>::count)
-    .def("reshape",           &Blob<Dtype>::Reshape)
+    .add_property("count",    static_cast<int (Blob<Dtype>::*)() const>(
+        &Blob<Dtype>::count))
+    .def("reshape", static_cast<void (Blob<Dtype>::*)(int, int, int, int)>(
+        &Blob<Dtype>::Reshape))
+    .def("reshape", &Blob_Reshape)
     .add_property("data",     bp::make_function(&Blob<Dtype>::mutable_cpu_data,
           NdarrayCallPolicies()))
     .add_property("diff",     bp::make_function(&Blob<Dtype>::mutable_cpu_diff,
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index d662d6cc282..ac387d51d96 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -85,8 +85,6 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs):
         # Set input according to defined shapes and make arrays single and
         # C-contiguous as Caffe expects.
         for in_, blob in kwargs.iteritems():
-            if blob.ndim != 4:
-                raise Exception('{} blob is not 4-d'.format(in_))
             if blob.shape[0] != self.blobs[in_].num:
                 raise Exception('Input is not batch sized')
             self.blobs[in_].data[...] = blob

From 71df6f90c8cc72c197db6bfe4b24480ab42b93bc Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Fri, 30 Jan 2015 23:16:44 -0800
Subject: [PATCH 56/65] Add option not to reshape to Blob::FromProto; use when
 loading Blobs from saved NetParameter

Want to keep the param Blob shape the layer has set, and not necessarily
adopt the one from the saved net (e.g. want to keep new 1D bias shape,
rather than take the (1 x 1 x 1 x D) shape from a legacy net).
---
 include/caffe/blob.hpp |  2 +-
 src/caffe/blob.cpp     | 36 ++++++++++++++++++++----------------
 src/caffe/net.cpp      |  4 ++--
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index 82ca95a5f0f..36579a5a545 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -227,7 +227,7 @@ class Blob {
   Dtype* mutable_cpu_diff();
   Dtype* mutable_gpu_diff();
   void Update();
-  void FromProto(const BlobProto& proto);
+  void FromProto(const BlobProto& proto, bool reshape = true);
   void ToProto(BlobProto* proto, bool write_diff = false) const;
 
   /// @brief Compute the sum of absolute values (L1 norm) of the data.
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index c65dc2dd1ac..6d2b3f502d9 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -431,24 +431,28 @@ void Blob<Dtype>::CopyFrom(const Blob& source, bool copy_diff, bool reshape) {
 }
 
 template <typename Dtype>
-void Blob<Dtype>::FromProto(const BlobProto& proto) {
-  vector<int> shape;
-  if (proto.has_num() || proto.has_channels() ||
-      proto.has_height() || proto.has_width()) {
-    // Using deprecated 4D Blob dimensions --
-    // shape is (num, channels, height, width).
-    shape.resize(4);
-    shape[0] = proto.num();
-    shape[1] = proto.channels();
-    shape[2] = proto.height();
-    shape[3] = proto.width();
-  } else {
-    shape.resize(proto.shape().dim_size());
-    for (int i = 0; i < proto.shape().dim_size(); ++i) {
-      shape[i] = proto.shape().dim(i);
+void Blob<Dtype>::FromProto(const BlobProto& proto, bool reshape) {
+  if (reshape) {
+    vector<int> shape;
+    if (proto.has_num() || proto.has_channels() ||
+        proto.has_height() || proto.has_width()) {
+      // Using deprecated 4D Blob dimensions --
+      // shape is (num, channels, height, width).
+      shape.resize(4);
+      shape[0] = proto.num();
+      shape[1] = proto.channels();
+      shape[2] = proto.height();
+      shape[3] = proto.width();
+    } else {
+      shape.resize(proto.shape().dim_size());
+      for (int i = 0; i < proto.shape().dim_size(); ++i) {
+        shape[i] = proto.shape().dim(i);
+      }
     }
+    Reshape(shape);
+  } else {
+    CHECK(ShapeEquals(proto)) << "shape mismatch (reshape not set)";
   }
-  Reshape(shape);
   // copy data
   Dtype* data_vec = mutable_cpu_data();
   for (int i = 0; i < count_; ++i) {
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 60f387b92e9..e8f7c05e09d 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -705,8 +705,8 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
         << "Incompatible number of blobs for layer " << source_layer_name;
     for (int j = 0; j < target_blobs.size(); ++j) {
-      CHECK(target_blobs[j]->ShapeEquals(source_layer.blobs(j)));
-      target_blobs[j]->FromProto(source_layer.blobs(j));
+      const bool kReshape = false;
+      target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
     }
   }
 }

From aa242aa5a121cf3afc0bacf4449da5a4ad76b236 Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Mon, 2 Mar 2015 15:27:45 -0800
Subject: [PATCH 57/65] [pycaffe] expose Blob.reshape as *args function

---
 python/caffe/_caffe.cpp | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index d4eda798192..bfea0de661b 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -5,6 +5,7 @@
 
 #include <boost/make_shared.hpp>
 #include <boost/python.hpp>
+#include <boost/python/raw_function.hpp>
 #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
 #include <numpy/arrayobject.h>
 
@@ -175,25 +176,18 @@ struct NdarrayCallPolicies : public bp::default_call_policies {
   }
 };
 
-void Blob_Reshape(Blob<Dtype>* blob, bp::object shape_obj) {
-  PyArrayObject* shape_arr =
-      reinterpret_cast<PyArrayObject*>(shape_obj.ptr());
-  if (!(PyArray_FLAGS(shape_arr) & NPY_ARRAY_C_CONTIGUOUS)) {
-    throw std::runtime_error("new shape must be C contiguous");
+bp::object Blob_Reshape(bp::tuple args, bp::dict kwargs) {
+  if (bp::len(kwargs) > 0) {
+    throw std::runtime_error("Blob.reshape takes no kwargs");
   }
-  if (PyArray_NDIM(shape_arr) != 1) {
-    throw std::runtime_error("new shape must be 1-d");
+  Blob<Dtype>* self = bp::extract<Blob<Dtype>*>(args[0]);
+  vector<int> shape(bp::len(args) - 1);
+  for (int i = 1; i < bp::len(args); ++i) {
+    shape[i - 1] = bp::extract<int>(args[i]);
   }
-  if (PyArray_TYPE(shape_arr) != NPY_INT32) {
-    throw std::runtime_error("new shape must be specified as int32 array");
-  }
-  npy_int32* shape_data = static_cast<npy_int32*>(PyArray_DATA(shape_arr));
-  const int num_axes = PyArray_SIZE(shape_arr);
-  vector<int> shape(num_axes);
-  for (int i = 0; i < num_axes; ++i) {
-    shape[i] = shape_data[i];
-  }
-  blob->Reshape(shape);
+  self->Reshape(shape);
+  // We need to explicitly return None to use bp::raw_function.
+  return bp::object();
 }
 
 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
@@ -242,9 +236,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .add_property("width",    &Blob<Dtype>::width)
     .add_property("count",    static_cast<int (Blob<Dtype>::*)() const>(
         &Blob<Dtype>::count))
-    .def("reshape", static_cast<void (Blob<Dtype>::*)(int, int, int, int)>(
-        &Blob<Dtype>::Reshape))
-    .def("reshape", &Blob_Reshape)
+    .def("reshape",           bp::raw_function(&Blob_Reshape))
     .add_property("data",     bp::make_function(&Blob<Dtype>::mutable_cpu_data,
           NdarrayCallPolicies()))
     .add_property("diff",     bp::make_function(&Blob<Dtype>::mutable_cpu_diff,

From 8c79d65e333388e2bde9346899b7cdf812aab2c6 Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Mon, 2 Mar 2015 15:54:11 -0800
Subject: [PATCH 58/65] [pytest] use non-4d blobs in test_python_layer

---
 python/caffe/test/test_python_layer.py | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py
index 383c283959d..dd99f6f15b9 100644
--- a/python/caffe/test/test_python_layer.py
+++ b/python/caffe/test/test_python_layer.py
@@ -11,8 +11,7 @@ def setup(self, bottom, top):
         pass
 
     def reshape(self, bottom, top):
-        top[0].reshape(bottom[0].num, bottom[0].channels, bottom[0].height,
-                bottom[0].width)
+        top[0].reshape(*bottom[0].data.shape)
 
     def forward(self, bottom, top):
         top[0].data[...] = 10 * bottom[0].data
@@ -21,17 +20,16 @@ def backward(self, top, propagate_down, bottom):
         bottom[0].diff[...] = 10 * top[0].diff
 
 def python_net_file():
-    f = tempfile.NamedTemporaryFile(delete=False)
-    f.write("""name: 'pythonnet' force_backward: true
-    input: 'data' input_dim: 10 input_dim: 9 input_dim: 8 input_dim: 7
-    layer { type: 'Python' name: 'one' bottom: 'data' top: 'one'
-      python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }
-    layer { type: 'Python' name: 'two' bottom: 'one' top: 'two'
-      python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }
-    layer { type: 'Python' name: 'three' bottom: 'two' top: 'three'
-      python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""")
-    f.close()
-    return f.name
+    with tempfile.NamedTemporaryFile(delete=False) as f:
+        f.write("""name: 'pythonnet' force_backward: true
+        input: 'data' input_shape { dim: 10 dim: 9 dim: 8 }
+        layer { type: 'Python' name: 'one' bottom: 'data' top: 'one'
+          python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }
+        layer { type: 'Python' name: 'two' bottom: 'one' top: 'two'
+          python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }
+        layer { type: 'Python' name: 'three' bottom: 'two' top: 'three'
+          python_param { module: 'test_python_layer' layer: 'SimpleLayer' } }""")
+        return f.name
 
 class TestPythonLayer(unittest.TestCase):
     def setUp(self):

From 642619bcd45ab71be2483379ff4e7192628c11ab Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Mon, 23 Feb 2015 09:18:31 -0800
Subject: [PATCH 59/65] Add error checking for image mean

When setting the mean, assert that it is either one pixel or an array with
shape equal to the input data size.
---
 python/caffe/io.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/python/caffe/io.py b/python/caffe/io.py
index f51e3a64d36..f7f75b73cd8 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -238,11 +238,16 @@ def set_mean(self, in_, mean):
         """
         self.__check_input(in_)
         if mean.ndim == 1:
+            # broadcast pixel
             mean = mean[:, np.newaxis, np.newaxis]
-        mk, mh, mw = mean.shape
-        in_k, in_h, in_w = self.inputs[in_][1:]
-        #if mk != in_k or (mh, mw) != (in_h, in_w) and (mh, mw) != (1, 1):
-        #    raise Exception('Mean shape incompatible with input shape.')
+        else:
+            ms = mean.shape
+            if len(ms) == 2:
+                ms = (1,) + ms
+            if len(ms) != 3:
+                raise ValueError('Mean shape invalid')
+            if ms != self.inputs[in_][1:]:
+                raise ValueError('Mean shape incompatible with input shape.')
         self.mean[in_] = mean
 
 

From dec148e89d30020b0d81ea59c6d88539db83b019 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 4 Mar 2015 11:17:51 -0800
Subject: [PATCH 60/65] fix comment I forgot about from @shelhamer's review of
 #1970

---
 src/caffe/layers/inner_product_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp
index 6b88724491b..89e0c8fbad7 100644
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -19,7 +19,7 @@ void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       this->layer_param_.inner_product_param().axis());
   // Dimensions starting from "axis" are "flattened" into a single
   // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
-  // N inner products with dimension CHW are performed.
+  // and axis == 1, N inner products with dimension CHW are performed.
   K_ = bottom[0]->count(axis);
   // Check if we need to set up the weights
   if (this->blobs_.size() > 0) {

From a3b0fbd9748114b981c56db5d2103843dbcb45b7 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 4 Mar 2015 15:22:46 -0800
Subject: [PATCH 61/65] include/caffe/common.hpp: add <climits> for INT_MAX
 (now in blob.hpp)

---
 include/caffe/common.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 890673cd7e6..0e108f1fce8 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -5,6 +5,7 @@
 #include <gflags/gflags.h>
 #include <glog/logging.h>
 
+#include <climits>
 #include <cmath>
 #include <fstream>  // NOLINT(readability/streams)
 #include <iostream>  // NOLINT(readability/streams)

From ccfa3dc10c4bfa3ff5740923a5c760e17316a514 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 4 Mar 2015 17:13:38 -0800
Subject: [PATCH 62/65] [pycaffe] check mean channels for transformation

follow-up to #2031: check that the input and mean channels are
compatible in the broadcast channels case.
---
 python/caffe/io.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/caffe/io.py b/python/caffe/io.py
index f7f75b73cd8..e362f3637c0 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -237,11 +237,14 @@ def set_mean(self, in_, mean):
         mean: mean ndarray (input dimensional or broadcastable)
         """
         self.__check_input(in_)
+        ms = mean.shape
         if mean.ndim == 1:
-            # broadcast pixel
+            # broadcast channels
+            if ms[0] != self.inputs[in_][1]:
+                raise ValueError('Mean channels incompatible with input.')
             mean = mean[:, np.newaxis, np.newaxis]
         else:
-            ms = mean.shape
+            # elementwise mean
             if len(ms) == 2:
                 ms = (1,) + ms
             if len(ms) != 3:

From 91a65975788a20852d7978b4ff06d1fb7ebe712b Mon Sep 17 00:00:00 2001
From: Simon Layton <slayton@nvidia.com>
Date: Mon, 9 Feb 2015 22:08:39 -0500
Subject: [PATCH 63/65] switch to cuDNN R2

---
 include/caffe/common_layers.hpp          |  4 +-
 include/caffe/neuron_layers.hpp          | 12 ++--
 include/caffe/util/cudnn.hpp             | 24 +++----
 include/caffe/vision_layers.hpp          |  8 ++-
 src/caffe/layers/cudnn_conv_layer.cpp    | 13 ++--
 src/caffe/layers/cudnn_conv_layer.cu     | 92 ++++++++++++++++++------
 src/caffe/layers/cudnn_pooling_layer.cpp |  4 +-
 src/caffe/layers/cudnn_pooling_layer.cu  | 20 +++++-
 src/caffe/layers/cudnn_relu_layer.cpp    |  4 +-
 src/caffe/layers/cudnn_relu_layer.cu     | 24 +++++--
 src/caffe/layers/cudnn_sigmoid_layer.cpp |  4 +-
 src/caffe/layers/cudnn_sigmoid_layer.cu  | 24 +++++--
 src/caffe/layers/cudnn_softmax_layer.cpp |  4 +-
 src/caffe/layers/cudnn_softmax_layer.cu  | 22 ++++--
 src/caffe/layers/cudnn_tanh_layer.cpp    |  4 +-
 src/caffe/layers/cudnn_tanh_layer.cu     | 24 +++++--
 16 files changed, 205 insertions(+), 82 deletions(-)

diff --git a/include/caffe/common_layers.hpp b/include/caffe/common_layers.hpp
index b1ac3a93eff..cae1c3e4ee6 100644
--- a/include/caffe/common_layers.hpp
+++ b/include/caffe/common_layers.hpp
@@ -386,8 +386,8 @@ class CuDNNSoftmaxLayer : public SoftmaxLayer<Dtype> {
 
   bool handles_setup_;
   cudnnHandle_t             handle_;
-  cudnnTensor4dDescriptor_t bottom_desc_;
-  cudnnTensor4dDescriptor_t top_desc_;
+  cudnnTensorDescriptor_t bottom_desc_;
+  cudnnTensorDescriptor_t top_desc_;
 };
 #endif
 
diff --git a/include/caffe/neuron_layers.hpp b/include/caffe/neuron_layers.hpp
index 0c306fb41bf..6cefc5d9396 100644
--- a/include/caffe/neuron_layers.hpp
+++ b/include/caffe/neuron_layers.hpp
@@ -433,8 +433,8 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {
 
   bool handles_setup_;
   cudnnHandle_t             handle_;
-  cudnnTensor4dDescriptor_t bottom_desc_;
-  cudnnTensor4dDescriptor_t top_desc_;
+  cudnnTensorDescriptor_t bottom_desc_;
+  cudnnTensorDescriptor_t top_desc_;
 };
 #endif
 
@@ -516,8 +516,8 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {
 
   bool handles_setup_;
   cudnnHandle_t             handle_;
-  cudnnTensor4dDescriptor_t bottom_desc_;
-  cudnnTensor4dDescriptor_t top_desc_;
+  cudnnTensorDescriptor_t bottom_desc_;
+  cudnnTensorDescriptor_t top_desc_;
 };
 #endif
 
@@ -601,8 +601,8 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {
 
   bool handles_setup_;
   cudnnHandle_t             handle_;
-  cudnnTensor4dDescriptor_t bottom_desc_;
-  cudnnTensor4dDescriptor_t top_desc_;
+  cudnnTensorDescriptor_t bottom_desc_;
+  cudnnTensorDescriptor_t top_desc_;
 };
 #endif
 
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
index eaed7333df8..f4963623598 100644
--- a/include/caffe/util/cudnn.hpp
+++ b/include/caffe/util/cudnn.hpp
@@ -57,34 +57,34 @@ template<> class dataType<double> {
 };
 
 template <typename Dtype>
-inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc) {
-  CUDNN_CHECK(cudnnCreateTensor4dDescriptor(desc));
+inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
+  CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
 }
 
 template <typename Dtype>
-inline void setTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
+inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
     int n, int c, int h, int w,
     int stride_n, int stride_c, int stride_h, int stride_w) {
   CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
-      n, c, h, w, stride_n, stride_c, stride_h, stride_w));
+        n, c, h, w, stride_n, stride_c, stride_h, stride_w));
 }
 
 template <typename Dtype>
-inline void setTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
+inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
     int n, int c, int h, int w) {
   const int stride_w = 1;
   const int stride_h = w * stride_w;
   const int stride_c = h * stride_h;
   const int stride_n = c * stride_c;
   setTensor4dDesc<Dtype>(desc, n, c, h, w,
-      stride_n, stride_c, stride_h, stride_w);
+                         stride_n, stride_c, stride_h, stride_w);
 }
 
 template <typename Dtype>
 inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
     int n, int c, int h, int w) {
   CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
-  CUDNN_CHECK(cudnnSetFilterDescriptor(*desc, dataType<Dtype>::type,
+  CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
       n, c, h, w));
 }
 
@@ -95,9 +95,9 @@ inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {
 
 template <typename Dtype>
 inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
-    cudnnTensor4dDescriptor_t bottom, cudnnFilterDescriptor_t filter,
+    cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
     int pad_h, int pad_w, int stride_h, int stride_w) {
-  CUDNN_CHECK(cudnnSetConvolutionDescriptor(*conv, bottom, filter,
+  CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
       pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
 }
 
@@ -110,14 +110,14 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
     *mode = CUDNN_POOLING_MAX;
     break;
   case PoolingParameter_PoolMethod_AVE:
-    *mode = CUDNN_POOLING_AVERAGE;
+    *mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
     break;
   default:
     LOG(FATAL) << "Unknown pooling method.";
   }
   CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
-  CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
-        stride_h, stride_w));
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*conv, *mode, h, w,
+        0, 0, stride_h, stride_w));
 }
 
 }  // namespace cudnn
diff --git a/include/caffe/vision_layers.hpp b/include/caffe/vision_layers.hpp
index 6cb507a5780..cd0ab8babb0 100644
--- a/include/caffe/vision_layers.hpp
+++ b/include/caffe/vision_layers.hpp
@@ -246,11 +246,13 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
   bool handles_setup_;
   cudnnHandle_t* handle_;
   cudaStream_t*  stream_;
-  vector<cudnnTensor4dDescriptor_t> bottom_descs_, top_descs_;
-  cudnnTensor4dDescriptor_t    bias_desc_;
+  vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
+  cudnnTensorDescriptor_t    bias_desc_;
   cudnnFilterDescriptor_t      filter_desc_;
   vector<cudnnConvolutionDescriptor_t> conv_descs_;
   int bottom_offset_, top_offset_, weight_offset_, bias_offset_;
+  size_t workspaceSizeInBytes;
+  void *workspace;
 };
 #endif
 
@@ -445,7 +447,7 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {
 
   bool handles_setup_;
   cudnnHandle_t             handle_;
-  cudnnTensor4dDescriptor_t bottom_desc_, top_desc_;
+  cudnnTensorDescriptor_t bottom_desc_, top_desc_;
   cudnnPoolingDescriptor_t  pooling_desc_;
   cudnnPoolingMode_t        mode_;
 };
diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp
index 4a69ca20d0a..c27138bbc10 100644
--- a/src/caffe/layers/cudnn_conv_layer.cpp
+++ b/src/caffe/layers/cudnn_conv_layer.cpp
@@ -25,6 +25,9 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
   stream_         = new cudaStream_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
   handle_         = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
 
+  workspace = NULL;
+  workspaceSizeInBytes = (size_t)0;
+
   for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
     CUDA_CHECK(cudaStreamCreate(&stream_[g]));
     CUDNN_CHECK(cudnnCreate(&handle_[g]));
@@ -43,10 +46,10 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
 
   // Create tensor descriptor(s) for data and corresponding convolution(s).
   for (int i = 0; i < bottom.size(); i++) {
-    cudnnTensor4dDescriptor_t bottom_desc;
+    cudnnTensorDescriptor_t bottom_desc;
     cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
     bottom_descs_.push_back(bottom_desc);
-    cudnnTensor4dDescriptor_t top_desc;
+    cudnnTensorDescriptor_t top_desc;
     cudnn::createTensor4dDesc<Dtype>(&top_desc);
     top_descs_.push_back(top_desc);
     cudnnConvolutionDescriptor_t conv_desc;
@@ -104,12 +107,12 @@ CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
   if (!handles_setup_) { return; }
 
   for (int i = 0; i < bottom_descs_.size(); i++) {
-    cudnnDestroyTensor4dDescriptor(bottom_descs_[i]);
-    cudnnDestroyTensor4dDescriptor(top_descs_[i]);
+    cudnnDestroyTensorDescriptor(bottom_descs_[i]);
+    cudnnDestroyTensorDescriptor(top_descs_[i]);
     cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
   }
   if (this->bias_term_) {
-    cudnnDestroyTensor4dDescriptor(bias_desc_);
+    cudnnDestroyTensorDescriptor(bias_desc_);
   }
   cudnnDestroyFilterDescriptor(filter_desc_);
 
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu
index 071014e1b48..081c3285448 100644
--- a/src/caffe/layers/cudnn_conv_layer.cu
+++ b/src/caffe/layers/cudnn_conv_layer.cu
@@ -21,21 +21,62 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
 
     // Forward through cuDNN in parallel over groups.
     for (int g = 0; g < this->group_; g++) {
+      Dtype alpha = 1.0;
+      Dtype beta = 0.0;
+
+      cudnnConvolutionFwdAlgo_t algo;
+
+      // get the desired convolution algorithm
+      CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[g],
+        bottom_descs_[i],
+        filter_desc_,
+        conv_descs_[i],
+        top_descs_[i],
+        CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
+        0,  // memoryLimitInBytes,
+        &algo));
+
+      // get minimum size of the workspace needed for the desired algorithm
+      size_t workspaceSizeInBytes_temp = 0;
+
+      CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[g],
+        bottom_descs_[i],
+        filter_desc_,
+        conv_descs_[i],
+        top_descs_[i],
+        algo,
+        &workspaceSizeInBytes_temp));
+
+      if (workspaceSizeInBytes_temp > workspaceSizeInBytes) {
+        workspaceSizeInBytes = workspaceSizeInBytes_temp;
+        // free the existing workspace and allocate a new (larger) one
+        if (this->workspace != NULL) {
+          cudaFree(this->workspace);
+        }
+        cudaMalloc(&(this->workspace), workspaceSizeInBytes);
+        CUDA_POST_KERNEL_CHECK;
+      }
+
       // Filters.
       CUDNN_CHECK(cudnnConvolutionForward(handle_[g],
-          bottom_descs_[i], bottom_data + bottom_offset_ * g,
-          filter_desc_, weight + weight_offset_ * g,
-          conv_descs_[i],
-          top_descs_[i], top_data + top_offset_ * g,
-          CUDNN_RESULT_NO_ACCUMULATE));
+            reinterpret_cast<void *>(&alpha),
+            bottom_descs_[i], bottom_data + bottom_offset_ * g,
+            filter_desc_, weight + weight_offset_ * g,
+            conv_descs_[i],
+            algo, workspace, workspaceSizeInBytes,
+            reinterpret_cast<void *>(&beta),
+            top_descs_[i], top_data + top_offset_ * g));
 
       // Bias.
       if (this->bias_term_) {
         const Dtype* bias_data = this->blobs_[1]->gpu_data();
-        Dtype alpha = 1.;
-        CUDNN_CHECK(cudnnAddTensor4d(handle_[g], CUDNN_ADD_SAME_C, &alpha,
-            bias_desc_, bias_data + bias_offset_ * g,
-            top_descs_[i], top_data + top_offset_ * g));
+        Dtype alpha = 1.0;
+        Dtype beta = 1.0;
+        CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,
+              reinterpret_cast<void *>(&alpha),
+              bias_desc_, bias_data + bias_offset_ * g,
+              reinterpret_cast<void *>(&beta),
+              top_descs_[i], top_data + top_offset_ * g));
       }
     }
 
@@ -67,21 +108,26 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     for (int g = 0; g < this->group_; g++) {
       // Gradient w.r.t. bias.
       if (this->bias_term_ && this->param_propagate_down_[1]) {
+        Dtype alpha = 1.0;
+        Dtype beta = 1.0;
         CUDNN_CHECK(cudnnConvolutionBackwardBias(handle_[0*this->group_ + g],
-            top_descs_[i],  top_diff + top_offset_ * g,
-            bias_desc_, bias_diff + bias_offset_ * g,
-            CUDNN_RESULT_ACCUMULATE));
+              reinterpret_cast<void *>(&alpha),
+              top_descs_[i],  top_diff + top_offset_ * g,
+              reinterpret_cast<void *>(&beta),
+              bias_desc_, bias_diff + bias_offset_ * g));
       }
 
       // Gradient w.r.t. weights.
       if (this->param_propagate_down_[0]) {
         const Dtype* bottom_data = bottom[i]->gpu_data();
+        Dtype alpha = 1.0;
+        Dtype beta = 1.0;
         CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g],
-            bottom_descs_[i], bottom_data + bottom_offset_ * g,
-            top_descs_[i],    top_diff + top_offset_ * g,
-            conv_descs_[i],
-            filter_desc_, weight_diff + weight_offset_ * g,
-            CUDNN_RESULT_ACCUMULATE));
+              reinterpret_cast<void *>(&alpha),
+              bottom_descs_[i], bottom_data + bottom_offset_ * g,
+              top_descs_[i],    top_diff + top_offset_ * g,
+              conv_descs_[i], reinterpret_cast<void *>(&beta),
+              filter_desc_, weight_diff + weight_offset_ * g));
       }
 
       // Gradient w.r.t. bottom data.
@@ -90,12 +136,14 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           weight = this->blobs_[0]->gpu_data();
         }
         Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
+        Dtype alpha = 1.0;
+        Dtype beta = 0.0;
         CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g],
-            filter_desc_, weight + weight_offset_ * g,
-            top_descs_[i],    top_diff + top_offset_ * g,
-            conv_descs_[i],
-            bottom_descs_[i], bottom_diff + bottom_offset_ * g,
-            CUDNN_RESULT_NO_ACCUMULATE));
+              reinterpret_cast<void *>(&alpha),
+              filter_desc_, weight + weight_offset_ * g,
+              top_descs_[i],    top_diff + top_offset_ * g,
+              conv_descs_[i], reinterpret_cast<void *>(&beta),
+              bottom_descs_[i], bottom_diff + bottom_offset_ * g));
       }
     }
 
diff --git a/src/caffe/layers/cudnn_pooling_layer.cpp b/src/caffe/layers/cudnn_pooling_layer.cpp
index dd90195637b..b447f19b426 100644
--- a/src/caffe/layers/cudnn_pooling_layer.cpp
+++ b/src/caffe/layers/cudnn_pooling_layer.cpp
@@ -40,8 +40,8 @@ CuDNNPoolingLayer<Dtype>::~CuDNNPoolingLayer() {
   // Check that handles have been setup before destroying.
   if (!handles_setup_) { return; }
 
-  cudnnDestroyTensor4dDescriptor(bottom_desc_);
-  cudnnDestroyTensor4dDescriptor(top_desc_);
+  cudnnDestroyTensorDescriptor(bottom_desc_);
+  cudnnDestroyTensorDescriptor(top_desc_);
   cudnnDestroyPoolingDescriptor(pooling_desc_);
   cudnnDestroy(handle_);
 }
diff --git a/src/caffe/layers/cudnn_pooling_layer.cu b/src/caffe/layers/cudnn_pooling_layer.cu
index 1c113aad75f..be7c4a8edb8 100644
--- a/src/caffe/layers/cudnn_pooling_layer.cu
+++ b/src/caffe/layers/cudnn_pooling_layer.cu
@@ -14,8 +14,15 @@ void CuDNNPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnPoolingForward(handle_, pooling_desc_,
-      bottom_desc_, bottom_data, top_desc_, top_data));
+        reinterpret_cast<void *>(&alpha),
+        bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        top_desc_, top_data));
 }
 
 template <typename Dtype>
@@ -28,9 +35,16 @@ void CuDNNPoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_data = top[0]->gpu_data();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnPoolingBackward(handle_, pooling_desc_,
-      top_desc_, top_data, top_desc_, top_diff,
-      bottom_desc_, bottom_data, bottom_desc_, bottom_diff));
+        reinterpret_cast<void *>(&alpha),
+        top_desc_, top_data, top_desc_, top_diff,
+        bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        bottom_desc_, bottom_diff));
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNPoolingLayer);
diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp
index 0b8a6bc3248..759d83984ef 100644
--- a/src/caffe/layers/cudnn_relu_layer.cpp
+++ b/src/caffe/layers/cudnn_relu_layer.cpp
@@ -35,8 +35,8 @@ CuDNNReLULayer<Dtype>::~CuDNNReLULayer() {
   // Check that handles have been setup before destroying.
   if (!handles_setup_) { return; }
 
-  cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
-  cudnnDestroyTensor4dDescriptor(this->top_desc_);
+  cudnnDestroyTensorDescriptor(this->bottom_desc_);
+  cudnnDestroyTensorDescriptor(this->top_desc_);
   cudnnDestroy(this->handle_);
 }
 
diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu
index 862508707a0..b9d0870a5a7 100644
--- a/src/caffe/layers/cudnn_relu_layer.cu
+++ b/src/caffe/layers/cudnn_relu_layer.cu
@@ -17,9 +17,16 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-      CUDNN_ACTIVATION_RELU,
-      this->bottom_desc_, bottom_data, this->top_desc_, top_data));
+        CUDNN_ACTIVATION_RELU,
+        reinterpret_cast<void *>(&alpha),
+        this->bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        this->top_desc_, top_data));
 }
 
 template <typename Dtype>
@@ -39,10 +46,17 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-      CUDNN_ACTIVATION_RELU,
-      this->top_desc_, top_data, this->top_desc_, top_diff,
-      this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
+        CUDNN_ACTIVATION_RELU,
+        reinterpret_cast<void *>(&alpha),
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        this->bottom_desc_, bottom_diff));
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNReLULayer);
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp
index 67bd9c373b0..32637873d46 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cpp
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cpp
@@ -35,8 +35,8 @@ CuDNNSigmoidLayer<Dtype>::~CuDNNSigmoidLayer() {
   // Check that handles have been setup before destroying.
   if (!handles_setup_) { return; }
 
-  cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
-  cudnnDestroyTensor4dDescriptor(this->top_desc_);
+  cudnnDestroyTensorDescriptor(this->bottom_desc_);
+  cudnnDestroyTensorDescriptor(this->top_desc_);
   cudnnDestroy(this->handle_);
 }
 
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu
index 31b094e25d4..9bb915017b4 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cu
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cu
@@ -12,9 +12,16 @@ void CuDNNSigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-      CUDNN_ACTIVATION_SIGMOID,
-      this->bottom_desc_, bottom_data, this->top_desc_, top_data));
+        CUDNN_ACTIVATION_SIGMOID,
+        reinterpret_cast<void *>(&alpha),
+        this->bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        this->top_desc_, top_data));
 }
 
 template <typename Dtype>
@@ -29,10 +36,17 @@ void CuDNNSigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-      CUDNN_ACTIVATION_SIGMOID,
-      this->top_desc_, top_data, this->top_desc_, top_diff,
-      this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
+        CUDNN_ACTIVATION_SIGMOID,
+        reinterpret_cast<void *>(&alpha),
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        this->bottom_desc_, bottom_diff));
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNSigmoidLayer);
diff --git a/src/caffe/layers/cudnn_softmax_layer.cpp b/src/caffe/layers/cudnn_softmax_layer.cpp
index 211701cad49..77a3225adcd 100644
--- a/src/caffe/layers/cudnn_softmax_layer.cpp
+++ b/src/caffe/layers/cudnn_softmax_layer.cpp
@@ -39,8 +39,8 @@ CuDNNSoftmaxLayer<Dtype>::~CuDNNSoftmaxLayer() {
   // Check that handles have been setup before destroying.
   if (!handles_setup_) { return; }
 
-  cudnnDestroyTensor4dDescriptor(bottom_desc_);
-  cudnnDestroyTensor4dDescriptor(top_desc_);
+  cudnnDestroyTensorDescriptor(bottom_desc_);
+  cudnnDestroyTensorDescriptor(top_desc_);
   cudnnDestroy(handle_);
 }
 
diff --git a/src/caffe/layers/cudnn_softmax_layer.cu b/src/caffe/layers/cudnn_softmax_layer.cu
index f328afdd831..59c304f6338 100644
--- a/src/caffe/layers/cudnn_softmax_layer.cu
+++ b/src/caffe/layers/cudnn_softmax_layer.cu
@@ -16,9 +16,16 @@ void CuDNNSoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE,
-      CUDNN_SOFTMAX_MODE_CHANNEL,
-      bottom_desc_, bottom_data, top_desc_, top_data));
+        CUDNN_SOFTMAX_MODE_CHANNEL,
+        reinterpret_cast<void *>(&alpha),
+        bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        top_desc_, top_data));
 }
 
 template <typename Dtype>
@@ -29,9 +36,16 @@ void CuDNNSoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const Dtype* top_diff = top[0]->gpu_diff();
     const Dtype* bottom_data = bottom[0]->gpu_data();
     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+    Dtype alpha = 1.0;
+    Dtype beta = 0.0;
+
     CUDNN_CHECK(cudnnSoftmaxBackward(handle_, CUDNN_SOFTMAX_ACCURATE,
-        CUDNN_SOFTMAX_MODE_CHANNEL,
-        top_desc_, top_data, top_desc_, top_diff, bottom_desc_, bottom_diff));
+          CUDNN_SOFTMAX_MODE_CHANNEL,
+          reinterpret_cast<void *>(&alpha),
+          top_desc_, top_data, top_desc_, top_diff,
+          reinterpret_cast<void *>(&beta),
+          bottom_desc_, bottom_diff));
   }
 }
 
diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp
index b1d2b86384e..376faad324d 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cpp
+++ b/src/caffe/layers/cudnn_tanh_layer.cpp
@@ -35,8 +35,8 @@ CuDNNTanHLayer<Dtype>::~CuDNNTanHLayer() {
   // Check that handles have been setup before destroying.
   if (!handles_setup_) { return; }
 
-  cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
-  cudnnDestroyTensor4dDescriptor(this->top_desc_);
+  cudnnDestroyTensorDescriptor(this->bottom_desc_);
+  cudnnDestroyTensorDescriptor(this->top_desc_);
   cudnnDestroy(this->handle_);
 }
 
diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu
index bf9ec7cfac4..e008b0dcde3 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cu
+++ b/src/caffe/layers/cudnn_tanh_layer.cu
@@ -12,9 +12,16 @@ void CuDNNTanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-      CUDNN_ACTIVATION_TANH,
-      this->bottom_desc_, bottom_data, this->top_desc_, top_data));
+        CUDNN_ACTIVATION_TANH,
+        reinterpret_cast<void *>(&alpha),
+        this->bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        this->top_desc_, top_data));
 }
 
 template <typename Dtype>
@@ -29,10 +36,17 @@ void CuDNNTanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+
+  Dtype alpha = 1.0;
+  Dtype beta = 0.0;
+
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-      CUDNN_ACTIVATION_TANH,
-      this->top_desc_, top_data, this->top_desc_, top_diff,
-      this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
+        CUDNN_ACTIVATION_TANH,
+        reinterpret_cast<void *>(&alpha),
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        reinterpret_cast<void *>(&beta),
+        this->bottom_desc_, bottom_diff));
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNTanHLayer);

From 2ddbb04f68ae728b785c6cc742ea9dd195c97c68 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Mon, 16 Feb 2015 16:01:18 -0800
Subject: [PATCH 64/65] replace cuDNN alphas and betas with coefficient values

Give cuDNN {0, 1} constants for controlling accumulation through the
alpha and beta coefficients.
---
 include/caffe/util/cudnn.hpp            | 14 ++++++----
 src/caffe/layers/cudnn_conv_layer.cu    | 35 +++++++++----------------
 src/caffe/layers/cudnn_pooling_layer.cu | 16 +++--------
 src/caffe/layers/cudnn_relu_layer.cu    | 16 +++--------
 src/caffe/layers/cudnn_sigmoid_layer.cu | 16 +++--------
 src/caffe/layers/cudnn_softmax_layer.cu | 15 +++--------
 src/caffe/layers/cudnn_tanh_layer.cu    | 15 +++--------
 src/caffe/util/cudnn.cpp                | 23 ++++++++++++++++
 8 files changed, 65 insertions(+), 85 deletions(-)
 create mode 100644 src/caffe/util/cudnn.cpp

diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
index f4963623598..b531dd5fa7a 100644
--- a/include/caffe/util/cudnn.hpp
+++ b/include/caffe/util/cudnn.hpp
@@ -50,10 +50,14 @@ template <typename Dtype> class dataType;
 template<> class dataType<float>  {
  public:
   static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
+  static float oneval, zeroval;
+  static const void *one, *zero;
 };
 template<> class dataType<double> {
  public:
   static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
+  static double oneval, zeroval;
+  static const void *one, *zero;
 };
 
 template <typename Dtype>
@@ -102,9 +106,9 @@ inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
 }
 
 template <typename Dtype>
-inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
+inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
     PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
-    int h, int w, int stride_h, int stride_w) {
+    int h, int w, int pad_h, int pad_w, int stride_h, int stride_w) {
   switch (poolmethod) {
   case PoolingParameter_PoolMethod_MAX:
     *mode = CUDNN_POOLING_MAX;
@@ -115,9 +119,9 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
   default:
     LOG(FATAL) << "Unknown pooling method.";
   }
-  CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*conv, *mode, h, w,
-        0, 0, stride_h, stride_w));
+  CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
+        pad_h, pad_w, stride_h, stride_w));
 }
 
 }  // namespace cudnn
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu
index 081c3285448..0426c269503 100644
--- a/src/caffe/layers/cudnn_conv_layer.cu
+++ b/src/caffe/layers/cudnn_conv_layer.cu
@@ -21,9 +21,6 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
 
     // Forward through cuDNN in parallel over groups.
     for (int g = 0; g < this->group_; g++) {
-      Dtype alpha = 1.0;
-      Dtype beta = 0.0;
-
       cudnnConvolutionFwdAlgo_t algo;
 
       // get the desired convolution algorithm
@@ -59,23 +56,21 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
 
       // Filters.
       CUDNN_CHECK(cudnnConvolutionForward(handle_[g],
-            reinterpret_cast<void *>(&alpha),
+            cudnn::dataType<Dtype>::one,
             bottom_descs_[i], bottom_data + bottom_offset_ * g,
             filter_desc_, weight + weight_offset_ * g,
             conv_descs_[i],
             algo, workspace, workspaceSizeInBytes,
-            reinterpret_cast<void *>(&beta),
+            cudnn::dataType<Dtype>::zero,
             top_descs_[i], top_data + top_offset_ * g));
 
       // Bias.
       if (this->bias_term_) {
         const Dtype* bias_data = this->blobs_[1]->gpu_data();
-        Dtype alpha = 1.0;
-        Dtype beta = 1.0;
         CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,
-              reinterpret_cast<void *>(&alpha),
+              cudnn::dataType<Dtype>::one,
               bias_desc_, bias_data + bias_offset_ * g,
-              reinterpret_cast<void *>(&beta),
+              cudnn::dataType<Dtype>::one,
               top_descs_[i], top_data + top_offset_ * g));
       }
     }
@@ -108,25 +103,22 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     for (int g = 0; g < this->group_; g++) {
       // Gradient w.r.t. bias.
       if (this->bias_term_ && this->param_propagate_down_[1]) {
-        Dtype alpha = 1.0;
-        Dtype beta = 1.0;
         CUDNN_CHECK(cudnnConvolutionBackwardBias(handle_[0*this->group_ + g],
-              reinterpret_cast<void *>(&alpha),
+              cudnn::dataType<Dtype>::one,
               top_descs_[i],  top_diff + top_offset_ * g,
-              reinterpret_cast<void *>(&beta),
+              cudnn::dataType<Dtype>::one,
               bias_desc_, bias_diff + bias_offset_ * g));
       }
 
       // Gradient w.r.t. weights.
       if (this->param_propagate_down_[0]) {
         const Dtype* bottom_data = bottom[i]->gpu_data();
-        Dtype alpha = 1.0;
-        Dtype beta = 1.0;
         CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g],
-              reinterpret_cast<void *>(&alpha),
+              cudnn::dataType<Dtype>::one,
               bottom_descs_[i], bottom_data + bottom_offset_ * g,
               top_descs_[i],    top_diff + top_offset_ * g,
-              conv_descs_[i], reinterpret_cast<void *>(&beta),
+              conv_descs_[i],
+              cudnn::dataType<Dtype>::one,
               filter_desc_, weight_diff + weight_offset_ * g));
       }
 
@@ -136,13 +128,12 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           weight = this->blobs_[0]->gpu_data();
         }
         Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
-        Dtype alpha = 1.0;
-        Dtype beta = 0.0;
         CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g],
-              reinterpret_cast<void *>(&alpha),
+              cudnn::dataType<Dtype>::one,
               filter_desc_, weight + weight_offset_ * g,
-              top_descs_[i],    top_diff + top_offset_ * g,
-              conv_descs_[i], reinterpret_cast<void *>(&beta),
+              top_descs_[i], top_diff + top_offset_ * g,
+              conv_descs_[i],
+              cudnn::dataType<Dtype>::zero,
               bottom_descs_[i], bottom_diff + bottom_offset_ * g));
       }
     }
diff --git a/src/caffe/layers/cudnn_pooling_layer.cu b/src/caffe/layers/cudnn_pooling_layer.cu
index be7c4a8edb8..a952b855a48 100644
--- a/src/caffe/layers/cudnn_pooling_layer.cu
+++ b/src/caffe/layers/cudnn_pooling_layer.cu
@@ -14,14 +14,10 @@ void CuDNNPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnPoolingForward(handle_, pooling_desc_,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         top_desc_, top_data));
 }
 
@@ -35,15 +31,11 @@ void CuDNNPoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_data = top[0]->gpu_data();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnPoolingBackward(handle_, pooling_desc_,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         top_desc_, top_data, top_desc_, top_diff,
         bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         bottom_desc_, bottom_diff));
 }
 
diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu
index b9d0870a5a7..21d14857dd2 100644
--- a/src/caffe/layers/cudnn_relu_layer.cu
+++ b/src/caffe/layers/cudnn_relu_layer.cu
@@ -17,15 +17,11 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
         CUDNN_ACTIVATION_RELU,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
 }
 
@@ -46,16 +42,12 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
         CUDNN_ACTIVATION_RELU,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
 }
 
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu
index 9bb915017b4..7a06cf721da 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cu
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cu
@@ -12,15 +12,11 @@ void CuDNNSigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
         CUDNN_ACTIVATION_SIGMOID,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
 }
 
@@ -36,16 +32,12 @@ void CuDNNSigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
         CUDNN_ACTIVATION_SIGMOID,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
 }
 
diff --git a/src/caffe/layers/cudnn_softmax_layer.cu b/src/caffe/layers/cudnn_softmax_layer.cu
index 59c304f6338..a9e2fcefaf7 100644
--- a/src/caffe/layers/cudnn_softmax_layer.cu
+++ b/src/caffe/layers/cudnn_softmax_layer.cu
@@ -16,15 +16,11 @@ void CuDNNSoftmaxLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnSoftmaxForward(handle_, CUDNN_SOFTMAX_ACCURATE,
         CUDNN_SOFTMAX_MODE_CHANNEL,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         top_desc_, top_data));
 }
 
@@ -37,14 +33,11 @@ void CuDNNSoftmaxLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const Dtype* bottom_data = bottom[0]->gpu_data();
     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 
-    Dtype alpha = 1.0;
-    Dtype beta = 0.0;
-
     CUDNN_CHECK(cudnnSoftmaxBackward(handle_, CUDNN_SOFTMAX_ACCURATE,
           CUDNN_SOFTMAX_MODE_CHANNEL,
-          reinterpret_cast<void *>(&alpha),
+          cudnn::dataType<Dtype>::one,
           top_desc_, top_data, top_desc_, top_diff,
-          reinterpret_cast<void *>(&beta),
+          cudnn::dataType<Dtype>::zero,
           bottom_desc_, bottom_diff));
   }
 }
diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu
index e008b0dcde3..d287f6fee85 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cu
+++ b/src/caffe/layers/cudnn_tanh_layer.cu
@@ -12,15 +12,11 @@ void CuDNNTanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
-
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
         CUDNN_ACTIVATION_TANH,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
 }
 
@@ -37,15 +33,12 @@ void CuDNNTanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 
-  Dtype alpha = 1.0;
-  Dtype beta = 0.0;
-
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
         CUDNN_ACTIVATION_TANH,
-        reinterpret_cast<void *>(&alpha),
+        cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
-        reinterpret_cast<void *>(&beta),
+        cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
 }
 
diff --git a/src/caffe/util/cudnn.cpp b/src/caffe/util/cudnn.cpp
new file mode 100644
index 00000000000..1772f0099ce
--- /dev/null
+++ b/src/caffe/util/cudnn.cpp
@@ -0,0 +1,23 @@
+#ifdef USE_CUDNN
+#include "caffe/util/cudnn.hpp"
+
+namespace caffe {
+namespace cudnn {
+
+float dataType<float>::oneval = 1.0;
+float dataType<float>::zeroval = 0.0;
+const void* dataType<float>::one =
+    static_cast<void *>(&dataType<float>::oneval);
+const void* dataType<float>::zero =
+    static_cast<void *>(&dataType<float>::zeroval);
+
+double dataType<double>::oneval = 1.0;
+double dataType<double>::zeroval = 0.0;
+const void* dataType<double>::one =
+    static_cast<void *>(&dataType<double>::oneval);
+const void* dataType<double>::zero =
+    static_cast<void *>(&dataType<double>::zeroval);
+
+}  // namespace cudnn
+}  // namespace caffe
+#endif

From 4beebccf31d7181515b0bc32ca2a6ab40c9dc84a Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Mon, 16 Feb 2015 17:18:13 -0800
Subject: [PATCH 65/65] cuDNN pooling can pad now

---
 src/caffe/layers/cudnn_pooling_layer.cpp | 6 ++----
 src/caffe/test/test_pooling_layer.cpp    | 8 --------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/caffe/layers/cudnn_pooling_layer.cpp b/src/caffe/layers/cudnn_pooling_layer.cpp
index b447f19b426..c92c4e477b5 100644
--- a/src/caffe/layers/cudnn_pooling_layer.cpp
+++ b/src/caffe/layers/cudnn_pooling_layer.cpp
@@ -13,15 +13,13 @@ template <typename Dtype>
 void CuDNNPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   PoolingLayer<Dtype>::LayerSetUp(bottom, top);
-  // Sanity check: CUDNN currently only supports pad == 0.
-  CHECK_EQ(this->pad_h_, 0);
-  CHECK_EQ(this->pad_w_, 0);
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
   cudnn::createPoolingDesc<Dtype>(&pooling_desc_,
       this->layer_param_.pooling_param().pool(), &mode_,
-      this->kernel_h_, this->kernel_w_, this->stride_h_, this->stride_w_);
+      this->kernel_h_, this->kernel_w_, this->pad_h_, this->pad_w_,
+      this->stride_h_, this->stride_w_);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/test/test_pooling_layer.cpp b/src/caffe/test/test_pooling_layer.cpp
index 435caa8381e..e9964e7f0b7 100644
--- a/src/caffe/test/test_pooling_layer.cpp
+++ b/src/caffe/test/test_pooling_layer.cpp
@@ -976,9 +976,6 @@ TYPED_TEST(CuDNNPoolingLayerTest, TestSetupCuDNN) {
   EXPECT_EQ(this->blob_top_->width(), 2);
 }
 
-// This test and all following cuDNN pooling tests with padding are commented
-// for now, since cuDNN pooling does not currently support padding.
-/*
 TYPED_TEST(CuDNNPoolingLayerTest, TestSetupPaddedCuDNN) {
   Caffe::set_mode(Caffe::GPU);
   LayerParameter layer_param;
@@ -994,7 +991,6 @@ TYPED_TEST(CuDNNPoolingLayerTest, TestSetupPaddedCuDNN) {
   EXPECT_EQ(this->blob_top_->height(), 4);
   EXPECT_EQ(this->blob_top_->width(), 3);
 }
-*/
 
 /*
 TYPED_TEST(CuDNNPoolingLayerTest, PrintBackwardCuDNN) {
@@ -1062,7 +1058,6 @@ TYPED_TEST(CuDNNPoolingLayerTest, TestGradientMaxCuDNN) {
   }
 }
 
-/*
 TYPED_TEST(CuDNNPoolingLayerTest, TestForwardMaxPaddedCuDNN) {
   Caffe::set_mode(Caffe::GPU);
   LayerParameter layer_param;
@@ -1107,7 +1102,6 @@ TYPED_TEST(CuDNNPoolingLayerTest, TestForwardMaxPaddedCuDNN) {
   EXPECT_NEAR(this->blob_top_->cpu_data()[7], 4, epsilon);
   EXPECT_NEAR(this->blob_top_->cpu_data()[8], 1, epsilon);
 }
-*/
 
 /*
 TYPED_TEST(CuDNNPoolingLayerTest, TestGradientMaxTopMaskCuDNN) {
@@ -1175,7 +1169,6 @@ TYPED_TEST(CuDNNPoolingLayerTest, TestGradientAveCuDNN) {
   }
 }
 
-/*
 TYPED_TEST(CuDNNPoolingLayerTest, TestGradientAvePaddedCuDNN) {
   Caffe::set_mode(Caffe::GPU);
   for (int kernel_h = 3; kernel_h <= 4; kernel_h++) {
@@ -1194,7 +1187,6 @@ TYPED_TEST(CuDNNPoolingLayerTest, TestGradientAvePaddedCuDNN) {
     }
   }
 }
-*/
 
 #endif