add support for nd convolution in cudnn

BVLC · christianpayer · Jul 25, 2016 · Jul 26, 2016 · Feb 18, 2016 · Feb 18, 2016
commit 4f63ea5dfb89d28da9bc2d904db5db7a67acc0c2
diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp
@@ -59,20 +59,21 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
  bias_offset_ = (this->num_output_ / this->group_);
 
  // Create filter descriptor.
- const int* kernel_shape_data = this->kernel_shape_.cpu_data();
- const int kernel_h = kernel_shape_data[0];
- const int kernel_w = kernel_shape_data[1];
- cudnn::createFilterDesc<Dtype>(&filter_desc_,
- this->num_output_ / this->group_, this->channels_ / this->group_,
- kernel_h, kernel_w);
+ std::vector<int> kernel_shape;
+ kernel_shape.push_back(this->num_output_ / this->group_);
+ kernel_shape.push_back(this->channels_ / this->group_);
+ for (unsigned int i = 0; i < this->num_spatial_axes_; ++i)
+ kernel_shape.push_back(this->kernel_shape_.cpu_data()[i]);
+
+ cudnn::createNdFilterDesc<Dtype>(&filter_desc_, kernel_shape);
 
  // Create tensor descriptor(s) for data and corresponding convolution(s).
  for (int i = 0; i < bottom.size(); i++) {
  cudnnTensorDescriptor_t bottom_desc;
- cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
+ cudnn::createTensorDesc<Dtype>(&bottom_desc);
  bottom_descs_.push_back(bottom_desc);
  cudnnTensorDescriptor_t top_desc;
- cudnn::createTensor4dDesc<Dtype>(&top_desc);
+ cudnn::createTensorDesc<Dtype>(&top_desc);
  top_descs_.push_back(top_desc);
  cudnnConvolutionDescriptor_t conv_desc;
  cudnn::createConvolutionDesc<Dtype>(&conv_desc);
@@ -81,7 +82,7 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
 
  // Tensor descriptor for bias.
  if (this->bias_term_) {
- cudnn::createTensor4dDesc<Dtype>(&bias_desc_);
+ cudnn::createTensorDesc<Dtype>(&bias_desc_);
  }
 
  handles_setup_ = true;
@@ -91,41 +92,42 @@ template <typename Dtype>
 void CuDNNConvolutionLayer<Dtype>::Reshape(
  const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  ConvolutionLayer<Dtype>::Reshape(bottom, top);
- CHECK_EQ(2, this->num_spatial_axes_)
- << "CuDNNConvolution input must have 2 spatial axes "
- << "(e.g., height and width). "
- << "Use 'engine: CAFFE' for general ND convolution.";
+
  bottom_offset_ = this->bottom_dim_ / this->group_;
  top_offset_ = this->top_dim_ / this->group_;
- const int height = bottom[0]->shape(this->channel_axis_ + 1);
- const int width = bottom[0]->shape(this->channel_axis_ + 2);
- const int height_out = top[0]->shape(this->channel_axis_ + 1);
- const int width_out = top[0]->shape(this->channel_axis_ + 2);
- const int* pad_data = this->pad_.cpu_data();
- const int pad_h = pad_data[0];
- const int pad_w = pad_data[1];
- const int* stride_data = this->stride_.cpu_data();
- const int stride_h = stride_data[0];
- const int stride_w = stride_data[1];
+
+ std::vector<int> bottom_tensor_shape(bottom[0]->shape());
+ bottom_tensor_shape[1] /= this->group_;
+ std::vector<int> bottom_tensor_stride(bottom[0]->shape().size(), 1);
+ for (int i = bottom[0]->shape().size() - 2; i >= 0; --i) {
+ bottom_tensor_stride[i] =
+ bottom[0]->shape(i + 1) * bottom_tensor_stride[i + 1];
+ }
+
+ std::vector<int> top_tensor_shape(top[0]->shape());
+ top_tensor_shape[1] /= this->group_;
+ std::vector<int> top_tensor_stride(top[0]->shape().size(), 1);
+ for (int i = top[0]->shape().size() - 2; i >= 0; --i) {
+ top_tensor_stride[i] = top[0]->shape(i + 1) * top_tensor_stride[i + 1];
+ }
+
+ std::vector<int> pad, stride;
+ for (unsigned int i = 0; i < this->num_spatial_axes_; ++i) {
+ pad.push_back(this->pad_.cpu_data()[i]);
+ stride.push_back(this->stride_.cpu_data()[i]);
+ }
 
  // Specify workspace limit for kernels directly until we have a
  // planning strategy and a rewrite of Caffe's GPU memory mangagement
  size_t workspace_limit_bytes = 8*1024*1024;
 
  for (int i = 0; i < bottom.size(); i++) {
- cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i],
- this->num_,
- this->channels_ / this->group_, height, width,
- this->channels_ * height * width,
- height * width, width, 1);
- cudnn::setTensor4dDesc<Dtype>(&top_descs_[i],
- this->num_,
- this->num_output_ / this->group_, height_out, width_out,
- this->num_output_ * this->out_spatial_dim_,
- this->out_spatial_dim_, width_out, 1);
- cudnn::setConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
- filter_desc_, pad_h, pad_w,
- stride_h, stride_w);
+ cudnn::setTensorNdDesc<Dtype>(&bottom_descs_[i],
+ bottom_tensor_shape, bottom_tensor_stride);
+ cudnn::setTensorNdDesc<Dtype>(&top_descs_[i],
+ top_tensor_shape, top_tensor_stride);
+ cudnn::setNdConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
+ filter_desc_, pad, stride);
 
  // choose forward and backward algorithms + workspace(s)
  CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0],
@@ -226,8 +228,9 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(
 
  // Tensor descriptor for bias.
  if (this->bias_term_) {
- cudnn::setTensor4dDesc<Dtype>(&bias_desc_,
- 1, this->num_output_ / this->group_, 1, 1);
+ vector<int> bias_shape(bottom[0]->shape().size(), 1);
+ bias_shape[1] = this->num_output_ / this->group_;
+ cudnn::setTensorNdDesc<Dtype>(&bias_desc_, bias_shape);
  }
 }