Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nd convolution and pooling with cuDNN #3983

Open
wants to merge 32 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
0ad1284
CMake: link with ${HDF5_HL_LIBRARIES}
intelfx Jul 25, 2016
c62e06b
Fix search for Atlas on arch.
Jul 26, 2016
bc1a433
add cudnn interfaces for n-dimensional computation
Feb 18, 2016
e5c13a5
add support for nd convolution in cudnn
Feb 18, 2016
cc357bd
change interface of pool to support n-dimensions
Feb 19, 2016
12cb24f
fix 2D pooling on CPU and GPU
Feb 19, 2016
c1b0f38
remove some calls of Blob::LegacyShape() to support 3D
May 23, 2016
721553e
fix xavier filler to use new blob shape accessors
Feb 19, 2016
b2f3848
fix tests for new pooling parameter interface
Apr 12, 2016
7173035
add 3D cudnn convolution tests
Apr 13, 2016
c9de153
add 3D cudnn pooling tests
Apr 14, 2016
eb93d32
fix CUDNN_BAD_PARAM when using InnerProduct layer
Apr 28, 2016
919b6d7
change interface for cudnn v5
May 23, 2016
9e9e9ba
Merge pull request #4523 from delftrobotics/cmake-atlas
longjon Aug 4, 2016
6431477
Merge pull request #4516 from intelfx/BVLC-work
longjon Aug 4, 2016
61e0165
num in blob is deprecated
fyu Aug 7, 2016
375003a
Merge pull request #4559 from fyu/loss_reshape
jeffdonahue Aug 7, 2016
f86a099
add cudnn interfaces for n-dimensional computation
Feb 18, 2016
4f63ea5
add support for nd convolution in cudnn
Feb 18, 2016
5e1f04e
change interface of pool to support n-dimensions
Feb 19, 2016
2346c5e
fix 2D pooling on CPU and GPU
Feb 19, 2016
0dcb68a
remove some calls of Blob::LegacyShape() to support 3D
May 23, 2016
fb0f9f5
fix xavier filler to use new blob shape accessors
Feb 19, 2016
b8ca687
fix tests for new pooling parameter interface
Apr 12, 2016
c88f8fa
add 3D cudnn convolution tests
Apr 13, 2016
d0efc10
add 3D cudnn pooling tests
Apr 14, 2016
45562a0
fix CUDNN_BAD_PARAM when using InnerProduct layer
Apr 28, 2016
b506327
change interface for cudnn v5
May 23, 2016
fc39d7e
remove some calls of Blob::LegacyShape() to support 3D
Sep 12, 2016
857f47d
fix msra filler to use new blob shape accessors
Sep 12, 2016
334e76f
fix positive_unitball filler to use new blob shape accessors
Sep 12, 2016
efda84c
Merge branch 'nd-cudnn' of github.com:christianpayer/caffe into nd-cudnn
Nov 2, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add support for nd convolution in cudnn
  • Loading branch information
Christian Payer committed Aug 10, 2016
commit 4f63ea5dfb89d28da9bc2d904db5db7a67acc0c2
79 changes: 41 additions & 38 deletions src/caffe/layers/cudnn_conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,21 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
bias_offset_ = (this->num_output_ / this->group_);

// Create filter descriptor.
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int kernel_h = kernel_shape_data[0];
const int kernel_w = kernel_shape_data[1];
cudnn::createFilterDesc<Dtype>(&filter_desc_,
this->num_output_ / this->group_, this->channels_ / this->group_,
kernel_h, kernel_w);
std::vector<int> kernel_shape;
kernel_shape.push_back(this->num_output_ / this->group_);
kernel_shape.push_back(this->channels_ / this->group_);
for (unsigned int i = 0; i < this->num_spatial_axes_; ++i)
kernel_shape.push_back(this->kernel_shape_.cpu_data()[i]);

cudnn::createNdFilterDesc<Dtype>(&filter_desc_, kernel_shape);

// Create tensor descriptor(s) for data and corresponding convolution(s).
for (int i = 0; i < bottom.size(); i++) {
cudnnTensorDescriptor_t bottom_desc;
cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
cudnn::createTensorDesc<Dtype>(&bottom_desc);
bottom_descs_.push_back(bottom_desc);
cudnnTensorDescriptor_t top_desc;
cudnn::createTensor4dDesc<Dtype>(&top_desc);
cudnn::createTensorDesc<Dtype>(&top_desc);
top_descs_.push_back(top_desc);
cudnnConvolutionDescriptor_t conv_desc;
cudnn::createConvolutionDesc<Dtype>(&conv_desc);
Expand All @@ -81,7 +82,7 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(

// Tensor descriptor for bias.
if (this->bias_term_) {
cudnn::createTensor4dDesc<Dtype>(&bias_desc_);
cudnn::createTensorDesc<Dtype>(&bias_desc_);
}

handles_setup_ = true;
Expand All @@ -91,41 +92,42 @@ template <typename Dtype>
void CuDNNConvolutionLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
ConvolutionLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(2, this->num_spatial_axes_)
<< "CuDNNConvolution input must have 2 spatial axes "
<< "(e.g., height and width). "
<< "Use 'engine: CAFFE' for general ND convolution.";

bottom_offset_ = this->bottom_dim_ / this->group_;
top_offset_ = this->top_dim_ / this->group_;
const int height = bottom[0]->shape(this->channel_axis_ + 1);
const int width = bottom[0]->shape(this->channel_axis_ + 2);
const int height_out = top[0]->shape(this->channel_axis_ + 1);
const int width_out = top[0]->shape(this->channel_axis_ + 2);
const int* pad_data = this->pad_.cpu_data();
const int pad_h = pad_data[0];
const int pad_w = pad_data[1];
const int* stride_data = this->stride_.cpu_data();
const int stride_h = stride_data[0];
const int stride_w = stride_data[1];

std::vector<int> bottom_tensor_shape(bottom[0]->shape());
bottom_tensor_shape[1] /= this->group_;
std::vector<int> bottom_tensor_stride(bottom[0]->shape().size(), 1);
for (int i = bottom[0]->shape().size() - 2; i >= 0; --i) {
bottom_tensor_stride[i] =
bottom[0]->shape(i + 1) * bottom_tensor_stride[i + 1];
}

std::vector<int> top_tensor_shape(top[0]->shape());
top_tensor_shape[1] /= this->group_;
std::vector<int> top_tensor_stride(top[0]->shape().size(), 1);
for (int i = top[0]->shape().size() - 2; i >= 0; --i) {
top_tensor_stride[i] = top[0]->shape(i + 1) * top_tensor_stride[i + 1];
}

std::vector<int> pad, stride;
for (unsigned int i = 0; i < this->num_spatial_axes_; ++i) {
pad.push_back(this->pad_.cpu_data()[i]);
stride.push_back(this->stride_.cpu_data()[i]);
}

// Specify workspace limit for kernels directly until we have a
// planning strategy and a rewrite of Caffe's GPU memory mangagement
size_t workspace_limit_bytes = 8*1024*1024;

for (int i = 0; i < bottom.size(); i++) {
cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i],
this->num_,
this->channels_ / this->group_, height, width,
this->channels_ * height * width,
height * width, width, 1);
cudnn::setTensor4dDesc<Dtype>(&top_descs_[i],
this->num_,
this->num_output_ / this->group_, height_out, width_out,
this->num_output_ * this->out_spatial_dim_,
this->out_spatial_dim_, width_out, 1);
cudnn::setConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
filter_desc_, pad_h, pad_w,
stride_h, stride_w);
cudnn::setTensorNdDesc<Dtype>(&bottom_descs_[i],
bottom_tensor_shape, bottom_tensor_stride);
cudnn::setTensorNdDesc<Dtype>(&top_descs_[i],
top_tensor_shape, top_tensor_stride);
cudnn::setNdConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
filter_desc_, pad, stride);

// choose forward and backward algorithms + workspace(s)
CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0],
Expand Down Expand Up @@ -226,8 +228,9 @@ void CuDNNConvolutionLayer<Dtype>::Reshape(

// Tensor descriptor for bias.
if (this->bias_term_) {
cudnn::setTensor4dDesc<Dtype>(&bias_desc_,
1, this->num_output_ / this->group_, 1, 1);
vector<int> bias_shape(bottom[0]->shape().size(), 1);
bias_shape[1] = this->num_output_ / this->group_;
cudnn::setTensorNdDesc<Dtype>(&bias_desc_, bias_shape);
}
}

Expand Down