Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Add im2col and col2im operator (#16502)
Browse files Browse the repository at this point in the history
* add im2col

* add col2im

* fix typo

* add docs

* add unittest

* more tests

* fix lint

* fix doc

* fix request

* trigger CI
  • Loading branch information
arcadiaphy committed Dec 18, 2019
1 parent a01ded2 commit faa2832
Show file tree
Hide file tree
Showing 4 changed files with 723 additions and 0 deletions.
259 changes: 259 additions & 0 deletions src/operator/nn/im2col-inl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http:https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2015 by Contributors
* \file im2col-inl.h
* \brief
* \author Jiajun Wang
*/

#ifndef MXNET_OPERATOR_NN_IM2COL_INL_H_
#define MXNET_OPERATOR_NN_IM2COL_INL_H_
#include <vector>
#include "../mxnet_op.h"
#include "../mshadow_op.h"
#include "../elemwise_op_common.h"
#include "./im2col.h"

namespace mxnet {
namespace op {

struct Im2colParam : public dmlc::Parameter<Im2colParam> {
mxnet::TShape kernel;
mxnet::TShape stride;
mxnet::TShape dilate;
mxnet::TShape pad;
DMLC_DECLARE_PARAMETER(Im2colParam) {
DMLC_DECLARE_FIELD(kernel).describe("Sliding kernel size: (w,), (h, w) or (d, h, w).");
DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0))
.describe("The stride between adjacent sliding blocks in spatial dimension: "
"(w,), (h, w) or (d, h, w). Defaults to 1 for each dimension.");
DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0))
.describe("The spacing between adjacent kernel points: (w,), (h, w) or (d, h, w). "
"Defaults to 1 for each dimension.");
DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0))
.describe("The zero-value padding size on both sides of spatial dimension: "
"(w,), (h, w) or (d, h, w). Defaults to no padding.");
}

index_t DilatedKernelSize(int dim) const {
return 1 + (kernel[dim] - 1) * dilate[dim];
}
}; // struct Im2colParam


template<typename xpu>
void Im2colCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
const Im2colParam& param = nnvm::get<Im2colParam>(attrs.parsed);
Stream<xpu> *s = ctx.get_stream<xpu>();
const mxnet::TShape im_shape = inputs[0].shape_;
const mxnet::TShape col_shape = outputs[0].shape_;
const index_t num = im_shape[0];

const int spatial_size = param.kernel.ndim();
mxnet::TShape col_buffer_shape(1 + spatial_size, 1);
col_buffer_shape[0] = col_shape[1];
for (int i = 0; i < spatial_size; ++i) {
const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i];
const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1;
col_buffer_shape[i + 1] = output_size;
}

MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
Tensor<xpu, 4, DType> im = inputs[0].get_with_shape<xpu, 4, DType>(
Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s);
Tensor<xpu, 3, DType> col = outputs[0].get_with_shape<xpu, 3, DType>(
Shape3(col_shape[0], col_shape[1], col_shape[2]), s);

if (req[0] == kNullOp) return;
if (req[0] != kAddTo) {
for (index_t n = 0; n < num; ++n) {
im2col(s, im[n].dptr_, im_shape, col_buffer_shape,
param.kernel, param.pad, param.stride, param.dilate, col[n].dptr_);
}
} else {
Tensor<xpu, 2, DType> tcol = ctx.requested[0]
.get_space_typed<xpu, 2, DType>(Shape2(col_shape[1], col_shape[2]), s);
for (index_t n = 0; n < num; ++n) {
im2col(s, im[n].dptr_, im_shape, col_buffer_shape,
param.kernel, param.pad, param.stride, param.dilate, tcol.dptr_);
Tensor<xpu, 2, DType> ocol = col[n];
ocol += tcol;
}
}
});
}

template<typename xpu>
void Im2colGradCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
const Im2colParam& param = nnvm::get<Im2colParam>(attrs.parsed);
Stream<xpu> *s = ctx.get_stream<xpu>();

const mxnet::TShape im_shape = outputs[0].shape_;
const mxnet::TShape col_shape = inputs[0].shape_;
const index_t num = im_shape[0];

const int spatial_size = param.kernel.ndim();
mxnet::TShape col_buffer_shape(1 + spatial_size, 1);
col_buffer_shape[0] = col_shape[1];
for (int i = 0; i < spatial_size; ++i) {
const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i];
const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1;
col_buffer_shape[i + 1] = output_size;
}

MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
Tensor<xpu, 4, DType> im_grad = outputs[0].get_with_shape<xpu, 4, DType>(
Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s);
Tensor<xpu, 3, DType> col_grad = inputs[0].get_with_shape<xpu, 3, DType>(
Shape3(col_shape[0], col_shape[1], col_shape[2]), s);

for (index_t n = 0; n < num; ++n) {
col2im(s, col_grad[n].dptr_, im_shape, col_buffer_shape,
param.kernel, param.pad, param.stride, param.dilate,
im_grad[n].dptr_, req[0]);
}
});
}

struct Col2imParam : public dmlc::Parameter<Col2imParam> {
mxnet::TShape output_size;
mxnet::TShape kernel;
mxnet::TShape stride;
mxnet::TShape dilate;
mxnet::TShape pad;
DMLC_DECLARE_PARAMETER(Col2imParam) {
DMLC_DECLARE_FIELD(output_size)
.describe("The spatial dimension of image array: (w,), (h, w) or (d, h, w).");
DMLC_DECLARE_FIELD(kernel).describe("Sliding kernel size: (w,), (h, w) or (d, h, w).");
DMLC_DECLARE_FIELD(stride).set_default(mxnet::TShape(0, 0))
.describe("The stride between adjacent sliding blocks in spatial dimension: "
"(w,), (h, w) or (d, h, w). Defaults to 1 for each dimension.");
DMLC_DECLARE_FIELD(dilate).set_default(mxnet::TShape(0, 0))
.describe("The spacing between adjacent kernel points: (w,), (h, w) or (d, h, w). "
"Defaults to 1 for each dimension.");
DMLC_DECLARE_FIELD(pad).set_default(mxnet::TShape(0, 0))
.describe("The zero-value padding size on both sides of spatial dimension: "
"(w,), (h, w) or (d, h, w). Defaults to no padding.");
}

index_t DilatedKernelSize(int dim) const {
return 1 + (kernel[dim] - 1) * dilate[dim];
}
}; // struct Col2imParam

template<typename xpu>
void Col2imCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
const Col2imParam& param = nnvm::get<Col2imParam>(attrs.parsed);
Stream<xpu> *s = ctx.get_stream<xpu>();
const mxnet::TShape im_shape = outputs[0].shape_;
const mxnet::TShape col_shape = inputs[0].shape_;
const index_t num = im_shape[0];

const int spatial_size = param.kernel.ndim();
mxnet::TShape col_buffer_shape(1 + spatial_size, 1);
col_buffer_shape[0] = col_shape[1];
for (int i = 0; i < spatial_size; ++i) {
const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i];
const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1;
col_buffer_shape[i + 1] = output_size;
}

MSHADOW_REAL_TYPE_SWITCH(outputs[0].type_flag_, DType, {
Tensor<xpu, 4, DType> im = outputs[0].get_with_shape<xpu, 4, DType>(
Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s);
Tensor<xpu, 3, DType> col = inputs[0].get_with_shape<xpu, 3, DType>(
Shape3(col_shape[0], col_shape[1], col_shape[2]), s);

for (index_t n = 0; n < num; ++n) {
col2im(s, col[n].dptr_, im_shape, col_buffer_shape,
param.kernel, param.pad, param.stride, param.dilate,
im[n].dptr_, req[0]);
}
});
}

template<typename xpu>
void Col2imGradCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<TBlob>& inputs,
const std::vector<OpReqType>& req,
const std::vector<TBlob>& outputs) {
using namespace mshadow;
const Col2imParam& param = nnvm::get<Col2imParam>(attrs.parsed);
Stream<xpu> *s = ctx.get_stream<xpu>();

const mxnet::TShape im_shape = inputs[0].shape_;
const mxnet::TShape col_shape = outputs[0].shape_;
const index_t batch_size = im_shape[0];

const int spatial_size = param.kernel.ndim();
mxnet::TShape col_buffer_shape(1 + spatial_size, 1);
col_buffer_shape[0] = im_shape[1];
for (int i = 0; i < spatial_size; ++i) {
const index_t pad_size = im_shape[i + 2] + 2 * param.pad[i];
const index_t output_size = (pad_size - param.DilatedKernelSize(i)) / param.stride[i] + 1;
col_buffer_shape[i + 1] = output_size;
}

MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
Tensor<xpu, 4, DType> im_grad = inputs[0].get_with_shape<xpu, 4, DType>(
Shape4(im_shape[0], im_shape[1], im_shape[2], im_shape[3]), s);
Tensor<xpu, 3, DType> col_grad = outputs[0].get_with_shape<xpu, 3, DType>(
Shape3(col_shape[0], col_shape[1], col_shape[2]), s);

if (req[0] == kNullOp) return;
if (req[0] != kAddTo) {
for (index_t n = 0; n < batch_size; ++n) {
im2col(s, im_grad[n].dptr_, im_shape, col_buffer_shape,
param.kernel, param.pad, param.stride, param.dilate, col_grad[n].dptr_);
}
} else {
Tensor<xpu, 2, DType> tgrad = ctx.requested[0]
.get_space_typed<xpu, 2, DType>(Shape2(col_shape[1], col_shape[2]), s);
for (index_t n = 0; n < batch_size; ++n) {
im2col(s, im_grad[n].dptr_, im_shape, col_buffer_shape,
param.kernel, param.pad, param.stride, param.dilate, tgrad.dptr_);
Tensor<xpu, 2, DType> cgrad = col_grad[n];
cgrad += tgrad;
}
}
});
}

} // namespace op
} // namespace mxnet

#endif // MXNET_OPERATOR_NN_IM2COL_INL_H_
Loading

0 comments on commit faa2832

Please sign in to comment.