Skip to content

Commit

Permalink
make ROIAlign support position-sensitive pooling (apache#13088)
Browse files Browse the repository at this point in the history
* make ROIAlign support position-sensitive pooling

* add unittest for RoIAlign op

* fix ccplint error

* fix python3 compability for unittest

* change OMP for better performance

* delete blank line to trigger CI

* add shape check when position_sensitive is true

* fix the typo

* typo: shuold -> should

* remove private() clause in omp statement
  • Loading branch information
shesung authored and haohuw committed Jun 23, 2019
1 parent 7324160 commit db8fa85
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 34 deletions.
8 changes: 6 additions & 2 deletions src/operator/contrib/roi_align-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
* Copyright (c) 2018 by Contributors
* \file roi_align-inl.h
* \brief roi align operator and symbol
* \author Hang Zhang
* \author Hang Zhang, Shesung
* modified from Caffe2
*/
#ifndef MXNET_OPERATOR_CONTRIB_ROI_ALIGN_INL_H_
Expand All @@ -35,7 +35,6 @@
namespace mxnet {
namespace op {


// Declare enumeration of input order to make code more intuitive.
// These enums are only visible within this header
namespace roialign {
Expand All @@ -48,6 +47,7 @@ struct ROIAlignParam : public dmlc::Parameter<ROIAlignParam> {
TShape pooled_size;
float spatial_scale;
int sample_ratio;
bool position_sensitive;
DMLC_DECLARE_PARAMETER(ROIAlignParam) {
DMLC_DECLARE_FIELD(pooled_size)
.set_expect_ndim(2).enforce_nonzero()
Expand All @@ -57,6 +57,10 @@ struct ROIAlignParam : public dmlc::Parameter<ROIAlignParam> {
"Equals the reciprocal of total stride in convolutional layers");
DMLC_DECLARE_FIELD(sample_ratio).set_default(-1)
.describe("Optional sampling ratio of ROI align, using adaptive size by default.");
DMLC_DECLARE_FIELD(position_sensitive).set_default(false)
.describe("Whether to perform position-sensitive RoI pooling. PSRoIPooling is "
"first proposaled by R-FCN and it can reduce the input channels by ph*pw times, "
"where (ph, pw) is the pooled_size");
}
};

Expand Down
59 changes: 42 additions & 17 deletions src/operator/contrib/roi_align.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
* Copyright (c) 2018 by Contributors
* \file roi_align.cc
* \brief roi align operator
* \author Hang Zhang
* \author Hang Zhang, Shesung
* Adapted from Caffe2
*/
#include "./roi_align-inl.h"
Expand Down Expand Up @@ -142,6 +142,7 @@ void ROIAlignForward(
const int nthreads,
const T* bottom_data,
const T& spatial_scale,
const bool position_sensitive,
const int channels,
const int height,
const int width,
Expand All @@ -156,6 +157,8 @@ void ROIAlignForward(
int n_rois = nthreads / channels / pooled_width / pooled_height;
// (n, c, ph, pw) is an element in the pooled output
// can be parallelized using omp
#pragma omp parallel for \
num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (int n = 0; n < n_rois; n++) {
int index_n = n * channels * pooled_width * pooled_height;

Expand Down Expand Up @@ -208,19 +211,23 @@ void ROIAlignForward(
roi_bin_grid_w,
&pre_calc);

int c;
#pragma omp parallel for private(c) \
num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (c = 0; c < channels; c++) {
for (int c = 0; c < channels; c++) {
int index_n_c = index_n + c * pooled_width * pooled_height;
const T* offset_bottom_data =
bottom_data + (roi_batch_ind * channels + c) * height * width;
int pre_calc_index = 0;

for (int ph = 0; ph < pooled_height; ph++) {
for (int pw = 0; pw < pooled_width; pw++) {
int index = index_n_c + ph * pooled_width + pw;

int c_unpooled = c;
int channels_unpooled = channels;
if (position_sensitive) {
c_unpooled = c * pooled_height * pooled_width + ph * pooled_width + pw;
channels_unpooled = channels * pooled_height * pooled_width;
}
const T* offset_bottom_data =
bottom_data + (roi_batch_ind * channels_unpooled + c_unpooled)
* height * width;
T output_val = 0.;
for (int iy = 0; iy < roi_bin_grid_h; iy++) {
for (int ix = 0; ix < roi_bin_grid_w; ix++) {
Expand Down Expand Up @@ -310,6 +317,7 @@ void ROIAlignBackward(
const T* top_diff,
const int /*num_rois*/,
const T& spatial_scale,
const bool position_sensitive,
const int channels,
const int height,
const int width,
Expand Down Expand Up @@ -347,8 +355,15 @@ void ROIAlignBackward(
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

int c_unpooled = c;
int channels_unpooled = channels;
if (position_sensitive) {
c_unpooled = c * pooled_height * pooled_width + ph * pooled_width + pw;
channels_unpooled = channels * pooled_height * pooled_width;
}
T* offset_bottom_diff =
bottom_diff + (roi_batch_ind * channels + c) * height * width;
bottom_diff + (roi_batch_ind * channels_unpooled + c_unpooled)
* height * width;

int top_offset = (n * channels + c) * pooled_height * pooled_width;
const T* offset_top_diff = top_diff + top_offset;
Expand Down Expand Up @@ -426,7 +441,7 @@ void ROIAlignForwardCompute(const nnvm::NodeAttrs& attrs,

const int count = out_data[roialign::kOut].Size();
// const int num_rois = in_data[roialign::kBox].size(0);
const int channels = in_data[roialign::kData].size(1);
const int channels = out_data[roialign::kOut].size(1); // channels of pooled output
const int height = in_data[roialign::kData].size(2);
const int width = in_data[roialign::kData].size(3);
const int pooled_height = out_data[roialign::kOut].size(2);
Expand All @@ -439,9 +454,9 @@ void ROIAlignForwardCompute(const nnvm::NodeAttrs& attrs,
const DType *bottom_rois = in_data[roialign::kBox].dptr<DType>();
DType *top_data = out_data[roialign::kOut].dptr<DType>();

ROIAlignForward<DType>(count, bottom_data, param.spatial_scale, channels,
height, width, pooled_height, pooled_width, param.sample_ratio,
bottom_rois, rois_cols, top_data);
ROIAlignForward<DType>(count, bottom_data, param.spatial_scale, param.position_sensitive,
channels, height, width, pooled_height, pooled_width,
param.sample_ratio, bottom_rois, rois_cols, top_data);
})
}

Expand Down Expand Up @@ -470,7 +485,7 @@ void ROIAlignBackwardCompute(const nnvm::NodeAttrs& attrs,

const int count = out_grad[0].Size();
const int num_rois = in_data[0].size(0);
const int channels = outputs[0].size(1);
const int channels = out_grad[0].size(1); // channels of pooled output
const int height = outputs[0].size(2);
const int width = outputs[0].size(3);
const int pooled_height = out_grad[0].size(2);
Expand All @@ -489,8 +504,9 @@ void ROIAlignBackwardCompute(const nnvm::NodeAttrs& attrs,
Fill<false>(s, outputs[0], kWriteTo, static_cast<DType>(0));
}
ROIAlignBackward<DType>(count, top_diff, num_rois, param.spatial_scale,
channels, height, width, pooled_height, pooled_width,
param.sample_ratio, grad_in, bottom_rois, rois_cols);
param.position_sensitive, channels, height, width,
pooled_height, pooled_width, param.sample_ratio, grad_in,
bottom_rois, rois_cols);
}
if (kWriteTo == req[roialign::kBox]) {
Fill<false>(s, outputs[1], kWriteTo, static_cast<DType>(0));
Expand Down Expand Up @@ -545,8 +561,17 @@ He, Kaiming, et al. "Mask R-CNN." ICCV, 2017
CHECK_EQ(bshape[1], 5) << "bbox should be a 2D tensor of shape [batch, 5]";
// out: [num_rois, c, pooled_h, pooled_w]
out_shape->clear();
out_shape->push_back(
Shape4(bshape[0], dshape[1], param.pooled_size[0], param.pooled_size[1]));
if (param.position_sensitive) {
CHECK_EQ(dshape[1] % (param.pooled_size[0]*param.pooled_size[1]), 0) <<
"Input channels should be divided by pooled_size[0]*pooled_size[1]"
"when position_sensitive is true.";
out_shape->push_back(
Shape4(bshape[0], dshape[1]/param.pooled_size[0]/param.pooled_size[1],
param.pooled_size[0], param.pooled_size[1]));
} else {
out_shape->push_back(
Shape4(bshape[0], dshape[1], param.pooled_size[0], param.pooled_size[1]));
}
return true;
})
.set_attr<nnvm::FInferType>("FInferType", [](const nnvm::NodeAttrs& attrs,
Expand Down
28 changes: 23 additions & 5 deletions src/operator/contrib/roi_align.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
* Copyright (c) 2018 by Contributors
* \file roi_align.cu
* \brief roi align operator
* \author Hang Zhang
* \author Hang Zhang, Shesung
* Adapted from Caffe2
*/
#include "./roi_align-inl.h"
Expand Down Expand Up @@ -111,6 +111,7 @@ __global__ void RoIAlignForwardKernel(
const int nthreads,
const T* bottom_data,
const T spatial_scale,
const bool position_sensitive,
const int channels,
const int height,
const int width,
Expand Down Expand Up @@ -145,8 +146,15 @@ __global__ void RoIAlignForwardKernel(
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

int c_unpooled = c;
int channels_unpooled = channels;
if (position_sensitive) {
c_unpooled = c * pooled_height * pooled_width + ph * pooled_width + pw;
channels_unpooled = channels * pooled_height * pooled_width;
}
const T* offset_bottom_data =
bottom_data + (roi_batch_ind * channels + c) * height * width;
bottom_data + (roi_batch_ind * channels_unpooled + c_unpooled)
* height * width;

// We use roi_bin_grid to sample the grid and mimic integral
int roi_bin_grid_h = (sampling_ratio > 0)
Expand Down Expand Up @@ -242,6 +250,7 @@ __global__ void RoIAlignBackwardKernel(
const T* top_diff,
const int num_rois,
const T spatial_scale,
const bool position_sensitive,
const int channels,
const int height,
const int width,
Expand Down Expand Up @@ -276,8 +285,15 @@ __global__ void RoIAlignBackwardKernel(
T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);

int c_unpooled = c;
int channels_unpooled = channels;
if (position_sensitive) {
c_unpooled = c * pooled_height * pooled_width + ph * pooled_width + pw;
channels_unpooled = channels * pooled_height * pooled_width;
}
T* offset_bottom_diff =
bottom_diff + (roi_batch_ind * channels + c) * height * width;
bottom_diff + (roi_batch_ind * channels_unpooled + c_unpooled)
* height * width;

int top_offset = (n * channels + c) * pooled_height * pooled_width;
const T* offset_top_diff = top_diff + top_offset;
Expand Down Expand Up @@ -357,7 +373,7 @@ void ROIAlignForwardCompute(const nnvm::NodeAttrs& attrs,

const int count = out_data[roialign::kOut].Size();
const int num_rois = in_data[roialign::kBox].size(0);
const int channels = in_data[roialign::kData].size(1);
const int channels = out_data[roialign::kOut].size(1); // channels of pooled output
const int height = in_data[roialign::kData].size(2);
const int width = in_data[roialign::kData].size(3);
const int pooled_height = out_data[roialign::kOut].size(2);
Expand All @@ -377,6 +393,7 @@ void ROIAlignForwardCompute(const nnvm::NodeAttrs& attrs,
count,
bottom_data,
param.spatial_scale,
param.position_sensitive,
channels,
height,
width,
Expand Down Expand Up @@ -414,7 +431,7 @@ void ROIAlignBackwardCompute(const nnvm::NodeAttrs& attrs,

const int count = out_grad[0].Size();
const int num_rois = in_data[0].size(0);
const int channels = outputs[0].size(1);
const int channels = out_grad[0].size(1); // channels of pooled output
const int height = outputs[0].size(2);
const int width = outputs[0].size(3);
const int pooled_height = out_grad[0].size(2);
Expand Down Expand Up @@ -445,6 +462,7 @@ void ROIAlignBackwardCompute(const nnvm::NodeAttrs& attrs,
top_diff,
num_rois,
param.spatial_scale,
param.position_sensitive,
channels,
height,
width,
Expand Down
29 changes: 19 additions & 10 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

# pylint: skip-file
from __future__ import print_function
from __future__ import division
import numpy as np
import mxnet as mx
import copy
Expand Down Expand Up @@ -6899,14 +6900,16 @@ def bilinear_interpolate(bottom, height, width, y, x):
]
return val, grad

def roialign_forward_backward(data, rois, pooled_size, spatial_scale, sampling_ratio, dy):
def roialign_forward_backward(data, rois, pooled_size, spatial_scale, sampling_ratio,
position_sensitive, dy):
N, C, H, W = data.shape
R = rois.shape[0]
PH, PW = pooled_size
assert len(rois.shape) == 2
assert rois.shape[1] == 5

out = np.zeros((R, C, PH, PW))
C_out = C // PH // PW if position_sensitive else C
out = np.zeros((R, C_out, PH, PW))
dx = np.zeros_like(data)
drois = np.zeros_like(rois)

Expand All @@ -6924,24 +6927,25 @@ def roialign_forward_backward(data, rois, pooled_size, spatial_scale, sampling_r
roi_bin_grid_h = int(np.ceil(roi_h * 1.0 / PH))
roi_bin_grid_w = int(np.ceil(roi_w * 1.0 / PW))
count = roi_bin_grid_h * roi_bin_grid_w
for c in range(C):
for c in range(C_out):
for ph in range(PH):
for pw in range(PW):
val = 0.0
c_in = c * PH * PW + ph * PW + pw if position_sensitive else c
for iy in range(roi_bin_grid_h):
y = sh + ph * bin_h + (iy + 0.5) * bin_h / roi_bin_grid_h
for ix in range(roi_bin_grid_w):
x = sw + pw * bin_w + (ix + 0.5) * bin_w / roi_bin_grid_w
v, g = bilinear_interpolate(bdata[c], H, W, y, x)
v, g = bilinear_interpolate(bdata[c_in], H, W, y, x)
val += v
# compute grad
for qy, qx, qw in g:
dx[batch_ind, c, qy, qx] += dy[r, c, ph, pw] * qw * 1.0 / count
dx[batch_ind, c_in, qy, qx] += dy[r, c, ph, pw] * qw * 1.0 / count

out[r, c, ph, pw] = val * 1.0 / count
return out, [dx, drois]

def test_roi_align_value(sampling_ratio=0):
def test_roi_align_value(sampling_ratio=0, position_sensitive=False):
ctx=default_context()
dtype = np.float32

Expand All @@ -6950,6 +6954,7 @@ def test_roi_align_value(sampling_ratio=0):
assert H == W
R = 7
pooled_size = (3, 4)
C = C * pooled_size[0] * pooled_size[1] if position_sensitive else C

spatial_scale = H * 1.0 / dlen
data = mx.nd.array(np.arange(N*C*W*H).reshape((N,C,H,W)), ctx=ctx, dtype = dtype)
Expand All @@ -6964,11 +6969,14 @@ def test_roi_align_value(sampling_ratio=0):
rois.attach_grad()
with mx.autograd.record():
output = mx.nd.contrib.ROIAlign(data, rois, pooled_size=pooled_size,
spatial_scale=spatial_scale, sample_ratio=sampling_ratio)
dy = mx.nd.random.uniform(-1, 1, (R, C) + pooled_size, ctx=ctx, dtype = dtype)
spatial_scale=spatial_scale, sample_ratio=sampling_ratio,
position_sensitive=position_sensitive)
C_out = C // pooled_size[0] // pooled_size[1] if position_sensitive else C
dy = mx.nd.random.uniform(-1, 1, (R, C_out) + pooled_size, ctx=ctx, dtype = dtype)
output.backward(dy)
real_output, [dx, drois] = roialign_forward_backward(data.asnumpy(), rois.asnumpy(), pooled_size,
spatial_scale, sampling_ratio, dy.asnumpy())
spatial_scale, sampling_ratio,
position_sensitive, dy.asnumpy())
assert np.allclose(output.asnumpy(), real_output)
# It seems that the precision between Cfloat and Pyfloat is different.
assert np.allclose(data.grad.asnumpy(), dx, atol = 1e-5), np.abs(data.grad.asnumpy() - dx).max()
Expand All @@ -6994,7 +7002,8 @@ def test_roi_align_autograd(sampling_ratio=0):
numeric_eps=1e-4, rtol=1e-1, atol=1e-4, ctx=ctx)

test_roi_align_value()
test_roi_align_value(2)
test_roi_align_value(sampling_ratio=2)
test_roi_align_value(position_sensitive=True)
test_roi_align_autograd()

@with_seed()
Expand Down

0 comments on commit db8fa85

Please sign in to comment.