Skip to content

Commit

Permalink
fixed a bug about rcnn_cls
Browse files Browse the repository at this point in the history
  • Loading branch information
longcw committed Feb 16, 2017
1 parent e00d19a commit 264bf4a
Show file tree
Hide file tree
Showing 13 changed files with 372 additions and 227 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,6 @@ ENV/
.idea
extension-ffi
demo_mot.py
*.o
data/
models
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,4 @@ ln -s $VOCdevkit VOCdevkit2007

Then you can set some hyper-parameters in `train.py` and training parameters in the `.yml` file.

You may need to train RPN and the classifier separately as described in the Faster RCNN paper
and tune the loss function defined in `faster_rcnn/faster_rcnn.py` by yourself.
You may need to tune the loss function defined in `faster_rcnn/faster_rcnn.py` by yourself.
12 changes: 6 additions & 6 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

def test():
import os
# im_file = 'demo/004545.jpg'
im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009936.jpg'
im_file = 'demo/004545.jpg'
# im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg'
# im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg'
image = cv2.imread(im_file)

# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
model_file = '/media/longc/Data/models/faster_rcnn_pytorch/faster_rcnn_30000.h5'
model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5'
# model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5'
detector = FasterRCNN()
network.load_net(model_file, detector)
Expand All @@ -38,8 +38,8 @@ def test():
cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN,
1.0, (0, 0, 255), thickness=1)
cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
# cv2.imshow('demo', im2show)
# cv2.waitKey(0)
cv2.imshow('demo', im2show)
cv2.waitKey(0)


if __name__ == '__main__':
Expand Down
10 changes: 10 additions & 0 deletions faster_rcnn/fast_rcnn/bbox_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,21 @@


def bbox_transform(ex_rois, gt_rois):
"""
computes the distance from ground-truth boxes to the given boxes, normed by their size
:param ex_rois: n * 4 numpy array, given boxes
:param gt_rois: n * 4 numpy array, ground-truth boxes
:return: deltas: n * 4 numpy array, ground-truth boxes
"""
ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights

# assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \
# 'Invalid boxes found: {} {}'. \
# format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :])

gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
Expand Down
91 changes: 33 additions & 58 deletions faster_rcnn/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def __init__(self):

@property
def loss(self):
return self.cross_entropy + self.loss_box * 10
return self.cross_entropy + self.loss_box

def forward(self, im_data, im_info, gt_boxes=None, dontcare_areas=None):
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
im_data = network.np_to_variable(im_data, is_cuda=True)
im_data = im_data.permute(0, 3, 1, 2)
features = self.features(im_data)
Expand All @@ -73,51 +73,31 @@ def forward(self, im_data, im_info, gt_boxes=None, dontcare_areas=None):
# generating training labels and build the rpn loss
if self.training:
assert gt_boxes is not None
rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, dontcare_areas,
rpn_data = self.anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas,
im_info, self._feat_stride, self.anchor_scales)
self.cross_entropy, self.loss_box = \
self.build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data)
self.cross_entropy, self.loss_box = self.build_loss(rpn_cls_score_reshape, rpn_bbox_pred, rpn_data)

return features, rois

def build_loss(self, rpn_cls_score_reshape, rpn_bbox_pred, rpn_data):
# classification loss
rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(-1, 2)
rpn_label = rpn_data[0]

# print rpn_label.size(), rpn_cls_score.size()
rpn_label = rpn_data[0].view(-1)

rpn_keep = Variable(rpn_label.data.ne(-1).nonzero().squeeze()).cuda()
rpn_cls_score = torch.index_select(rpn_cls_score, 0, rpn_keep)
rpn_label = torch.index_select(rpn_label, 0, rpn_keep)

fg_cnt = torch.sum(rpn_label.data.ne(0))
bg_cnt = rpn_label.data.numel() - fg_cnt
# ce_weights = torch.ones(rpn_cls_score.size()[1])
# ce_weights[0] = float(fg_cnt) / bg_cnt
# ce_weights = ce_weights.cuda()

_, predict = torch.max(rpn_cls_score.data, 1)
error = torch.sum(torch.abs(predict - rpn_label.data))
self.tp = torch.sum(predict[:fg_cnt].eq(rpn_label.data[:fg_cnt]))
self.tf = torch.sum(predict[fg_cnt:].eq(rpn_label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt

rpn_cross_entropy = F.cross_entropy(rpn_cls_score, rpn_label)
# print rpn_cross_entropy

# box loss
rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
rpn_bbox_targets = torch.mul(rpn_bbox_targets, rpn_bbox_inside_weights)
rpn_bbox_pred = torch.mul(rpn_bbox_pred, rpn_bbox_inside_weights)

# a = rpn_bbox_pred.data.cpu().numpy()
# b = rpn_bbox_targets.data.cpu().numpy()
# s = torch.sum(rpn_bbox_inside_weights.data)

rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / fg_cnt
# print rpn_loss_box
rpn_loss_box = F.smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, size_average=False) / (fg_cnt + 1e-4)

return rpn_cross_entropy, rpn_loss_box

Expand All @@ -144,11 +124,11 @@ def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_
return x.view(-1, 5)

@staticmethod
def anchor_target_layer(rpn_cls_score, gt_boxes, dontcare_areas, im_info, _feat_stride, anchor_scales):
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales):
"""
rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer
gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
#gt_ishard: (G, 1), 1 or 0 indicates difficult or not
gt_ishard: (G, 1), 1 or 0 indicates difficult or not
dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
im_info: a list of [image_height, image_width, scale_ratios]
_feat_stride: the downsampling ratio of feature map to the original input image
Expand All @@ -165,7 +145,7 @@ def anchor_target_layer(rpn_cls_score, gt_boxes, dontcare_areas, im_info, _feat_
"""
rpn_cls_score = rpn_cls_score.data.cpu().numpy()
rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \
anchor_target_layer_py(rpn_cls_score, gt_boxes, dontcare_areas, im_info, _feat_stride, anchor_scales)
anchor_target_layer_py(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride, anchor_scales)

rpn_labels = network.np_to_variable(rpn_labels, is_cuda=True, dtype=torch.LongTensor)
rpn_bbox_targets = network.np_to_variable(rpn_bbox_targets, is_cuda=True)
Expand Down Expand Up @@ -202,7 +182,7 @@ class FasterRCNN(nn.Module):
SCALES = (600,)
MAX_SIZE = 1000

def __init__(self, classes=None):
def __init__(self, classes=None, debug=False):
super(FasterRCNN, self).__init__()

if classes is not None:
Expand All @@ -220,6 +200,9 @@ def __init__(self, classes=None):
self.cross_entropy = None
self.loss_box = None

# for log
self.debug = debug

@property
def loss(self):
# print self.cross_entropy
Expand All @@ -228,11 +211,11 @@ def loss(self):
# print self.rpn.loss_box
return self.cross_entropy + self.loss_box * 10

def forward(self, im_data, im_info, gt_boxes=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, dontcare_areas)
def forward(self, im_data, im_info, gt_boxes=None, gt_ishard=None, dontcare_areas=None):
features, rois = self.rpn(im_data, im_info, gt_boxes, gt_ishard, dontcare_areas)

if self.training:
roi_data = self.proposal_target_layer(rois, gt_boxes, dontcare_areas, self.n_classes)
roi_data = self.proposal_target_layer(rois, gt_boxes, gt_ishard, dontcare_areas, self.n_classes)
rois = roi_data[0]

# roi pool
Expand All @@ -248,47 +231,39 @@ def forward(self, im_data, im_info, gt_boxes=None, dontcare_areas=None):
bbox_pred = self.bbox_fc(x)

if self.training:
self.cross_entropy, self.loss_box = self.build_loss(cls_prob, bbox_pred, roi_data)
self.cross_entropy, self.loss_box = self.build_loss(cls_score, bbox_pred, roi_data)

return cls_prob, bbox_pred, rois

def build_loss(self, cls_score, bbox_pred, roi_data):
# classification loss
label = roi_data[1].squeeze()
fg_cnt = torch.sum(label.data.ne(0))
bg_cnt = label.data.numel() - fg_cnt

ce_weights = torch.ones(cls_score.size()[1])
ce_weights[0] = float(fg_cnt) / bg_cnt
# ce_weights[0] = 1./50
ce_weights = ce_weights.cuda()

maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt]))
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = bg_cnt
# print predict
cross_entropy = F.cross_entropy(cls_score, label, weight=ce_weights)
# print cross_entropy

# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
# for log
if self.debug:
maxv, predict = cls_score.data.max(1)
self.tp = torch.sum(predict[:fg_cnt].eq(label.data[:fg_cnt])) if fg_cnt > 0 else 0
self.tf = torch.sum(predict[fg_cnt:].eq(label.data[fg_cnt:]))
self.fg_cnt = fg_cnt
self.bg_cnt = label.data.numel() - fg_cnt

# b = bbox_targets.data.cpu().numpy()
# ce_weights = torch.ones(cls_score.size()[1])
# ce_weights[0] = float(fg_cnt) / bg_cnt
# ce_weights = ce_weights.cuda()
cross_entropy = F.cross_entropy(cls_score, label, weight=None)

# bounding box regression L1 loss
bbox_targets, bbox_inside_weights, bbox_outside_weights = roi_data[2:]
bbox_targets = torch.mul(bbox_targets, bbox_inside_weights)
bbox_pred = torch.mul(bbox_pred, bbox_inside_weights)

# a = bbox_pred.data.cpu().numpy()

loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / fg_cnt
# print loss_box
loss_box = F.smooth_l1_loss(bbox_pred, bbox_targets, size_average=False) / (fg_cnt + 1e-4)

return cross_entropy, loss_box

@staticmethod
def proposal_target_layer(rpn_rois, gt_boxes, dontcare_areas, num_classes):
def proposal_target_layer(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes):
"""
----------
rpn_rois: (1 x H x W x A, 5) [0, x1, y1, x2, y2]
Expand All @@ -307,7 +282,7 @@ def proposal_target_layer(rpn_rois, gt_boxes, dontcare_areas, num_classes):
"""
rpn_rois = rpn_rois.data.cpu().numpy()
rois, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights = \
proposal_target_layer_py(rpn_rois, gt_boxes, dontcare_areas, num_classes)
proposal_target_layer_py(rpn_rois, gt_boxes, gt_ishard, dontcare_areas, num_classes)
# print labels.shape, bbox_targets.shape, bbox_inside_weights.shape
rois = network.np_to_variable(rois, is_cuda=True)
labels = network.np_to_variable(labels, is_cuda=True, dtype=torch.LongTensor)
Expand Down
Empty file modified faster_rcnn/make.sh
100644 → 100755
Empty file.
33 changes: 33 additions & 0 deletions faster_rcnn/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,39 @@ def load_net(fname, net):
v.copy_(param)


def load_pretrained_npy(faster_rcnn_model, fname):
params = np.load(fname).item()
# vgg16
vgg16_dict = faster_rcnn_model.rpn.features.state_dict()
for name, val in vgg16_dict.items():
# # print name
# # print val.size()
# # print param.size()
if name.find('bn.') >= 0:
continue
i, j = int(name[4]), int(name[6]) + 1
ptype = 'weights' if name[-1] == 't' else 'biases'
key = 'conv{}_{}'.format(i, j)
param = torch.from_numpy(params[key][ptype])

if ptype == 'weights':
param = param.permute(3, 2, 0, 1)

val.copy_(param)

# fc6 fc7
frcnn_dict = faster_rcnn_model.state_dict()
pairs = {'fc6.fc': 'fc6', 'fc7.fc': 'fc7'}
for k, v in pairs.items():
key = '{}.weight'.format(k)
param = torch.from_numpy(params[v]['weights']).permute(1, 0)
frcnn_dict[key].copy_(param)

key = '{}.bias'.format(k)
param = torch.from_numpy(params[v]['biases'])
frcnn_dict[key].copy_(param)


def np_to_variable(x, is_cuda=True, dtype=torch.FloatTensor):
v = Variable(torch.from_numpy(x).type(dtype))
if is_cuda:
Expand Down
1 change: 1 addition & 0 deletions faster_rcnn/roi_data_layer/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, roidb, num_classes):
def _shuffle_roidb_inds(self):
"""Randomly permute the training roidb."""
self._perm = np.random.permutation(np.arange(len(self._roidb)))
# self._perm = np.arange(len(self._roidb))
self._cur = 0

def _get_next_minibatch_inds(self):
Expand Down
Loading

0 comments on commit 264bf4a

Please sign in to comment.