Skip to content

Commit

Permalink
feat: Add option on how to treat border pixels of bounding boxes
Browse files Browse the repository at this point in the history
Any operations that involve computing the areas of bounding boxes, such as computation of the IoU, now let you decide whether a box's border pixels should be included in or excluded from the box area.

This behavior is more consistent than the previous behavior, which neither included nor excluded border pixels cleanly.

Either alternative makes only a small difference, but among other things it was necessary to provide an option to include border pixels in order to precisely replicate the official Pascal VOC mean average precision (mAP)  computation.
  • Loading branch information
pierluigiferrari committed Apr 23, 2018
1 parent 03fa7c4 commit 81ddd31
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 60 deletions.
43 changes: 32 additions & 11 deletions bounding_box_utils/bounding_box_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def convert_coordinates2(tensor, start_index, conversion):

return tensor1

def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product', include_border_pixels=True):
'''
Computes the intersection areas of two sets of axis-aligned 2D rectangular boxes.
Expand All @@ -132,6 +132,9 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
`n` boxes in `boxes2`. In 'element-wise' mode, returns a 1D array and the shapes of `boxes1` and `boxes2`
must be boadcast-compatible. If both `boxes1` and `boxes2` have `m` boxes, then this returns an array of
length `m` where the i-th position contains the intersection area of `boxes1[i]` with `boxes2[i]`.
include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
x-value 367 belong to the bounding box or not.
Returns:
A 1D or 2D Numpy array (refer to the `mode` argument for details) of dtype float containing values with
Expand Down Expand Up @@ -171,6 +174,11 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
ymin = 2
ymax = 3

if include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
else:
d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.

# Compute the intersection areas.

if mode == 'outer_product':
Expand All @@ -186,7 +194,7 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
np.tile(np.expand_dims(boxes2[:,[xmax,ymax]], axis=0), reps=(m, 1, 1)))

# Compute the side lengths of the intersection rectangles.
side_lengths = np.maximum(0, max_xy - min_xy)
side_lengths = np.maximum(0, max_xy - min_xy + d)

return side_lengths[:,:,0] * side_lengths[:,:,1]

Expand All @@ -196,11 +204,11 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
max_xy = np.minimum(boxes1[:,[xmax,ymax]], boxes2[:,[xmax,ymax]])

# Compute the side lengths of the intersection rectangles.
side_lengths = np.maximum(0, max_xy - min_xy)
side_lengths = np.maximum(0, max_xy - min_xy + d)

return side_lengths[:,0] * side_lengths[:,1]

def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product', include_border_pixels=True):
'''
The same as 'intersection_area()' but for internal use, i.e. without all the safety checks.
'''
Expand All @@ -220,6 +228,11 @@ def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
ymin = 2
ymax = 3

if include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
else:
d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.

# Compute the intersection areas.

if mode == 'outer_product':
Expand All @@ -235,7 +248,7 @@ def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
np.tile(np.expand_dims(boxes2[:,[xmax,ymax]], axis=0), reps=(m, 1, 1)))

# Compute the side lengths of the intersection rectangles.
side_lengths = np.maximum(0, max_xy - min_xy)
side_lengths = np.maximum(0, max_xy - min_xy + d)

return side_lengths[:,:,0] * side_lengths[:,:,1]

Expand All @@ -245,12 +258,12 @@ def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
max_xy = np.minimum(boxes1[:,[xmax,ymax]], boxes2[:,[xmax,ymax]])

# Compute the side lengths of the intersection rectangles.
side_lengths = np.maximum(0, max_xy - min_xy)
side_lengths = np.maximum(0, max_xy - min_xy + d)

return side_lengths[:,0] * side_lengths[:,1]


def iou(boxes1, boxes2, coords='centroids', mode='outer_product'):
def iou(boxes1, boxes2, coords='centroids', mode='outer_product', include_border_pixels=True):
'''
Computes the intersection-over-union similarity (also known as Jaccard similarity)
of two sets of axis-aligned 2D rectangular boxes.
Expand Down Expand Up @@ -278,6 +291,9 @@ def iou(boxes1, boxes2, coords='centroids', mode='outer_product'):
`n` boxes in `boxes2`. In 'element-wise' mode, returns a 1D array and the shapes of `boxes1` and `boxes2`
must be boadcast-compatible. If both `boxes1` and `boxes2` have `m` boxes, then this returns an array of
length `m` where the i-th position contains the IoU overlap of `boxes1[i]` with `boxes2[i]`.
include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
x-value 367 belong to the bounding box or not.
Returns:
A 1D or 2D Numpy array (refer to the `mode` argument for details) of dtype float containing values in [0,1],
Expand Down Expand Up @@ -326,15 +342,20 @@ def iou(boxes1, boxes2, coords='centroids', mode='outer_product'):
ymin = 2
ymax = 3

if include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
else:
d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.

if mode == 'outer_product':

boxes1_areas = np.tile(np.expand_dims((boxes1[:,xmax] - boxes1[:,xmin]) * (boxes1[:,ymax] - boxes1[:,ymin]), axis=1), reps=(1,n))
boxes2_areas = np.tile(np.expand_dims((boxes2[:,xmax] - boxes2[:,xmin]) * (boxes2[:,ymax] - boxes2[:,ymin]), axis=0), reps=(m,1))
boxes1_areas = np.tile(np.expand_dims((boxes1[:,xmax] - boxes1[:,xmin] + d) * (boxes1[:,ymax] - boxes1[:,ymin] + d), axis=1), reps=(1,n))
boxes2_areas = np.tile(np.expand_dims((boxes2[:,xmax] - boxes2[:,xmin] + d) * (boxes2[:,ymax] - boxes2[:,ymin] + d), axis=0), reps=(m,1))

elif mode == 'element-wise':

boxes1_areas = (boxes1[:,xmax] - boxes1[:,xmin]) * (boxes1[:,ymax] - boxes1[:,ymin])
boxes2_areas = (boxes2[:,xmax] - boxes2[:,xmin]) * (boxes2[:,ymax] - boxes2[:,ymin])
boxes1_areas = (boxes1[:,xmax] - boxes1[:,xmin] + d) * (boxes1[:,ymax] - boxes1[:,ymin] + d)
boxes2_areas = (boxes2[:,xmax] - boxes2[:,xmin] + d) * (boxes2[:,ymax] - boxes2[:,ymin] + d)

union_areas = boxes1_areas + boxes2_areas - intersection_areas

Expand Down
17 changes: 13 additions & 4 deletions data_generator/object_detection_2d_image_boxes_validation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ def __init__(self,
overlap_criterion='center_point',
overlap_bounds=(0.3, 1.0),
min_area=16,
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
include_border_pixels=True):
'''
Arguments:
check_overlap (bool, optional): Whether or not to enforce the overlap requirements defined by
Expand Down Expand Up @@ -123,6 +124,9 @@ def __init__(self,
labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
of an image contains which bounding box coordinate. The dictionary maps at least the keywords
'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
x-value 367 belong to the bounding box or not.
'''
if not isinstance(overlap_bounds, (list, tuple, BoundGenerator)):
raise ValueError("`overlap_bounds` must be either a 2-tuple of scalars or a `BoundGenerator` object.")
Expand All @@ -137,6 +141,7 @@ def __init__(self,
self.check_min_area = check_min_area
self.check_degenerate = check_degenerate
self.labels_format = labels_format
self.include_border_pixels = include_border_pixels

def __call__(self,
labels,
Expand Down Expand Up @@ -191,17 +196,21 @@ def __call__(self,
# Compute the patch coordinates.
image_coords = np.array([0, 0, image_width, image_height])
# Compute the IoU between the patch and all of the ground truth boxes.
image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise')
image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise', include_border_pixels=self.include_border_pixels)
requirements_met *= (image_boxes_iou > lower) * (image_boxes_iou <= upper)

elif self.overlap_criterion == 'area':
if self.include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
else:
d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
# Compute the areas of the boxes.
box_areas = (labels[:,xmax] - labels[:,xmin]) * (labels[:,ymax] - labels[:,ymin])
box_areas = (labels[:,xmax] - labels[:,xmin] + d) * (labels[:,ymax] - labels[:,ymin] + d)
# Compute the intersection area between the patch and all of the ground truth boxes.
clipped_boxes = np.copy(labels)
clipped_boxes[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=image_height-1)
clipped_boxes[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=image_width-1)
intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin]) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin])
intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin] + d) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin] + d) # +1 because the border pixels belong to the box areas.
# Check which boxes meet the overlap requirements.
if lower == 0.0:
mask_lower = intersection_areas > lower * box_areas # If `self.lower == 0`, we want to make sure that boxes with area 0 don't count, hence the ">" sign instead of the ">=" sign.
Expand Down
26 changes: 14 additions & 12 deletions ssd_encoder_decoder/ssd_input_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(self,
matching_type='multi',
pos_iou_threshold=0.5,
neg_iou_limit=0.3,
include_border_pixels=True,
coords='centroids',
normalize_coords=True,
background_id=0):
Expand All @@ -78,19 +79,16 @@ def __init__(self,
This list must be one element longer than the number of predictor layers. The first `k` elements are the
scaling factors for the `k` predictor layers, while the last element is used for the second box
for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
last scaling factor must be passed either way, even if it is not being used.
Defaults to `None`. If a list is passed, this argument overrides `min_scale` and
`max_scale`. All scaling factors must be greater than zero. Note that you should set the scaling factors
such that the resulting anchor box sizes correspond to the sizes of the objects you are trying
to detect.
last scaling factor must be passed either way, even if it is not being used. If a list is passed,
this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero.
Note that you should set the scaling factors such that the resulting anchor box sizes correspond to
the sizes of the objects you are trying to detect.
aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
generated. This list is valid for all prediction layers. Defaults to [0.5, 1.0, 2.0]. Note that you should
set the aspect ratios such that the resulting anchor box shapes roughly correspond to the shapes of the
objects you are trying to detect.
generated. This list is valid for all prediction layers. Note that you should set the aspect ratios such
that the resulting anchor box shapes roughly correspond to the shapes of the objects you are trying to detect.
aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer.
If a list is passed, it overrides `aspect_ratios_global`. Defaults to `None`. Note that you should
set the aspect ratios such that the resulting anchor box shapes very roughly correspond to the shapes of the
objects you are trying to detect.
If a list is passed, it overrides `aspect_ratios_global`. Note that you should set the aspect ratios such
that the resulting anchor box shapes very roughly correspond to the shapes of the objects you are trying to detect.
two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratios lists that contain 1. Will be ignored otherwise.
If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
using the scaling factor for the respective layer, the second one will be generated using
Expand Down Expand Up @@ -121,6 +119,9 @@ def __init__(self,
neg_iou_limit (float, optional): The maximum allowed intersection-over-union similarity of an
anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an
anchor box is neither a positive, nor a negative box, it will be ignored during training.
include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
x-value 367 belong to the bounding box or not.
coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format
of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width,
and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
Expand Down Expand Up @@ -217,6 +218,7 @@ def __init__(self,
self.matching_type = matching_type
self.pos_iou_threshold = pos_iou_threshold
self.neg_iou_limit = neg_iou_limit
self.include_border_pixels = include_border_pixels
self.coords = coords
self.normalize_coords = normalize_coords
self.background_id = background_id
Expand Down Expand Up @@ -348,7 +350,7 @@ def __call__(self, ground_truth_labels, diagnostics=False):

# Compute the IoU similarities between all anchor boxes and all ground truth boxes for this batch item.
# This is a matrix of shape `(num_ground_truth_boxes, num_anchor_boxes)`.
similarities = iou(labels[:,[xmin,ymin,xmax,ymax]], y_encoded[i,:,-12:-8], coords=self.coords, mode='outer_product')
similarities = iou(labels[:,[xmin,ymin,xmax,ymax]], y_encoded[i,:,-12:-8], coords=self.coords, mode='outer_product', include_border_pixels=self.include_border_pixels)

# First: Do bipartite matching, i.e. match each ground truth box to the one anchor box with the highest IoU.
# This ensures that each ground truth box will have at least one good match.
Expand Down
Loading

0 comments on commit 81ddd31

Please sign in to comment.