feat: Add option on how to treat border pixels of bounding boxes

Any operations that involve computing the areas of bounding boxes, such as computation of the IoU, now let you decide whether a box's border pixels should be included in or excluded from the box area. This behavior is more consistent than the previous behavior, which neither included nor excluded border pixels cleanly. Either alternative makes only a small difference, but among other things it was necessary to provide an option to include border pixels in order to precisely replicate the official Pascal VOC mean average precision (mAP) computation.
dongsungkim · Apr 23, 2018 · 81ddd31 · 81ddd31
1 parent 03fa7c4
commit 81ddd31
Show file tree

Hide file tree

Showing 4 changed files with 105 additions and 60 deletions.
diff --git a/bounding_box_utils/bounding_box_utils.py b/bounding_box_utils/bounding_box_utils.py
@@ -105,7 +105,7 @@ def convert_coordinates2(tensor, start_index, conversion):
 
     return tensor1
 
-def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
+def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product', include_border_pixels=True):
     '''
     Computes the intersection areas of two sets of axis-aligned 2D rectangular boxes.
 
@@ -132,6 +132,9 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
             `n` boxes in `boxes2`. In 'element-wise' mode, returns a 1D array and the shapes of `boxes1` and `boxes2`
             must be boadcast-compatible. If both `boxes1` and `boxes2` have `m` boxes, then this returns an array of
             length `m` where the i-th position contains the intersection area of `boxes1[i]` with `boxes2[i]`.
+        include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
+            For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
+            x-value 367 belong to the bounding box or not.
 
     Returns:
         A 1D or 2D Numpy array (refer to the `mode` argument for details) of dtype float containing values with
@@ -171,6 +174,11 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
         ymin = 2
         ymax = 3
 
+    if include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
+        d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
+    else:
+        d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
+
     # Compute the intersection areas.
 
     if mode == 'outer_product':
@@ -186,7 +194,7 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
                             np.tile(np.expand_dims(boxes2[:,[xmax,ymax]], axis=0), reps=(m, 1, 1)))
 
         # Compute the side lengths of the intersection rectangles.
-        side_lengths = np.maximum(0, max_xy - min_xy)
+        side_lengths = np.maximum(0, max_xy - min_xy + d)
 
         return side_lengths[:,:,0] * side_lengths[:,:,1]
 
@@ -196,11 +204,11 @@ def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product'):
         max_xy = np.minimum(boxes1[:,[xmax,ymax]], boxes2[:,[xmax,ymax]])
 
         # Compute the side lengths of the intersection rectangles.
-        side_lengths = np.maximum(0, max_xy - min_xy)
+        side_lengths = np.maximum(0, max_xy - min_xy + d)
 
         return side_lengths[:,0] * side_lengths[:,1]
 
-def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
+def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product', include_border_pixels=True):
     '''
     The same as 'intersection_area()' but for internal use, i.e. without all the safety checks.
     '''
@@ -220,6 +228,11 @@ def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
         ymin = 2
         ymax = 3
 
+    if include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
+        d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
+    else:
+        d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
+
     # Compute the intersection areas.
 
     if mode == 'outer_product':
@@ -235,7 +248,7 @@ def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
                             np.tile(np.expand_dims(boxes2[:,[xmax,ymax]], axis=0), reps=(m, 1, 1)))
 
         # Compute the side lengths of the intersection rectangles.
-        side_lengths = np.maximum(0, max_xy - min_xy)
+        side_lengths = np.maximum(0, max_xy - min_xy + d)
 
         return side_lengths[:,:,0] * side_lengths[:,:,1]
 
@@ -245,12 +258,12 @@ def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product'):
         max_xy = np.minimum(boxes1[:,[xmax,ymax]], boxes2[:,[xmax,ymax]])
 
         # Compute the side lengths of the intersection rectangles.
-        side_lengths = np.maximum(0, max_xy - min_xy)
+        side_lengths = np.maximum(0, max_xy - min_xy + d)
 
         return side_lengths[:,0] * side_lengths[:,1]
 
 
-def iou(boxes1, boxes2, coords='centroids', mode='outer_product'):
+def iou(boxes1, boxes2, coords='centroids', mode='outer_product', include_border_pixels=True):
     '''
     Computes the intersection-over-union similarity (also known as Jaccard similarity)
     of two sets of axis-aligned 2D rectangular boxes.
@@ -278,6 +291,9 @@ def iou(boxes1, boxes2, coords='centroids', mode='outer_product'):
             `n` boxes in `boxes2`. In 'element-wise' mode, returns a 1D array and the shapes of `boxes1` and `boxes2`
             must be boadcast-compatible. If both `boxes1` and `boxes2` have `m` boxes, then this returns an array of
             length `m` where the i-th position contains the IoU overlap of `boxes1[i]` with `boxes2[i]`.
+        include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
+            For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
+            x-value 367 belong to the bounding box or not.
 
     Returns:
         A 1D or 2D Numpy array (refer to the `mode` argument for details) of dtype float containing values in [0,1],
@@ -326,15 +342,20 @@ def iou(boxes1, boxes2, coords='centroids', mode='outer_product'):
         ymin = 2
         ymax = 3
 
+    if include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
+        d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
+    else:
+        d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
+
     if mode == 'outer_product':
 
-        boxes1_areas = np.tile(np.expand_dims((boxes1[:,xmax] - boxes1[:,xmin]) * (boxes1[:,ymax] - boxes1[:,ymin]), axis=1), reps=(1,n))
-        boxes2_areas = np.tile(np.expand_dims((boxes2[:,xmax] - boxes2[:,xmin]) * (boxes2[:,ymax] - boxes2[:,ymin]), axis=0), reps=(m,1))
+        boxes1_areas = np.tile(np.expand_dims((boxes1[:,xmax] - boxes1[:,xmin] + d) * (boxes1[:,ymax] - boxes1[:,ymin] + d), axis=1), reps=(1,n))
+        boxes2_areas = np.tile(np.expand_dims((boxes2[:,xmax] - boxes2[:,xmin] + d) * (boxes2[:,ymax] - boxes2[:,ymin] + d), axis=0), reps=(m,1))
 
     elif mode == 'element-wise':
 
-        boxes1_areas = (boxes1[:,xmax] - boxes1[:,xmin]) * (boxes1[:,ymax] - boxes1[:,ymin])
-        boxes2_areas = (boxes2[:,xmax] - boxes2[:,xmin]) * (boxes2[:,ymax] - boxes2[:,ymin])
+        boxes1_areas = (boxes1[:,xmax] - boxes1[:,xmin] + d) * (boxes1[:,ymax] - boxes1[:,ymin] + d)
+        boxes2_areas = (boxes2[:,xmax] - boxes2[:,xmin] + d) * (boxes2[:,ymax] - boxes2[:,ymin] + d)
 
     union_areas = boxes1_areas + boxes2_areas - intersection_areas
 

diff --git a/data_generator/object_detection_2d_image_boxes_validation_utils.py b/data_generator/object_detection_2d_image_boxes_validation_utils.py
@@ -89,7 +89,8 @@ def __init__(self,
                  overlap_criterion='center_point',
                  overlap_bounds=(0.3, 1.0),
                  min_area=16,
-                 labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}):
+                 labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4},
+                 include_border_pixels=True):
         '''
         Arguments:
             check_overlap (bool, optional): Whether or not to enforce the overlap requirements defined by
@@ -123,6 +124,9 @@ def __init__(self,
             labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels
                 of an image contains which bounding box coordinate. The dictionary maps at least the keywords
                 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array.
+            include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
+                For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
+                x-value 367 belong to the bounding box or not.
         '''
         if not isinstance(overlap_bounds, (list, tuple, BoundGenerator)):
             raise ValueError("`overlap_bounds` must be either a 2-tuple of scalars or a `BoundGenerator` object.")
@@ -137,6 +141,7 @@ def __init__(self,
         self.check_min_area = check_min_area
         self.check_degenerate = check_degenerate
         self.labels_format = labels_format
+        self.include_border_pixels = include_border_pixels
 
     def __call__(self,
                  labels,
@@ -191,17 +196,21 @@ def __call__(self,
                 # Compute the patch coordinates.
                 image_coords = np.array([0, 0, image_width, image_height])
                 # Compute the IoU between the patch and all of the ground truth boxes.
-                image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise')
+                image_boxes_iou = iou(image_coords, labels[:, [xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise', include_border_pixels=self.include_border_pixels)
                 requirements_met *= (image_boxes_iou > lower) * (image_boxes_iou <= upper)
 
             elif self.overlap_criterion == 'area':
+                if self.include_border_pixels: # Whether to include or exclude the border pixels of the boxes.
+                    d = 1 # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`.
+                else:
+                    d = -1 # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`.
                 # Compute the areas of the boxes.
-                box_areas = (labels[:,xmax] - labels[:,xmin]) * (labels[:,ymax] - labels[:,ymin])
+                box_areas = (labels[:,xmax] - labels[:,xmin] + d) * (labels[:,ymax] - labels[:,ymin] + d)
                 # Compute the intersection area between the patch and all of the ground truth boxes.
                 clipped_boxes = np.copy(labels)
                 clipped_boxes[:,[ymin,ymax]] = np.clip(labels[:,[ymin,ymax]], a_min=0, a_max=image_height-1)
                 clipped_boxes[:,[xmin,xmax]] = np.clip(labels[:,[xmin,xmax]], a_min=0, a_max=image_width-1)
-                intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin]) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin])
+                intersection_areas = (clipped_boxes[:,xmax] - clipped_boxes[:,xmin] + d) * (clipped_boxes[:,ymax] - clipped_boxes[:,ymin] + d) # +1 because the border pixels belong to the box areas.
                 # Check which boxes meet the overlap requirements.
                 if lower == 0.0:
                     mask_lower = intersection_areas > lower * box_areas # If `self.lower == 0`, we want to make sure that boxes with area 0 don't count, hence the ">" sign instead of the ">=" sign.

diff --git a/ssd_encoder_decoder/ssd_input_encoder.py b/ssd_encoder_decoder/ssd_input_encoder.py
@@ -52,6 +52,7 @@ def __init__(self,
                  matching_type='multi',
                  pos_iou_threshold=0.5,
                  neg_iou_limit=0.3,
+                 include_border_pixels=True,
                  coords='centroids',
                  normalize_coords=True,
                  background_id=0):
@@ -78,19 +79,16 @@ def __init__(self,
                 This list must be one element longer than the number of predictor layers. The first `k` elements are the
                 scaling factors for the `k` predictor layers, while the last element is used for the second box
                 for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional
-                last scaling factor must be passed either way, even if it is not being used.
-                Defaults to `None`. If a list is passed, this argument overrides `min_scale` and
-                `max_scale`. All scaling factors must be greater than zero. Note that you should set the scaling factors
-                such that the resulting anchor box sizes correspond to the sizes of the objects you are trying
-                to detect.
+                last scaling factor must be passed either way, even if it is not being used. If a list is passed,
+                this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero.
+                Note that you should set the scaling factors such that the resulting anchor box sizes correspond to
+                the sizes of the objects you are trying to detect.
             aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be
-                generated. This list is valid for all prediction layers. Defaults to [0.5, 1.0, 2.0]. Note that you should
-                set the aspect ratios such that the resulting anchor box shapes roughly correspond to the shapes of the
-                objects you are trying to detect.
+                generated. This list is valid for all prediction layers. Note that you should set the aspect ratios such
+                that the resulting anchor box shapes roughly correspond to the shapes of the objects you are trying to detect.
             aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer.
-                If a list is passed, it overrides `aspect_ratios_global`. Defaults to `None`. Note that you should
-                set the aspect ratios such that the resulting anchor box shapes very roughly correspond to the shapes of the
-                objects you are trying to detect.
+                If a list is passed, it overrides `aspect_ratios_global`. Note that you should set the aspect ratios such
+                that the resulting anchor box shapes very roughly correspond to the shapes of the objects you are trying to detect.
             two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratios lists that contain 1. Will be ignored otherwise.
                 If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated
                 using the scaling factor for the respective layer, the second one will be generated using
@@ -121,6 +119,9 @@ def __init__(self,
             neg_iou_limit (float, optional): The maximum allowed intersection-over-union similarity of an
                 anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an
                 anchor box is neither a positive, nor a negative box, it will be ignored during training.
+            include_border_pixels (bool, optional): Whether the border pixels of the bounding boxes belong to them or not.
+                For example, if a bounding box has an `xmax` pixel value of 367, this determines whether the pixels with
+                x-value 367 belong to the bounding box or not.
             coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format
                 of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width,
                 and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`.
@@ -217,6 +218,7 @@ def __init__(self,
         self.matching_type = matching_type
         self.pos_iou_threshold = pos_iou_threshold
         self.neg_iou_limit = neg_iou_limit
+        self.include_border_pixels = include_border_pixels
         self.coords = coords
         self.normalize_coords = normalize_coords
         self.background_id = background_id
@@ -348,7 +350,7 @@ def __call__(self, ground_truth_labels, diagnostics=False):
 
             # Compute the IoU similarities between all anchor boxes and all ground truth boxes for this batch item.
             # This is a matrix of shape `(num_ground_truth_boxes, num_anchor_boxes)`.
-            similarities = iou(labels[:,[xmin,ymin,xmax,ymax]], y_encoded[i,:,-12:-8], coords=self.coords, mode='outer_product')
+            similarities = iou(labels[:,[xmin,ymin,xmax,ymax]], y_encoded[i,:,-12:-8], coords=self.coords, mode='outer_product', include_border_pixels=self.include_border_pixels)
 
             # First: Do bipartite matching, i.e. match each ground truth box to the one anchor box with the highest IoU.
             #        This ensures that each ground truth box will have at least one good match.