fix: Make XML robust to varying tag order

It turns out that the XML parser had a bug: For each ground truth object that it parsed, it took the first occurring bounding box for that object. This behavior led to the correct bounding boxes being parsed for the vast majority of the ground truth bounding boxes, but in some cases, particularly for the Pascal VOC object class "Person", there is more than one bounding box per object (body parts) and what's worse, the main bounding box is not always the first bounding box among the tags. This has now been fixed. The parser always gets the correct bounding box for each object.
dongsungkim · Apr 23, 2018 · 2064082 · 2064082
1 parent 81ddd31
commit 2064082
Showing 1 changed file with 14 additions and 12 deletions.
diff --git a/data_generator/object_detection_2d_data_generator.py b/data_generator/object_detection_2d_data_generator.py
@@ -374,25 +374,27 @@ def parse_xml(self,
  folder = soup.folder.text # In case we want to return the folder in addition to the image file name. Relevant for determining which dataset an image belongs to.
  #filename = soup.filename.text
 
- boxes = [] # We'll store all boxes for this image here
+ boxes = [] # We'll store all boxes for this image here.
  eval_neutr = [] # We'll store whether a box is annotated as "difficult" here.
- objects = soup.find_all('object') # Get a list of all objects in this image
+ objects = soup.find_all('object') # Get a list of all objects in this image.
 
- # Parse the data for each object
+ # Parse the data for each object.
  for obj in objects:
- class_name = obj.find('name').text
+ class_name = obj.find('name', recursive=False).text
  class_id = self.classes.index(class_name)
- # Check if this class is supposed to be included in the dataset
+ # Check whether this class is supposed to be included in the dataset.
  if (not self.include_classes == 'all') and (not class_id in self.include_classes): continue
- pose = obj.pose.text
- truncated = int(obj.truncated.text)
+ pose = obj.find('pose', recursive=False).text
+ truncated = int(obj.find('truncated', recursive=False).text)
  if exclude_truncated and (truncated == 1): continue
- difficult = int(obj.difficult.text)
+ difficult = int(obj.find('difficult', recursive=False).text)
  if exclude_difficult and (difficult == 1): continue
- xmin = int(obj.bndbox.xmin.text)
- ymin = int(obj.bndbox.ymin.text)
- xmax = int(obj.bndbox.xmax.text)
- ymax = int(obj.bndbox.ymax.text)
+ # Get the bounding box coordinates.
+ bndbox = obj.find('bndbox', recursive=False)
+ xmin = int(bndbox.xmin.text)
+ ymin = int(bndbox.ymin.text)
+ xmax = int(bndbox.xmax.text)
+ ymax = int(bndbox.ymax.text)
  item_dict = {'folder': folder,
  'image_name': filename,
  'image_id': image_id,