Implemented yolo dataset support (#487)

* implemented yolo data loader * added yolo example configuration * fixed super call for yolo data loader * converted normalized values to pixels for yolo dataset * run pre-commit and fixed coordinate bug * fixed yolo categories indexed by zero * added readme hint for yolo format
RangiLyu · Feb 7, 2023 · 0b78eba · 0b78eba
1 parent 0036f94
commit 0b78eba
Show file tree

Hide file tree

Showing 4 changed files with 314 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -220,6 +220,8 @@ NanoDet-RepVGG | RepVGG-A0 | 416*416 | 27.8 | 11.3G | 6.75M |
 
  If your dataset annotations are pascal voc xml format, refer to [config/nanodet_custom_xml_dataset.yml](config/nanodet_custom_xml_dataset.yml)
 
+ Otherwise, if your dataset annotations are YOLO format ([Darknet TXT](https://github.com/AlexeyAB/Yolo_mark/issues/60#issuecomment-401854885)), refer to [config/nanodet-plus-m_416-yolo.yml](config/nanodet-plus-m_416-yolo.yml)
+
  Or convert your dataset annotations to MS COCO format[(COCO annotation format details)](https://cocodataset.org/#format-data).
 
 2. **Prepare config file**

diff --git a/config/nanodet-plus-m_416-yolo.yml b/config/nanodet-plus-m_416-yolo.yml
@@ -0,0 +1,134 @@
+# nanodet-plus-m_416
+# COCO mAP(0.5:0.95) = 0.304
+# AP_50 = 0.459
+# AP_75 = 0.317
+# AP_small = 0.106
+# AP_m = 0.322
+# AP_l = 0.477
+save_dir: workspace/nanodet-plus-m_416
+model:
+ weight_averager:
+ name: ExpMovingAverager
+ decay: 0.9998
+ arch:
+ name: NanoDetPlus
+ detach_epoch: 10
+ backbone:
+ name: ShuffleNetV2
+ model_size: 1.0x
+ out_stages: [2,3,4]
+ activation: LeakyReLU
+ fpn:
+ name: GhostPAN
+ in_channels: [116, 232, 464]
+ out_channels: 96
+ kernel_size: 5
+ num_extra_level: 1
+ use_depthwise: True
+ activation: LeakyReLU
+ head:
+ name: NanoDetPlusHead
+ num_classes: 80
+ input_channel: 96
+ feat_channels: 96
+ stacked_convs: 2
+ kernel_size: 5
+ strides: [8, 16, 32, 64]
+ activation: LeakyReLU
+ reg_max: 7
+ norm_cfg:
+ type: BN
+ loss:
+ loss_qfl:
+ name: QualityFocalLoss
+ use_sigmoid: True
+ beta: 2.0
+ loss_weight: 1.0
+ loss_dfl:
+ name: DistributionFocalLoss
+ loss_weight: 0.25
+ loss_bbox:
+ name: GIoULoss
+ loss_weight: 2.0
+ # Auxiliary head, only use in training time.
+ aux_head:
+ name: SimpleConvHead
+ num_classes: 80
+ input_channel: 192
+ feat_channels: 192
+ stacked_convs: 4
+ strides: [8, 16, 32, 64]
+ activation: LeakyReLU
+ reg_max: 7
+
+class_names: &class_names ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+ 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant',
+ 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog',
+ 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
+ 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+ 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat',
+ 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket',
+ 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+ 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
+ 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+ 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop',
+ 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
+ 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock',
+ 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush']
+
+data:
+ train:
+ name: YoloDataset
+ img_path: coco/train2017
+ ann_path: coco/train2017
+ class_names: *class_names
+ input_size: [416,416] #[w,h]
+ keep_ratio: False
+ pipeline:
+ perspective: 0.0
+ scale: [0.6, 1.4]
+ stretch: [[0.8, 1.2], [0.8, 1.2]]
+ rotation: 0
+ shear: 0
+ translate: 0.2
+ flip: 0.5
+ brightness: 0.2
+ contrast: [0.6, 1.4]
+ saturation: [0.5, 1.2]
+ normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+ val:
+ name: YoloDataset
+ img_path: coco/val2017
+ ann_path: coco/val2017
+ class_names: *class_names
+ input_size: [416,416] #[w,h]
+ keep_ratio: False
+ pipeline:
+ normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]]
+device:
+ gpu_ids: [0]
+ workers_per_gpu: 10
+ batchsize_per_gpu: 96
+schedule:
+# resume:
+# load_model:
+ optimizer:
+ name: AdamW
+ lr: 0.001
+ weight_decay: 0.05
+ warmup:
+ name: linear
+ steps: 500
+ ratio: 0.0001
+ total_epochs: 300
+ lr_schedule:
+ name: CosineAnnealingLR
+ T_max: 300
+ eta_min: 0.00005
+ val_intervals: 10
+grad_clip: 35
+evaluator:
+ name: CocoDetectionEvaluator
+ save_key: mAP
+log:
+ interval: 50
diff --git a/nanodet/data/dataset/__init__.py b/nanodet/data/dataset/__init__.py
@@ -17,6 +17,7 @@
 
 from .coco import CocoDataset
 from .xml_dataset import XMLDataset
+from .yolo import YoloDataset
 
 
 def build_dataset(cfg, mode):
@@ -27,6 +28,8 @@ def build_dataset(cfg, mode):
  "Dataset name coco has been deprecated. Please use CocoDataset instead."
  )
  return CocoDataset(mode=mode, **dataset_cfg)
+ elif name == "yolo":
+ return YoloDataset(mode=mode, **dataset_cfg)
  elif name == "xml_dataset":
  warnings.warn(
  "Dataset name xml_dataset has been deprecated. "
@@ -35,6 +38,8 @@ def build_dataset(cfg, mode):
  return XMLDataset(mode=mode, **dataset_cfg)
  elif name == "CocoDataset":
  return CocoDataset(mode=mode, **dataset_cfg)
+ elif name == "YoloDataset":
+ return YoloDataset(mode=mode, **dataset_cfg)
  elif name == "XMLDataset":
  return XMLDataset(mode=mode, **dataset_cfg)
  else:

diff --git a/nanodet/data/dataset/yolo.py b/nanodet/data/dataset/yolo.py
@@ -0,0 +1,173 @@
+# Copyright 2023 cansik.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http:https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import time
+from collections import defaultdict
+from typing import Optional, Sequence
+
+import cv2
+import numpy as np
+from pycocotools.coco import COCO
+
+from .coco import CocoDataset
+from .xml_dataset import get_file_list
+
+
+class CocoYolo(COCO):
+ def __init__(self, annotation):
+ """
+ Constructor of Microsoft COCO helper class for
+ reading and visualizing annotations.
+ :param annotation: annotation dict
+ :return:
+ """
+ # load dataset
+ super().__init__()
+ self.dataset, self.anns, self.cats, self.imgs = dict(), dict(), dict(), dict()
+ self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+ dataset = annotation
+ assert type(dataset) == dict, "annotation file format {} not supported".format(
+ type(dataset)
+ )
+ self.dataset = dataset
+ self.createIndex()
+
+
+class YoloDataset(CocoDataset):
+ def __init__(self, class_names, **kwargs):
+ self.class_names = class_names
+ super(YoloDataset, self).__init__(**kwargs)
+
+ @staticmethod
+ def _find_image(
+ image_prefix: str,
+ image_types: Sequence[str] = (".png", ".jpg", ".jpeg", ".bmp", ".tiff"),
+ ) -> Optional[str]:
+ for image_type in image_types:
+ path = f"{image_prefix}{image_type}"
+ if os.path.exists(path):
+ return path
+ return None
+
+ def yolo_to_coco(self, ann_path):
+ """
+ convert xml annotations to coco_api
+ :param ann_path:
+ :return:
+ """
+ logging.info("loading annotations into memory...")
+ tic = time.time()
+ ann_file_names = get_file_list(ann_path, type=".txt")
+ logging.info("Found {} annotation files.".format(len(ann_file_names)))
+ image_info = []
+ categories = []
+ annotations = []
+ for idx, supercat in enumerate(self.class_names):
+ categories.append(
+ {"supercategory": supercat, "id": idx + 1, "name": supercat}
+ )
+ ann_id = 1
+
+ for idx, txt_name in enumerate(ann_file_names):
+ ann_file = os.path.join(ann_path, txt_name)
+ image_file = self._find_image(os.path.splitext(ann_file)[0])
+
+ if image_file is None:
+ logging.warning(f"Could not find image for {ann_file}")
+ continue
+
+ with open(ann_file, "r") as f:
+ lines = f.readlines()
+
+ image = cv2.imread(image_file)
+ height, width = image.shape[:2]
+
+ file_name = os.path.basename(image_file)
+ info = {
+ "file_name": file_name,
+ "height": height,
+ "width": width,
+ "id": idx + 1,
+ }
+ image_info.append(info)
+ for line in lines:
+ data = [float(t) for t in line.split(" ")]
+ cat_id = int(data[0])
+ locations = np.array(data[1:]).reshape((len(data) // 2, 2))
+ bbox = locations[0:2]
+
+ bbox[0] -= bbox[1] * 0.5
+
+ bbox = np.round(bbox * np.array([width, height])).astype(int)
+ x, y = bbox[0][0], bbox[0][1]
+ w, h = bbox[1][0], bbox[1][1]
+
+ if cat_id >= len(self.class_names):
+ logging.warning(
+ f"Category {cat_id} is not defined in config ({txt_name})"
+ )
+ continue
+
+ if w < 0 or h < 0:
+ logging.warning(
+ "WARNING! Find error data in file {}! Box w and "
+ "h should > 0. Pass this box annotation.".format(txt_name)
+ )
+ continue
+
+ coco_box = [max(x, 0), max(y, 0), min(w, width), min(h, height)]
+ ann = {
+ "image_id": idx + 1,
+ "bbox": coco_box,
+ "category_id": cat_id + 1,
+ "iscrowd": 0,
+ "id": ann_id,
+ "area": coco_box[2] * coco_box[3],
+ }
+ annotations.append(ann)
+ ann_id += 1
+
+ coco_dict = {
+ "images": image_info,
+ "categories": categories,
+ "annotations": annotations,
+ }
+ logging.info(
+ "Load {} txt files and {} boxes".format(len(image_info), len(annotations))
+ )
+ logging.info("Done (t={:0.2f}s)".format(time.time() - tic))
+ return coco_dict
+
+ def get_data_info(self, ann_path):
+ """
+ Load basic information of dataset such as image path, label and so on.
+ :param ann_path: coco json file path
+ :return: image info:
+ [{'file_name': '000000000139.jpg',
+ 'height': 426,
+ 'width': 640,
+ 'id': 139},
+ ...
+ ]
+ """
+ coco_dict = self.yolo_to_coco(ann_path)
+ self.coco_api = CocoYolo(coco_dict)
+ self.cat_ids = sorted(self.coco_api.getCatIds())
+ self.cat2label = {cat_id: i for i, cat_id in enumerate(self.cat_ids)}
+ self.cats = self.coco_api.loadCats(self.cat_ids)
+ self.img_ids = sorted(self.coco_api.imgs.keys())
+ img_info = self.coco_api.loadImgs(self.img_ids)
+ return img_info