From a1d81740174c3e3438b0cc32fd7525f9c9ad053f Mon Sep 17 00:00:00 2001 From: jinwonkim93 Date: Tue, 20 Sep 2022 08:47:02 +0000 Subject: [PATCH] [Feature] Support Delving into High-Quality Synthetic Face Occlusion Segmentation Datasets (#2194) add custom dataset add face occlusion dataset add config file for occlusion face fix format update prepare.md formatting formatting fix typo error for doc update downloading process Update dataset_prepare.md PR fix version to original repository. change to original repository. --- configs/_base_/datasets/occlude_face.py | 78 +++++++ ...3plus_r101_512x512_C-CM+C-WO-NatOcc-SOT.py | 63 ++++++ docs/en/dataset_prepare.md | 204 ++++++++++++++++++ mmseg/datasets/__init__.py | 3 +- mmseg/datasets/face.py | 23 ++ 5 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 configs/_base_/datasets/occlude_face.py create mode 100644 configs/deeplabv3plus/deeplabv3plus_r101_512x512_C-CM+C-WO-NatOcc-SOT.py create mode 100755 mmseg/datasets/face.py diff --git a/configs/_base_/datasets/occlude_face.py b/configs/_base_/datasets/occlude_face.py new file mode 100644 index 0000000000..a15df84cf3 --- /dev/null +++ b/configs/_base_/datasets/occlude_face.py @@ -0,0 +1,78 @@ +dataset_type = 'FaceOccludedDataset' +data_root = 'data/occlusion-aware-face-dataset' +crop_size = (512, 512) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomFlip', prob=0.5), + dict(type='RandomRotate', degree=(-30, 30), prob=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='ResizeToMultiple', size_divisor=32), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +dataset_train_A = dict( + type=dataset_type, + data_root=data_root, + img_dir='NatOcc_hand_sot/img', + ann_dir='NatOcc_hand_sot/mask', + split='train.txt', + pipeline=train_pipeline) + +dataset_train_B = dict( + type=dataset_type, + data_root=data_root, + img_dir='NatOcc_object/img', + ann_dir='NatOcc_object/mask', + split='train.txt', + pipeline=train_pipeline) + +dataset_train_C = dict( + type=dataset_type, + data_root=data_root, + img_dir='RandOcc/img', + ann_dir='RandOcc/mask', + split='train.txt', + pipeline=train_pipeline) + +dataset_valid = dict( + type=dataset_type, + data_root=data_root, + img_dir='RealOcc/image', + ann_dir='RealOcc/mask', + split='RealOcc/split/val.txt', + pipeline=test_pipeline) + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=[dataset_train_A, dataset_train_B, dataset_train_C], + val=dataset_valid) diff --git a/configs/deeplabv3plus/deeplabv3plus_r101_512x512_C-CM+C-WO-NatOcc-SOT.py b/configs/deeplabv3plus/deeplabv3plus_r101_512x512_C-CM+C-WO-NatOcc-SOT.py new file mode 100644 index 0000000000..c94385c7e4 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101_512x512_C-CM+C-WO-NatOcc-SOT.py @@ -0,0 +1,63 @@ +# + +_base_ = '../_base_/datasets/occlude_face.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +log_config = dict( + interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] +cudnn_benchmark = True +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=False) +runner = dict(type='IterBasedRunner', max_iters=30000) +checkpoint_config = dict(by_epoch=False, interval=400) +evaluation = dict( + interval=400, metric=['mIoU', 'mDice', 'mFscore'], pre_eval=True) +auto_resume = False diff --git a/docs/en/dataset_prepare.md b/docs/en/dataset_prepare.md index 4982ce1828..a4878d0d83 100644 --- a/docs/en/dataset_prepare.md +++ b/docs/en/dataset_prepare.md @@ -1,3 +1,5 @@ + + ## Prepare datasets It is recommended to symlink the dataset root to `$MMSEGMENTATION/data`. @@ -138,6 +140,21 @@ mmsegmentation │ │ ├── ann_dir │ │ │ ├── train │ │ │ ├── val +│ ├── occlusion-aware-face-dataset +│ │ ├── train.txt +│ │ ├── NatOcc_hand_sot +│ │ │ ├── img +│ │ │ ├── mask +│ │ ├── NatOcc_object +│ │ │ ├── img +│ │ │ ├── mask +│ │ ├── RandOcc +│ │ │ ├── img +│ │ │ ├── mask +│ │ ├── RealOcc +│ │ │ ├── img +│ │ │ ├── mask +│ │ │ ├── split ``` ### Cityscapes @@ -376,3 +393,190 @@ python tools/convert_datasets/isaid.py /path/to/iSAID ``` In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation. + +### Delving into High-Quality Synthetic Face Occlusion Segmentation Datasets + +The dataset is generated by two techniques, Naturalistic occlusion generation, Random occlusion generation. you must install face-occlusion-generation and dataset. see more guide in https://github.com/kennyvoo/face-occlusion-generation.git + +## Dataset Preparation + +step 1 + +Create a folder for data generation materials on mmsegmentation folder. + +```shell +mkdir data_materials +``` + +step 2 + +Please download the masks (11k-hands_mask.7z,CelebAMask-HQ-masks_corrected.7z) from this [drive](https://drive.google.com/drive/folders/15nZETWlGMdcKY6aHbchRsWkUI42KTNs5?usp=sharing) + +Please download the images from [CelebAMask-HQ](https://github.com/switchablenorms/CelebAMask-HQ), [11k Hands.zip](https://sites.google.com/view/11khands) and [dtd-r1.0.1.tar.gz](https://www.robots.ox.ac.uk/~vgg/data/dtd/). + +step 3 + +Download a upsampled COCO objects images and masks (coco_object.7z). files can be found in this [drive](https://drive.google.com/drive/folders/15nZETWlGMdcKY6aHbchRsWkUI42KTNs5?usp=sharing). + +Download CelebAMask-HQ and 11k Hands images split txt files. (11k_hands_sample.txt, CelebAMask-HQ-WO-train.txt) found in [drive](https://drive.google.com/drive/folders/15nZETWlGMdcKY6aHbchRsWkUI42KTNs5?usp=sharing). + +download file to ./data_materials + +```none +CelebAMask-HQ.zip +CelebAMask-HQ-masks_corrected.7z +CelebAMask-HQ-WO-train.txt +RealOcc.7z +RealOcc-Wild.7z +11k-hands_mask.7z +11k Hands.zip +11k_hands_sample.txt +coco_object.7z +dtd-r1.0.1.tar.gz +``` + +______________________________________________________________________ + +```bash +apt-get install p7zip-full + +cd data_materials + +#make occlusion-aware-face-dataset folder +mkdir path-to-mmsegmentaion/data/occlusion-aware-face-dataset + +#extract celebAMask-HQ and split by train-set +unzip CelebAMask-HQ.zip +7za x CelebAMask-HQ-masks_corrected.7z -o./CelebAMask-HQ +#copy training data to train-image-folder +rsync -a ./CelebAMask-HQ/CelebA-HQ-img/ --files-from=./CelebAMask-HQ-WO-train.txt ./CelebAMask-HQ-WO-Train_img +#create a file-name txt file for copying mask +basename -s .jpg ./CelebAMask-HQ-WO-Train_img/* > train.txt +#add .png to file-name txt file +xargs -n 1 -i echo {}.png < train.txt > mask_train.txt +#copy training data to train-mask-folder +rsync -a ./CelebAMask-HQ/CelebAMask-HQ-masks_corrected/ --files-from=./mask_train.txt ./CelebAMask-HQ-WO-Train_mask +mv train.txt ../data/occlusion-aware-face-dataset + +#extract DTD +tar -zxvf dtd-r1.0.1.tar.gz +mv dtd DTD + +#extract hands dataset and split by 200 samples +7za x 11k-hands_masks.7z -o. +unzip Hands.zip +rsync -a ./Hands/ --files-from=./11k_hands_sample.txt ./11k-hands_img + +#extract upscaled coco object +7za x coco_object.7z -o. +mv coco_object/* . + +#extract validation set +7za x RealOcc.7z -o../data/occlusion-aware-face-dataset + +``` + +**Dataset material Organization:** + +```none + +├── data_materials +│ ├── CelebAMask-HQ-WO-Train_img +│ │ ├── {image}.jpg +│ ├── CelebAMask-HQ-WO-Train_mask +│ │ ├── {mask}.png +│ ├── DTD +│ │ ├── images +│ │ │ ├── {classA} +│ │ │ │ ├── {image}.jpg +│ │ │ ├── {classB} +│ │ │ │ ├── {image}.jpg +│ ├── 11k-hands_img +│ │ ├── {image}.jpg +│ ├── 11k-hands_mask +│ │ ├── {mask}.png +│ ├── object_image_sr +│ │ ├── {image}.jpg +│ ├── object_mask_x4 +│ │ ├── {mask}.png + +``` + +## Data Generation + +```bash +git clone https://github.com/kennyvoo/face-occlusion-generation.git +cd face_occlusion-generation +``` + +Example script to generate NatOcc hand dataset + +```bash +CUDA_VISIBLE_DEVICES=0 NUM_WORKERS=4 python main.py \ +--config ./configs/natocc_hand.yaml \ +--opts OUTPUT_PATH "path/to/mmsegmentation/data/occlusion-aware-face-dataset/NatOcc_hand_sot"\ +AUGMENTATION.SOT True \ +SOURCE_DATASET.IMG_DIR "path/to/data_materials/CelebAMask-HQ-WO-Train_img" \ +SOURCE_DATASET.MASK_DIR "path/to/mmsegmentation/data_materials/CelebAMask-HQ-WO-Train_mask" \ +OCCLUDER_DATASET.IMG_DIR "path/to/mmsegmentation/data_materials/11k-hands_img" \ +OCCLUDER_DATASET.MASK_DIR "path/to/mmsegmentation/data_materials/11k-hands_masks" +``` + +Example script to generate NatOcc object dataset + +```bash +CUDA_VISIBLE_DEVICES=0 NUM_WORKERS=4 python main.py \ +--config ./configs/natocc_objects.yaml \ +--opts OUTPUT_PATH "path/to/mmsegmentation/data/occlusion-aware-face-dataset/NatOcc_object" \ +SOURCE_DATASET.IMG_DIR "path/to/mmsegmentation/data_materials/CelebAMask-HQ-WO-Train_img" \ +SOURCE_DATASET.MASK_DIR "path/to/mmsegmentation/data_materials/CelebAMask-HQ-WO-Train_mask" \ +OCCLUDER_DATASET.IMG_DIR "path/to/mmsegmentation/data_materials/object_image_sr" \ +OCCLUDER_DATASET.MASK_DIR "path/to/mmsegmentation/data_materials/object_mask_x4" +``` + +Example script to generate RandOcc dataset + +```bash +CUDA_VISIBLE_DEVICES=0 NUM_WORKERS=4 python main.py \ +--config ./configs/randocc.yaml \ +--opts OUTPUT_PATH "path/to/mmsegmentation/data/occlusion-aware-face-dataset/RandOcc" \ +SOURCE_DATASET.IMG_DIR "path/to/mmsegmentation/data_materials/CelebAMask-HQ-WO-Train_img/" \ +SOURCE_DATASET.MASK_DIR "path/to/mmsegmentation/data_materials/CelebAMask-HQ-WO-Train_mask" \ +OCCLUDER_DATASET.IMG_DIR "path/to/jw93/mmsegmentation/data_materials/DTD/images" +``` + +**Dataset Organization:** + +```none +├── data +│ ├── occlusion-aware-face-dataset +│ │ ├── train.txt +│ │ ├── NatOcc_hand_sot +│ │ │ ├── img +│ │ │ │ ├── {image}.jpg +│ │ │ ├── mask +│ │ │ │ ├── {mask}.png +│ │ ├── NatOcc_object +│ │ │ ├── img +│ │ │ │ ├── {image}.jpg +│ │ │ ├── mask +│ │ │ │ ├── {mask}.png +│ │ ├── RandOcc +│ │ │ ├── img +│ │ │ │ ├── {image}.jpg +│ │ │ ├── mask +│ │ │ │ ├── {mask}.png +│ │ ├── RealOcc +│ │ │ ├── img +│ │ │ │ ├── {image}.jpg +│ │ │ ├── mask +│ │ │ │ ├── {mask}.png +│ │ │ ├── split +│ │ │ │ ├── val.txt +``` + + + +```python + +``` diff --git a/mmseg/datasets/__init__.py b/mmseg/datasets/__init__.py index 5d42a11c26..9060564c0d 100644 --- a/mmseg/datasets/__init__.py +++ b/mmseg/datasets/__init__.py @@ -9,6 +9,7 @@ from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset, RepeatDataset) from .drive import DRIVEDataset +from .face import FaceOccludedDataset from .hrf import HRFDataset from .isaid import iSAIDDataset from .isprs import ISPRSDataset @@ -26,5 +27,5 @@ 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', 'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset', - 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset' + 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset', 'FaceOccludedDataset' ] diff --git a/mmseg/datasets/face.py b/mmseg/datasets/face.py new file mode 100755 index 0000000000..cbc2345b09 --- /dev/null +++ b/mmseg/datasets/face.py @@ -0,0 +1,23 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class FaceOccludedDataset(CustomDataset): + """Face Occluded dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + + CLASSES = ('background', 'face') + + PALETTE = [[0, 0, 0], [128, 0, 0]] + + def __init__(self, split, **kwargs): + super(FaceOccludedDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + assert osp.exists(self.img_dir) and self.split is not None