change the resize_upsample4 to upsample

open-mmlab · Tau-J · Apr 20, 2023 · Jan 16, 2023 · Jan 16, 2023 · Jan 16, 2023
commit dde90047398121d0fb831552f8ed8a6744c4efd9
diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_base_coco_256x192.py b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_base_coco_256x192.py
@@ -54,11 +54,10 @@
  keypoint_head=dict(
  type='TopdownHeatmapSimpleHead',
  in_channels=768,
- input_transform='resize_upsample4',
- num_deconv_layers=0,
- num_deconv_filters=[],
- num_deconv_kernels=[],
- extra=dict(final_conv_kernel=3, ),
+ num_deconv_layers=2,
+ num_deconv_filters=(256, 256),
+ num_deconv_kernels=(4, 4),
+ extra=dict(final_conv_kernel=1, ),
  out_channels=channel_cfg['num_output_channels'],
  loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
  train_cfg=dict(),

diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_base_coco_256x192.py b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_base_coco_256x192.py
@@ -54,10 +54,11 @@
  keypoint_head=dict(
  type='TopdownHeatmapSimpleHead',
  in_channels=768,
- num_deconv_layers=2,
- num_deconv_filters=(256, 256),
- num_deconv_kernels=(4, 4),
- extra=dict(final_conv_kernel=1, ),
+ upsample=4,
+ num_deconv_layers=0,
+ num_deconv_filters=[],
+ num_deconv_kernels=[],
+ extra=dict(final_conv_kernel=3, ),
  out_channels=channel_cfg['num_output_channels'],
  loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
  train_cfg=dict(),

diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_huge_coco_256x192.py b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_huge_coco_256x192.py
@@ -54,7 +54,7 @@
  keypoint_head=dict(
  type='TopdownHeatmapSimpleHead',
  in_channels=1280,
- input_transform='resize_upsample4',
+ upsample=4,
  num_deconv_layers=0,
  num_deconv_filters=[],
  num_deconv_kernels=[],

diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_large_coco_256x192.py b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_large_coco_256x192.py
@@ -54,7 +54,7 @@
  keypoint_head=dict(
  type='TopdownHeatmapSimpleHead',
  in_channels=1024,
- input_transform='resize_upsample4',
+ upsample=4,
  num_deconv_layers=0,
  num_deconv_filters=[],
  num_deconv_kernels=[],

diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_small_coco_256x192.py b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vitpose_simple_small_coco_256x192.py
@@ -54,7 +54,7 @@
  keypoint_head=dict(
  type='TopdownHeatmapSimpleHead',
  in_channels=384,
- input_transform='resize_upsample4',
+ upsample=4,
  num_deconv_layers=0,
  num_deconv_filters=[],
  num_deconv_kernels=[],

diff --git a/mmpose/models/heads/topdown_heatmap_simple_head.py b/mmpose/models/heads/topdown_heatmap_simple_head.py
@@ -30,6 +30,7 @@ class TopdownHeatmapSimpleHead(TopdownHeatmapBaseHead):
  If num_deconv_layers > 0, the length of
  num_deconv_kernels (list|tuple): Kernel sizes.
  in_index (int|Sequence[int]): Input feature index. Default: 0
+ upsample (int): Directly upsample ratio of input features. Default: 0
  input_transform (str|None): Transformation type of input features.
  Options: 'resize_concat', 'multiple_select', None.
  Default: None.
@@ -53,6 +54,7 @@ def __init__(self,
  num_deconv_kernels=(4, 4, 4),
  extra=None,
  in_index=0,
+ upsample=0,
  input_transform=None,
  align_corners=False,
  loss_keypoint=None,
@@ -71,6 +73,10 @@ def __init__(self,
  self.in_index = in_index
  self.align_corners = align_corners
 
+ self.upsample = upsample
+ if self.upsample > 0:
+ assert isinstance(in_index, int)
+
  if extra is not None and not isinstance(extra, dict):
  raise TypeError('extra should be dict or None.')
 
@@ -247,9 +253,7 @@ def _init_inputs(self, in_channels, in_index, input_transform):
  """
 
  if input_transform is not None:
- assert input_transform in [
- 'resize_concat', 'multiple_select', 'resize_upsample4'
- ]
+ assert input_transform in ['resize_concat', 'multiple_select']
  self.input_transform = input_transform
  self.in_index = in_index
  if input_transform is not None:
@@ -275,10 +279,10 @@ def _transform_inputs(self, inputs):
  Tensor: The transformed inputs
  """
  if not isinstance(inputs, list):
- if self.input_transform == 'resize_upsample4':
+ if self.upsample > 0:
  inputs = resize(
  input=torch.nn.functional.relu(inputs),
- scale_factor=4,
+ scale_factor=self.upsample,
  mode='bilinear',
  align_corners=self.align_corners)
  return inputs

diff --git a/tests/test_models/test_top_down_head.py b/tests/test_models/test_top_down_head.py
@@ -266,7 +266,7 @@ def test_top_down_simple_head():
  out_channels=3,
  in_channels=512,
  num_deconv_layers=0,
- input_transform='resize_upsample4',
+ upsample=4,
  loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))
  input_shape = (1, 512, 32, 32)
  inputs = _demo_inputs(input_shape)