remove not used codes and update the URL of the retrained model

megvii-research · Oct 11, 2021 · 6274fca · 6274fca
1 parent f688fb0
commit 6274fca
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 32 deletions.
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ We also propose an attention variant RCDA to reduce the memory cost for high-res
 | SMCA | multi-level | 50 | 43.7 | 152 | 10 |
 | Deformable DETR | multi-level | 50 | 43.8 | 173 | 15 |
 | Conditional DETR | DC5 | 50 | 43.8 | 195 | 10 |
-| Anchor DETR | DC5 | 50 | 44.2 | 151 | 16 (19) |
+| Anchor DETR | DC5 | 50 | 44.3 | 151 | 16 (19) |
 
 
 *Note:*
@@ -34,12 +34,12 @@ We also propose an attention variant RCDA to reduce the memory cost for high-res
 ## Model
 | name | backbone | AP | URL |
 |:----------------:|:---------:|:-------:|:-----:|
-| AnchorDETR-C5 | R50 | 42.1 | [model](https://drive.google.com/file/d/1FKDrTL7qg9riNN5a910Gzf4aZYJTHdT-/view?usp=sharing) / [log](https://drive.google.com/file/d/1b3jy9xkpLA0vi0GWlchtg4SY5jIqVz5S/view?usp=sharing) |
-| AnchorDETR-DC5 | R50 | 44.2 | [model](https://drive.google.com/file/d/1ggsdoBOZa53S4h6Ur3rlK1-7-eABBlid/view?usp=sharing) / [log](https://drive.google.com/file/d/1S3rtBYMsAv437hGL0nm3JlYp6P0nqZfj/view?usp=sharing) |
-| AnchorDETR-C5 | R101 | 43.5 | [model](https://drive.google.com/file/d/19CQqNvrrpdpSxIyn-2IPmLZOf2KP-Zft/view?usp=sharing) / [log](https://drive.google.com/file/d/1O4K00CLiMBaNu0x61xECg7Kek2Rf-tUr/view?usp=sharing) |
-| AnchorDETR-DC5 | R101 | 45.1 | [model](https://drive.google.com/file/d/1bEnFnHCoDSVQ1u_q7B0gR3yxhq12Wevp/view?usp=sharing) / [log](https://drive.google.com/file/d/1wPeEf84zil8yPBLEnweONXadr5LrwXXv/view?usp=sharing) |
+| AnchorDETR-C5 | R50 | 42.1 | [model](https://drive.google.com/file/d/1ktLJyw4PGdaXkOn61W537Z67WHcttXDs/view?usp=sharing) / [log](https://drive.google.com/file/d/1CoEUzs6pxYw-z1ew04qC1jFJwVjdDlPv/view?usp=sharing) |
+| AnchorDETR-DC5 | R50 | 44.3 | [model](https://drive.google.com/file/d/1lJZWdIlHj6KKmAdU28Y01tTyO0hc6Jxs/view?usp=sharing) / [log](https://drive.google.com/file/d/1ywmE02P7ORj_1HQOR2lYW11kfuqX00v-/view?usp=sharing) |
+| AnchorDETR-C5 | R101 | 43.5 | [model](https://drive.google.com/file/d/1eBLYzlKWwSF_RRcfjgRXqIplRKetsvtg/view?usp=sharing) / [log](https://drive.google.com/file/d/1XIDSpYCioYlK5NwdJnbUHQls-PUr_xwi/view?usp=sharing) |
+| AnchorDETR-DC5 | R101 | 45.1 | [model](https://drive.google.com/file/d/1irmZPSALME4Nht3_qhM9WLExDyO9Sj-J/view?usp=sharing) / [log](https://drive.google.com/file/d/1KIIYid8mmoAWX7w6T6VPhORc86STqoXR/view?usp=sharing) |
 
-*Note:* the models and logs are also available at [Baidu Netdisk](https://pan.baidu.com/s/1Fgx-YPQ0WdTuZIsbOv6hLw) with code `f56r`.
+*Note:* the models and logs are also available at [Baidu Netdisk](https://pan.baidu.com/s/1iB8qtVPb9dWHYgA5z1I4xg) with code `hh13`.
 
 ## Usage
 

diff --git a/main.py b/main.py
@@ -41,11 +41,8 @@ def get_args_parser():
  parser.add_argument('--clip_max_norm', default=0.1, type=float,
  help='gradient clipping max norm')
 
-
  parser.add_argument('--sgd', action='store_true')
 
- parser.add_argument('--with_box_refine', default=False, action='store_true')
-
  # Model parameters
  parser.add_argument('--frozen_weights', type=str, default=None,
  help="Path to the pretrained model. If set, only the mask head will be trained")
@@ -209,7 +206,7 @@ def match_name_keywords(n, name_keywords):
  lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)
 
  if args.distributed:
- model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu],find_unused_parameters=True)
+ model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
  model_without_ddp = model.module
 
  if args.dataset_file == "coco_panoptic":

diff --git a/models/anchor_detr.py b/models/anchor_detr.py
@@ -30,15 +30,13 @@
 class AnchorDETR(nn.Module):
  """ This is the AnchorDETR module that performs object detection """
 
- def __init__(self, backbone, transformer, num_feature_levels,
- aux_loss=True, with_box_refine=False):
+ def __init__(self, backbone, transformer, num_feature_levels, aux_loss=True):
  """ Initializes the model.
  Parameters:
  backbone: torch module of the backbone to be used. See backbone.py
  transformer: torch module of the transformer architecture. See transformer.py
  num_classes: number of object classes
  aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used.
- with_box_refine: iterative bounding box refinement
  """
  super().__init__()
  self.transformer = transformer
@@ -69,7 +67,6 @@ def __init__(self, backbone, transformer, num_feature_levels,
  )])
  self.backbone = backbone
  self.aux_loss = aux_loss
- self.with_box_refine = with_box_refine
 
  for proj in self.input_proj:
  nn.init.xavier_uniform_(proj[0].weight, gain=1)
@@ -367,8 +364,7 @@ def build(args):
  backbone,
  transformer,
  num_feature_levels=args.num_feature_levels,
- aux_loss=args.aux_loss,
- with_box_refine=args.with_box_refine
+ aux_loss=args.aux_loss
  )
  if args.masks:
  model = DETRsegm(model, freeze_detr=(args.frozen_weights is not None))

diff --git a/models/transformer.py b/models/transformer.py
@@ -52,7 +52,8 @@ def __init__(self, d_model=256, nhead=8,
 
  self.spatial_prior=spatial_prior
 
- self.level_embed = nn.Embedding(num_feature_levels, d_model)
+ if num_feature_levels>1:
+ self.level_embed = nn.Embedding(num_feature_levels, d_model)
  self.num_pattern = num_query_pattern
  self.pattern = nn.Embedding(self.num_pattern, d_model)
 
@@ -77,8 +78,6 @@ def __init__(self, d_model=256, nhead=8,
  self.class_embed = nn.Linear(d_model, num_classes)
  self.bbox_embed = MLP(d_model, d_model, 4, 3)
 
- self.refine_box = False
-
  self._reset_parameters()
 
  def _reset_parameters(self):
@@ -94,14 +93,10 @@ def _reset_parameters(self):
  if self.spatial_prior == "learned":
  nn.init.uniform_(self.position.weight.data, 0, 1)
 
- if self.refine_box:
- self.class_embed = _get_clones(self.class_embed, num_pred)
- self.bbox_embed = _get_clones(self.bbox_embed, num_pred)
- nn.init.constant_(self.bbox_embed[0].layers[-1].bias.data[2:], -2.0)
- else:
- nn.init.constant_(self.bbox_embed.layers[-1].bias.data[2:], -2.0)
- self.class_embed = nn.ModuleList([self.class_embed for _ in range(num_pred)])
- self.bbox_embed = nn.ModuleList([self.bbox_embed for _ in range(num_pred)])
+ nn.init.constant_(self.bbox_embed.layers[-1].bias.data[2:], -2.0)
+ self.class_embed = nn.ModuleList([self.class_embed for _ in range(num_pred)])
+ self.bbox_embed = nn.ModuleList([self.bbox_embed for _ in range(num_pred)])
+
 
  def forward(self, srcs, masks):
 
@@ -163,8 +158,6 @@ def forward(self, srcs, masks):
  outputs_coord = tmp.sigmoid()
  outputs_classes.append(outputs_class[None,])
  outputs_coords.append(outputs_coord[None,])
- if self.refine_box:
- reference_points = outputs_coord
 
  output = torch.cat(outputs_classes, dim=0), torch.cat(outputs_coords, dim=0)
 
@@ -285,9 +278,11 @@ def __init__(self, d_model=256, d_ffn=1024,
  self.dropout2 = nn.Dropout(dropout)
  self.norm2 = nn.LayerNorm(d_model)
 
- # self attention
- self.self_attn_level = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
- self.level_fc = nn.Linear(d_model * n_levels, d_model)
+
+ # level combination
+ if n_levels>1:
+ self.level_fc = nn.Linear(d_model * n_levels, d_model)
+
  # ffn
  self.ffn = FFN(d_model, d_ffn, dropout, activation)