Add support for TransFusion-Lidar Head

stanislav-chekmenev · May 7, 2023 · 4dc1849 · 4dc1849
1 parent ad9c25c
commit 4dc1849
Show file tree

Hide file tree

Showing 12 changed files with 1,032 additions and 17 deletions.
diff --git a/pcdet/models/backbones_2d/base_bev_backbone.py b/pcdet/models/backbones_2d/base_bev_backbone.py
@@ -46,7 +46,7 @@ def __init__(self, model_cfg, input_channels):
             self.blocks.append(nn.Sequential(*cur_layers))
             if len(upsample_strides) > 0:
                 stride = upsample_strides[idx]
-                if stride >= 1:
+                if stride > 1 or (stride == 1 and not self.model_cfg.get('USE_CONV_FOR_NO_STRIDE', False)):
                     self.deblocks.append(nn.Sequential(
                         nn.ConvTranspose2d(
                             num_filters[idx], num_upsample_filters[idx],

diff --git a/pcdet/models/backbones_3d/spconv_backbone.py b/pcdet/models/backbones_3d/spconv_backbone.py
@@ -30,11 +30,12 @@ def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stri
 class SparseBasicBlock(spconv.SparseModule):
     expansion = 1
 
-    def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None):
+    def __init__(self, inplanes, planes, stride=1, bias=None, norm_fn=None, downsample=None, indice_key=None):
         super(SparseBasicBlock, self).__init__()
 
         assert norm_fn is not None
-        bias = norm_fn is not None
+        if bias is None:
+            bias = norm_fn is not None
         self.conv1 = spconv.SubMConv3d(
             inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key
         )
@@ -184,6 +185,7 @@ class VoxelResBackBone8x(nn.Module):
     def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
         super().__init__()
         self.model_cfg = model_cfg
+        use_bias = self.model_cfg.get('USE_BIAS', None)
         norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)
 
         self.sparse_shape = grid_size[::-1] + [1, 0, 0]
@@ -196,29 +198,29 @@ def __init__(self, model_cfg, input_channels, grid_size, **kwargs):
         block = post_act_block
 
         self.conv1 = spconv.SparseSequential(
-            SparseBasicBlock(16, 16, norm_fn=norm_fn, indice_key='res1'),
-            SparseBasicBlock(16, 16, norm_fn=norm_fn, indice_key='res1'),
+            SparseBasicBlock(16, 16, bias=use_bias, norm_fn=norm_fn, indice_key='res1'),
+            SparseBasicBlock(16, 16, bias=use_bias, norm_fn=norm_fn, indice_key='res1'),
         )
 
         self.conv2 = spconv.SparseSequential(
             # [1600, 1408, 41] <- [800, 704, 21]
             block(16, 32, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv2', conv_type='spconv'),
-            SparseBasicBlock(32, 32, norm_fn=norm_fn, indice_key='res2'),
-            SparseBasicBlock(32, 32, norm_fn=norm_fn, indice_key='res2'),
+            SparseBasicBlock(32, 32, bias=use_bias, norm_fn=norm_fn, indice_key='res2'),
+            SparseBasicBlock(32, 32, bias=use_bias, norm_fn=norm_fn, indice_key='res2'),
         )
 
         self.conv3 = spconv.SparseSequential(
             # [800, 704, 21] <- [400, 352, 11]
             block(32, 64, 3, norm_fn=norm_fn, stride=2, padding=1, indice_key='spconv3', conv_type='spconv'),
-            SparseBasicBlock(64, 64, norm_fn=norm_fn, indice_key='res3'),
-            SparseBasicBlock(64, 64, norm_fn=norm_fn, indice_key='res3'),
+            SparseBasicBlock(64, 64, bias=use_bias, norm_fn=norm_fn, indice_key='res3'),
+            SparseBasicBlock(64, 64, bias=use_bias, norm_fn=norm_fn, indice_key='res3'),
         )
 
         self.conv4 = spconv.SparseSequential(
             # [400, 352, 11] <- [200, 176, 5]
             block(64, 128, 3, norm_fn=norm_fn, stride=2, padding=(0, 1, 1), indice_key='spconv4', conv_type='spconv'),
-            SparseBasicBlock(128, 128, norm_fn=norm_fn, indice_key='res4'),
-            SparseBasicBlock(128, 128, norm_fn=norm_fn, indice_key='res4'),
+            SparseBasicBlock(128, 128, bias=use_bias, norm_fn=norm_fn, indice_key='res4'),
+            SparseBasicBlock(128, 128, bias=use_bias, norm_fn=norm_fn, indice_key='res4'),
         )
 
         last_pad = 0

diff --git a/pcdet/models/dense_heads/target_assigner/hungarian_assigner.py b/pcdet/models/dense_heads/target_assigner/hungarian_assigner.py
@@ -0,0 +1,131 @@
+import torch
+from scipy.optimize import linear_sum_assignment
+from pcdet.ops.iou3d_nms import iou3d_nms_cuda
+
+
+def height_overlaps(boxes1, boxes2):
+    """
+    Calculate height overlaps of two boxes.
+    """
+    boxes1_top_height = (boxes1[:,2]+ boxes1[:,5]).view(-1, 1)
+    boxes1_bottom_height = boxes1[:,2].view(-1, 1)
+    boxes2_top_height = (boxes2[:,2]+boxes2[:,5]).view(1, -1)
+    boxes2_bottom_height = boxes2[:,2].view(1, -1)
+
+    heighest_of_bottom = torch.max(boxes1_bottom_height, boxes2_bottom_height)
+    lowest_of_top = torch.min(boxes1_top_height, boxes2_top_height)
+    overlaps_h = torch.clamp(lowest_of_top - heighest_of_bottom, min=0)
+    return overlaps_h
+
+
+def overlaps(boxes1, boxes2):
+    """
+    Calculate 3D overlaps of two boxes.
+    """
+    rows = len(boxes1)
+    cols = len(boxes2)
+    if rows * cols == 0:
+        return boxes1.new(rows, cols)
+
+    # height overlap
+    overlaps_h = height_overlaps(boxes1, boxes2)
+    boxes1_bev = boxes1[:,:7]
+    boxes2_bev = boxes2[:,:7]
+
+    # bev overlap
+    overlaps_bev = boxes1_bev.new_zeros(
+        (boxes1_bev.shape[0], boxes2_bev.shape[0])
+    ).cuda()  # (N, M)
+    iou3d_nms_cuda.boxes_overlap_bev_gpu(
+        boxes1_bev.contiguous().cuda(), boxes2_bev.contiguous().cuda(), overlaps_bev
+    )
+
+    # 3d overlaps
+    overlaps_3d = overlaps_bev.to(boxes1.device) * overlaps_h
+
+    volume1 = (boxes1[:, 3] * boxes1[:, 4] * boxes1[:, 5]).view(-1, 1)
+    volume2 = (boxes2[:, 3] * boxes2[:, 4] * boxes2[:, 5]).view(1, -1)
+
+    iou3d = overlaps_3d / torch.clamp(volume1 + volume2 - overlaps_3d, min=1e-8)
+
+    return iou3d
+
+
+
+class HungarianAssigner3D:
+    def __init__(self, cls_cost, reg_cost, iou_cost):
+        self.cls_cost = cls_cost
+        self.reg_cost = reg_cost
+        self.iou_cost = iou_cost
+
+    def focal_loss_cost(self, cls_pred, gt_labels):
+        weight = self.cls_cost.get('weight', 0.15)
+        alpha = self.cls_cost.get('alpha', 0.25)
+        gamma = self.cls_cost.get('gamma', 2.0)
+        eps = self.cls_cost.get('eps', 1e-12)
+
+        cls_pred = cls_pred.sigmoid()
+        neg_cost = -(1 - cls_pred + eps).log() * (
+            1 - alpha) * cls_pred.pow(gamma)
+        pos_cost = -(cls_pred + eps).log() * alpha * (
+            1 - cls_pred).pow(gamma)
+
+        cls_cost = pos_cost[:, gt_labels] - neg_cost[:, gt_labels]
+        return cls_cost * weight
+
+    def bevbox_cost(self, bboxes, gt_bboxes, point_cloud_range):
+        weight = self.reg_cost.get('weight', 0.25)
+
+        pc_start = bboxes.new(point_cloud_range[0:2])
+        pc_range = bboxes.new(point_cloud_range[3:5]) - bboxes.new(point_cloud_range[0:2])
+        # normalize the box center to [0, 1]
+        normalized_bboxes_xy = (bboxes[:, :2] - pc_start) / pc_range
+        normalized_gt_bboxes_xy = (gt_bboxes[:, :2] - pc_start) / pc_range
+        reg_cost = torch.cdist(normalized_bboxes_xy, normalized_gt_bboxes_xy, p=1)
+        return reg_cost * weight
+
+    def iou3d_cost(self, bboxes, gt_bboxes):
+        iou = overlaps(bboxes, gt_bboxes)
+        weight = self.iou_cost.get('weight', 0.25)
+        iou_cost = - iou
+        return iou_cost * weight, iou
+
+    def assign(self, bboxes, gt_bboxes, gt_labels, cls_pred, point_cloud_range):
+        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)
+
+        # 1. assign -1 by default
+        assigned_gt_inds = bboxes.new_full((num_bboxes,), -1, dtype=torch.long)
+        assigned_labels = bboxes.new_full((num_bboxes,), -1, dtype=torch.long)
+        if num_gts == 0 or num_bboxes == 0:
+            # No ground truth or boxes, return empty assignment
+            if num_gts == 0:
+                # No ground truth, assign all to background
+                assigned_gt_inds[:] = 0
+            return num_gts, assigned_gt_inds, max_overlaps, assigned_labels
+
+        # 2. compute the weighted costs
+        cls_cost = self.focal_loss_cost(cls_pred[0].T, gt_labels)
+        reg_cost = self.bevbox_cost(bboxes, gt_bboxes, point_cloud_range)
+        iou_cost, iou = self.iou3d_cost(bboxes, gt_bboxes)
+
+
+        # weighted sum of above three costs
+        cost = cls_cost + reg_cost + iou_cost
+
+        # 3. do Hungarian matching on CPU using linear_sum_assignment
+        cost = cost.detach().cpu()
+        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
+        matched_row_inds = torch.from_numpy(matched_row_inds).to(bboxes.device)
+        matched_col_inds = torch.from_numpy(matched_col_inds).to(bboxes.device)
+
+        # 4. assign backgrounds and foregrounds
+        # assign all indices to backgrounds first
+        assigned_gt_inds[:] = 0
+        # assign foregrounds based on matching results
+        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
+        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
+
+        max_overlaps = torch.zeros_like(iou.max(1).values)
+        max_overlaps[matched_row_inds] = iou[matched_row_inds, matched_col_inds]
+
+        return assigned_gt_inds, max_overlaps