Skip to content

Commit

Permalink
Add multi-modal support for Nuscenes dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
chenshi3 committed May 8, 2023
1 parent 4dc1849 commit c5dfdd7
Show file tree
Hide file tree
Showing 9 changed files with 330 additions and 11 deletions.
36 changes: 36 additions & 0 deletions pcdet/datasets/augmentor/data_augmentor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from functools import partial

import numpy as np
from PIL import Image

from ...utils import common_utils
from . import augmentor_utils, database_sampler
Expand All @@ -23,6 +24,18 @@ def __init__(self, root_path, augmentor_configs, class_names, logger=None):
cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
self.data_augmentor_queue.append(cur_augmentor)

def disableAugmentation(self, augmentor_configs):
self.data_augmentor_queue = []
aug_config_list = augmentor_configs if isinstance(augmentor_configs, list) \
else augmentor_configs.AUG_CONFIG_LIST

for cur_cfg in aug_config_list:
if not isinstance(augmentor_configs, list):
if cur_cfg.NAME in augmentor_configs.DISABLE_AUG_LIST:
continue
cur_augmentor = getattr(self, cur_cfg.NAME)(config=cur_cfg)
self.data_augmentor_queue.append(cur_augmentor)

def gt_sampling(self, config=None):
db_sampler = database_sampler.DataBaseSampler(
root_path=self.root_path,
Expand Down Expand Up @@ -139,6 +152,7 @@ def random_world_translation(self, data_dict=None, config=None):

data_dict['gt_boxes'] = gt_boxes
data_dict['points'] = points
data_dict['noise_translate'] = noise_translate
return data_dict

def random_local_translation(self, data_dict=None, config=None):
Expand Down Expand Up @@ -251,6 +265,28 @@ def random_local_pyramid_aug(self, data_dict=None, config=None):
data_dict['points'] = points
return data_dict

def imgaug(self, data_dict=None, config=None):
if data_dict is None:
return partial(self.imgaug, config=config)
imgs = data_dict["camera_imgs"]
img_process_infos = data_dict['img_process_infos']
new_imgs = []
for img, img_process_info in zip(imgs, img_process_infos):
flip = False
if config.RAND_FLIP and np.random.choice([0, 1]):
flip = True
rotate = np.random.uniform(*config.ROT_LIM)
# aug images
if flip:
img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
img = img.rotate(rotate)
img_process_info[2] = flip
img_process_info[3] = rotate
new_imgs.append(img)

data_dict["camera_imgs"] = new_imgs
return data_dict

def forward(self, data_dict):
"""
Args:
Expand Down
28 changes: 28 additions & 0 deletions pcdet/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path

import numpy as np
import torch
import torch.utils.data as torch_data

from ..utils import common_utils
Expand Down Expand Up @@ -130,6 +131,30 @@ def __getitem__(self, index):
"""
raise NotImplementedError

def set_lidar_aug_matrix(self, data_dict):
"""
Get lidar augment matrix (4 x 4), which are used to recover orig point coordinates.
"""
lidar_aug_matrix = np.eye(4)
if 'flip_y' in data_dict.keys():
flip_x = data_dict['flip_x']
flip_y = data_dict['flip_y']
if flip_x:
lidar_aug_matrix[:3,:3] = np.array([[1, 0, 0], [0, -1, 0], [0, 0, 1]]) @ lidar_aug_matrix[:3,:3]
if flip_y:
lidar_aug_matrix[:3,:3] = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]]) @ lidar_aug_matrix[:3,:3]
if 'noise_rot' in data_dict.keys():
noise_rot = data_dict['noise_rot']
lidar_aug_matrix[:3,:3] = common_utils.angle2matrix(torch.tensor(noise_rot)) @ lidar_aug_matrix[:3,:3]
if 'noise_scale' in data_dict.keys():
noise_scale = data_dict['noise_scale']
lidar_aug_matrix[:3,:3] *= noise_scale
if 'noise_translate' in data_dict.keys():
noise_translate = data_dict['noise_translate']
lidar_aug_matrix[:3,3:4] = noise_translate.T
data_dict['lidar_aug_matrix'] = lidar_aug_matrix
return data_dict

def prepare_data(self, data_dict):
"""
Args:
Expand Down Expand Up @@ -165,6 +190,7 @@ def prepare_data(self, data_dict):
)
if 'calib' in data_dict:
data_dict['calib'] = calib
data_dict = self.set_lidar_aug_matrix(data_dict)
if data_dict.get('gt_boxes', None) is not None:
selected = common_utils.keep_arrays_by_name(data_dict['gt_names'], self.class_names)
data_dict['gt_boxes'] = data_dict['gt_boxes'][selected]
Expand Down Expand Up @@ -287,6 +313,8 @@ def collate_batch(batch_list, _unused=False):
constant_values=pad_value)
points.append(points_pad)
ret[key] = np.stack(points, axis=0)
elif key in ['camera_imgs']:
ret[key] = torch.stack([torch.stack(imgs,dim=0) for imgs in val],dim=0)
else:
ret[key] = np.stack(val, axis=0)
except:
Expand Down
102 changes: 101 additions & 1 deletion pcdet/datasets/nuscenes/nuscenes_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from ...ops.roiaware_pool3d import roiaware_pool3d_utils
from ...utils import common_utils
from ..dataset import DatasetTemplate
from pyquaternion import Quaternion
from PIL import Image


class NuScenesDataset(DatasetTemplate):
Expand All @@ -17,6 +19,13 @@ def __init__(self, dataset_cfg, class_names, training=True, root_path=None, logg
dataset_cfg=dataset_cfg, class_names=class_names, training=training, root_path=root_path, logger=logger
)
self.infos = []
self.camera_config = self.dataset_cfg.get('CAMERA_CONFIG', None)
if self.camera_config is not None:
self.use_camera = self.camera_config.get('USE_CAMERA', True)
self.camera_image_config = self.camera_config.IMAGE
else:
self.use_camera = False

self.include_nuscenes_data(self.mode)
if self.training and self.dataset_cfg.get('BALANCED_RESAMPLING', False):
self.infos = self.balanced_infos_resampling(self.infos)
Expand Down Expand Up @@ -108,6 +117,41 @@ def get_lidar_with_sweeps(self, index, max_sweeps=1):
points = np.concatenate((points, times), axis=1)
return points

def crop_image(self, input_dict):
W, H = input_dict["ori_shape"]
imgs = input_dict["camera_imgs"]
img_process_infos = []
crop_images = []
for img in imgs:
if self.training == True:
fH, fW = self.camera_image_config.FINAL_DIM
resize_lim = self.camera_image_config.RESIZE_LIM_TRAIN
resize = np.random.uniform(*resize_lim)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = newH - fH
crop_w = int(np.random.uniform(0, max(0, newW - fW)))
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)
else:
fH, fW = self.camera_image_config.FINAL_DIM
resize_lim = self.camera_image_config.RESIZE_LIM_TEST
resize = np.mean(resize_lim)
resize_dims = (int(W * resize), int(H * resize))
newW, newH = resize_dims
crop_h = newH - fH
crop_w = int(max(0, newW - fW) / 2)
crop = (crop_w, crop_h, crop_w + fW, crop_h + fH)

# reisze and crop image
img = img.resize(resize_dims)
img = img.crop(crop)
crop_images.append(img)
img_process_infos.append([resize, crop, False, 0])

input_dict['img_process_infos'] = img_process_infos
input_dict['camera_imgs'] = crop_images
return input_dict

def __len__(self):
if self._merge_all_iters_to_one_epoch:
return len(self.infos) * self.total_epochs
Expand Down Expand Up @@ -137,6 +181,60 @@ def __getitem__(self, index):
'gt_names': info['gt_names'] if mask is None else info['gt_names'][mask],
'gt_boxes': info['gt_boxes'] if mask is None else info['gt_boxes'][mask]
})
if self.use_camera:
input_dict["image_paths"] = []
input_dict["lidar2camera"] = []
input_dict["lidar2image"] = []
input_dict["camera2ego"] = []
input_dict["camera_intrinsics"] = []
input_dict["camera2lidar"] = []

for _, camera_info in info["cams"].items():
input_dict["image_paths"].append(camera_info["data_path"])

# lidar to camera transform
lidar2camera_r = np.linalg.inv(camera_info["sensor2lidar_rotation"])
lidar2camera_t = (
camera_info["sensor2lidar_translation"] @ lidar2camera_r.T
)
lidar2camera_rt = np.eye(4).astype(np.float32)
lidar2camera_rt[:3, :3] = lidar2camera_r.T
lidar2camera_rt[3, :3] = -lidar2camera_t
input_dict["lidar2camera"].append(lidar2camera_rt.T)

# camera intrinsics
camera_intrinsics = np.eye(4).astype(np.float32)
camera_intrinsics[:3, :3] = camera_info["camera_intrinsics"]
input_dict["camera_intrinsics"].append(camera_intrinsics)

# lidar to image transform
lidar2image = camera_intrinsics @ lidar2camera_rt.T
input_dict["lidar2image"].append(lidar2image)

# camera to ego transform
camera2ego = np.eye(4).astype(np.float32)
camera2ego[:3, :3] = Quaternion(
camera_info["sensor2ego_rotation"]
).rotation_matrix
camera2ego[:3, 3] = camera_info["sensor2ego_translation"]
input_dict["camera2ego"].append(camera2ego)

# camera to lidar transform
camera2lidar = np.eye(4).astype(np.float32)
camera2lidar[:3, :3] = camera_info["sensor2lidar_rotation"]
camera2lidar[:3, 3] = camera_info["sensor2lidar_translation"]
input_dict["camera2lidar"].append(camera2lidar)
# read image
filename = input_dict["image_paths"]
images = []
for name in filename:
images.append(Image.open(str(self.root_path / name)))

input_dict["camera_imgs"] = images
input_dict["ori_shape"] = images[0].size

# resize and crop image
input_dict = self.crop_image(input_dict)

data_dict = self.prepare_data(data_dict=input_dict)

Expand Down Expand Up @@ -251,7 +349,7 @@ def create_groundtruth_database(self, used_classes=None, max_sweeps=10):
pickle.dump(all_db_infos, f)


def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
def create_nuscenes_info(version, data_path, save_path, max_sweeps=10, with_cam=False):
from nuscenes.nuscenes import NuScenes
from nuscenes.utils import splits
from . import nuscenes_utils
Expand Down Expand Up @@ -308,6 +406,7 @@ def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
parser.add_argument('--cfg_file', type=str, default=None, help='specify the config of dataset')
parser.add_argument('--func', type=str, default='create_nuscenes_infos', help='')
parser.add_argument('--version', type=str, default='v1.0-trainval', help='')
parser.add_argument('--with_cam', action='store_true', default=False, help='use camera or not')
args = parser.parse_args()

if args.func == 'create_nuscenes_infos':
Expand All @@ -319,6 +418,7 @@ def create_nuscenes_info(version, data_path, save_path, max_sweeps=10):
data_path=ROOT_DIR / 'data' / 'nuscenes',
save_path=ROOT_DIR / 'data' / 'nuscenes',
max_sweeps=dataset_cfg.MAX_SWEEPS,
with_cam=args.with_cam
)

nuscenes_dataset = NuScenesDataset(
Expand Down
90 changes: 89 additions & 1 deletion pcdet/datasets/nuscenes/nuscenes_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,69 @@ def quaternion_yaw(q: Quaternion) -> float:
yaw = np.arctan2(v[1], v[0])

return yaw


def obtain_sensor2top(
nusc, sensor_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, sensor_type="lidar"
):
"""Obtain the info with RT matric from general sensor to Top LiDAR.
def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10):
Args:
nusc (class): Dataset class in the nuScenes dataset.
sensor_token (str): Sample data token corresponding to the
specific sensor type.
l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).
l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego
in shape (3, 3).
e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).
e2g_r_mat (np.ndarray): Rotation matrix from ego to global
in shape (3, 3).
sensor_type (str): Sensor to calibrate. Default: 'lidar'.
Returns:
sweep (dict): Sweep information after transformation.
"""
sd_rec = nusc.get("sample_data", sensor_token)
cs_record = nusc.get("calibrated_sensor", sd_rec["calibrated_sensor_token"])
pose_record = nusc.get("ego_pose", sd_rec["ego_pose_token"])
data_path = str(nusc.get_sample_data_path(sd_rec["token"]))
# if os.getcwd() in data_path: # path from lyftdataset is absolute path
# data_path = data_path.split(f"{os.getcwd()}/")[-1] # relative path
sweep = {
"data_path": data_path,
"type": sensor_type,
"sample_data_token": sd_rec["token"],
"sensor2ego_translation": cs_record["translation"],
"sensor2ego_rotation": cs_record["rotation"],
"ego2global_translation": pose_record["translation"],
"ego2global_rotation": pose_record["rotation"],
"timestamp": sd_rec["timestamp"],
}
l2e_r_s = sweep["sensor2ego_rotation"]
l2e_t_s = sweep["sensor2ego_translation"]
e2g_r_s = sweep["ego2global_rotation"]
e2g_t_s = sweep["ego2global_translation"]

# obtain the RT from sensor to Top LiDAR
# sweep->ego->global->ego'->lidar
l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix
R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
)
T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
)
T -= (
e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
+ l2e_t @ np.linalg.inv(l2e_r_mat).T
).squeeze(0)
sweep["sensor2lidar_rotation"] = R.T # points @ R.T + T
sweep["sensor2lidar_translation"] = T
return sweep


def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, max_sweeps=10, with_cam=False):
train_nusc_infos = []
val_nusc_infos = []
progress_bar = tqdm.tqdm(total=len(nusc.sample), desc='create_info', dynamic_ncols=True)
Expand Down Expand Up @@ -291,6 +351,34 @@ def fill_trainval_infos(data_path, nusc, train_scenes, val_scenes, test=False, m
'car_from_global': car_from_global,
'timestamp': ref_time,
}
if with_cam:
info['cams'] = dict()
l2e_r = ref_cs_rec["rotation"]
l2e_t = ref_cs_rec["translation"],
e2g_r = ref_pose_rec["rotation"]
e2g_t = ref_pose_rec["translation"]
l2e_r_mat = Quaternion(l2e_r).rotation_matrix
e2g_r_mat = Quaternion(e2g_r).rotation_matrix

# obtain 6 image's information per frame
camera_types = [
"CAM_FRONT",
"CAM_FRONT_RIGHT",
"CAM_FRONT_LEFT",
"CAM_BACK",
"CAM_BACK_LEFT",
"CAM_BACK_RIGHT",
]
for cam in camera_types:
cam_token = sample["data"][cam]
cam_path, _, camera_intrinsics = nusc.get_sample_data(cam_token)
cam_info = obtain_sensor2top(
nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam
)
cam_info['data_path'] = Path(cam_info['data_path']).relative_to(data_path).__str__()
cam_info.update(camera_intrinsics=camera_intrinsics)
info["cams"].update({cam: cam_info})


sample_data_token = sample['data'][chan]
curr_sd_rec = nusc.get('sample_data', sample_data_token)
Expand Down
Loading

0 comments on commit c5dfdd7

Please sign in to comment.