1. Voxel grid generation
This process calls the implementation in the spconv library, the main process is points_to_voxel
from spconv import spconv_utils from spconv.spconv_utils import (non_max_suppression_cpu, points_to_voxel_3d_np, points_to_voxel_3d_np_mean, points_to_voxel_3d_with_filtering, rbbox_intersection, rbbox_iou, rotate_non_max_suppression_cpu) try: from spconv.spconv_utils import non_max_suppression except ImportError: pass def points_to_voxel(points, voxel_size, coors_range, coor_to_voxelidx, max_points=35, max_voxels=20000, full_mean=False, block_filtering=True, block_factor=1, block_size=8, height_threshold=0.2, height_high_threshold=3.0, pad_output=False): """convert 3d points(N, >=3) to voxels. This version calculate everything in one loop. now it takes only 0.8ms(~6k voxels) with c++ and 3.2ghz cpu. Args: points: [N, ndim] float tensor. points[:, :3] contain xyz points and points[:, 3:] contain other information such as reflectivity. voxel_size: [3] list/tuple or array, float. xyz, indicate voxel size coors_range: [6] list/tuple or array, float. indicate voxel range. format: xyzxyz, minmax coor_to_voxelidx: int array. used as a dense map. max_points: int. indicate maximum points contained in a voxel. max_voxels: int. indicate maximum voxels this function create. for voxelnet, 20000 is a good choice. you should shuffle points before call this function because max_voxels may drop some points. full_mean: bool. If true, all empty points in voxel will be filled with mean of exist points. block_filtering: filter voxels by height. used for lidar point cloud. use some visualization tool to see filtered result. Returns: voxels: [M, max_points, ndim] float tensor. only contain points. coordinates: [M, 3] int32 tensor. zyx format. num_points_per_voxel: [M] int32 tensor. """ if full_mean: assert block_filtering is False if not isinstance(voxel_size, np.ndarray): voxel_size = np.array(voxel_size, dtype=points.dtype) if not isinstance(coors_range, np.ndarray): coors_range = np.array(coors_range, dtype=points.dtype) voxelmap_shape = (coors_range[3:] - coors_range[:3]) / voxel_size voxelmap_shape = tuple(np.round(voxelmap_shape).astype(np.int32).tolist()) voxelmap_shape = voxelmap_shape[::-1] num_points_per_voxel = np.zeros(shape=(max_voxels, ), dtype=np.int32) voxels = np.zeros(shape=(max_voxels, max_points, points.shape[-1]), dtype=points.dtype) voxel_point_mask = np.zeros(shape=(max_voxels, max_points), dtype=points.dtype) coors = np.zeros(shape=(max_voxels, 3), dtype=np.int32) res = { "voxels": voxels, "coordinates": coors, "num_points_per_voxel": num_points_per_voxel, "voxel_point_mask": voxel_point_mask, } if full_mean: means = np.zeros(shape=(max_voxels, points.shape[-1]), dtype=points.dtype) voxel_num = points_to_voxel_3d_np_mean(points, voxels, voxel_point_mask, means, coors, num_points_per_voxel, coor_to_voxelidx, voxel_size.tolist(), coors_range.tolist(), max_points, max_voxels) else: if block_filtering: block_shape = [*voxelmap_shape[1:]] block_shape = [b // block_factor for b in block_shape] mins = np.full(block_shape, 99999999, dtype=points.dtype) maxs = np.full(block_shape, -99999999, dtype=points.dtype) voxel_mask = np.zeros((max_voxels, ), dtype=np.int32) voxel_num = points_to_voxel_3d_with_filtering( points, voxels, voxel_point_mask, voxel_mask, mins, maxs, coors, num_points_per_voxel, coor_to_voxelidx, voxel_size.tolist(), coors_range.tolist(), max_points, max_voxels, block_factor, block_size, height_threshold, height_high_threshold) voxel_mask = voxel_mask.astype(np.bool_) coors_ = coors[voxel_mask] if pad_output: res["coordinates"][:voxel_num] = coors_ res["voxels"][:voxel_num] = voxels[voxel_mask] res["voxel_point_mask"][:voxel_num] = voxel_point_mask[ voxel_mask] res["num_points_per_voxel"][:voxel_num] = num_points_per_voxel[ voxel_mask] res["coordinates"][voxel_num:] = 0 res["voxels"][voxel_num:] = 0 res["num_points_per_voxel"][voxel_num:] = 0 res["voxel_point_mask"][voxel_num:] = 0 else: res["coordinates"] = coors_ res["voxels"] = voxels[voxel_mask] res["num_points_per_voxel"] = num_points_per_voxel[voxel_mask] res["voxel_point_mask"] = voxel_point_mask[voxel_mask] voxel_num = coors_.shape[0] else: voxel_num = points_to_voxel_3d_np(points, voxels, voxel_point_mask, coors, num_points_per_voxel, coor_to_voxelidx, voxel_size.tolist(), coors_range.tolist(), max_points, max_voxels) res["voxel_num"] = voxel_num res["voxel_point_mask"] = res["voxel_point_mask"].reshape( -1, max_points, 1) return res class VoxelGenerator: def __init__(self, voxel_size, point_cloud_range, max_num_points, max_voxels=20000, full_mean=True): point_cloud_range = np.array(point_cloud_range, dtype=np.float32) # [0, -40, -3, 70.4, 40, 1] voxel_size = np.array(voxel_size, dtype=np.float32) grid_size = (point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size grid_size = np.round(grid_size).astype(np.int64) voxelmap_shape = tuple(np. round(grid_size). astype(np. int32). tolist()) voxelmap_shape = voxelmap_shape[::-1] self._coor_to_voxelidx = np.full(voxelmap_shape, -1, dtype=np.int32) self._voxel_size = voxel_size self._point_cloud_range = point_cloud_range self._max_num_points = max_num_points self._max_voxels = max_voxels self._grid_size = grid_size self._full_mean = full_mean def generate(self, points, max_voxels=None): res = points_to_voxel(points, self._voxel_size, self._point_cloud_range, self._coor_to_voxelidx, self._max_num_points, max_voxels or self._max_voxels, self._full_mean) voxels = res["voxels"] coors = res["coordinates"] num_points_per_voxel = res["num_points_per_voxel"] voxel_num = res["voxel_num"] coors = coors[:voxel_num] voxels = voxels[:voxel_num] num_points_per_voxel = num_points_per_voxel[:voxel_num] return (voxels, coors, num_points_per_voxel) def generate_multi_gpu(self, points, max_voxels=None): res = points_to_voxel(points, self._voxel_size, self._point_cloud_range, self._coor_to_voxelidx, self._max_num_points, max_voxels or self._max_voxels, self._full_mean) voxels = res["voxels"] coors = res["coordinates"] num_points_per_voxel = res["num_points_per_voxel"] voxel_num = res["voxel_num"] return (voxels, coors, num_points_per_voxel) @property def voxel_size(self): return self._voxel_size @property def max_num_points_per_voxel(self): return self._max_num_points @property def point_cloud_range(self): return self._point_cloud_range @property def grid_size(self): return self._grid_size
The specific implementation is implemented in c++ language in point2voxel.h:
template <typename DType, int NDim> int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels, py::array_t<DType> voxel_point_mask, py::array_t<int> coors, py::array_t<int>num_points_per_voxel, py::array_t<int> coor_to_voxelidx, std::vector<DType> voxel_size, std::vector<DType> coors_range, int max_points, int max_voxels) { auto points_rw = points. template mutable_unchecked<2>(); auto voxels_rw = voxels. template mutable_unchecked<3>(); auto voxel_point_mask_rw = voxel_point_mask. template mutable_unchecked<2>(); auto coors_rw = coors.mutable_unchecked<2>(); auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>(); auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>(); auto N = points_rw.shape(0); auto num_features = points_rw. shape(1); // auto ndim = points_rw.shape(1) - 1; constexpr int ndim_minus_1 = NDim - 1; int voxel_num = 0; bool failed = false; int coor[NDim]; int c; int grid_size[NDim]; for (int i = 0; i < NDim; + + i) { grid_size[i] = round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]); } int voxelidx, num; for (int i = 0; i < N; + + i) { failed = false; for (int j = 0; j < NDim; + + j) { c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]); if ((c < 0 || c >= grid_size[j])) { failed = true; break; } coor[ndim_minus_1 - j] = c; } if (failed) continue; voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]); if (voxelidx == -1) { voxelidx = voxel_num; if (voxel_num >= max_voxels) continue; voxel_num += 1; coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx; for (int k = 0; k < NDim; + + k) { coors_rw(voxelidx, k) = coor[k]; } } num = num_points_per_voxel_rw(voxelidx); if (num < max_points) { voxel_point_mask_rw(voxelidx, num) = DType(1); for (int k = 0; k < num_features; + + k) { voxels_rw(voxelidx, num, k) = points_rw(i, k); } num_points_per_voxel_rw(voxelidx) += 1; } } for (int i = 0; i < voxel_num; + + i) { coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1; } return voxel_num; } template <typename DType, int NDim> int points_to_voxel_3d_np_mean( py::array_t<DType> points, py::array_t<DType> voxel_point_mask, py::array_t<DType> voxels, py::array_t<DType> means, py::array_t<int> coors, py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx, std::vector<DType> voxel_size, std::vector<DType> coors_range, int max_points, int max_voxels) { auto points_rw = points. template mutable_unchecked<2>(); auto means_rw = means. template mutable_unchecked<2>(); auto voxels_rw = voxels. template mutable_unchecked<3>(); auto voxel_point_mask_rw = voxel_point_mask. template mutable_unchecked<2>(); auto coors_rw = coors.mutable_unchecked<2>(); auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>(); auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>(); auto N = points_rw.shape(0); auto num_features = points_rw. shape(1); // auto ndim = points_rw.shape(1) - 1; constexpr int ndim_minus_1 = NDim - 1; int voxel_num = 0; bool failed = false; int coor[NDim]; int c; int grid_size[NDim]; for (int i = 0; i < NDim; + + i) { grid_size[i] = round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]); } int voxelidx, num; for (int i = 0; i < N; + + i) { failed = false; for (int j = 0; j < NDim; + + j) { c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]); if ((c < 0 || c >= grid_size[j])) { failed = true; break; } coor[ndim_minus_1 - j] = c; } if (failed) continue; voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]); if (voxelidx == -1) { voxelidx = voxel_num; if (voxel_num >= max_voxels) continue; voxel_num += 1; coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx; for (int k = 0; k < NDim; + + k) { coors_rw(voxelidx, k) = coor[k]; } } num = num_points_per_voxel_rw(voxelidx); if (num < max_points) { voxel_point_mask_rw(voxelidx, num) = DType(1); for (int k = 0; k < num_features; + + k) { voxels_rw(voxelidx, num, k) = points_rw(i, k); } num_points_per_voxel_rw(voxelidx) += 1; for (int k = 0; k < num_features; + + k) { means_rw(voxelidx, k) + = (points_rw(i, k) - means_rw(voxelidx, k)) / DType(num + 1); } } } for (int i = 0; i < voxel_num; + + i) { coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1; num = num_points_per_voxel_rw(i); for (int j = num; j < max_points; + + j) { for (int k = 0; k < num_features; + + k) { voxels_rw(i, j, k) = means_rw(i, k); } } } return voxel_num; }
2. Hard voxel feature
This process is easier to understand, it is to sum the same voxel grid, and then calculate the average.
class MeanVFE(VFETemplate): def __init__(self, model_cfg, num_point_features, **kwargs): super().__init__(model_cfg=model_cfg) self.num_point_features = num_point_features def get_output_feature_dim(self): return self.num_point_features def forward(self, batch_dict, **kwargs): """ Args: batch_dict: voxels: (num_voxels, max_points_per_voxel, C) voxel_num_points: optional (num_voxels) **kwargs: Returns: vfe_features: (num_voxels, C) """ voxel_features, voxel_num_points = batch_dict['voxels'], batch_dict['voxel_num_points'] points_mean = voxel_features[:, :, :].sum(dim=1, keepdim=False) normalizer = torch.clamp_min(voxel_num_points.view(-1, 1), min=1.0).type_as(voxel_features) points_mean = points_mean / normalizer batch_dict['voxel_features'] = points_mean.contiguous() return batch_dict
3. Dynamic voxel generation
The process is also very simple, directly use torch.unique to output the non-repeated coordinates in the voxel coordinates, the index of the original coordinates in the non-repeated coordinates, and the number of non-repeated coordinates. Then use scatter_mean to find the mean directly.
class DynamicMeanVFE(VFETemplate): def __init__(self, model_cfg, num_point_features, voxel_size, grid_size, point_cloud_range, **kwargs): super().__init__(model_cfg=model_cfg) self.num_point_features = num_point_features self.grid_size = torch.tensor(grid_size).cuda() self.voxel_size = torch.tensor(voxel_size).cuda() self.point_cloud_range = torch.tensor(point_cloud_range).cuda() self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] self.scale_xyz = grid_size[0] * grid_size[1] * grid_size[2] self.scale_yz = grid_size[1] * grid_size[2] self. scale_z = grid_size[2] def get_output_feature_dim(self): return self.num_point_features @torch.no_grad() def forward(self, batch_dict, **kwargs): """ Args: batch_dict: voxels: (num_voxels, max_points_per_voxel, C) voxel_num_points: optional (num_voxels) **kwargs: Returns: vfe_features: (num_voxels, C) """ batch_size = batch_dict['batch_size'] points = batch_dict['points'] # (batch_idx, x, y, z, i, e) ##debug point_coords = torch.floor((points[:, 1:4] - self.point_cloud_range[0:3]) / self.voxel_size).int() mask = ((point_coords >= 0) & (point_coords < self.grid_size)).all(dim=1) points = points[mask] point_coords = point_coords[mask] merge_coords = points[:, 0].int() * self.scale_xyz + \ point_coords[:, 0] * self. scale_yz + \ point_coords[:, 1] * self. scale_z + \ point_coords[:, 2] points_data = points[:, 1:].contiguous() unq_coords, unq_inv, unq_cnt = torch.unique(merge_coords, return_inverse=True, return_counts=True) points_mean = torch_scatter.scatter_mean(points_data, unq_inv, dim=0) unq_coords = unq_coords.int() voxel_coords = torch.stack((unq_coords // self.scale_xyz, (unq_coords % self. scale_xyz) // self. scale_yz, (unq_coords % self. scale_yz) // self. scale_z, unq_coords % self.scale_z), dim=1) voxel_coords = voxel_coords[:, [0, 3, 2, 1]] batch_dict['voxel_features'] = points_mean.contiguous() batch_dict['voxel_coords'] = voxel_coords.contiguous() return batch_dict
Supplement:
torch. unique(input, sorted=True, return_inverse=False, return_counts=False, dim=None)
input: tensor to be processed
sorted: Whether to arrange the returned non-repeating tensors according to their values, the default is to arrange them in order
return_inverse: Whether to return the index of each element in the original tensor in this non-repeated tensor
return_counts: counts the number of repetitions of each individual element in the original tensor
dim: The dimension along which the value is uniquely processed. I did not understand the mechanism of this after experimenting. If the processed tensors are all one-dimensional, then this does not need to be ignored.
import torch x = torch.tensor([4,0,1,2,1,2,3])#Generate a tensor as experimental input print(x) out = torch.unique(x) #All parameters are set to default print(out)# print out the processing result #The results are as follows: #tensor([0, 1, 2, 3, 4]) #Pick out the non-repeating elements in x, and the default is the order of birth out = torch.unique(x,sorted=False)#Change the default order to False print(out) #The output results are as follows: #tensor([3, 2, 1, 0, 4]) #Find out the independent elements in x, and output them in the original order out = torch.unique(x,return_inverse=True)#Output the index of each element in the original data in the newly generated independent element tensor print(out) #The output results are as follows: #(tensor([0, 1, 2, 3, 4]), tensor([4, 0, 1, 2, 1, 2, 3])) #The first tensor is an independent tensor output after sorting , the second result corresponds to the index of each element in the original data in the new independent non-repeated tensor, such as x[0]=4, the index in the new tensor is 4, x[1]=0, in the new The index in the tensor is 0, x[6]=3, and the index in the new tensor is 3 out = torch.unique(x,return_counts=True) #return the number of each independent element print(out) #The output results are as follows #(tensor([0, 1, 2, 3, 4]), tensor([1, 2, 2, 1, 1])) #0 The number of this element in the original data is 1, 1 This element is in the original The number in the data is 2