47 namespace voxel_grid {
52 #if defined(__CUDACC__) 53 void GetVoxelCoordinatesAndFlattenedIndicesCUDA
68 float* voxel_coords_ptr = voxel_coords.GetDataPtr<
float>();
69 int64_t* flattened_indices_ptr = flattened_indices.GetDataPtr<int64_t>();
71 index_t n = flattened_indices.GetLength();
72 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
73 index_t resolution3 = resolution * resolution * resolution;
76 index_t block_idx = buf_indices_ptr[workload_idx / resolution3];
77 index_t voxel_idx = workload_idx % resolution3;
79 index_t block_key_offset = block_idx * 3;
80 index_t xb = block_key_ptr[block_key_offset + 0];
81 index_t yb = block_key_ptr[block_key_offset + 1];
82 index_t zb = block_key_ptr[block_key_offset + 2];
85 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
87 float x = (xb * resolution + xv) * voxel_size;
88 float y = (yb * resolution + yv) * voxel_size;
89 float z = (zb * resolution + zv) * voxel_size;
91 flattened_indices_ptr[workload_idx] =
92 block_idx * resolution3 + voxel_idx;
94 index_t voxel_coords_offset = workload_idx * 3;
95 voxel_coords_ptr[voxel_coords_offset + 0] = x;
96 voxel_coords_ptr[voxel_coords_offset + 1] = y;
97 voxel_coords_ptr[voxel_coords_offset + 2] = z;
109 index_t xn = (xo + resolution) % resolution;
110 index_t yn = (yo + resolution) % resolution;
111 index_t zn = (zo + resolution) % resolution;
117 index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
120 *nb_block_masks_indexer.
GetDataPtr<
bool>(curr_block_idx, nb_idx);
121 if (!block_mask_i)
return -1;
124 curr_block_idx, nb_idx);
126 return (((block_idx_i * resolution) + zn) * resolution + yn) * resolution +
130 template <
typename tsdf_t>
132 const tsdf_t* tsdf_base_ptr,
144 nb_block_masks_indexer,
145 nb_block_indices_indexer);
147 index_t vxp = GetLinearIdx(xo + 1, yo, zo);
148 index_t vxn = GetLinearIdx(xo - 1, yo, zo);
149 index_t vyp = GetLinearIdx(xo, yo + 1, zo);
150 index_t vyn = GetLinearIdx(xo, yo - 1, zo);
151 index_t vzp = GetLinearIdx(xo, yo, zo + 1);
152 index_t vzn = GetLinearIdx(xo, yo, zo - 1);
153 if (vxp >= 0 && vxn >= 0) n[0] = tsdf_base_ptr[vxp] - tsdf_base_ptr[vxn];
154 if (vyp >= 0 && vyn >= 0) n[1] = tsdf_base_ptr[vyp] - tsdf_base_ptr[vyn];
155 if (vzp >= 0 && vzn >= 0) n[2] = tsdf_base_ptr[vzp] - tsdf_base_ptr[vzn];
158 template <
typename input_depth_t,
159 typename input_color_t,
163 #if defined(__CUDACC__) 182 index_t resolution2 = resolution * resolution;
183 index_t resolution3 = resolution2 * resolution;
185 TransformIndexer transform_indexer(depth_intrinsic, extrinsics, voxel_size);
190 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
198 if (!block_value_map.Contains(
"tsdf") ||
199 !block_value_map.Contains(
"weight")) {
201 "TSDF and/or weight not allocated in blocks, please implement " 202 "customized integration.");
204 tsdf_t* tsdf_base_ptr = block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
205 weight_t* weight_base_ptr =
206 block_value_map.at(
"weight").GetDataPtr<weight_t>();
208 bool integrate_color =
209 block_value_map.Contains(
"color") && color.NumElements() > 0;
210 color_t* color_base_ptr =
nullptr;
213 float color_multiplier = 1.0;
214 if (integrate_color) {
215 color_base_ptr = block_value_map.at(
"color").
GetDataPtr<color_t>();
220 color_multiplier = 255.0;
224 index_t n = indices.GetLength() * resolution3;
227 index_t block_idx = indices_ptr[workload_idx / resolution3];
228 index_t voxel_idx = workload_idx % resolution3;
233 block_keys_indexer.GetDataPtr<
index_t>(block_idx);
240 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
243 index_t x = xb * resolution + xv;
244 index_t y = yb * resolution + yv;
245 index_t z = zb * resolution + zv;
248 float xc, yc, zc, u, v;
250 static_cast<float>(y),
251 static_cast<float>(z), &xc, &yc, &zc);
254 transform_indexer.
Project(xc, yc, zc, &u, &v);
265 *depth_indexer.
GetDataPtr<input_depth_t>(ui, vi) / depth_scale;
267 float sdf = depth - zc;
268 if (depth <= 0 || depth > depth_max || zc <= 0 || sdf < -sdf_trunc) {
271 sdf = sdf < sdf_trunc ? sdf : sdf_trunc;
274 index_t linear_idx = block_idx * resolution3 + voxel_idx;
276 tsdf_t* tsdf_ptr = tsdf_base_ptr + linear_idx;
277 weight_t* weight_ptr = weight_base_ptr + linear_idx;
279 float inv_wsum = 1.0f / (*weight_ptr + 1);
280 float weight = *weight_ptr;
281 *tsdf_ptr = (weight * (*tsdf_ptr) + sdf) * inv_wsum;
283 if (integrate_color) {
284 color_t* color_ptr = color_base_ptr + 3 * linear_idx;
289 transform_indexer.
Unproject(ui, vi, 1.0, &x, &y, &z);
292 colormap_indexer.
Project(x, y, z, &uf, &vf);
297 input_color_t* input_color_ptr =
298 color_indexer.
GetDataPtr<input_color_t>(ui, vi);
300 for (
index_t i = 0; i < 3; ++i) {
301 color_ptr[i] = (weight * color_ptr[i] +
302 input_color_ptr[i] * color_multiplier) *
307 *weight_ptr = weight + 1;
310 #if defined(__CUDACC__) 315 #if defined(__CUDACC__) 316 void EstimateRangeCUDA
327 int64_t block_resolution,
336 int h_down = h / down_factor;
337 int w_down = w / down_factor;
343 const int fragment_size = 16;
345 if (fragment_buffer.GetDataPtr() == 0 ||
346 fragment_buffer.NumElements() == 0) {
348 const int reserve_frag_buffer_size =
349 h_down * w_down / (fragment_size * fragment_size) / voxel_size;
350 fragment_buffer =
core::Tensor({reserve_frag_buffer_size, 6},
354 const int frag_buffer_size = fragment_buffer.NumElements() / 6;
359 #if defined(__CUDACC__) 362 int* count_ptr =
count.GetDataPtr<
int>();
364 std::atomic<int> count_atomic(0);
365 std::atomic<int>* count_ptr = &count_atomic;
377 int* key = block_keys_indexer.
GetDataPtr<
int>(workload_idx);
379 int u_min = w_down - 1, v_min = h_down - 1, u_max = 0,
381 float z_min = depth_max, z_max = depth_min;
383 float xc, yc, zc, u, v;
386 for (
int i = 0; i < 8; ++i) {
387 float xw = (key[0] + ((i & 1) > 0)) * block_resolution *
389 float yw = (key[1] + ((i & 2) > 0)) * block_resolution *
391 float zw = (key[2] + ((i & 4) > 0)) * block_resolution *
396 if (zc <= 0)
continue;
399 w2c_transform_indexer.
Project(xc, yc, zc, &u, &v);
403 v_min = min(static_cast<int>(floorf(v)), v_min);
404 v_max = max(static_cast<int>(ceilf(v)), v_max);
406 u_min = min(static_cast<int>(floorf(u)), u_min);
407 u_max = max(static_cast<int>(ceilf(u)), u_max);
409 z_min = min(z_min, zc);
410 z_max = max(z_max, zc);
413 v_min = max(0, v_min);
414 v_max = min(h_down - 1, v_max);
416 u_min = max(0, u_min);
417 u_max = min(w_down - 1, u_max);
419 if (v_min >= v_max || u_min >= u_max || z_min >= z_max)
return;
423 ceilf(
float(v_max - v_min + 1) /
float(fragment_size));
425 ceilf(
float(u_max - u_min + 1) /
float(fragment_size));
427 int frag_count = frag_v_count * frag_u_count;
429 int frag_count_end = frag_count_start + frag_count;
430 if (frag_count_end >= frag_buffer_size) {
435 for (
int frag_v = 0; frag_v < frag_v_count; ++frag_v) {
436 for (
int frag_u = 0; frag_u < frag_u_count;
438 float* frag_ptr = frag_buffer_indexer.
GetDataPtr<
float>(
439 frag_count_start +
offset);
445 frag_ptr[2] = v_min + frag_v * fragment_size;
446 frag_ptr[3] = u_min + frag_u * fragment_size;
449 frag_ptr[4] = min(frag_ptr[2] + fragment_size - 1,
450 static_cast<float>(v_max));
451 frag_ptr[5] = min(frag_ptr[3] + fragment_size - 1,
452 static_cast<float>(u_max));
456 #if defined(__CUDACC__) 457 int needed_frag_count =
count[0].Item<
int>();
459 int needed_frag_count = (*count_ptr).load();
462 int frag_count = needed_frag_count;
463 if (frag_count >= frag_buffer_size) {
465 "Could not generate full range map; allocated {} fragments but " 467 frag_buffer_size, frag_count);
468 frag_count = frag_buffer_size - 1;
471 frag_buffer_size, frag_count);
477 int v = workload_idx / w_down;
478 int u = workload_idx % w_down;
480 range_map_indexer.GetDataPtr<
float>(u, v);
481 range_ptr[0] = depth_max;
482 range_ptr[1] = depth_min;
487 block_keys.
GetDevice(), frag_count * fragment_size * fragment_size,
489 int frag_idx = workload_idx / (fragment_size * fragment_size);
490 int local_idx = workload_idx % (fragment_size * fragment_size);
491 int dv = local_idx / fragment_size;
492 int du = local_idx % fragment_size;
495 frag_buffer_indexer.
GetDataPtr<
float>(frag_idx);
496 int v_min =
static_cast<int>(frag_ptr[2]);
497 int u_min =
static_cast<int>(frag_ptr[3]);
498 int v_max =
static_cast<int>(frag_ptr[4]);
499 int u_max =
static_cast<int>(frag_ptr[5]);
503 if (v > v_max || u > u_max)
return;
505 float z_min = frag_ptr[0];
506 float z_max = frag_ptr[1];
507 float* range_ptr = range_map_indexer.GetDataPtr<
float>(u, v);
509 atomicMinf(&(range_ptr[0]), z_min);
510 atomicMaxf(&(range_ptr[1]), z_max);
512 #pragma omp critical(EstimateRangeCPU) 514 range_ptr[0] = min(z_min, range_ptr[0]);
515 range_ptr[1] = max(z_max, range_ptr[1]);
520 #if defined(__CUDACC__) 524 if (needed_frag_count != frag_count) {
526 needed_frag_count, frag_count);
540 return (xin == x && yin == y && zin == z) ?
block_idx : -1;
550 block_idx = block_idx_in;
554 template <
typename tsdf_t,
typename weight_t,
typename color_t>
555 #if defined(__CUDACC__) 560 (std::shared_ptr<core::HashMap>& hashmap,
573 float weight_threshold,
574 float trunc_voxel_multiplier,
575 int range_map_down_factor) {
580 auto device_hashmap = hashmap->GetDeviceHashBackend();
581 #if defined(__CUDACC__) 585 if (cuda_hashmap ==
nullptr) {
587 "Unsupported backend: CUDA raycasting only supports STDGPU.");
589 auto hashmap_impl = cuda_hashmap->GetImpl();
594 if (cpu_hashmap ==
nullptr) {
596 "Unsupported backend: CPU raycasting only supports TBB.");
598 auto hashmap_impl = *cpu_hashmap->GetImpl();
621 if (!block_value_map.Contains(
"tsdf") ||
622 !block_value_map.Contains(
"weight")) {
624 "TSDF and/or weight not allocated in blocks, please implement " 625 "customized integration.");
627 const tsdf_t* tsdf_base_ptr =
628 block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
629 const weight_t* weight_base_ptr =
630 block_value_map.at(
"weight").GetDataPtr<weight_t>();
633 if (renderings_map.Contains(
"depth")) {
634 depth_indexer =
ArrayIndexer(renderings_map.at(
"depth"), 2);
636 if (renderings_map.Contains(
"vertex")) {
637 vertex_indexer =
ArrayIndexer(renderings_map.at(
"vertex"), 2);
639 if (renderings_map.Contains(
"normal")) {
640 normal_indexer =
ArrayIndexer(renderings_map.at(
"normal"), 2);
644 if (renderings_map.Contains(
"index")) {
645 index_indexer =
ArrayIndexer(renderings_map.at(
"index"), 2);
647 if (renderings_map.Contains(
"mask")) {
648 mask_indexer =
ArrayIndexer(renderings_map.at(
"mask"), 2);
650 if (renderings_map.Contains(
"interp_ratio")) {
651 interp_ratio_indexer =
654 if (renderings_map.Contains(
"interp_ratio_dx")) {
655 interp_ratio_dx_indexer =
658 if (renderings_map.Contains(
"interp_ratio_dy")) {
659 interp_ratio_dy_indexer =
662 if (renderings_map.Contains(
"interp_ratio_dz")) {
663 interp_ratio_dz_indexer =
668 bool render_color =
false;
669 if (block_value_map.Contains(
"color") && renderings_map.Contains(
"color")) {
671 color_indexer =
ArrayIndexer(renderings_map.at(
"color"), 2);
673 const color_t* color_base_ptr =
674 render_color ? block_value_map.at(
"color").GetDataPtr<color_t>()
677 bool visit_neighbors = render_color || normal_indexer.
GetDataPtr() ||
693 float block_size = voxel_size * block_resolution;
694 index_t resolution2 = block_resolution * block_resolution;
695 index_t resolution3 = resolution2 * block_resolution;
708 index_t x_vn = (x_v + block_resolution) % block_resolution;
709 index_t y_vn = (y_v + block_resolution) % block_resolution;
710 index_t z_vn = (z_v + block_resolution) % block_resolution;
716 if (dx_b == 0 && dy_b == 0 && dz_b == 0) {
717 return block_buf_idx * resolution3 + z_v * resolution2 +
718 y_v * block_resolution + x_v;
720 Key key(x_b + dx_b, y_b + dy_b, z_b + dz_b);
722 index_t block_buf_idx = cache.
Check(key[0], key[1], key[2]);
723 if (block_buf_idx < 0) {
724 auto iter = hashmap_impl.find(key);
725 if (iter == hashmap_impl.end())
return -1;
726 block_buf_idx = iter->second;
727 cache.
Update(key[0], key[1], key[2], block_buf_idx);
730 return block_buf_idx * resolution3 + z_vn * resolution2 +
731 y_vn * block_resolution + x_vn;
736 float x_o,
float y_o,
float z_o,
737 float x_d,
float y_d,
float z_d,
float t,
739 float x_g = x_o + t * x_d;
740 float y_g = y_o + t * y_d;
741 float z_g = z_o + t * z_d;
748 Key key(x_b, y_b, z_b);
750 if (block_buf_idx < 0) {
751 auto iter = hashmap_impl.find(key);
752 if (iter == hashmap_impl.end())
return -1;
753 block_buf_idx = iter->second;
754 cache.
Update(x_b, y_b, z_b, block_buf_idx);
762 return block_buf_idx * resolution3 + z_v * resolution2 +
763 y_v * block_resolution + x_v;
769 const float* range = range_indexer.
GetDataPtr<
float>(
770 x / range_map_down_factor, y / range_map_down_factor);
772 float* depth_ptr =
nullptr;
773 float* vertex_ptr =
nullptr;
774 float* color_ptr =
nullptr;
775 float* normal_ptr =
nullptr;
777 int64_t* index_ptr =
nullptr;
778 bool* mask_ptr =
nullptr;
779 float* interp_ratio_ptr =
nullptr;
780 float* interp_ratio_dx_ptr =
nullptr;
781 float* interp_ratio_dy_ptr =
nullptr;
782 float* interp_ratio_dz_ptr =
nullptr;
806 for (
int i = 0; i < 8; ++i) {
815 for (
int i = 0; i < 8; ++i) {
820 interp_ratio_ptr = interp_ratio_indexer.
GetDataPtr<
float>(
x,
y);
824 for (
int i = 0; i < 8; ++i) {
825 interp_ratio_ptr[i] = 0;
829 interp_ratio_dx_ptr =
834 for (
int i = 0; i < 8; ++i) {
835 interp_ratio_dx_ptr[i] = 0;
839 interp_ratio_dy_ptr =
844 for (
int i = 0; i < 8; ++i) {
845 interp_ratio_dy_ptr[i] = 0;
849 interp_ratio_dz_ptr =
854 for (
int i = 0; i < 8; ++i) {
855 interp_ratio_dz_ptr[i] = 0;
867 const float t_max = range[1];
868 if (t >= t_max)
return;
871 float x_c = 0, y_c = 0, z_c = 0;
872 float x_g = 0, y_g = 0, z_g = 0;
873 float x_o = 0, y_o = 0, z_o = 0;
878 float tsdf_prev = -1.0f;
880 float sdf_trunc = voxel_size * trunc_voxel_multiplier;
887 c2w_transform_indexer.
Unproject(static_cast<float>(x),
888 static_cast<float>(y), 1.0f, &x_c, &y_c,
890 c2w_transform_indexer.
RigidTransform(x_c, y_c, z_c, &x_g, &y_g, &z_g);
891 float x_d = (x_g - x_o);
892 float y_d = (y_g - y_o);
893 float z_d = (z_g - z_o);
896 bool surface_found =
false;
899 GetLinearIdxAtT(x_o, y_o, z_o, x_d, y_d, z_d, t, cache);
901 if (linear_idx < 0) {
906 tsdf = tsdf_base_ptr[linear_idx];
907 w = weight_base_ptr[linear_idx];
908 if (tsdf_prev > 0 && w >= weight_threshold && tsdf <= 0) {
909 surface_found =
true;
913 float delta = tsdf * sdf_trunc;
914 t += delta < voxel_size ? voxel_size : delta;
920 (t * tsdf_prev - t_prev * tsdf) / (tsdf_prev - tsdf);
921 x_g = x_o + t_intersect * x_d;
922 y_g = y_o + t_intersect * y_d;
923 z_g = z_o + t_intersect * z_d;
927 *depth_ptr = t_intersect * depth_scale;
931 x_g, y_g, z_g, vertex_ptr + 0, vertex_ptr + 1,
934 if (!visit_neighbors)
return;
942 float x_v = (x_g -
float(x_b) * block_size) / voxel_size;
943 float y_v = (y_g -
float(y_b) * block_size) / voxel_size;
944 float z_v = (z_g -
float(z_b) * block_size) / voxel_size;
946 Key key(x_b, y_b, z_b);
949 if (block_buf_idx < 0) {
950 auto iter = hashmap_impl.find(key);
951 if (iter == hashmap_impl.end())
return;
952 block_buf_idx = iter->second;
953 cache.
Update(x_b, y_b, z_b, block_buf_idx);
960 float ratio_x = x_v -
float(x_v_floor);
961 float ratio_y = y_v -
float(y_v_floor);
962 float ratio_z = z_v -
float(z_v_floor);
965 for (
index_t k = 0; k < 8; ++k) {
966 index_t dx_v = (k & 1) > 0 ? 1 : 0;
967 index_t dy_v = (k & 2) > 0 ? 1 : 0;
968 index_t dz_v = (k & 4) > 0 ? 1 : 0;
970 index_t linear_idx_k = GetLinearIdxAtP(
971 x_b, y_b, z_b, x_v_floor + dx_v, y_v_floor + dy_v,
972 z_v_floor + dz_v, block_buf_idx, cache);
974 if (linear_idx_k >= 0 && weight_base_ptr[linear_idx_k] > 0) {
975 float rx = dx_v * (ratio_x) + (1 - dx_v) * (1 - ratio_x);
976 float ry = dy_v * (ratio_y) + (1 - dy_v) * (1 - ratio_y);
977 float rz = dz_v * (ratio_z) + (1 - dz_v) * (1 - ratio_z);
978 float r = rx * ry * rz;
980 if (interp_ratio_ptr) {
981 interp_ratio_ptr[k] = r;
987 index_ptr[k] = linear_idx_k;
990 float tsdf_k = tsdf_base_ptr[linear_idx_k];
991 float interp_ratio_dx = ry * rz * (2 * dx_v - 1);
992 float interp_ratio_dy = rx * rz * (2 * dy_v - 1);
993 float interp_ratio_dz = rx * ry * (2 * dz_v - 1);
995 if (interp_ratio_dx_ptr) {
996 interp_ratio_dx_ptr[k] = interp_ratio_dx;
998 if (interp_ratio_dy_ptr) {
999 interp_ratio_dy_ptr[k] = interp_ratio_dy;
1001 if (interp_ratio_dz_ptr) {
1002 interp_ratio_dz_ptr[k] = interp_ratio_dz;
1006 normal_ptr[0] += interp_ratio_dx * tsdf_k;
1007 normal_ptr[1] += interp_ratio_dy * tsdf_k;
1008 normal_ptr[2] += interp_ratio_dz * tsdf_k;
1012 index_t color_linear_idx = linear_idx_k * 3;
1014 r * color_base_ptr[color_linear_idx + 0];
1016 r * color_base_ptr[color_linear_idx + 1];
1018 r * color_base_ptr[color_linear_idx + 2];
1028 color_ptr[0] /= sum_r;
1029 color_ptr[1] /= sum_r;
1030 color_ptr[2] /= sum_r;
1034 constexpr
float EPSILON = 1e-5f;
1035 float norm = sqrt(normal_ptr[0] * normal_ptr[0] +
1036 normal_ptr[1] * normal_ptr[1] +
1037 normal_ptr[2] * normal_ptr[2]);
1038 norm = std::max(norm, EPSILON);
1039 w2c_transform_indexer.
Rotate(
1040 -normal_ptr[0] / norm, -normal_ptr[1] / norm,
1041 -normal_ptr[2] / norm, normal_ptr + 0,
1042 normal_ptr + 1, normal_ptr + 2);
1048 #if defined(__CUDACC__) 1053 template <
typename tsdf_t,
typename weight_t,
typename color_t>
1054 #if defined(__CUDACC__) 1055 void ExtractPointCloudCUDA
1069 float weight_threshold,
1074 index_t resolution2 = resolution * resolution;
1075 index_t resolution3 = resolution2 * resolution;
1078 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
1088 if (!block_value_map.Contains(
"tsdf") ||
1089 !block_value_map.Contains(
"weight")) {
1091 "TSDF and/or weight not allocated in blocks, please implement " 1092 "customized integration.");
1094 const tsdf_t* tsdf_base_ptr =
1095 block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
1096 const weight_t* weight_base_ptr =
1097 block_value_map.at(
"weight").GetDataPtr<weight_t>();
1098 const color_t* color_base_ptr =
nullptr;
1099 if (block_value_map.Contains(
"color")) {
1100 color_base_ptr = block_value_map.at(
"color").GetDataPtr<color_t>();
1104 index_t n = n_blocks * resolution3;
1107 #if defined(__CUDACC__) 1109 block_keys.GetDevice());
1112 std::atomic<index_t> count_atomic(0);
1113 std::atomic<index_t>* count_ptr = &count_atomic;
1116 if (valid_size < 0) {
1118 "No estimated max point cloud size provided, using a 2-pass " 1119 "estimation. Surface extraction could be slow.");
1127 resolution, nb_block_masks_indexer,
1128 nb_block_indices_indexer);
1133 index_t workload_block_idx = workload_idx / resolution3;
1135 index_t voxel_idx = workload_idx % resolution3;
1139 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1141 index_t linear_idx = block_idx * resolution3 + voxel_idx;
1142 float tsdf_o = tsdf_base_ptr[linear_idx];
1143 float weight_o = weight_base_ptr[linear_idx];
1144 if (weight_o <= weight_threshold)
return;
1147 for (
index_t i = 0; i < 3; ++i) {
1149 GetLinearIdx(xv + (i == 0), yv + (i == 1),
1150 zv + (i == 2), workload_block_idx);
1151 if (linear_idx_i < 0)
continue;
1153 float tsdf_i = tsdf_base_ptr[linear_idx_i];
1154 float weight_i = weight_base_ptr[linear_idx_i];
1155 if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) {
1161 #if defined(__CUDACC__) 1165 valid_size = (*count_ptr).load();
1170 if (points.GetLength() == 0) {
1184 if (color_base_ptr) {
1194 nb_block_masks_indexer,
1195 nb_block_indices_indexer);
1199 index_t curr_block_idx,
float* n) {
1200 return DeviceGetNormal<tsdf_t>(
1201 tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution,
1202 nb_block_masks_indexer, nb_block_indices_indexer);
1206 index_t workload_block_idx = workload_idx / resolution3;
1208 index_t voxel_idx = workload_idx % resolution3;
1214 index_t xb = block_key_ptr[0];
1215 index_t yb = block_key_ptr[1];
1216 index_t zb = block_key_ptr[2];
1220 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1222 index_t linear_idx = block_idx * resolution3 + voxel_idx;
1223 float tsdf_o = tsdf_base_ptr[linear_idx];
1224 float weight_o = weight_base_ptr[linear_idx];
1225 if (weight_o <= weight_threshold)
return;
1227 float no[3] = {0}, ne[3] = {0};
1230 GetNormal(xv, yv, zv, workload_block_idx, no);
1237 for (
index_t i = 0; i < 3; ++i) {
1239 GetLinearIdx(xv + (i == 0), yv + (i == 1), zv + (i == 2),
1240 workload_block_idx);
1241 if (linear_idx_i < 0)
continue;
1243 float tsdf_i = tsdf_base_ptr[linear_idx_i];
1244 float weight_i = weight_base_ptr[linear_idx_i];
1245 if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) {
1246 float ratio = (0 - tsdf_o) / (tsdf_i - tsdf_o);
1249 if (idx >= valid_size) {
1250 printf(
"Point cloud size larger than " 1251 "estimated, please increase the " 1256 float* point_ptr = point_indexer.GetDataPtr<
float>(idx);
1257 point_ptr[0] = voxel_size * (x + ratio *
int(i == 0));
1258 point_ptr[1] = voxel_size * (y + ratio *
int(i == 1));
1259 point_ptr[2] = voxel_size * (z + ratio *
int(i == 2));
1262 float* normal_ptr = normal_indexer.
GetDataPtr<
float>(idx);
1263 GetNormal(xv + (i == 0), yv + (i == 1), zv + (i == 2),
1264 workload_block_idx, ne);
1265 float nx = (1 - ratio) * no[0] + ratio * ne[0];
1266 float ny = (1 - ratio) * no[1] + ratio * ne[1];
1267 float nz = (1 - ratio) * no[2] + ratio * ne[2];
1268 float norm =
static_cast<float>(
1269 sqrt(nx * nx + ny * ny + nz * nz) + 1e-5);
1270 normal_ptr[0] = nx / norm;
1271 normal_ptr[1] = ny / norm;
1272 normal_ptr[2] = nz / norm;
1274 if (color_base_ptr) {
1275 float* color_ptr = color_indexer.
GetDataPtr<
float>(idx);
1276 const color_t* color_o_ptr =
1277 color_base_ptr + 3 * linear_idx;
1278 float r_o = color_o_ptr[0];
1279 float g_o = color_o_ptr[1];
1280 float b_o = color_o_ptr[2];
1282 const color_t* color_i_ptr =
1283 color_base_ptr + 3 * linear_idx_i;
1284 float r_i = color_i_ptr[0];
1285 float g_i = color_i_ptr[1];
1286 float b_i = color_i_ptr[2];
1288 color_ptr[0] = ((1 - ratio) * r_o + ratio * r_i) / 255.0f;
1289 color_ptr[1] = ((1 - ratio) * g_o + ratio * g_i) / 255.0f;
1290 color_ptr[2] = ((1 - ratio) * b_o + ratio * b_i) / 255.0f;
1296 #if defined(__CUDACC__) 1299 index_t total_count = (*count_ptr).load();
1303 valid_size = total_count;
1305 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 1310 template <
typename tsdf_t,
typename weight_t,
typename color_t>
1311 #if defined(__CUDACC__) 1312 void ExtractTriangleMeshCUDA
1328 float weight_threshold,
1332 index_t resolution = block_resolution;
1333 index_t resolution3 = resolution * resolution * resolution;
1336 ArrayIndexer voxel_indexer({resolution, resolution, resolution});
1345 {n_blocks, resolution, resolution, resolution, 4},
core::Int32,
1347 }
catch (
const std::runtime_error&) {
1349 "Unable to allocate assistance mesh structure for Marching " 1350 "Cubes with {} active voxel blocks. Please consider using a " 1351 "larger voxel size (currently {}) for TSDF integration, or " 1352 "using tsdf_volume.cpu() to perform mesh extraction on CPU.",
1353 n_blocks, voxel_size);
1357 ArrayIndexer mesh_structure_indexer(mesh_structure, 4);
1358 ArrayIndexer nb_block_masks_indexer(nb_block_masks, 2);
1359 ArrayIndexer nb_block_indices_indexer(nb_block_indices, 2);
1363 const index_t* inv_indices_ptr = inv_block_indices.GetDataPtr<
index_t>();
1365 if (!block_value_map.Contains(
"tsdf") ||
1366 !block_value_map.Contains(
"weight")) {
1368 "TSDF and/or weight not allocated in blocks, please implement " 1369 "customized integration.");
1371 const tsdf_t* tsdf_base_ptr =
1372 block_value_map.at(
"tsdf").GetDataPtr<tsdf_t>();
1373 const weight_t* weight_base_ptr =
1374 block_value_map.at(
"weight").GetDataPtr<weight_t>();
1375 const color_t* color_base_ptr =
nullptr;
1376 if (block_value_map.Contains(
"color")) {
1377 color_base_ptr = block_value_map.at(
"color").GetDataPtr<color_t>();
1380 index_t n = n_blocks * resolution3;
1389 static_cast<index_t>(resolution),
1390 nb_block_masks_indexer,
1391 nb_block_indices_indexer);
1395 index_t workload_block_idx = widx / resolution3;
1396 index_t voxel_idx = widx % resolution3;
1400 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1405 for (
index_t i = 0; i < 8; ++i) {
1407 GetLinearIdx(xv + vtx_shifts[i][0], yv + vtx_shifts[i][1],
1408 zv + vtx_shifts[i][2], workload_block_idx);
1409 if (linear_idx_i < 0)
return;
1411 float tsdf_i = tsdf_base_ptr[linear_idx_i];
1412 float weight_i = weight_base_ptr[linear_idx_i];
1413 if (weight_i <= weight_threshold)
return;
1415 table_idx |= ((tsdf_i < 0) ? (1 << i) : 0);
1419 xv, yv, zv, workload_block_idx);
1420 mesh_struct_ptr[3] = table_idx;
1422 if (table_idx == 0 || table_idx == 255)
return;
1425 index_t edges_with_vertices = edge_table[table_idx];
1426 for (
index_t i = 0; i < 12; ++i) {
1427 if (edges_with_vertices & (1 << i)) {
1428 index_t xv_i = xv + edge_shifts[i][0];
1429 index_t yv_i = yv + edge_shifts[i][1];
1430 index_t zv_i = zv + edge_shifts[i][2];
1431 index_t edge_i = edge_shifts[i][3];
1433 index_t dxb = xv_i / resolution;
1434 index_t dyb = yv_i / resolution;
1435 index_t dzb = zv_i / resolution;
1437 index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
1441 workload_block_idx, nb_idx);
1444 xv_i - dxb * resolution,
1445 yv_i - dyb * resolution,
1446 zv_i - dzb * resolution,
1447 inv_indices_ptr[block_idx_i]);
1450 mesh_ptr_i[edge_i] = -1;
1456 #if defined(__CUDACC__) 1461 std::atomic<index_t> count_atomic(0);
1462 std::atomic<index_t>* count_ptr = &count_atomic;
1465 if (vertex_count < 0) {
1468 index_t workload_block_idx = widx / resolution3;
1469 index_t voxel_idx = widx % resolution3;
1473 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1478 xv, yv, zv, workload_block_idx);
1481 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
1482 mesh_struct_ptr[2] != -1) {
1487 for (
index_t e = 0; e < 3; ++e) {
1488 index_t vertex_idx = mesh_struct_ptr[e];
1489 if (vertex_idx != -1)
continue;
1495 #if defined(__CUDACC__) 1498 vertex_count = (*count_ptr).load();
1509 if (color_base_ptr) {
1517 #if defined(__CUDACC__) 1531 nb_block_masks_indexer,
1532 nb_block_indices_indexer);
1536 index_t curr_block_idx,
float* n) {
1537 return DeviceGetNormal<tsdf_t>(
1538 tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution,
1539 nb_block_masks_indexer, nb_block_indices_indexer);
1543 index_t workload_block_idx = widx / resolution3;
1545 index_t voxel_idx = widx % resolution3;
1550 index_t xb = block_key_ptr[0];
1551 index_t yb = block_key_ptr[1];
1552 index_t zb = block_key_ptr[2];
1556 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1565 xv, yv, zv, workload_block_idx);
1568 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
1569 mesh_struct_ptr[2] != -1) {
1574 index_t linear_idx = resolution3 * block_idx + voxel_idx;
1575 float tsdf_o = tsdf_base_ptr[linear_idx];
1577 float no[3] = {0}, ne[3] = {0};
1580 GetNormal(xv, yv, zv, workload_block_idx, no);
1583 for (
index_t e = 0; e < 3; ++e) {
1584 index_t vertex_idx = mesh_struct_ptr[e];
1585 if (vertex_idx != -1)
continue;
1588 GetLinearIdx(xv + (e == 0), yv + (e == 1), zv + (e == 2),
1589 workload_block_idx);
1591 "Internal error: GetVoxelAt returns nullptr.");
1592 float tsdf_e = tsdf_base_ptr[linear_idx_e];
1593 float ratio = (0 - tsdf_o) / (tsdf_e - tsdf_o);
1596 mesh_struct_ptr[e] = idx;
1598 float ratio_x = ratio *
index_t(e == 0);
1599 float ratio_y = ratio *
index_t(e == 1);
1600 float ratio_z = ratio *
index_t(e == 2);
1602 float* vertex_ptr = vertex_indexer.
GetDataPtr<
float>(idx);
1603 vertex_ptr[0] = voxel_size * (x + ratio_x);
1604 vertex_ptr[1] = voxel_size * (y + ratio_y);
1605 vertex_ptr[2] = voxel_size * (z + ratio_z);
1608 float* normal_ptr = normal_indexer.GetDataPtr<
float>(idx);
1609 GetNormal(xv + (e == 0), yv + (e == 1), zv + (e == 2),
1610 workload_block_idx, ne);
1611 float nx = (1 - ratio) * no[0] + ratio * ne[0];
1612 float ny = (1 - ratio) * no[1] + ratio * ne[1];
1613 float nz = (1 - ratio) * no[2] + ratio * ne[2];
1614 float norm =
static_cast<float>(sqrt(nx * nx + ny * ny + nz * nz) +
1616 normal_ptr[0] = nx / norm;
1617 normal_ptr[1] = ny / norm;
1618 normal_ptr[2] = nz / norm;
1620 if (color_base_ptr) {
1621 float* color_ptr = color_indexer.
GetDataPtr<
float>(idx);
1622 float r_o = color_base_ptr[linear_idx * 3 + 0];
1623 float g_o = color_base_ptr[linear_idx * 3 + 1];
1624 float b_o = color_base_ptr[linear_idx * 3 + 2];
1626 float r_e = color_base_ptr[linear_idx_e * 3 + 0];
1627 float g_e = color_base_ptr[linear_idx_e * 3 + 1];
1628 float b_e = color_base_ptr[linear_idx_e * 3 + 2];
1630 color_ptr[0] = ((1 - ratio) * r_o + ratio * r_e) / 255.0f;
1631 color_ptr[1] = ((1 - ratio) * g_o + ratio * g_e) / 255.0f;
1632 color_ptr[2] = ((1 - ratio) * b_o + ratio * b_e) / 255.0f;
1638 index_t triangle_count = vertex_count * 3;
1642 #if defined(__CUDACC__) 1650 index_t workload_block_idx = widx / resolution3;
1651 index_t voxel_idx = widx % resolution3;
1655 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
1659 xv, yv, zv, workload_block_idx);
1661 index_t table_idx = mesh_struct_ptr[3];
1662 if (tri_count[table_idx] == 0)
return;
1664 for (
index_t tri = 0; tri < 16; tri += 3) {
1665 if (tri_table[table_idx][tri] == -1)
return;
1669 for (
index_t vertex = 0; vertex < 3; ++vertex) {
1670 index_t edge = tri_table[table_idx][tri + vertex];
1672 index_t xv_i = xv + edge_shifts[edge][0];
1673 index_t yv_i = yv + edge_shifts[edge][1];
1674 index_t zv_i = zv + edge_shifts[edge][2];
1675 index_t edge_i = edge_shifts[edge][3];
1677 index_t dxb = xv_i / resolution;
1678 index_t dyb = yv_i / resolution;
1679 index_t dzb = zv_i / resolution;
1681 index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
1685 workload_block_idx, nb_idx);
1688 xv_i - dxb * resolution,
1689 yv_i - dyb * resolution,
1690 zv_i - dzb * resolution,
1691 inv_indices_ptr[block_idx_i]);
1694 triangle_indexer.GetDataPtr<
index_t>(tri_idx);
1695 triangle_ptr[2 - vertex] = mesh_struct_ptr_i[edge_i];
1700 #if defined(__CUDACC__) 1703 triangle_count = (*count_ptr).load();
1706 triangles = triangles.Slice(0, 0, triangle_count);
Definition: StdGPUHashBackend.h:134
Definition: GeometryIndexer.h:180
void RayCastCPU(std::shared_ptr< core::HashMap > &hashmap, const TensorMap &block_value_map, const core::Tensor &range_map, TensorMap &renderings_map, const core::Tensor &intrinsic, const core::Tensor &extrinsic, index_t h, index_t w, index_t block_resolution, float voxel_size, float depth_scale, float depth_min, float depth_max, float weight_threshold, float trunc_voxel_multiplier, int range_map_down_factor)
Definition: VoxelBlockGridImpl.h:560
TArrayIndexer< index_t > ArrayIndexer
Definition: VoxelBlockGridImpl.h:50
OPEN3D_DEVICE void DeviceGetNormal(const tsdf_t *tsdf_base_ptr, index_t xo, index_t yo, index_t zo, index_t curr_block_idx, float *n, index_t resolution, const ArrayIndexer &nb_block_masks_indexer, const ArrayIndexer &nb_block_indices_indexer)
Definition: VoxelBlockGridImpl.h:131
OPEN3D_HOST_DEVICE void * GetDataPtr() const
Definition: GeometryIndexer.h:334
OPEN3D_HOST_DEVICE int Sign(int x)
Definition: GeometryMacros.h:96
void ExtractTriangleMeshCPU(const core::Tensor &block_indices, const core::Tensor &inv_block_indices, const core::Tensor &nb_block_indices, const core::Tensor &nb_block_masks, const core::Tensor &block_keys, const TensorMap &block_value_map, core::Tensor &vertices, core::Tensor &triangles, core::Tensor &vertex_normals, core::Tensor &vertex_colors, index_t block_resolution, float voxel_size, float weight_threshold, index_t &vertex_count)
Definition: VoxelBlockGridImpl.h:1316
Definition: Dispatch.h:129
void OPEN3D_DEVICE Update(index_t xin, index_t yin, index_t zin, index_t block_idx_in)
Definition: VoxelBlockGridImpl.h:543
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:122
const Dtype Float32
Definition: Dtype.cpp:61
uint32_t buf_index_t
Definition: HashBackendBuffer.h:63
OPEN3D_DEVICE index_t DeviceGetLinearIdx(index_t xo, index_t yo, index_t zo, index_t curr_block_idx, index_t resolution, const ArrayIndexer &nb_block_masks_indexer, const ArrayIndexer &nb_block_indices_indexer)
Definition: VoxelBlockGridImpl.h:102
Device GetDevice() const override
Definition: Tensor.cpp:1384
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:64
void ExtractPointCloudCPU(const core::Tensor &block_indices, const core::Tensor &nb_block_indices, const core::Tensor &nb_block_masks, const core::Tensor &block_keys, const TensorMap &block_value_map, core::Tensor &points, core::Tensor &normals, core::Tensor &colors, index_t block_resolution, float voxel_size, float weight_threshold, index_t &valid_size)
Definition: VoxelBlockGridImpl.h:1059
#define OPEN3D_ATOMIC_ADD(X, Y)
Definition: GeometryMacros.h:58
#define LogWarning(...)
Definition: Logging.h:79
void Synchronize()
Definition: CUDAUtils.cpp:77
Definition: Dispatch.h:113
void IntegrateCPU(const core::Tensor &depth, const core::Tensor &color, const core::Tensor &block_indices, const core::Tensor &block_keys, TensorMap &block_value_map, const core::Tensor &depth_intrinsic, const core::Tensor &color_intrinsic, const core::Tensor &extrinsic, index_t resolution, float voxel_size, float sdf_trunc, float depth_scale, float depth_max)
Definition: VoxelBlockGridImpl.h:168
index_t OPEN3D_DEVICE Check(index_t xin, index_t yin, index_t zin)
Definition: VoxelBlockGridImpl.h:539
#define LogDebug(...)
Definition: Logging.h:98
math::float4 color
Definition: LineSetBuffers.cpp:64
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Definition: Utility.h:96
const Dtype Int32
Definition: Dtype.cpp:65
void GetVoxelCoordinatesAndFlattenedIndicesCPU(const core::Tensor &buf_indices, const core::Tensor &block_keys, core::Tensor &voxel_coords, core::Tensor &flattened_indices, index_t block_resolution, float voxel_size)
Definition: VoxelBlockGridImpl.h:57
Definition: TBBHashBackend.h:41
int index_t
Definition: VoxelBlockGrid.h:41
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c int
Definition: K4aPlugin.cpp:489
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Definition: Tensor.cpp:392
Definition: VoxelBlockGridImpl.h:533
Definition: PinholeCameraIntrinsic.cpp:35
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:475
T * GetDataPtr()
Definition: Tensor.h:1149
#define OPEN3D_ASSERT(...)
Definition: Macro.h:67
static Tensor Eye(int64_t n, Dtype dtype, const Device &device)
Create an identity matrix of size n x n.
Definition: Tensor.cpp:404
index_t x
Definition: VoxelBlockGridImpl.h:534
index_t z
Definition: VoxelBlockGridImpl.h:536
index_t y
Definition: VoxelBlockGridImpl.h:535
int64_t GetLength() const
Definition: Tensor.h:1130
OPEN3D_HOST_DEVICE bool InBoundary(float x, float y) const
Definition: GeometryIndexer.h:313
index_t block_idx
Definition: VoxelBlockGridImpl.h:537
#define LogInfo(...)
Definition: Logging.h:89
static const Dtype Float64
Definition: Dtype.h:43
#define LogError(...)
Definition: Logging.h:67
void EstimateRangeCPU(const core::Tensor &block_keys, core::Tensor &range_minmax_map, const core::Tensor &intrinsics, const core::Tensor &extrinsics, int h, int w, int down_factor, int64_t block_resolution, float voxel_size, float depth_min, float depth_max, core::Tensor &fragment_buffer)
Definition: VoxelBlockGridImpl.h:320
Definition: TensorMap.h:50