49 #if defined(__CUDACC__) 69 int64_t resolution3 = resolution * resolution * resolution;
72 NDArrayIndexer voxel_indexer({resolution, resolution, resolution});
82 bool integrate_color =
false;
83 if (color.NumElements() != 0) {
85 integrate_color =
true;
89 const int* indices_ptr = indices.GetDataPtr<
int>();
91 int64_t n = indices.GetLength() * resolution3;
93 #if defined(__CUDACC__) 94 core::kernel::CUDALauncher launcher;
102 int64_t workload_idx) {
104 int block_idx = indices_ptr[workload_idx / resolution3];
105 int voxel_idx = workload_idx % resolution3;
110 block_keys_indexer.
GetDataPtr<
int>(block_idx);
111 int64_t xb =
static_cast<int64_t
>(block_key_ptr[0]);
112 int64_t yb =
static_cast<int64_t
>(block_key_ptr[1]);
113 int64_t zb =
static_cast<int64_t
>(block_key_ptr[2]);
117 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
120 int64_t x = (xb * resolution + xv);
121 int64_t y = (yb * resolution + yv);
122 int64_t z = (zb * resolution + zv);
125 float xc, yc, zc, u, v;
126 transform_indexer.RigidTransform(
127 static_cast<float>(x), static_cast<float>(y),
128 static_cast<float>(z), &xc, &yc, &zc);
131 transform_indexer.Project(xc, yc, zc, &u, &v);
137 float depth = *depth_indexer.
GetDataPtr<
float>(
138 static_cast<int64_t
>(u),
139 static_cast<int64_t>(v)) /
142 float sdf = (depth - zc);
143 if (depth <= 0 || depth > depth_max || zc <= 0 ||
147 sdf = sdf < sdf_trunc ? sdf : sdf_trunc;
152 voxel_block_buffer_indexer.
GetDataPtr<voxel_t>(
153 xv, yv, zv, block_idx);
155 if (integrate_color) {
156 float* color_ptr = color_indexer.
GetDataPtr<
float>(
157 static_cast<int64_t
>(u),
158 static_cast<int64_t>(v));
160 voxel_ptr->Integrate(sdf, color_ptr[0], color_ptr[1],
163 voxel_ptr->Integrate(sdf);
167 #if defined(__CUDACC__) 172 #if defined(__CUDACC__) 173 void ExtractSurfacePointsCUDA
187 float weight_threshold,
190 int64_t resolution3 = resolution * resolution * resolution;
193 NDArrayIndexer voxel_indexer({resolution, resolution, resolution});
202 const int64_t* indices_ptr = indices.
GetDataPtr<int64_t>();
205 int64_t n = n_blocks * resolution3;
208 #if defined(__CUDACC__) 210 block_values.GetDevice());
211 int* count_ptr =
count.GetDataPtr<
int>();
213 std::atomic<int> count_atomic(0);
214 std::atomic<int>* count_ptr = &count_atomic;
217 #if defined(__CUDACC__) 218 core::kernel::CUDALauncher launcher;
222 if (valid_size < 0) {
224 "No estimated max point cloud size provided, using a 2-pass " 225 "estimation. Surface extraction could be slow.");
228 voxel_block_buffer_indexer.ElementByteSize(), [&]() {
236 return DeviceGetVoxelAt<voxel_t>(
237 xo, yo, zo, curr_block_idx,
238 static_cast<int>(resolution),
239 nb_block_masks_indexer,
240 nb_block_indices_indexer,
241 voxel_block_buffer_indexer);
246 int64_t workload_block_idx =
247 workload_idx / resolution3;
249 indices_ptr[workload_block_idx];
250 int64_t voxel_idx = workload_idx % resolution3;
254 voxel_indexer.WorkloadToCoord(voxel_idx, &xv,
258 voxel_block_buffer_indexer
259 .GetDataPtr<voxel_t>(xv, yv, zv,
261 float tsdf_o = voxel_ptr->GetTSDF();
262 float weight_o = voxel_ptr->GetWeight();
263 if (weight_o <= weight_threshold)
return;
266 for (
int i = 0; i < 3; ++i) {
267 voxel_t* ptr = GetVoxelAt(
268 static_cast<int>(xv) + (i == 0),
269 static_cast<int>(yv) + (i == 1),
270 static_cast<int>(zv) + (i == 2),
272 workload_block_idx));
273 if (ptr ==
nullptr)
continue;
275 float tsdf_i = ptr->GetTSDF();
276 float weight_i = ptr->GetWeight();
278 if (weight_i > weight_threshold &&
279 tsdf_i * tsdf_o < 0) {
286 #if defined(__CUDACC__) 287 valid_size =
count[0].Item<
int>();
290 valid_size = (*count_ptr).load();
295 int max_count = valid_size;
296 if (points.GetLength() == 0) {
298 block_values.GetDevice());
303 bool extract_normal =
false;
305 if (normals.has_value()) {
306 extract_normal =
true;
307 if (normals.value().get().GetLength() == 0) {
308 normals.value().get() =
310 block_values.GetDevice());
317 voxel_block_buffer_indexer.ElementByteSize(), [&]() {
319 bool extract_color =
false;
321 if (voxel_t::HasColor() && colors.has_value()) {
322 extract_color =
true;
323 if (colors.value().get().GetLength() == 0) {
326 block_values.GetDevice());
332 int64_t workload_idx) {
334 int xo,
int yo,
int zo,
335 int curr_block_idx) -> voxel_t* {
336 return DeviceGetVoxelAt<voxel_t>(
337 xo, yo, zo, curr_block_idx,
338 static_cast<int>(resolution),
339 nb_block_masks_indexer,
340 nb_block_indices_indexer,
341 voxel_block_buffer_indexer);
346 return DeviceGetNormalAt<voxel_t>(
347 xo, yo, zo, curr_block_idx, n,
348 static_cast<int>(resolution), voxel_size,
349 nb_block_masks_indexer,
350 nb_block_indices_indexer,
351 voxel_block_buffer_indexer);
355 int64_t workload_block_idx = workload_idx / resolution3;
356 int64_t block_idx = indices_ptr[workload_block_idx];
357 int64_t voxel_idx = workload_idx % resolution3;
362 block_keys_indexer.
GetDataPtr<
int>(block_idx);
363 int64_t xb =
static_cast<int64_t
>(block_key_ptr[0]);
364 int64_t yb =
static_cast<int64_t
>(block_key_ptr[1]);
365 int64_t zb =
static_cast<int64_t
>(block_key_ptr[2]);
369 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
372 voxel_block_buffer_indexer.GetDataPtr<voxel_t>(
373 xv, yv, zv, block_idx);
374 float tsdf_o = voxel_ptr->GetTSDF();
375 float weight_o = voxel_ptr->GetWeight();
377 if (weight_o <= weight_threshold)
return;
379 int64_t x = xb * resolution + xv;
380 int64_t y = yb * resolution + yv;
381 int64_t z = zb * resolution + zv;
383 float no[3] = {0}, ni[3] = {0};
384 if (extract_normal) {
385 GetNormalAt(static_cast<int>(xv), static_cast<int>(yv),
386 static_cast<int>(zv),
387 static_cast<int>(workload_block_idx), no);
391 for (
int i = 0; i < 3; ++i) {
392 voxel_t* ptr = GetVoxelAt(
393 static_cast<int>(xv) + (i == 0),
394 static_cast<int>(yv) + (i == 1),
395 static_cast<int>(zv) + (i == 2),
396 static_cast<int>(workload_block_idx));
397 if (ptr ==
nullptr)
continue;
399 float tsdf_i = ptr->GetTSDF();
400 float weight_i = ptr->GetWeight();
402 if (weight_i > weight_threshold &&
403 tsdf_i * tsdf_o < 0) {
404 float ratio = (0 - tsdf_o) / (tsdf_i - tsdf_o);
407 if (idx >= valid_size) {
408 printf(
"Point cloud size larger than " 409 "estimated, please increase the " 415 point_indexer.GetDataPtr<
float>(idx);
417 voxel_size * (x + ratio *
int(i == 0));
419 voxel_size * (y + ratio *
int(i == 1));
421 voxel_size * (z + ratio *
int(i == 2));
427 float r_o = voxel_ptr->GetR();
428 float g_o = voxel_ptr->GetG();
429 float b_o = voxel_ptr->GetB();
431 float r_i = ptr->GetR();
432 float g_i = ptr->GetG();
433 float b_i = ptr->GetB();
436 ((1 - ratio) * r_o + ratio * r_i) /
439 ((1 - ratio) * g_o + ratio * g_i) /
442 ((1 - ratio) * b_o + ratio * b_i) /
446 if (extract_normal) {
448 static_cast<int>(xv) + (i == 0),
449 static_cast<int>(yv) + (i == 1),
450 static_cast<int>(zv) + (i == 2),
451 static_cast<int>(workload_block_idx),
456 float nx = (1 - ratio) * no[0] + ratio * ni[0];
457 float ny = (1 - ratio) * no[1] + ratio * ni[1];
458 float nz = (1 - ratio) * no[2] + ratio * ni[2];
459 float norm =
static_cast<float>(
460 sqrt(nx * nx + ny * ny + nz * nz) +
462 normal_ptr[0] = nx / norm;
463 normal_ptr[1] = ny / norm;
464 normal_ptr[2] = nz / norm;
470 #if defined(__CUDACC__) 471 int total_count =
count.Item<
int>();
473 int total_count = (*count_ptr).load();
477 valid_size = total_count;
479 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 484 #if defined(__CUDACC__) 485 void ExtractSurfaceMeshCUDA
501 float weight_threshold,
504 int64_t resolution3 = resolution * resolution * resolution;
507 NDArrayIndexer voxel_indexer({resolution, resolution, resolution});
508 int n_blocks =
static_cast<int>(indices.
GetLength());
510 #if defined(__CUDACC__) 519 {n_blocks, resolution, resolution, resolution, 4},
521 }
catch (
const std::runtime_error&) {
523 "[MeshExtractionKernel] Unable to allocate assistance mesh " 524 "structure for Marching " 525 "Cubes with {} active voxel blocks. Please consider using a " 526 "larger voxel size (currently {}) for TSDF " 527 "integration, or using tsdf_volume.cpu() to perform mesh " 528 "extraction on CPU.",
529 n_blocks, voxel_size);
539 const int64_t* indices_ptr = indices.
GetDataPtr<int64_t>();
540 const int64_t* inv_indices_ptr = inv_indices.GetDataPtr<int64_t>();
541 int64_t n = n_blocks * resolution3;
543 #if defined(__CUDACC__) 544 core::kernel::CUDALauncher launcher;
554 int xo,
int yo,
int zo,
555 int curr_block_idx) -> voxel_t* {
556 return DeviceGetVoxelAt<voxel_t>(
557 xo, yo, zo, curr_block_idx,
558 static_cast<int>(resolution), nb_block_masks_indexer,
559 nb_block_indices_indexer, voxel_block_buffer_indexer);
563 int64_t workload_block_idx = widx / resolution3;
564 int64_t voxel_idx = widx % resolution3;
568 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
573 for (
int i = 0; i < 8; ++i) {
574 voxel_t* voxel_ptr_i =
575 GetVoxelAt(static_cast<int>(xv) + vtx_shifts[i][0],
576 static_cast<int>(yv) + vtx_shifts[i][1],
577 static_cast<int>(zv) + vtx_shifts[i][2],
578 static_cast<int>(workload_block_idx));
579 if (voxel_ptr_i ==
nullptr)
return;
581 float tsdf_i = voxel_ptr_i->GetTSDF();
582 float weight_i = voxel_ptr_i->GetWeight();
583 if (weight_i <= weight_threshold)
return;
585 table_idx |= ((tsdf_i < 0) ? (1 << i) : 0);
588 int* mesh_struct_ptr = mesh_structure_indexer.
GetDataPtr<
int>(
589 xv, yv, zv, workload_block_idx);
590 mesh_struct_ptr[3] = table_idx;
592 if (table_idx == 0 || table_idx == 255)
return;
595 int edges_with_vertices = edge_table[table_idx];
596 for (
int i = 0; i < 12; ++i) {
597 if (edges_with_vertices & (1 << i)) {
598 int64_t xv_i = xv + edge_shifts[i][0];
599 int64_t yv_i = yv + edge_shifts[i][1];
600 int64_t zv_i = zv + edge_shifts[i][2];
601 int edge_i = edge_shifts[i][3];
603 int dxb =
static_cast<int>(xv_i / resolution);
604 int dyb =
static_cast<int>(yv_i / resolution);
605 int dzb =
static_cast<int>(zv_i / resolution);
607 int nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
609 int64_t block_idx_i =
610 *nb_block_indices_indexer.
GetDataPtr<int64_t>(
611 workload_block_idx, nb_idx);
612 int* mesh_ptr_i = mesh_structure_indexer.
GetDataPtr<
int>(
613 xv_i - dxb * resolution, yv_i - dyb * resolution,
614 zv_i - dzb * resolution,
615 inv_indices_ptr[block_idx_i]);
618 mesh_ptr_i[edge_i] = -1;
625 #if defined(__CUDACC__) 627 block_values.GetDevice());
629 int* count_ptr =
count.GetDataPtr<
int>();
631 std::atomic<int> count_atomic(0);
632 std::atomic<int>* count_ptr = &count_atomic;
635 if (vertex_count < 0) {
638 int64_t workload_block_idx = widx / resolution3;
639 int64_t voxel_idx = widx % resolution3;
643 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
646 int* mesh_struct_ptr = mesh_structure_indexer.
GetDataPtr<
int>(
647 xv, yv, zv, workload_block_idx);
650 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
651 mesh_struct_ptr[2] != -1) {
656 for (
int e = 0; e < 3; ++e) {
657 int vertex_idx = mesh_struct_ptr[e];
658 if (vertex_idx != -1)
continue;
664 #if defined(__CUDACC__) 665 vertex_count =
count.Item<
int>();
667 vertex_count = (*count_ptr).load();
673 block_values.GetDevice());
675 bool extract_normal =
false;
677 if (normals.has_value()) {
678 extract_normal =
true;
679 normals.value().get() =
681 block_values.GetDevice());
688 #if defined(__CUDACC__) 690 block_values.GetDevice());
691 count_ptr =
count.GetDataPtr<
int>();
698 bool extract_color =
false;
700 if (voxel_t::HasColor() && colors.has_value()) {
701 extract_color =
true;
702 colors.value().get() =
704 block_values.GetDevice());
710 int xo,
int yo,
int zo,
711 int curr_block_idx) -> voxel_t* {
712 return DeviceGetVoxelAt<voxel_t>(
713 xo, yo, zo, curr_block_idx,
714 static_cast<int>(resolution), nb_block_masks_indexer,
715 nb_block_indices_indexer, voxel_block_buffer_indexer);
719 int curr_block_idx,
float* n) {
720 return DeviceGetNormalAt<voxel_t>(
721 xo, yo, zo, curr_block_idx, n,
722 static_cast<int>(resolution), voxel_size,
723 nb_block_masks_indexer, nb_block_indices_indexer,
724 voxel_block_buffer_indexer);
728 int64_t workload_block_idx = widx / resolution3;
729 int64_t block_idx = indices_ptr[workload_block_idx];
730 int64_t voxel_idx = widx % resolution3;
733 int* block_key_ptr = block_keys_indexer.GetDataPtr<
int>(block_idx);
734 int64_t xb =
static_cast<int64_t
>(block_key_ptr[0]);
735 int64_t yb =
static_cast<int64_t
>(block_key_ptr[1]);
736 int64_t zb =
static_cast<int64_t
>(block_key_ptr[2]);
740 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
743 int64_t x = xb * resolution + xv;
744 int64_t y = yb * resolution + yv;
745 int64_t z = zb * resolution + zv;
748 int* mesh_struct_ptr = mesh_structure_indexer.
GetDataPtr<
int>(
749 xv, yv, zv, workload_block_idx);
752 if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
753 mesh_struct_ptr[2] != -1) {
758 voxel_t* voxel_ptr = voxel_block_buffer_indexer.
GetDataPtr<voxel_t>(
759 xv, yv, zv, block_idx);
760 float tsdf_o = voxel_ptr->GetTSDF();
761 float no[3] = {0}, ne[3] = {0};
763 if (extract_normal) {
764 GetNormalAt(static_cast<int>(xv), static_cast<int>(yv),
765 static_cast<int>(zv),
766 static_cast<int>(workload_block_idx), no);
770 for (
int e = 0; e < 3; ++e) {
771 int vertex_idx = mesh_struct_ptr[e];
772 if (vertex_idx != -1)
continue;
774 voxel_t* voxel_ptr_e =
775 GetVoxelAt(static_cast<int>(xv) + (e == 0),
776 static_cast<int>(yv) + (e == 1),
777 static_cast<int>(zv) + (e == 2),
778 static_cast<int>(workload_block_idx));
779 float tsdf_e = voxel_ptr_e->GetTSDF();
780 float ratio = (0 - tsdf_o) / (tsdf_e - tsdf_o);
783 mesh_struct_ptr[e] = idx;
785 float ratio_x = ratio *
int(e == 0);
786 float ratio_y = ratio *
int(e == 1);
787 float ratio_z = ratio *
int(e == 2);
789 float* vertex_ptr = vertex_indexer.
GetDataPtr<
float>(idx);
790 vertex_ptr[0] = voxel_size * (x + ratio_x);
791 vertex_ptr[1] = voxel_size * (y + ratio_y);
792 vertex_ptr[2] = voxel_size * (z + ratio_z);
794 if (extract_normal) {
795 float* normal_ptr = normal_indexer.
GetDataPtr<
float>(idx);
796 GetNormalAt(static_cast<int>(xv) + (e == 0),
797 static_cast<int>(yv) + (e == 1),
798 static_cast<int>(zv) + (e == 2),
799 static_cast<int>(workload_block_idx), ne);
800 float nx = (1 - ratio) * no[0] + ratio * ne[0];
801 float ny = (1 - ratio) * no[1] + ratio * ne[1];
802 float nz = (1 - ratio) * no[2] + ratio * ne[2];
803 float norm =
static_cast<float>(
804 sqrt(nx * nx + ny * ny + nz * nz) + 1e-5);
805 normal_ptr[0] = nx / norm;
806 normal_ptr[1] = ny / norm;
807 normal_ptr[2] = nz / norm;
811 float* color_ptr = color_indexer.
GetDataPtr<
float>(idx);
812 float r_o = voxel_ptr->GetR();
813 float g_o = voxel_ptr->GetG();
814 float b_o = voxel_ptr->GetB();
816 float r_e = voxel_ptr_e->GetR();
817 float g_e = voxel_ptr_e->GetG();
818 float b_e = voxel_ptr_e->GetB();
819 color_ptr[0] = ((1 - ratio) * r_o + ratio * r_e) / 255.0f;
820 color_ptr[1] = ((1 - ratio) * g_o + ratio * g_e) / 255.0f;
821 color_ptr[2] = ((1 - ratio) * b_o + ratio * b_e) / 255.0f;
828 int triangle_count = vertex_count * 3;
830 block_values.GetDevice());
833 #if defined(__CUDACC__) 835 block_values.GetDevice());
836 count_ptr =
count.GetDataPtr<
int>();
842 int64_t workload_block_idx = widx / resolution3;
843 int64_t voxel_idx = widx % resolution3;
847 voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
850 int* mesh_struct_ptr = mesh_structure_indexer.
GetDataPtr<
int>(
851 xv, yv, zv, workload_block_idx);
853 int table_idx = mesh_struct_ptr[3];
854 if (tri_count[table_idx] == 0)
return;
856 for (
size_t tri = 0; tri < 16; tri += 3) {
857 if (tri_table[table_idx][tri] == -1)
return;
861 for (
size_t vertex = 0; vertex < 3; ++vertex) {
862 int edge = tri_table[table_idx][tri + vertex];
864 int64_t xv_i = xv + edge_shifts[edge][0];
865 int64_t yv_i = yv + edge_shifts[edge][1];
866 int64_t zv_i = zv + edge_shifts[edge][2];
867 int64_t edge_i = edge_shifts[edge][3];
869 int dxb =
static_cast<int>(xv_i / resolution);
870 int dyb =
static_cast<int>(yv_i / resolution);
871 int dzb =
static_cast<int>(zv_i / resolution);
873 int nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
875 int64_t block_idx_i =
876 *nb_block_indices_indexer.
GetDataPtr<int64_t>(
877 workload_block_idx, nb_idx);
878 int* mesh_struct_ptr_i = mesh_structure_indexer.
GetDataPtr<
int>(
879 xv_i - dxb * resolution, yv_i - dyb * resolution,
880 zv_i - dzb * resolution, inv_indices_ptr[block_idx_i]);
882 int64_t* triangle_ptr =
883 triangle_indexer.GetDataPtr<int64_t>(tri_idx);
884 triangle_ptr[2 - vertex] = mesh_struct_ptr_i[edge_i];
889 #if defined(__CUDACC__) 890 triangle_count =
count.Item<
int>();
892 triangle_count = (*count_ptr).load();
895 triangles = triangles.Slice(0, 0, triangle_count);
898 #if defined(__CUDACC__) 899 void EstimateRangeCUDA
910 int64_t block_resolution,
918 int h_down = h / down_factor;
919 int w_down = w / down_factor;
925 const int fragment_size = 16;
926 const int frag_buffer_size = 65535;
936 #if defined(__CUDACC__) 939 int* count_ptr =
count.GetDataPtr<
int>();
941 std::atomic<int> count_atomic(0);
942 std::atomic<int>* count_ptr = &count_atomic;
944 #if defined(__CUDACC__) 945 core::kernel::CUDALauncher launcher;
955 int* key = block_keys_indexer.
GetDataPtr<
int>(workload_idx);
957 int u_min = w_down - 1, v_min = h_down - 1, u_max = 0,
959 float z_min = depth_max, z_max = depth_min;
961 float xc, yc, zc, u, v;
964 for (
int i = 0; i < 8; ++i) {
965 float xw = (key[0] + ((i & 1) > 0)) * block_resolution *
967 float yw = (key[1] + ((i & 2) > 0)) * block_resolution *
969 float zw = (key[2] + ((i & 4) > 0)) * block_resolution *
974 if (zc <= 0)
continue;
977 w2c_transform_indexer.
Project(xc, yc, zc, &u, &v);
981 v_min = min(static_cast<int>(floorf(v)), v_min);
982 v_max =
max(static_cast<int>(ceilf(v)), v_max);
984 u_min = min(static_cast<int>(floorf(u)), u_min);
985 u_max =
max(static_cast<int>(ceilf(u)), u_max);
987 z_min = min(z_min, zc);
988 z_max =
max(z_max, zc);
991 v_min =
max(0, v_min);
992 v_max = min(h_down - 1, v_max);
994 u_min =
max(0, u_min);
995 u_max = min(w_down - 1, u_max);
997 if (v_min >= v_max || u_min >= u_max || z_min >= z_max)
return;
1001 ceilf(
float(v_max - v_min + 1) /
float(fragment_size));
1003 ceilf(
float(u_max - u_min + 1) /
float(fragment_size));
1005 int frag_count = frag_v_count * frag_u_count;
1007 int frag_count_end = frag_count_start + frag_count;
1008 if (frag_count_end >= frag_buffer_size) {
1009 printf(
"Fragment count exceeding buffer size, abort!\n");
1013 for (
int frag_v = 0; frag_v < frag_v_count; ++frag_v) {
1014 for (
int frag_u = 0; frag_u < frag_u_count;
1016 float* frag_ptr = frag_buffer_indexer.GetDataPtr<
float>(
1017 frag_count_start +
offset);
1019 frag_ptr[0] = z_min;
1020 frag_ptr[1] = z_max;
1023 frag_ptr[2] = v_min + frag_v * fragment_size;
1024 frag_ptr[3] = u_min + frag_u * fragment_size;
1027 frag_ptr[4] = min(frag_ptr[2] + fragment_size - 1,
1028 static_cast<float>(v_max));
1029 frag_ptr[5] = min(frag_ptr[3] + fragment_size - 1,
1030 static_cast<float>(u_max));
1034 #if defined(__CUDACC__) 1035 int frag_count =
count[0].Item<
int>();
1037 int frag_count = (*count_ptr).load();
1043 int v = workload_idx / w_down;
1044 int u = workload_idx % w_down;
1045 float* range_ptr = range_map_indexer.GetDataPtr<
float>(u, v);
1046 range_ptr[0] = depth_max;
1047 range_ptr[1] = depth_min;
1052 frag_count * fragment_size * fragment_size,
1054 int frag_idx = workload_idx / (fragment_size * fragment_size);
1055 int local_idx = workload_idx % (fragment_size * fragment_size);
1056 int dv = local_idx / fragment_size;
1057 int du = local_idx % fragment_size;
1060 frag_buffer_indexer.GetDataPtr<
float>(frag_idx);
1061 int v_min =
static_cast<int>(frag_ptr[2]);
1062 int u_min =
static_cast<int>(frag_ptr[3]);
1063 int v_max =
static_cast<int>(frag_ptr[4]);
1064 int u_max =
static_cast<int>(frag_ptr[5]);
1068 if (v > v_max || u > u_max)
return;
1070 float z_min = frag_ptr[0];
1071 float z_max = frag_ptr[1];
1072 float* range_ptr = range_map_indexer.GetDataPtr<
float>(u, v);
1074 atomicMinf(&(range_ptr[0]), z_min);
1075 atomicMaxf(&(range_ptr[1]), z_max);
1077 #pragma omp critical 1079 range_ptr[0] = min(z_min, range_ptr[0]);
1080 range_ptr[1] =
max(z_max, range_ptr[1]);
1084 #if defined(__CUDACC__) 1096 return (xin == x && yin == y && zin == z) ?
block_idx : -1;
1106 block_idx = block_idx_in;
1110 #if defined(__CUDACC__) 1115 (std::shared_ptr<core::DeviceHashmap>& hashmap,
1126 int64_t block_resolution,
1132 float weight_threshold) {
1136 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 1139 if (cuda_hashmap ==
nullptr) {
1141 "Unsupported backend: CUDA raycasting only supports STDGPU.");
1143 auto hashmap_impl = cuda_hashmap->GetImpl();
1147 auto hashmap_impl = *cpu_hashmap->
GetImpl();
1158 bool enable_vertex = (vertex_map.GetLength() != 0);
1159 bool enable_depth = (depth_map.GetLength() != 0);
1160 bool enable_color = (color_map.GetLength() != 0);
1161 bool enable_normal = (normal_map.GetLength() != 0);
1162 if (!enable_vertex && !enable_depth && !enable_color && !enable_normal) {
1167 if (enable_vertex) {
1176 if (enable_normal) {
1187 float block_size = voxel_size * block_resolution;
1188 #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) 1189 core::kernel::CUDALauncher launcher;
1199 int x_b,
int y_b,
int z_b,
1200 int x_v,
int y_v,
int z_v,
1203 int x_vn = (x_v + block_resolution) % block_resolution;
1204 int y_vn = (y_v + block_resolution) % block_resolution;
1205 int z_vn = (z_v + block_resolution) % block_resolution;
1207 int dx_b =
Sign(x_v - x_vn);
1208 int dy_b =
Sign(y_v - y_vn);
1209 int dz_b =
Sign(z_v - z_vn);
1211 if (dx_b == 0 && dy_b == 0 && dz_b == 0) {
1212 return voxel_block_buffer_indexer
1217 key.Set(0, x_b + dx_b);
1218 key.Set(1, y_b + dy_b);
1219 key.Set(2, z_b + dz_b);
1221 int block_addr = cache.
Check(key.Get(0), key.Get(1),
1223 if (block_addr < 0) {
1224 auto iter = hashmap_impl.find(key);
1225 if (iter == hashmap_impl.end())
return nullptr;
1226 block_addr = iter->second;
1227 cache.
Update(key.Get(0), key.Get(1), key.Get(2),
1231 return voxel_block_buffer_indexer
1238 float x_o,
float y_o,
float z_o,
1239 float x_d,
float y_d,
float z_d,
1242 float x_g = x_o + t * x_d;
1243 float y_g = y_o + t * y_d;
1244 float z_g = z_o + t * z_d;
1247 int x_b =
static_cast<int>(floorf(x_g / block_size));
1248 int y_b =
static_cast<int>(floorf(y_g / block_size));
1249 int z_b =
static_cast<int>(floorf(z_g / block_size));
1256 int block_addr = cache.
Check(x_b, y_b, z_b);
1257 if (block_addr < 0) {
1258 auto iter = hashmap_impl.find(key);
1259 if (iter == hashmap_impl.end())
return nullptr;
1260 block_addr = iter->second;
1261 cache.
Update(x_b, y_b, z_b, block_addr);
1265 int x_v =
int((x_g - x_b * block_size) / voxel_size);
1266 int y_v =
int((y_g - y_b * block_size) / voxel_size);
1267 int z_v =
int((z_g - z_b * block_size) / voxel_size);
1268 return voxel_block_buffer_indexer.
GetDataPtr<voxel_t>(
1269 x_v, y_v, z_v, block_addr);
1272 int64_t
y = workload_idx / cols;
1273 int64_t
x = workload_idx % cols;
1275 float *depth_ptr =
nullptr, *vertex_ptr =
nullptr,
1276 *normal_ptr =
nullptr, *color_ptr =
nullptr;
1281 if (enable_vertex) {
1282 vertex_ptr = vertex_map_indexer.
GetDataPtr<
float>(
x,
y);
1293 if (enable_normal) {
1294 normal_ptr = normal_map_indexer.
GetDataPtr<
float>(
x,
y);
1300 const float* range =
1301 range_map_indexer.
GetDataPtr<
float>(x / 8, y / 8);
1303 const float t_max = range[1];
1304 if (t >= t_max)
return;
1307 float x_c = 0, y_c = 0, z_c = 0;
1308 float x_g = 0, y_g = 0, z_g = 0;
1309 float x_o = 0, y_o = 0, z_o = 0;
1314 float tsdf_prev = -1.0f;
1323 c2w_transform_indexer.
Unproject(static_cast<float>(x),
1324 static_cast<float>(y), 1.0f,
1328 float x_d = (x_g - x_o);
1329 float y_d = (y_g - y_o);
1330 float z_d = (z_g - z_o);
1333 bool surface_found =
false;
1335 voxel_t* voxel_ptr = GetVoxelAtT(x_o, y_o, z_o, x_d,
1336 y_d, z_d, t, cache);
1343 tsdf = voxel_ptr->GetTSDF();
1344 w = voxel_ptr->GetWeight();
1345 if (tsdf_prev > 0 && w >= weight_threshold &&
1347 surface_found =
true;
1351 float delta = tsdf * sdf_trunc;
1352 t += delta < voxel_size ? voxel_size : delta;
1356 if (surface_found) {
1357 float t_intersect = (t * tsdf_prev - t_prev * tsdf) /
1359 x_g = x_o + t_intersect * x_d;
1360 y_g = y_o + t_intersect * y_d;
1361 z_g = z_o + t_intersect * z_d;
1365 *depth_ptr = t_intersect * depth_scale;
1367 if (enable_vertex) {
1369 x_g, y_g, z_g, vertex_ptr + 0,
1370 vertex_ptr + 1, vertex_ptr + 2);
1376 if (enable_color || enable_normal) {
1378 static_cast<int>(floorf(x_g / block_size));
1380 static_cast<int>(floorf(y_g / block_size));
1382 static_cast<int>(floorf(z_g / block_size));
1383 float x_v = (x_g -
float(x_b) * block_size) /
1385 float y_v = (y_g -
float(y_b) * block_size) /
1387 float z_v = (z_g -
float(z_b) * block_size) /
1395 int block_addr = cache.
Check(x_b, y_b, z_b);
1396 if (block_addr < 0) {
1397 auto iter = hashmap_impl.find(key);
1398 if (iter == hashmap_impl.end())
return;
1399 block_addr = iter->second;
1400 cache.
Update(x_b, y_b, z_b, block_addr);
1403 int x_v_floor =
static_cast<int>(floorf(x_v));
1404 int y_v_floor =
static_cast<int>(floorf(y_v));
1405 int z_v_floor =
static_cast<int>(floorf(z_v));
1407 float ratio_x = x_v -
float(x_v_floor);
1408 float ratio_y = y_v -
float(y_v_floor);
1409 float ratio_z = z_v -
float(z_v_floor);
1411 float sum_weight_color = 0.0;
1412 float sum_weight_normal = 0.0;
1413 for (
int k = 0; k < 8; ++k) {
1414 int dx_v = (k & 1) > 0 ? 1 : 0;
1415 int dy_v = (k & 2) > 0 ? 1 : 0;
1416 int dz_v = (k & 4) > 0 ? 1 : 0;
1417 float ratio = (dx_v * (ratio_x) +
1418 (1 - dx_v) * (1 - ratio_x)) *
1420 (1 - dy_v) * (1 - ratio_y)) *
1422 (1 - dz_v) * (1 - ratio_z));
1424 voxel_t* voxel_ptr_k = GetVoxelAtP(
1425 x_b, y_b, z_b, x_v_floor + dx_v,
1426 y_v_floor + dy_v, z_v_floor + dz_v,
1429 if (enable_color && voxel_ptr_k &&
1430 voxel_ptr_k->GetWeight() > 0) {
1431 sum_weight_color += ratio;
1432 color_ptr[0] += ratio * voxel_ptr_k->GetR();
1433 color_ptr[1] += ratio * voxel_ptr_k->GetG();
1434 color_ptr[2] += ratio * voxel_ptr_k->GetB();
1437 if (enable_normal) {
1438 for (
int dim = 0; dim < 3; ++dim) {
1439 voxel_t* voxel_ptr_k_plus = GetVoxelAtP(
1441 x_v_floor + dx_v + (dim == 0),
1442 y_v_floor + dy_v + (dim == 1),
1443 z_v_floor + dz_v + (dim == 2),
1445 voxel_t* voxel_ptr_k_minus =
1446 GetVoxelAtP(x_b, y_b, z_b,
1456 if (voxel_ptr_k_plus &&
1457 voxel_ptr_k_plus->GetWeight() > 0) {
1466 if (voxel_ptr_k_minus &&
1467 voxel_ptr_k_minus->GetWeight() >
1476 sum_weight_normal += valid ? ratio : 0;
1481 if (enable_color && sum_weight_color > 0) {
1482 sum_weight_color *= 255.0;
1483 color_ptr[0] /= sum_weight_color;
1484 color_ptr[1] /= sum_weight_color;
1485 color_ptr[2] /= sum_weight_color;
1487 if (enable_normal && sum_weight_normal > 0) {
1488 normal_ptr[0] /= sum_weight_normal;
1489 normal_ptr[1] /= sum_weight_normal;
1490 normal_ptr[2] /= sum_weight_normal;
1492 sqrt(normal_ptr[0] * normal_ptr[0] +
1493 normal_ptr[1] * normal_ptr[1] +
1494 normal_ptr[2] * normal_ptr[2]);
1495 w2c_transform_indexer.
Rotate(
1496 normal_ptr[0] / norm,
1497 normal_ptr[1] / norm,
1498 normal_ptr[2] / norm, normal_ptr + 0,
1499 normal_ptr + 1, normal_ptr + 2);
1506 #if defined(__CUDACC__)
Definition: TSDFVoxelGridImpl.h:1089
void ReleaseCache()
Definition: CUDAUtils.cpp:56
Definition: GeometryIndexer.h:168
Definition: CPULauncher.h:42
void ExtractSurfaceMeshCPU(const core::Tensor &block_indices, const core::Tensor &inv_block_indices, const core::Tensor &nb_block_indices, const core::Tensor &nb_block_masks, const core::Tensor &block_keys, const core::Tensor &block_values, core::Tensor &vertices, core::Tensor &triangles, utility::optional< std::reference_wrapper< core::Tensor >> vertex_normals, utility::optional< std::reference_wrapper< core::Tensor >> vertex_colors, int64_t block_resolution, float voxel_size, float weight_threshold, int &vertex_count)
Definition: TSDFVoxelGridImpl.h:489
OPEN3D_HOST_DEVICE int Sign(int x)
Definition: GeometryMacros.h:62
#define LogWarning(...)
Definition: Console.h:95
int block_idx
Definition: TSDFVoxelGridImpl.h:1093
int offset
Definition: FilePCD.cpp:64
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:59
std::shared_ptr< tbb::concurrent_unordered_map< Key, addr_t, Hash > > GetImpl() const
Definition: TBBHashmap.h:79
Definition: Dispatch.h:115
void EstimateRangeCPU(const core::Tensor &block_keys, core::Tensor &range_minmax_map, const core::Tensor &intrinsics, const core::Tensor &extrinsics, int h, int w, int down_factor, int64_t block_resolution, float voxel_size, float depth_min, float depth_max)
Definition: TSDFVoxelGridImpl.h:903
Device GetDevice() const
Definition: Tensor.cpp:1098
Definition: StdGPUHashmap.h:46
#define LogError(...)
Definition: Console.h:79
void OPEN3D_DEVICE Update(int xin, int yin, int zin, int block_idx_in)
Definition: TSDFVoxelGridImpl.h:1099
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:57
int x
Definition: TSDFVoxelGridImpl.h:1090
#define OPEN3D_ATOMIC_ADD(X, Y)
Definition: GeometryMacros.h:37
static const Dtype Int32
Definition: Dtype.h:46
OPEN3D_HOST_DEVICE T * GetDataPtr(int64_t x) const
Definition: GeometryIndexer.h:324
math::float4 color
Definition: LineSetBuffers.cpp:64
int y
Definition: TSDFVoxelGridImpl.h:1091
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Definition: Utility.h:36
Definition: Dispatch.h:72
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c int
Definition: K4aPlugin.cpp:479
static Tensor Zeros(const SizeVector &shape, Dtype dtype, const Device &device=Device("CPU:0"))
Create a tensor fill with zeros.
Definition: Tensor.cpp:240
#define LogInfo(...)
Definition: Console.h:108
int count
Definition: FilePCD.cpp:61
static const Dtype Float32
Definition: Dtype.h:42
void ExtractSurfacePointsCPU(const core::Tensor &block_indices, const core::Tensor &nb_block_indices, const core::Tensor &nb_block_masks, const core::Tensor &block_keys, const core::Tensor &block_values, core::Tensor &points, utility::optional< std::reference_wrapper< core::Tensor >> normals, utility::optional< std::reference_wrapper< core::Tensor >> colors, int64_t block_resolution, float voxel_size, float weight_threshold, int &valid_size)
Definition: TSDFVoxelGridImpl.h:177
Definition: Optional.h:54
static void LaunchGeneralKernel(int64_t n, func_t element_kernel)
General kernels with non-conventional indexers.
Definition: CPULauncher.h:176
int OPEN3D_DEVICE Check(int xin, int yin, int zin)
Definition: TSDFVoxelGridImpl.h:1095
int points
Definition: FilePCD.cpp:73
static const Dtype Int64
Definition: Dtype.h:47
#define DISPATCH_BYTESIZE_TO_VOXEL(BYTESIZE,...)
Definition: TSDFVoxel.h:33
Definition: PinholeCameraIntrinsic.cpp:35
#define LogDebug(...)
Definition: Console.h:121
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:465
uint32_t addr_t
Definition: HashmapBuffer.h:58
OPEN3D_HOST_DEVICE int64_t ElementByteSize()
Definition: GeometryIndexer.h:234
void RayCastCPU(std::shared_ptr< core::DeviceHashmap > &hashmap, const core::Tensor &block_values, const core::Tensor &range_map, core::Tensor &vertex_map, core::Tensor &depth_map, core::Tensor &color_map, core::Tensor &normal_map, const core::Tensor &intrinsics, const core::Tensor &extrinsics, int h, int w, int64_t block_resolution, float voxel_size, float sdf_trunc, float depth_scale, float depth_min, float depth_max, float weight_threshold)
Definition: TSDFVoxelGridImpl.h:1115
T * GetDataPtr()
Definition: Tensor.h:1005
void IntegrateCPU(const core::Tensor &depth, const core::Tensor &color, const core::Tensor &block_indices, const core::Tensor &block_keys, core::Tensor &block_values, const core::Tensor &intrinsics, const core::Tensor &extrinsics, int64_t resolution, float voxel_size, float sdf_trunc, float depth_scale, float depth_max)
Definition: TSDFVoxelGridImpl.h:54
OPEN3D_HOST_DEVICE bool InBoundary(float x, float y) const
Definition: GeometryIndexer.h:302
Definition: TBBHashmap.h:40
int64_t GetLength() const
Definition: Tensor.h:986
int z
Definition: TSDFVoxelGridImpl.h:1092
#define max(x, y)
Definition: SVD3x3CPU.h:38