Open3D (C++ API)  0.19.0
PointCloudImpl.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
8 #include <atomic>
9 #include <vector>
10 
11 #include "open3d/core/CUDAUtils.h"
12 #include "open3d/core/Dispatch.h"
13 #include "open3d/core/Dtype.h"
16 #include "open3d/core/SizeVector.h"
17 #include "open3d/core/Tensor.h"
25 #include "open3d/utility/Logging.h"
26 
27 namespace open3d {
28 namespace t {
29 namespace geometry {
30 namespace kernel {
31 namespace pointcloud {
32 
33 #ifndef __CUDACC__
34 using std::abs;
35 using std::max;
36 using std::min;
37 using std::sqrt;
38 #endif
39 
40 #if defined(__CUDACC__)
41 void UnprojectCUDA
42 #else
44 #endif
45  (const core::Tensor& depth,
46  std::optional<std::reference_wrapper<const core::Tensor>> image_colors,
48  std::optional<std::reference_wrapper<core::Tensor>> colors,
49  const core::Tensor& intrinsics,
50  const core::Tensor& extrinsics,
51  float depth_scale,
52  float depth_max,
53  int64_t stride) {
54 
55  const bool have_colors = image_colors.has_value();
56  NDArrayIndexer depth_indexer(depth, 2);
57  NDArrayIndexer image_colors_indexer;
58 
60  TransformIndexer ti(intrinsics, pose, 1.0f);
61 
62  // Output
63  int64_t rows_strided = depth_indexer.GetShape(0) / stride;
64  int64_t cols_strided = depth_indexer.GetShape(1) / stride;
65 
66  points = core::Tensor({rows_strided * cols_strided, 3}, core::Float32,
67  depth.GetDevice());
68  NDArrayIndexer point_indexer(points, 1);
69  NDArrayIndexer colors_indexer;
70  if (have_colors) {
71  const auto& imcol = image_colors.value().get();
72  image_colors_indexer = NDArrayIndexer{imcol, 2};
73  colors.value().get() = core::Tensor({rows_strided * cols_strided, 3},
74  core::Float32, imcol.GetDevice());
75  colors_indexer = NDArrayIndexer(colors.value().get(), 1);
76  }
77 
78  // Counter
79 #if defined(__CUDACC__)
80  core::Tensor count(std::vector<int>{0}, {}, core::Int32, depth.GetDevice());
81  int* count_ptr = count.GetDataPtr<int>();
82 #else
83  std::atomic<int> count_atomic(0);
84  std::atomic<int>* count_ptr = &count_atomic;
85 #endif
86 
87  int64_t n = rows_strided * cols_strided;
88 
89  DISPATCH_DTYPE_TO_TEMPLATE(depth.GetDtype(), [&]() {
90  core::ParallelFor(
91  depth.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
92  int64_t y = (workload_idx / cols_strided) * stride;
93  int64_t x = (workload_idx % cols_strided) * stride;
94 
95  float d = *depth_indexer.GetDataPtr<scalar_t>(x, y) /
96  depth_scale;
97  if (d > 0 && d < depth_max) {
98  int idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
99 
100  float x_c = 0, y_c = 0, z_c = 0;
101  ti.Unproject(static_cast<float>(x),
102  static_cast<float>(y), d, &x_c, &y_c,
103  &z_c);
104 
105  float* vertex = point_indexer.GetDataPtr<float>(idx);
106  ti.RigidTransform(x_c, y_c, z_c, vertex + 0, vertex + 1,
107  vertex + 2);
108  if (have_colors) {
109  float* pcd_pixel =
110  colors_indexer.GetDataPtr<float>(idx);
111  float* image_pixel =
112  image_colors_indexer.GetDataPtr<float>(x,
113  y);
114  *pcd_pixel = *image_pixel;
115  *(pcd_pixel + 1) = *(image_pixel + 1);
116  *(pcd_pixel + 2) = *(image_pixel + 2);
117  }
118  }
119  });
120  });
121 #if defined(__CUDACC__)
122  int total_pts_count = count.Item<int>();
123 #else
124  int total_pts_count = (*count_ptr).load();
125 #endif
126 
127 #ifdef __CUDACC__
129 #endif
130  points = points.Slice(0, 0, total_pts_count);
131  if (have_colors) {
132  colors.value().get() =
133  colors.value().get().Slice(0, 0, total_pts_count);
134  }
135 }
136 
137 #if defined(__CUDACC__)
138 void GetPointMaskWithinAABBCUDA
139 #else
141 #endif
142  (const core::Tensor& points,
143  const core::Tensor& min_bound,
144  const core::Tensor& max_bound,
145  core::Tensor& mask) {
146 
147  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
148  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
149  const int64_t n = points.GetLength();
150  const scalar_t* min_bound_ptr = min_bound.GetDataPtr<scalar_t>();
151  const scalar_t* max_bound_ptr = max_bound.GetDataPtr<scalar_t>();
152  bool* mask_ptr = mask.GetDataPtr<bool>();
153 
154  core::ParallelFor(
155  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
156  const scalar_t x = points_ptr[3 * workload_idx + 0];
157  const scalar_t y = points_ptr[3 * workload_idx + 1];
158  const scalar_t z = points_ptr[3 * workload_idx + 2];
159 
160  if (x >= min_bound_ptr[0] && x <= max_bound_ptr[0] &&
161  y >= min_bound_ptr[1] && y <= max_bound_ptr[1] &&
162  z >= min_bound_ptr[2] && z <= max_bound_ptr[2]) {
163  mask_ptr[workload_idx] = true;
164  } else {
165  mask_ptr[workload_idx] = false;
166  }
167  });
168  });
169 }
170 
171 #if defined(__CUDACC__)
172 void GetPointMaskWithinOBBCUDA
173 #else
175 #endif
176  (const core::Tensor& points,
177  const core::Tensor& center,
178  const core::Tensor& rotation,
179  const core::Tensor& extent,
180  core::Tensor& mask) {
181  const core::Tensor half_extent = extent.Div(2);
182  // Since we will extract 3 rotation axis from matrix and use it inside
183  // kernel, the transpose is needed.
184  const core::Tensor rotation_t = rotation.Transpose(0, 1).Contiguous();
185  const core::Tensor pd = points - center;
186  const int64_t n = points.GetLength();
187 
188  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
189  const scalar_t* pd_ptr = pd.GetDataPtr<scalar_t>();
190  // const scalar_t* center_ptr = center.GetDataPtr<scalar_t>();
191  const scalar_t* rotation_ptr = rotation_t.GetDataPtr<scalar_t>();
192  const scalar_t* half_extent_ptr = half_extent.GetDataPtr<scalar_t>();
193  bool* mask_ptr = mask.GetDataPtr<bool>();
194 
195  core::ParallelFor(points.GetDevice(), n,
196  [=] OPEN3D_DEVICE(int64_t workload_idx) {
197  int64_t idx = 3 * workload_idx;
198  if (abs(core::linalg::kernel::dot_3x1(
199  pd_ptr + idx, rotation_ptr)) <=
200  half_extent_ptr[0] &&
201  abs(core::linalg::kernel::dot_3x1(
202  pd_ptr + idx, rotation_ptr + 3)) <=
203  half_extent_ptr[1] &&
204  abs(core::linalg::kernel::dot_3x1(
205  pd_ptr + idx, rotation_ptr + 6)) <=
206  half_extent_ptr[2]) {
207  mask_ptr[workload_idx] = true;
208  } else {
209  mask_ptr[workload_idx] = false;
210  }
211  });
212  });
213 }
214 
215 #if defined(__CUDACC__)
216 void NormalizeNormalsCUDA
217 #else
219 #endif
220  (core::Tensor& normals) {
221  const core::Dtype dtype = normals.GetDtype();
222  const int64_t n = normals.GetLength();
223 
224  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
225  scalar_t* ptr = normals.GetDataPtr<scalar_t>();
226 
227  core::ParallelFor(normals.GetDevice(), n,
228  [=] OPEN3D_DEVICE(int64_t workload_idx) {
229  int64_t idx = 3 * workload_idx;
230  scalar_t x = ptr[idx];
231  scalar_t y = ptr[idx + 1];
232  scalar_t z = ptr[idx + 2];
233  scalar_t norm = sqrt(x * x + y * y + z * z);
234  if (norm > 0) {
235  x /= norm;
236  y /= norm;
237  z /= norm;
238  }
239  ptr[idx] = x;
240  ptr[idx + 1] = y;
241  ptr[idx + 2] = z;
242  });
243  });
244 }
245 
246 #if defined(__CUDACC__)
247 void OrientNormalsToAlignWithDirectionCUDA
248 #else
250 #endif
251  (core::Tensor& normals, const core::Tensor& direction) {
252  const core::Dtype dtype = normals.GetDtype();
253  const int64_t n = normals.GetLength();
254 
255  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
256  scalar_t* ptr = normals.GetDataPtr<scalar_t>();
257  const scalar_t* direction_ptr = direction.GetDataPtr<scalar_t>();
258 
259  core::ParallelFor(normals.GetDevice(), n,
260  [=] OPEN3D_DEVICE(int64_t workload_idx) {
261  int64_t idx = 3 * workload_idx;
262  scalar_t* normal = ptr + idx;
263  const scalar_t norm = sqrt(normal[0] * normal[0] +
264  normal[1] * normal[1] +
265  normal[2] * normal[2]);
266  if (norm == 0.0) {
267  normal[0] = direction_ptr[0];
268  normal[1] = direction_ptr[1];
269  normal[2] = direction_ptr[2];
271  normal, direction_ptr) < 0) {
272  normal[0] *= -1;
273  normal[1] *= -1;
274  normal[2] *= -1;
275  }
276  });
277  });
278 }
279 
280 #if defined(__CUDACC__)
281 void OrientNormalsTowardsCameraLocationCUDA
282 #else
284 #endif
285  (const core::Tensor& points,
286  core::Tensor& normals,
287  const core::Tensor& camera) {
288  const core::Dtype dtype = points.GetDtype();
289  const int64_t n = normals.GetLength();
290 
291  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
292  scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
293  const scalar_t* camera_ptr = camera.GetDataPtr<scalar_t>();
294  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
295 
297  normals.GetDevice(), n,
298  [=] OPEN3D_DEVICE(int64_t workload_idx) {
299  int64_t idx = 3 * workload_idx;
300  scalar_t* normal = normals_ptr + idx;
301  const scalar_t* point = points_ptr + idx;
302  const scalar_t reference[3] = {camera_ptr[0] - point[0],
303  camera_ptr[1] - point[1],
304  camera_ptr[2] - point[2]};
305  const scalar_t norm =
306  sqrt(normal[0] * normal[0] + normal[1] * normal[1] +
307  normal[2] * normal[2]);
308  if (norm == 0.0) {
309  normal[0] = reference[0];
310  normal[1] = reference[1];
311  normal[2] = reference[2];
312  const scalar_t norm_new = sqrt(normal[0] * normal[0] +
313  normal[1] * normal[1] +
314  normal[2] * normal[2]);
315  if (norm_new == 0.0) {
316  normal[0] = 0.0;
317  normal[1] = 0.0;
318  normal[2] = 1.0;
319  } else {
320  normal[0] /= norm_new;
321  normal[1] /= norm_new;
322  normal[2] /= norm_new;
323  }
324  } else if (core::linalg::kernel::dot_3x1(normal,
325  reference) < 0) {
326  normal[0] *= -1;
327  normal[1] *= -1;
328  normal[2] *= -1;
329  }
330  });
331  });
332 }
333 
334 template <typename scalar_t>
336  scalar_t* u,
337  scalar_t* v) {
338  // Unless the x and y coords are both close to zero, we can simply take
339  // ( -y, x, 0 ) and normalize it. If both x and y are close to zero,
340  // then the vector is close to the z-axis, so it's far from colinear to
341  // the x-axis for instance. So we take the crossed product with (1,0,0)
342  // and normalize it.
343  if (!(abs(query[0] - query[2]) < 1e-6) ||
344  !(abs(query[1] - query[2]) < 1e-6)) {
345  const scalar_t norm2_inv =
346  1.0 / sqrt(query[0] * query[0] + query[1] * query[1]);
347  v[0] = -1 * query[1] * norm2_inv;
348  v[1] = query[0] * norm2_inv;
349  v[2] = 0;
350  } else {
351  const scalar_t norm2_inv =
352  1.0 / sqrt(query[1] * query[1] + query[2] * query[2]);
353  v[0] = 0;
354  v[1] = -1 * query[2] * norm2_inv;
355  v[2] = query[1] * norm2_inv;
356  }
357 
358  core::linalg::kernel::cross_3x1(query, v, u);
359 }
360 
361 template <typename scalar_t>
362 inline OPEN3D_HOST_DEVICE void Swap(scalar_t* x, scalar_t* y) {
363  scalar_t tmp = *x;
364  *x = *y;
365  *y = tmp;
366 }
367 
368 template <typename scalar_t>
369 inline OPEN3D_HOST_DEVICE void Heapify(scalar_t* arr, int n, int root) {
370  int largest = root;
371  int l = 2 * root + 1;
372  int r = 2 * root + 2;
373 
374  if (l < n && arr[l] > arr[largest]) {
375  largest = l;
376  }
377  if (r < n && arr[r] > arr[largest]) {
378  largest = r;
379  }
380  if (largest != root) {
381  Swap<scalar_t>(&arr[root], &arr[largest]);
382  Heapify<scalar_t>(arr, n, largest);
383  }
384 }
385 
386 template <typename scalar_t>
387 OPEN3D_HOST_DEVICE void HeapSort(scalar_t* arr, int n) {
388  for (int i = n / 2 - 1; i >= 0; i--) Heapify(arr, n, i);
389 
390  for (int i = n - 1; i > 0; i--) {
391  Swap<scalar_t>(&arr[0], &arr[i]);
392  Heapify<scalar_t>(arr, i, 0);
393  }
394 }
395 
396 template <typename scalar_t>
397 OPEN3D_HOST_DEVICE bool IsBoundaryPoints(const scalar_t* angles,
398  int counts,
399  double angle_threshold) {
400  scalar_t diff;
401  scalar_t max_diff = 0;
402  // Compute the maximal angle difference between two consecutive angles.
403  for (int i = 0; i < counts - 1; i++) {
404  diff = angles[i + 1] - angles[i];
405  max_diff = max(max_diff, diff);
406  }
407 
408  // Get the angle difference between the last and the first.
409  diff = 2 * M_PI - angles[counts - 1] + angles[0];
410  max_diff = max(max_diff, diff);
411 
412  return max_diff > angle_threshold * M_PI / 180.0 ? true : false;
413 }
414 
415 #if defined(__CUDACC__)
416 void ComputeBoundaryPointsCUDA
417 #else
419 #endif
420  (const core::Tensor& points,
421  const core::Tensor& normals,
422  const core::Tensor& indices,
423  const core::Tensor& counts,
424  core::Tensor& mask,
425  double angle_threshold) {
426  const int nn_size = indices.GetShape()[1];
427 
428  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(points.GetDtype(), [&]() {
429  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
430  const scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
431  const int64_t n = points.GetLength();
432  const int32_t* indices_ptr = indices.GetDataPtr<int32_t>();
433  const int32_t* counts_ptr = counts.GetDataPtr<int32_t>();
434  bool* mask_ptr = mask.GetDataPtr<bool>();
435 
436  core::Tensor angles = core::Tensor::Full(
437  indices.GetShape(), -10, points.GetDtype(), points.GetDevice());
438  scalar_t* angles_ptr = angles.GetDataPtr<scalar_t>();
439 
440  core::ParallelFor(
441  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
442  scalar_t u[3], v[3];
443  GetCoordinateSystemOnPlane(normals_ptr + 3 * workload_idx,
444  u, v);
445 
446  // Ignore the point itself.
447  int indices_size = counts_ptr[workload_idx] - 1;
448  if (indices_size > 0) {
449  const scalar_t* query = points_ptr + 3 * workload_idx;
450  for (int i = 1; i < indices_size + 1; i++) {
451  const int idx = workload_idx * nn_size + i;
452 
453  const scalar_t* point_ref =
454  points_ptr + 3 * indices_ptr[idx];
455  const scalar_t delta[3] = {point_ref[0] - query[0],
456  point_ref[1] - query[1],
457  point_ref[2] - query[2]};
458  const scalar_t angle = atan2(
459  core::linalg::kernel::dot_3x1(v, delta),
460  core::linalg::kernel::dot_3x1(u, delta));
461 
462  angles_ptr[idx] = angle;
463  }
464 
465  // Sort the angles in ascending order.
466  HeapSort<scalar_t>(
467  angles_ptr + workload_idx * nn_size + 1,
468  indices_size);
469 
470  mask_ptr[workload_idx] = IsBoundaryPoints<scalar_t>(
471  angles_ptr + workload_idx * nn_size + 1,
472  indices_size, angle_threshold);
473  }
474  });
475  });
476 }
477 
478 // This is a `two-pass` estimate method for covariance which is numerically more
479 // robust than the `textbook` method generally used for covariance computation.
480 template <typename scalar_t>
482  const scalar_t* points_ptr,
483  const int32_t* indices_ptr,
484  const int32_t& indices_count,
485  scalar_t* covariance_ptr) {
486  if (indices_count < 3) {
487  covariance_ptr[0] = 1.0;
488  covariance_ptr[1] = 0.0;
489  covariance_ptr[2] = 0.0;
490  covariance_ptr[3] = 0.0;
491  covariance_ptr[4] = 1.0;
492  covariance_ptr[5] = 0.0;
493  covariance_ptr[6] = 0.0;
494  covariance_ptr[7] = 0.0;
495  covariance_ptr[8] = 1.0;
496  return;
497  }
498 
499  double centroid[3] = {0};
500  for (int32_t i = 0; i < indices_count; ++i) {
501  int32_t idx = 3 * indices_ptr[i];
502  centroid[0] += points_ptr[idx];
503  centroid[1] += points_ptr[idx + 1];
504  centroid[2] += points_ptr[idx + 2];
505  }
506 
507  centroid[0] /= indices_count;
508  centroid[1] /= indices_count;
509  centroid[2] /= indices_count;
510 
511  // cumulants must always be Float64 to ensure precision.
512  double cumulants[6] = {0};
513  for (int32_t i = 0; i < indices_count; ++i) {
514  int32_t idx = 3 * indices_ptr[i];
515  const double x = static_cast<double>(points_ptr[idx]) - centroid[0];
516  const double y = static_cast<double>(points_ptr[idx + 1]) - centroid[1];
517  const double z = static_cast<double>(points_ptr[idx + 2]) - centroid[2];
518 
519  cumulants[0] += x * x;
520  cumulants[1] += y * y;
521  cumulants[2] += z * z;
522 
523  cumulants[3] += x * y;
524  cumulants[4] += x * z;
525  cumulants[5] += y * z;
526  }
527 
528  // Using Bessel's correction (dividing by (n - 1) instead of n).
529  // Refer:
530  // https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
531  const double normalization_factor = static_cast<double>(indices_count - 1);
532  for (int i = 0; i < 6; ++i) {
533  cumulants[i] /= normalization_factor;
534  }
535 
536  // Covariances(0, 0)
537  covariance_ptr[0] = static_cast<scalar_t>(cumulants[0]);
538  // Covariances(1, 1)
539  covariance_ptr[4] = static_cast<scalar_t>(cumulants[1]);
540  // Covariances(2, 2)
541  covariance_ptr[8] = static_cast<scalar_t>(cumulants[2]);
542 
543  // Covariances(0, 1) = Covariances(1, 0)
544  covariance_ptr[1] = static_cast<scalar_t>(cumulants[3]);
545  covariance_ptr[3] = covariance_ptr[1];
546 
547  // Covariances(0, 2) = Covariances(2, 0)
548  covariance_ptr[2] = static_cast<scalar_t>(cumulants[4]);
549  covariance_ptr[6] = covariance_ptr[2];
550 
551  // Covariances(1, 2) = Covariances(2, 1)
552  covariance_ptr[5] = static_cast<scalar_t>(cumulants[5]);
553  covariance_ptr[7] = covariance_ptr[5];
554 }
555 
556 #if defined(__CUDACC__)
557 void EstimateCovariancesUsingHybridSearchCUDA
558 #else
560 #endif
561  (const core::Tensor& points,
562  core::Tensor& covariances,
563  const double& radius,
564  const int64_t& max_nn) {
565  core::Dtype dtype = points.GetDtype();
566  int64_t n = points.GetLength();
567 
569  bool check = tree.HybridIndex(radius);
570  if (!check) {
571  utility::LogError("Building FixedRadiusIndex failed.");
572  }
573 
574  core::Tensor indices, distance, counts;
575  std::tie(indices, distance, counts) =
576  tree.HybridSearch(points, radius, max_nn);
577 
578  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
579  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
580  int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
581  int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
582  scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
583 
585  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
586  // NNS [Hybrid Search].
587  const int32_t neighbour_offset = max_nn * workload_idx;
588  // Count of valid correspondences per point.
589  const int32_t neighbour_count =
590  neighbour_counts_ptr[workload_idx];
591  // Covariance is of shape {3, 3}, so it has an
592  // offset factor of 9 x workload_idx.
593  const int32_t covariances_offset = 9 * workload_idx;
594 
596  points_ptr,
597  neighbour_indices_ptr + neighbour_offset,
598  neighbour_count,
599  covariances_ptr + covariances_offset);
600  });
601  });
602 
603  core::cuda::Synchronize(points.GetDevice());
604 }
605 
606 #if defined(__CUDACC__)
607 void EstimateCovariancesUsingRadiusSearchCUDA
608 #else
610 #endif
611  (const core::Tensor& points,
612  core::Tensor& covariances,
613  const double& radius) {
614  core::Dtype dtype = points.GetDtype();
615  int64_t n = points.GetLength();
616 
618  bool check = tree.FixedRadiusIndex(radius);
619  if (!check) {
620  utility::LogError("Building Radius-Index failed.");
621  }
622 
623  core::Tensor indices, distance, counts;
624  std::tie(indices, distance, counts) =
625  tree.FixedRadiusSearch(points, radius);
626 
627  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
628  const scalar_t* points_ptr = points.GetDataPtr<scalar_t>();
629  const int32_t* neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
630  const int32_t* neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
631  scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
632 
634  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
635  const int32_t neighbour_offset =
636  neighbour_counts_ptr[workload_idx];
637  const int32_t neighbour_count =
638  (neighbour_counts_ptr[workload_idx + 1] -
639  neighbour_counts_ptr[workload_idx]);
640  // Covariance is of shape {3, 3}, so it has an offset
641  // factor of 9 x workload_idx.
642  const int32_t covariances_offset = 9 * workload_idx;
643 
645  points_ptr,
646  neighbour_indices_ptr + neighbour_offset,
647  neighbour_count,
648  covariances_ptr + covariances_offset);
649  });
650  });
651 
652  core::cuda::Synchronize(points.GetDevice());
653 }
654 
655 #if defined(__CUDACC__)
656 void EstimateCovariancesUsingKNNSearchCUDA
657 #else
659 #endif
660  (const core::Tensor& points,
661  core::Tensor& covariances,
662  const int64_t& max_nn) {
663  core::Dtype dtype = points.GetDtype();
664  int64_t n = points.GetLength();
665 
667  bool check = tree.KnnIndex();
668  if (!check) {
669  utility::LogError("Building KNN-Index failed.");
670  }
671 
672  core::Tensor indices, distance;
673  std::tie(indices, distance) = tree.KnnSearch(points, max_nn);
674 
675  indices = indices.Contiguous();
676  int32_t nn_count = static_cast<int32_t>(indices.GetShape()[1]);
677 
678  if (nn_count < 3) {
680  "Not enough neighbors to compute Covariances / Normals. "
681  "Try "
682  "increasing the max_nn parameter.");
683  }
684 
685  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
686  auto points_ptr = points.GetDataPtr<scalar_t>();
687  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
688  auto covariances_ptr = covariances.GetDataPtr<scalar_t>();
689 
691  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
692  // NNS [KNN Search].
693  const int32_t neighbour_offset = nn_count * workload_idx;
694  // Covariance is of shape {3, 3}, so it has an offset
695  // factor of 9 x workload_idx.
696  const int32_t covariances_offset = 9 * workload_idx;
697 
699  points_ptr,
700  neighbour_indices_ptr + neighbour_offset, nn_count,
701  covariances_ptr + covariances_offset);
702  });
703  });
704 
705  core::cuda::Synchronize(points.GetDevice());
706 }
707 
708 template <typename scalar_t>
710  const scalar_t eval0,
711  scalar_t* eigen_vector0) {
712  scalar_t row0[3] = {A[0] - eval0, A[1], A[2]};
713  scalar_t row1[3] = {A[1], A[4] - eval0, A[5]};
714  scalar_t row2[3] = {A[2], A[5], A[8] - eval0};
715 
716  scalar_t r0xr1[3], r0xr2[3], r1xr2[3];
717 
718  core::linalg::kernel::cross_3x1(row0, row1, r0xr1);
719  core::linalg::kernel::cross_3x1(row0, row2, r0xr2);
720  core::linalg::kernel::cross_3x1(row1, row2, r1xr2);
721 
722  scalar_t d0 = core::linalg::kernel::dot_3x1(r0xr1, r0xr1);
723  scalar_t d1 = core::linalg::kernel::dot_3x1(r0xr2, r0xr2);
724  scalar_t d2 = core::linalg::kernel::dot_3x1(r1xr2, r1xr2);
725 
726  scalar_t dmax = d0;
727  int imax = 0;
728  if (d1 > dmax) {
729  dmax = d1;
730  imax = 1;
731  }
732  if (d2 > dmax) {
733  imax = 2;
734  }
735 
736  if (imax == 0) {
737  scalar_t sqrt_d = sqrt(d0);
738  eigen_vector0[0] = r0xr1[0] / sqrt_d;
739  eigen_vector0[1] = r0xr1[1] / sqrt_d;
740  eigen_vector0[2] = r0xr1[2] / sqrt_d;
741  return;
742  } else if (imax == 1) {
743  scalar_t sqrt_d = sqrt(d1);
744  eigen_vector0[0] = r0xr2[0] / sqrt_d;
745  eigen_vector0[1] = r0xr2[1] / sqrt_d;
746  eigen_vector0[2] = r0xr2[2] / sqrt_d;
747  return;
748  } else {
749  scalar_t sqrt_d = sqrt(d2);
750  eigen_vector0[0] = r1xr2[0] / sqrt_d;
751  eigen_vector0[1] = r1xr2[1] / sqrt_d;
752  eigen_vector0[2] = r1xr2[2] / sqrt_d;
753  return;
754  }
755 }
756 
757 template <typename scalar_t>
759  const scalar_t* evec0,
760  const scalar_t eval1,
761  scalar_t* eigen_vector1) {
762  scalar_t U[3];
763  if (abs(evec0[0]) > abs(evec0[1])) {
764  scalar_t inv_length =
765  1.0 / sqrt(evec0[0] * evec0[0] + evec0[2] * evec0[2]);
766  U[0] = -evec0[2] * inv_length;
767  U[1] = 0.0;
768  U[2] = evec0[0] * inv_length;
769  } else {
770  scalar_t inv_length =
771  1.0 / sqrt(evec0[1] * evec0[1] + evec0[2] * evec0[2]);
772  U[0] = 0.0;
773  U[1] = evec0[2] * inv_length;
774  U[2] = -evec0[1] * inv_length;
775  }
776  scalar_t V[3], AU[3], AV[3];
777  core::linalg::kernel::cross_3x1(evec0, U, V);
778  core::linalg::kernel::matmul3x3_3x1(A, U, AU);
779  core::linalg::kernel::matmul3x3_3x1(A, V, AV);
780 
781  scalar_t m00 = core::linalg::kernel::dot_3x1(U, AU) - eval1;
782  scalar_t m01 = core::linalg::kernel::dot_3x1(U, AV);
783  scalar_t m11 = core::linalg::kernel::dot_3x1(V, AV) - eval1;
784 
785  scalar_t absM00 = abs(m00);
786  scalar_t absM01 = abs(m01);
787  scalar_t absM11 = abs(m11);
788  scalar_t max_abs_comp;
789 
790  if (absM00 >= absM11) {
791  max_abs_comp = max(absM00, absM01);
792  if (max_abs_comp > 0) {
793  if (absM00 >= absM01) {
794  m01 /= m00;
795  m00 = 1 / sqrt(1 + m01 * m01);
796  m01 *= m00;
797  } else {
798  m00 /= m01;
799  m01 = 1 / sqrt(1 + m00 * m00);
800  m00 *= m01;
801  }
802  eigen_vector1[0] = m01 * U[0] - m00 * V[0];
803  eigen_vector1[1] = m01 * U[1] - m00 * V[1];
804  eigen_vector1[2] = m01 * U[2] - m00 * V[2];
805  return;
806  } else {
807  eigen_vector1[0] = U[0];
808  eigen_vector1[1] = U[1];
809  eigen_vector1[2] = U[2];
810  return;
811  }
812  } else {
813  max_abs_comp = max(absM11, absM01);
814  if (max_abs_comp > 0) {
815  if (absM11 >= absM01) {
816  m01 /= m11;
817  m11 = 1 / sqrt(1 + m01 * m01);
818  m01 *= m11;
819  } else {
820  m11 /= m01;
821  m01 = 1 / sqrt(1 + m11 * m11);
822  m11 *= m01;
823  }
824  eigen_vector1[0] = m11 * U[0] - m01 * V[0];
825  eigen_vector1[1] = m11 * U[1] - m01 * V[1];
826  eigen_vector1[2] = m11 * U[2] - m01 * V[2];
827  return;
828  } else {
829  eigen_vector1[0] = U[0];
830  eigen_vector1[1] = U[1];
831  eigen_vector1[2] = U[2];
832  return;
833  }
834  }
835 }
836 
837 template <typename scalar_t>
839  const scalar_t* covariance_ptr, scalar_t* normals_ptr) {
840  // Based on:
841  // https://www.geometrictools.com/Documentation/RobustEigenSymmetric3x3.pdf
842  // which handles edge cases like points on a plane.
843  scalar_t max_coeff = covariance_ptr[0];
844 
845  for (int i = 1; i < 9; ++i) {
846  if (max_coeff < covariance_ptr[i]) {
847  max_coeff = covariance_ptr[i];
848  }
849  }
850 
851  if (max_coeff == 0) {
852  normals_ptr[0] = 0.0;
853  normals_ptr[1] = 0.0;
854  normals_ptr[2] = 0.0;
855  return;
856  }
857 
858  scalar_t A[9] = {0};
859 
860  for (int i = 0; i < 9; ++i) {
861  A[i] = covariance_ptr[i] / max_coeff;
862  }
863 
864  scalar_t norm = A[1] * A[1] + A[2] * A[2] + A[5] * A[5];
865 
866  if (norm > 0) {
867  scalar_t eval[3];
868  scalar_t evec0[3];
869  scalar_t evec1[3];
870  scalar_t evec2[3];
871 
872  scalar_t q = (A[0] + A[4] + A[8]) / 3.0;
873 
874  scalar_t b00 = A[0] - q;
875  scalar_t b11 = A[4] - q;
876  scalar_t b22 = A[8] - q;
877 
878  scalar_t p =
879  sqrt((b00 * b00 + b11 * b11 + b22 * b22 + norm * 2.0) / 6.0);
880 
881  scalar_t c00 = b11 * b22 - A[5] * A[5];
882  scalar_t c01 = A[1] * b22 - A[5] * A[2];
883  scalar_t c02 = A[1] * A[5] - b11 * A[2];
884  scalar_t det = (b00 * c00 - A[1] * c01 + A[2] * c02) / (p * p * p);
885 
886  scalar_t half_det = det * 0.5;
887  half_det = min(max(half_det, static_cast<scalar_t>(-1.0)),
888  static_cast<scalar_t>(1.0));
889 
890  scalar_t angle = acos(half_det) / 3.0;
891  const scalar_t two_thrids_pi = 2.09439510239319549;
892 
893  scalar_t beta2 = cos(angle) * 2.0;
894  scalar_t beta0 = cos(angle + two_thrids_pi) * 2.0;
895  scalar_t beta1 = -(beta0 + beta2);
896 
897  eval[0] = q + p * beta0;
898  eval[1] = q + p * beta1;
899  eval[2] = q + p * beta2;
900 
901  if (half_det >= 0) {
902  ComputeEigenvector0<scalar_t>(A, eval[2], evec2);
903 
904  if (eval[2] < eval[0] && eval[2] < eval[1]) {
905  normals_ptr[0] = evec2[0];
906  normals_ptr[1] = evec2[1];
907  normals_ptr[2] = evec2[2];
908 
909  return;
910  }
911 
912  ComputeEigenvector1<scalar_t>(A, evec2, eval[1], evec1);
913 
914  if (eval[1] < eval[0] && eval[1] < eval[2]) {
915  normals_ptr[0] = evec1[0];
916  normals_ptr[1] = evec1[1];
917  normals_ptr[2] = evec1[2];
918 
919  return;
920  }
921 
922  normals_ptr[0] = evec1[1] * evec2[2] - evec1[2] * evec2[1];
923  normals_ptr[1] = evec1[2] * evec2[0] - evec1[0] * evec2[2];
924  normals_ptr[2] = evec1[0] * evec2[1] - evec1[1] * evec2[0];
925 
926  return;
927  } else {
928  ComputeEigenvector0<scalar_t>(A, eval[0], evec0);
929 
930  if (eval[0] < eval[1] && eval[0] < eval[2]) {
931  normals_ptr[0] = evec0[0];
932  normals_ptr[1] = evec0[1];
933  normals_ptr[2] = evec0[2];
934  return;
935  }
936 
937  ComputeEigenvector1<scalar_t>(A, evec0, eval[1], evec1);
938 
939  if (eval[1] < eval[0] && eval[1] < eval[2]) {
940  normals_ptr[0] = evec1[0];
941  normals_ptr[1] = evec1[1];
942  normals_ptr[2] = evec1[2];
943  return;
944  }
945 
946  normals_ptr[0] = evec0[1] * evec1[2] - evec0[2] * evec1[1];
947  normals_ptr[1] = evec0[2] * evec1[0] - evec0[0] * evec1[2];
948  normals_ptr[2] = evec0[0] * evec1[1] - evec0[1] * evec1[0];
949  return;
950  }
951  } else {
952  if (covariance_ptr[0] < covariance_ptr[4] &&
953  covariance_ptr[0] < covariance_ptr[8]) {
954  normals_ptr[0] = 1.0;
955  normals_ptr[1] = 0.0;
956  normals_ptr[2] = 0.0;
957  return;
958  } else if (covariance_ptr[4] < covariance_ptr[0] &&
959  covariance_ptr[4] < covariance_ptr[8]) {
960  normals_ptr[0] = 0.0;
961  normals_ptr[1] = 1.0;
962  normals_ptr[2] = 0.0;
963  return;
964  } else {
965  normals_ptr[0] = 0.0;
966  normals_ptr[1] = 0.0;
967  normals_ptr[2] = 1.0;
968  return;
969  }
970  }
971 }
972 
973 #if defined(__CUDACC__)
974 void EstimateNormalsFromCovariancesCUDA
975 #else
977 #endif
978  (const core::Tensor& covariances,
979  core::Tensor& normals,
980  const bool has_normals) {
981  core::Dtype dtype = covariances.GetDtype();
982  int64_t n = covariances.GetLength();
983 
984  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
985  const scalar_t* covariances_ptr = covariances.GetDataPtr<scalar_t>();
986  scalar_t* normals_ptr = normals.GetDataPtr<scalar_t>();
987 
989  covariances.GetDevice(), n,
990  [=] OPEN3D_DEVICE(int64_t workload_idx) {
991  int32_t covariances_offset = 9 * workload_idx;
992  int32_t normals_offset = 3 * workload_idx;
993  scalar_t normals_output[3] = {0};
994  EstimatePointWiseNormalsWithFastEigen3x3<scalar_t>(
995  covariances_ptr + covariances_offset,
996  normals_output);
997 
998  if ((normals_output[0] * normals_output[0] +
999  normals_output[1] * normals_output[1] +
1000  normals_output[2] * normals_output[2]) == 0.0 &&
1001  !has_normals) {
1002  normals_output[0] = 0.0;
1003  normals_output[1] = 0.0;
1004  normals_output[2] = 1.0;
1005  }
1006  if (has_normals) {
1007  if ((normals_ptr[normals_offset] * normals_output[0] +
1008  normals_ptr[normals_offset + 1] *
1009  normals_output[1] +
1010  normals_ptr[normals_offset + 2] *
1011  normals_output[2]) < 0.0) {
1012  normals_output[0] *= -1;
1013  normals_output[1] *= -1;
1014  normals_output[2] *= -1;
1015  }
1016  }
1017 
1018  normals_ptr[normals_offset] = normals_output[0];
1019  normals_ptr[normals_offset + 1] = normals_output[1];
1020  normals_ptr[normals_offset + 2] = normals_output[2];
1021  });
1022  });
1023 
1024  core::cuda::Synchronize(covariances.GetDevice());
1025 }
1026 
1027 template <typename scalar_t>
1029  const scalar_t* points_ptr,
1030  const scalar_t* normals_ptr,
1031  const scalar_t* colors_ptr,
1032  const int32_t& idx_offset,
1033  const int32_t* indices_ptr,
1034  const int32_t& indices_count,
1035  scalar_t* color_gradients_ptr) {
1036  if (indices_count < 4) {
1037  color_gradients_ptr[idx_offset] = 0;
1038  color_gradients_ptr[idx_offset + 1] = 0;
1039  color_gradients_ptr[idx_offset + 2] = 0;
1040  } else {
1041  scalar_t vt[3] = {points_ptr[idx_offset], points_ptr[idx_offset + 1],
1042  points_ptr[idx_offset + 2]};
1043 
1044  scalar_t nt[3] = {normals_ptr[idx_offset], normals_ptr[idx_offset + 1],
1045  normals_ptr[idx_offset + 2]};
1046 
1047  scalar_t it = (colors_ptr[idx_offset] + colors_ptr[idx_offset + 1] +
1048  colors_ptr[idx_offset + 2]) /
1049  3.0;
1050 
1051  scalar_t AtA[9] = {0};
1052  scalar_t Atb[3] = {0};
1053 
1054  // approximate image gradient of vt's tangential plane
1055  // projection (p') of a point p on a plane defined by
1056  // normal n, where o is the closest point to p on the
1057  // plane, is given by:
1058  // p' = p - [(p - o).dot(n)] * n p'
1059  // => p - [(p.dot(n) - s)] * n [where s = o.dot(n)]
1060 
1061  // Computing the scalar s.
1062  scalar_t s = vt[0] * nt[0] + vt[1] * nt[1] + vt[2] * nt[2];
1063 
1064  int i = 1;
1065  for (; i < indices_count; i++) {
1066  int64_t neighbour_idx_offset = 3 * indices_ptr[i];
1067 
1068  if (neighbour_idx_offset == -1) {
1069  break;
1070  }
1071 
1072  scalar_t vt_adj[3] = {points_ptr[neighbour_idx_offset],
1073  points_ptr[neighbour_idx_offset + 1],
1074  points_ptr[neighbour_idx_offset + 2]};
1075 
1076  // p' = p - d * n [where d = p.dot(n) - s]
1077  // Computing the scalar d.
1078  scalar_t d = vt_adj[0] * nt[0] + vt_adj[1] * nt[1] +
1079  vt_adj[2] * nt[2] - s;
1080 
1081  // Computing the p' (projection of the point).
1082  scalar_t vt_proj[3] = {vt_adj[0] - d * nt[0], vt_adj[1] - d * nt[1],
1083  vt_adj[2] - d * nt[2]};
1084 
1085  scalar_t it_adj = (colors_ptr[neighbour_idx_offset + 0] +
1086  colors_ptr[neighbour_idx_offset + 1] +
1087  colors_ptr[neighbour_idx_offset + 2]) /
1088  3.0;
1089 
1090  scalar_t A[3] = {vt_proj[0] - vt[0], vt_proj[1] - vt[1],
1091  vt_proj[2] - vt[2]};
1092 
1093  AtA[0] += A[0] * A[0];
1094  AtA[1] += A[1] * A[0];
1095  AtA[2] += A[2] * A[0];
1096  AtA[4] += A[1] * A[1];
1097  AtA[5] += A[2] * A[1];
1098  AtA[8] += A[2] * A[2];
1099 
1100  scalar_t b = it_adj - it;
1101 
1102  Atb[0] += A[0] * b;
1103  Atb[1] += A[1] * b;
1104  Atb[2] += A[2] * b;
1105  }
1106 
1107  // Orthogonal constraint.
1108  scalar_t A[3] = {(i - 1) * nt[0], (i - 1) * nt[1], (i - 1) * nt[2]};
1109 
1110  AtA[0] += A[0] * A[0];
1111  AtA[1] += A[0] * A[1];
1112  AtA[2] += A[0] * A[2];
1113  AtA[4] += A[1] * A[1];
1114  AtA[5] += A[1] * A[2];
1115  AtA[8] += A[2] * A[2];
1116 
1117  // Symmetry.
1118  AtA[3] = AtA[1];
1119  AtA[6] = AtA[2];
1120  AtA[7] = AtA[5];
1121 
1123  color_gradients_ptr + idx_offset);
1124  }
1125 }
1126 
1127 #if defined(__CUDACC__)
1128 void EstimateColorGradientsUsingHybridSearchCUDA
1129 #else
1131 #endif
1132  (const core::Tensor& points,
1133  const core::Tensor& normals,
1134  const core::Tensor& colors,
1135  core::Tensor& color_gradients,
1136  const double& radius,
1137  const int64_t& max_nn) {
1138  core::Dtype dtype = points.GetDtype();
1139  int64_t n = points.GetLength();
1140 
1142 
1143  bool check = tree.HybridIndex(radius);
1144  if (!check) {
1145  utility::LogError("NearestNeighborSearch::HybridIndex is not set.");
1146  }
1147 
1148  core::Tensor indices, distance, counts;
1149  std::tie(indices, distance, counts) =
1150  tree.HybridSearch(points, radius, max_nn);
1151 
1152  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1153  auto points_ptr = points.GetDataPtr<scalar_t>();
1154  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1155  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1156  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1157  auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
1158  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1159 
1161  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
1162  // NNS [Hybrid Search].
1163  int32_t neighbour_offset = max_nn * workload_idx;
1164  // Count of valid correspondences per point.
1165  int32_t neighbour_count =
1166  neighbour_counts_ptr[workload_idx];
1167  int32_t idx_offset = 3 * workload_idx;
1168 
1170  points_ptr, normals_ptr, colors_ptr, idx_offset,
1171  neighbour_indices_ptr + neighbour_offset,
1172  neighbour_count, color_gradients_ptr);
1173  });
1174  });
1175 
1176  core::cuda::Synchronize(points.GetDevice());
1177 }
1178 
1179 #if defined(__CUDACC__)
1180 void EstimateColorGradientsUsingKNNSearchCUDA
1181 #else
1183 #endif
1184  (const core::Tensor& points,
1185  const core::Tensor& normals,
1186  const core::Tensor& colors,
1187  core::Tensor& color_gradients,
1188  const int64_t& max_nn) {
1189  core::Dtype dtype = points.GetDtype();
1190  int64_t n = points.GetLength();
1191 
1193 
1194  bool check = tree.KnnIndex();
1195  if (!check) {
1196  utility::LogError("KnnIndex is not set.");
1197  }
1198 
1199  core::Tensor indices, distance;
1200  std::tie(indices, distance) = tree.KnnSearch(points, max_nn);
1201 
1202  indices = indices.To(core::Int32).Contiguous();
1203  int64_t nn_count = indices.GetShape()[1];
1204 
1205  if (nn_count < 4) {
1207  "Not enough neighbors to compute Covariances / Normals. "
1208  "Try "
1209  "changing the search parameter.");
1210  }
1211 
1212  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1213  auto points_ptr = points.GetDataPtr<scalar_t>();
1214  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1215  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1216  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1217  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1218 
1220  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
1221  int32_t neighbour_offset = max_nn * workload_idx;
1222  int32_t idx_offset = 3 * workload_idx;
1223 
1225  points_ptr, normals_ptr, colors_ptr, idx_offset,
1226  neighbour_indices_ptr + neighbour_offset, nn_count,
1227  color_gradients_ptr);
1228  });
1229  });
1230 
1231  core::cuda::Synchronize(points.GetDevice());
1232 }
1233 
1234 #if defined(__CUDACC__)
1235 void EstimateColorGradientsUsingRadiusSearchCUDA
1236 #else
1238 #endif
1239  (const core::Tensor& points,
1240  const core::Tensor& normals,
1241  const core::Tensor& colors,
1242  core::Tensor& color_gradients,
1243  const double& radius) {
1244  core::Dtype dtype = points.GetDtype();
1245  int64_t n = points.GetLength();
1246 
1248 
1249  bool check = tree.FixedRadiusIndex(radius);
1250  if (!check) {
1251  utility::LogError("RadiusIndex is not set.");
1252  }
1253 
1254  core::Tensor indices, distance, counts;
1255  std::tie(indices, distance, counts) =
1256  tree.FixedRadiusSearch(points, radius);
1257 
1258  indices = indices.To(core::Int32).Contiguous();
1259  counts = counts.Contiguous();
1260 
1261  DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(dtype, [&]() {
1262  auto points_ptr = points.GetDataPtr<scalar_t>();
1263  auto normals_ptr = normals.GetDataPtr<scalar_t>();
1264  auto colors_ptr = colors.GetDataPtr<scalar_t>();
1265  auto neighbour_indices_ptr = indices.GetDataPtr<int32_t>();
1266  auto neighbour_counts_ptr = counts.GetDataPtr<int32_t>();
1267  auto color_gradients_ptr = color_gradients.GetDataPtr<scalar_t>();
1268 
1270  points.GetDevice(), n, [=] OPEN3D_DEVICE(int64_t workload_idx) {
1271  int32_t neighbour_offset =
1272  neighbour_counts_ptr[workload_idx];
1273  // Count of valid correspondences per point.
1274  const int32_t neighbour_count =
1275  (neighbour_counts_ptr[workload_idx + 1] -
1276  neighbour_counts_ptr[workload_idx]);
1277  int32_t idx_offset = 3 * workload_idx;
1278 
1280  points_ptr, normals_ptr, colors_ptr, idx_offset,
1281  neighbour_indices_ptr + neighbour_offset,
1282  neighbour_count, color_gradients_ptr);
1283  });
1284  });
1285 
1286  core::cuda::Synchronize(points.GetDevice());
1287 }
1288 
1289 } // namespace pointcloud
1290 } // namespace kernel
1291 } // namespace geometry
1292 } // namespace t
1293 } // namespace open3d
Common CUDA utilities.
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:43
#define OPEN3D_DEVICE
Definition: CUDAUtils.h:44
#define DISPATCH_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:30
#define DISPATCH_FLOAT_DTYPE_TO_TEMPLATE(DTYPE,...)
Definition: Dispatch.h:77
#define LogError(...)
Definition: Logging.h:51
double t
Definition: SurfaceReconstructionPoisson.cpp:172
Point< Real, 3 > point
Definition: SurfaceReconstructionPoisson.cpp:163
FEMTree< Dim, Real > & tree
Definition: SurfaceReconstructionPoisson.cpp:171
size_t stride
Definition: TriangleMeshBuffers.cpp:165
Definition: Dtype.h:20
Definition: Tensor.h:32
T * GetDataPtr()
Definition: Tensor.h:1143
SizeVector GetShape() const
Definition: Tensor.h:1126
Tensor Div(const Tensor &value) const
Divides a tensor and returns the resulting tensor.
Definition: Tensor.cpp:1204
Tensor Contiguous() const
Definition: Tensor.cpp:771
Tensor Transpose(int64_t dim0, int64_t dim1) const
Transpose a Tensor by swapping dimension dim0 and dim1.
Definition: Tensor.cpp:1067
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:738
A Class for nearest neighbor search.
Definition: NearestNeighborSearch.h:25
Definition: GeometryIndexer.h:161
OPEN3D_HOST_DEVICE index_t GetShape(int i) const
Definition: GeometryIndexer.h:311
Helper class for converting coordinates/indices between 3D/3D, 3D/2D, 2D/3D.
Definition: GeometryIndexer.h:25
bool has_normals
Definition: FilePCD.cpp:61
int count
Definition: FilePCD.cpp:42
int points
Definition: FilePCD.cpp:54
void Synchronize()
Definition: CUDAUtils.cpp:58
OPEN3D_HOST_DEVICE OPEN3D_FORCE_INLINE void cross_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input, scalar_t *C_3x1_output)
Definition: Matrix.h:63
OPEN3D_DEVICE OPEN3D_FORCE_INLINE void solve_svd3x3(const scalar_t *A_3x3, const scalar_t *B_3x1, scalar_t *X_3x1)
Definition: SVD3x3.h:2171
OPEN3D_HOST_DEVICE OPEN3D_FORCE_INLINE scalar_t dot_3x1(const scalar_t *A_3x1_input, const scalar_t *B_3x1_input)
Definition: Matrix.h:89
const Dtype Int32
Definition: Dtype.cpp:46
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:108
const Dtype Float32
Definition: Dtype.cpp:42
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t int32_t
Definition: K4aPlugin.cpp:395
void EstimateCovariancesUsingHybridSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius, const int64_t &max_nn)
Definition: PointCloudImpl.h:561
void EstimateCovariancesUsingRadiusSearchCPU(const core::Tensor &points, core::Tensor &covariances, const double &radius)
Definition: PointCloudImpl.h:611
OPEN3D_HOST_DEVICE void GetCoordinateSystemOnPlane(const scalar_t *query, scalar_t *u, scalar_t *v)
Definition: PointCloudImpl.h:335
void EstimateNormalsFromCovariancesCPU(const core::Tensor &covariances, core::Tensor &normals, const bool has_normals)
Definition: PointCloudImpl.h:978
OPEN3D_HOST_DEVICE void ComputeEigenvector0(const scalar_t *A, const scalar_t eval0, scalar_t *eigen_vector0)
Definition: PointCloudImpl.h:709
void OrientNormalsTowardsCameraLocationCPU(const core::Tensor &points, core::Tensor &normals, const core::Tensor &camera)
Definition: PointCloudImpl.h:285
void UnprojectCPU(const core::Tensor &depth, std::optional< std::reference_wrapper< const core::Tensor >> image_colors, core::Tensor &points, std::optional< std::reference_wrapper< core::Tensor >> colors, const core::Tensor &intrinsics, const core::Tensor &extrinsics, float depth_scale, float depth_max, int64_t stride)
Definition: PointCloudImpl.h:45
OPEN3D_HOST_DEVICE void EstimatePointWiseRobustNormalizedCovarianceKernel(const scalar_t *points_ptr, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *covariance_ptr)
Definition: PointCloudImpl.h:481
void GetPointMaskWithinAABBCPU(const core::Tensor &points, const core::Tensor &min_bound, const core::Tensor &max_bound, core::Tensor &mask)
Definition: PointCloudImpl.h:142
OPEN3D_HOST_DEVICE void Swap(scalar_t *x, scalar_t *y)
Definition: PointCloudImpl.h:362
OPEN3D_HOST_DEVICE bool IsBoundaryPoints(const scalar_t *angles, int counts, double angle_threshold)
Definition: PointCloudImpl.h:397
void ComputeBoundaryPointsCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &indices, const core::Tensor &counts, core::Tensor &mask, double angle_threshold)
Definition: PointCloudImpl.h:420
void EstimateColorGradientsUsingKNNSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const int64_t &max_nn)
Definition: PointCloudImpl.h:1184
void NormalizeNormalsCPU(core::Tensor &normals)
Definition: PointCloudImpl.h:220
OPEN3D_HOST_DEVICE void ComputeEigenvector1(const scalar_t *A, const scalar_t *evec0, const scalar_t eval1, scalar_t *eigen_vector1)
Definition: PointCloudImpl.h:758
OPEN3D_HOST_DEVICE void EstimatePointWiseColorGradientKernel(const scalar_t *points_ptr, const scalar_t *normals_ptr, const scalar_t *colors_ptr, const int32_t &idx_offset, const int32_t *indices_ptr, const int32_t &indices_count, scalar_t *color_gradients_ptr)
Definition: PointCloudImpl.h:1028
void EstimateColorGradientsUsingRadiusSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius)
Definition: PointCloudImpl.h:1239
void GetPointMaskWithinOBBCPU(const core::Tensor &points, const core::Tensor &center, const core::Tensor &rotation, const core::Tensor &extent, core::Tensor &mask)
Definition: PointCloudImpl.h:176
void EstimateColorGradientsUsingHybridSearchCPU(const core::Tensor &points, const core::Tensor &normals, const core::Tensor &colors, core::Tensor &color_gradient, const double &radius, const int64_t &max_nn)
Definition: PointCloudImpl.h:1132
OPEN3D_HOST_DEVICE void EstimatePointWiseNormalsWithFastEigen3x3(const scalar_t *covariance_ptr, scalar_t *normals_ptr)
Definition: PointCloudImpl.h:838
OPEN3D_HOST_DEVICE void Heapify(scalar_t *arr, int n, int root)
Definition: PointCloudImpl.h:369
void OrientNormalsToAlignWithDirectionCPU(core::Tensor &normals, const core::Tensor &direction)
Definition: PointCloudImpl.h:251
void EstimateCovariancesUsingKNNSearchCPU(const core::Tensor &points, core::Tensor &covariances, const int64_t &max_nn)
Definition: PointCloudImpl.h:660
OPEN3D_HOST_DEVICE void HeapSort(scalar_t *arr, int n)
Definition: PointCloudImpl.h:387
TArrayIndexer< int64_t > NDArrayIndexer
Definition: GeometryIndexer.h:360
core::Tensor InverseTransformation(const core::Tensor &T)
TODO(wei): find a proper place for such functionalities.
Definition: Utility.h:77
Definition: PinholeCameraIntrinsic.cpp:16