Loading [MathJax]/extensions/TeX/AMSsymbols.js
Open3D (C++ API)  0.14.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
Indexer.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018-2021 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <sstream>
30 
31 #include "open3d/core/CUDAUtils.h"
32 #include "open3d/core/Dtype.h"
33 #include "open3d/core/ShapeUtil.h"
34 #include "open3d/core/SizeVector.h"
35 #include "open3d/core/Tensor.h"
36 #include "open3d/utility/Logging.h"
37 #include "open3d/utility/MiniVec.h"
38 
39 // The generated "Indexer_ispc.h" header will not be available outside the
40 // library. Therefore, forward declare all exported ISPC classes.
41 #ifdef BUILD_ISPC_MODULE
42 namespace ispc {
43 struct TensorRef;
44 struct Indexer;
45 } // namespace ispc
46 #endif
47 
48 namespace open3d {
49 namespace core {
50 
51 class Indexer;
52 
53 class IndexerIterator;
54 
55 // Maximum number of dimensions of TensorRef.
56 static constexpr int64_t MAX_DIMS = 10;
57 
58 // Maximum number of inputs of an op.
59 // MAX_INPUTS shall be >= MAX_DIMS to support advanced indexing.
60 static constexpr int64_t MAX_INPUTS = 10;
61 
62 // Maximum number of outputs of an op. This number can be increased when
63 // necessary.
64 static constexpr int64_t MAX_OUTPUTS = 2;
65 
66 template <int NARGS, typename index_t = uint32_t>
68  OffsetCalculator(int dims,
69  const int64_t* sizes,
70  const int64_t* const* strides)
71  : dims_(dims) {
72  if (dims_ > MAX_DIMS) {
73  utility::LogError("tensor has too many (>{}) dims_", MAX_DIMS);
74  }
75 
76  for (int i = 0; i < MAX_DIMS; ++i) {
77  if (i < dims_) {
78  sizes_[i] = sizes[i];
79  } else {
80  sizes_[i] = 1;
81  }
82  for (int arg = 0; arg < NARGS; arg++) {
83  strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
84  }
85  }
86  }
87 
89  index_t linear_idx) const {
91 #if defined(__CUDA_ARCH__)
92 #pragma unroll
93 #endif
94  for (int arg = 0; arg < NARGS; arg++) {
95  offsets[arg] = 0;
96  }
97 
98 #if defined(__CUDA_ARCH__)
99 #pragma unroll
100 #endif
101  for (int dim = 0; dim < MAX_DIMS; ++dim) {
102  if (dim == dims_) {
103  break;
104  }
105  index_t mod = linear_idx % sizes_[dim];
106  linear_idx = linear_idx / sizes_[dim];
107 
108 #if defined(__CUDA_ARCH__)
109 #pragma unroll
110 #endif
111  for (int arg = 0; arg < NARGS; arg++) {
112  offsets[arg] += mod * strides_[dim][arg];
113  }
114  }
115  return offsets;
116  }
117 
118  int dims_;
119  index_t sizes_[MAX_DIMS];
120  index_t strides_[MAX_DIMS][NARGS];
121 };
122 
124 struct TensorRef {
125  // The default copy constructor works on __device__ as well so we don't
126  // define it explicitly. shape_[MAX_DIMS] and strides[MAX_DIMS] will be
127  // copied fully.
128  TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
129 
130  TensorRef(const Tensor& t) {
131  if (t.NumDims() > MAX_DIMS) {
132  utility::LogError("Tenor has too many dimensions {} > {}.",
133  t.NumDims(), MAX_DIMS);
134  }
135  data_ptr_ = const_cast<void*>(t.GetDataPtr());
136  ndims_ = t.NumDims();
137  dtype_byte_size_ = t.GetDtype().ByteSize();
138  for (int64_t i = 0; i < ndims_; ++i) {
139  shape_[i] = t.GetShape(i);
140  byte_strides_[i] = t.GetStride(i) * dtype_byte_size_;
141  }
142  }
143 
150  void Permute(const SizeVector& dims) {
151  // Check dims are permuntation of [0, 1, 2, ..., n-1]
152  if (static_cast<int64_t>(dims.size()) != ndims_) {
153  utility::LogError("Number of dimensions mismatch {} != {}.",
154  dims.size(), ndims_);
155  }
156  std::vector<bool> seen_dims(ndims_, false);
157  for (const int64_t& dim : dims) {
158  seen_dims[dim] = true;
159  }
160  if (!std::all_of(seen_dims.begin(), seen_dims.end(),
161  [](bool seen) { return seen; })) {
163  "Permute dims must be a permuntation from 0 to {}.",
164  dims.size() - 1);
165  }
166 
167  // Map to new shape and strides
168  SizeVector new_shape(ndims_);
169  SizeVector new_byte_strides(ndims_);
170  for (int64_t i = 0; i < ndims_; ++i) {
171  int64_t old_dim = shape_util::WrapDim(dims[i], ndims_);
172  new_shape[i] = shape_[old_dim];
173  new_byte_strides[i] = byte_strides_[old_dim];
174  }
175  for (int64_t i = 0; i < ndims_; ++i) {
176  shape_[i] = new_shape[i];
177  byte_strides_[i] = new_byte_strides[i];
178  }
179  }
180 
182  inline bool IsContiguous() const {
183  SizeVector shape(ndims_);
184  SizeVector strides(ndims_);
185  for (int64_t i = 0; i < ndims_; ++i) {
186  shape[i] = shape_[i];
187  strides[i] = byte_strides_[i] / dtype_byte_size_;
188  }
189  return shape_util::DefaultStrides(shape) == strides;
190  }
191 
192  bool operator==(const TensorRef& other) const {
193  bool rc = true;
194  rc = rc && (data_ptr_ == other.data_ptr_);
195  rc = rc && (ndims_ == other.ndims_);
196  rc = rc && (dtype_byte_size_ == other.dtype_byte_size_);
197  for (int64_t i = 0; i < ndims_; ++i) {
198  rc = rc && (shape_[i] == other.shape_[i]);
199  rc = rc && (byte_strides_[i] == other.byte_strides_[i]);
200  }
201  return rc;
202  }
203 
204  bool operator!=(const TensorRef& other) const { return !(*this == other); }
205 
206 #ifdef BUILD_ISPC_MODULE
207  ispc::TensorRef ToISPC() const;
209 #endif
210 
211  void* data_ptr_;
212  int64_t ndims_ = 0;
213  int64_t dtype_byte_size_ = 0;
214  int64_t shape_[MAX_DIMS];
215  int64_t byte_strides_[MAX_DIMS];
216 };
217 
218 enum class DtypePolicy {
219  NONE, // Do not check. Expects the kernel to handle the conversion.
220  // E.g. in Copy kernel with type casting.
221  ALL_SAME, // All inputs and outputs to to have the same dtype.
222  INPUT_SAME, // All inputs have the same dtype.
223  INPUT_SAME_OUTPUT_BOOL // All inputs have the same dtype. Outputs
224  // have bool dtype.
225 };
226 
241 public:
242  TensorIterator(const Tensor& tensor)
243  : input_(TensorRef(tensor)), ndims_(tensor.NumDims()) {}
244 
246  int64_t num_workloads = 1;
247  for (int64_t i = 0; i < ndims_; ++i) {
248  num_workloads *= input_.shape_[i];
249  }
250  return num_workloads;
251  }
252 
253  OPEN3D_HOST_DEVICE void* GetPtr(int64_t workload_idx) const {
254  if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
255  return nullptr;
256  }
257  int64_t offset = 0;
258  workload_idx = workload_idx * input_.dtype_byte_size_;
259  for (int64_t i = 0; i < ndims_; ++i) {
260  offset += workload_idx / input_.byte_strides_[i] *
261  input_.byte_strides_[i];
262  workload_idx = workload_idx % input_.byte_strides_[i];
263  }
264  return static_cast<void*>(static_cast<char*>(input_.data_ptr_) +
265  offset);
266  }
267 
268 protected:
270  int64_t ndims_;
271 };
272 
280 class Indexer {
281 public:
282  Indexer() {}
283  Indexer(const Indexer&) = default;
284  Indexer& operator=(const Indexer&) = default;
285 
289  Indexer(const std::vector<Tensor>& input_tensors,
290  const Tensor& output_tensor,
291  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
292  const SizeVector& reduction_dims = {});
293 
294  Indexer(const std::vector<Tensor>& input_tensors,
295  const std::vector<Tensor>& output_tensors,
296  DtypePolicy dtype_policy = DtypePolicy::ALL_SAME,
297  const SizeVector& reduction_dims = {});
298 
300  bool CanUse32BitIndexing() const;
301 
304  IndexerIterator SplitTo32BitIndexing() const;
305 
309  std::unique_ptr<Indexer> SplitLargestDim();
310 
313  Indexer GetPerOutputIndexer(int64_t output_idx) const;
314 
315  bool ShouldAccumulate() const { return accumulate_; }
316 
317  bool IsFinalOutput() const { return final_output_; }
318 
324  void ShrinkDim(int64_t dim, int64_t start, int64_t size);
325 
327  int64_t NumReductionDims() const;
328 
330  int64_t NumDims() const { return ndims_; }
331 
334  const int64_t* GetMasterShape() const { return master_shape_; }
335  int64_t* GetMasterShape() { return master_shape_; }
336 
339  const int64_t* GetMasterStrides() const { return master_strides_; }
340 
351  int64_t NumWorkloads() const;
352 
354  int64_t NumOutputElements() const;
355 
357  int64_t NumInputs() const { return num_inputs_; }
358 
360  int64_t NumOutputs() const { return num_outputs_; }
361 
363  TensorRef& GetInput(int64_t i) {
364  if (i >= num_inputs_ || i < 0) {
365  utility::LogError("0 <= i < {} required, however, i = {}.",
366  num_inputs_, i);
367  }
368  return inputs_[i];
369  }
370  const TensorRef& GetInput(int64_t i) const {
371  if (i >= num_inputs_ || i < 0) {
372  utility::LogError("0 <= i < {} required, however, i = {}.",
373  num_inputs_, i);
374  }
375  return inputs_[i];
376  }
377 
379  TensorRef& GetOutput(int64_t i) {
380  if (i >= num_outputs_ || i < 0) {
381  utility::LogError("0 <= i < {} required, however, i = {}.",
382  num_outputs_, i);
383  }
384  return outputs_[i];
385  }
386  const TensorRef& GetOutput(int64_t i) const {
387  if (i >= num_outputs_ || i < 0) {
388  utility::LogError("0 <= i < {} required, however, i = {}.",
389  num_outputs_, i);
390  }
391  return outputs_[i];
392  }
393 
397  if (num_outputs_ > 1) {
398  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
399  num_outputs_);
400  }
401  return GetOutput(0);
402  }
403  const TensorRef& GetOutput() const {
404  if (num_outputs_ > 1) {
405  utility::LogError("num_outputs_ == {} > 0, use GetOutput(i)",
406  num_outputs_);
407  }
408  return GetOutput(0);
409  }
410 
412  bool IsReductionDim(int64_t dim) const {
413  // All outputs have the same shape and reduction dims. Even if they
414  // don't have the same initial strides, the reduced strides are always
415  // set to 0. Thus it is okay to use outputs_[0].
416  return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
417  }
418 
424  OPEN3D_HOST_DEVICE char* GetInputPtr(int64_t input_idx,
425  int64_t workload_idx) const {
426  if (input_idx < 0 || input_idx >= num_inputs_) {
427  return nullptr;
428  }
429  return GetWorkloadDataPtr(inputs_[input_idx],
430  inputs_contiguous_[input_idx], workload_idx);
431  }
432 
441  template <typename T>
442  OPEN3D_HOST_DEVICE T* GetInputPtr(int64_t input_idx,
443  int64_t workload_idx) const {
444  if (input_idx < 0 || input_idx >= num_inputs_) {
445  return nullptr;
446  }
447  return GetWorkloadDataPtr<T>(inputs_[input_idx],
448  inputs_contiguous_[input_idx],
449  workload_idx);
450  }
451 
456  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t workload_idx) const {
457  return GetWorkloadDataPtr(outputs_[0], outputs_contiguous_[0],
458  workload_idx);
459  }
460 
468  template <typename T>
469  OPEN3D_HOST_DEVICE T* GetOutputPtr(int64_t workload_idx) const {
470  return GetWorkloadDataPtr<T>(outputs_[0], outputs_contiguous_[0],
471  workload_idx);
472  }
473 
479  OPEN3D_HOST_DEVICE char* GetOutputPtr(int64_t output_idx,
480  int64_t workload_idx) const {
481  return GetWorkloadDataPtr(outputs_[output_idx],
482  outputs_contiguous_[output_idx],
483  workload_idx);
484  }
485 
491  template <typename T>
492  OPEN3D_HOST_DEVICE T* GetOutputPtr(int64_t output_idx,
493  int64_t workload_idx) const {
494  return GetWorkloadDataPtr<T>(outputs_[output_idx],
495  outputs_contiguous_[output_idx],
496  workload_idx);
497  }
498 
499 #ifdef BUILD_ISPC_MODULE
500  ispc::Indexer ToISPC() const;
502 #endif
503 
504 protected:
507  void CoalesceDimensions();
508 
509  // Permute reduction dimensions to front.
510  // TODO: Sort the dimensions based on strides in ascending orderto improve
511  // thread coalescing.
512  void ReorderDimensions(const SizeVector& reduction_dims);
513 
515  void UpdateMasterStrides();
516 
518  void UpdateContiguousFlags();
519 
546  static void BroadcastRestride(TensorRef& src,
547  int64_t dst_ndims,
548  const int64_t* dst_shape);
549 
552  static void ReductionRestride(TensorRef& dst,
553  int64_t src_ndims,
554  const int64_t* src_shape,
555  const SizeVector& reduction_dims);
556 
561  bool tr_contiguous,
562  int64_t workload_idx) const {
563  // For 0-sized input reduction op, the output Tensor
564  // workload_idx == 1 > NumWorkloads() == 0.
565  if (workload_idx < 0) {
566  return nullptr;
567  }
568  if (tr_contiguous) {
569  return static_cast<char*>(tr.data_ptr_) +
570  workload_idx * tr.dtype_byte_size_;
571  } else {
572  int64_t offset = 0;
573  for (int64_t i = 0; i < ndims_; ++i) {
574  offset +=
575  workload_idx / master_strides_[i] * tr.byte_strides_[i];
576  workload_idx = workload_idx % master_strides_[i];
577  }
578  return static_cast<char*>(tr.data_ptr_) + offset;
579  }
580  }
581 
588  template <typename T>
590  bool tr_contiguous,
591  int64_t workload_idx) const {
592  // For 0-sized input reduction op, the output Tensor
593  // workload_idx == 1 > NumWorkloads() == 0.
594  if (workload_idx < 0) {
595  return nullptr;
596  }
597  if (tr_contiguous) {
598  return static_cast<T*>(tr.data_ptr_) + workload_idx;
599  } else {
600  int64_t offset = 0;
601  for (int64_t i = 0; i < ndims_; ++i) {
602  offset +=
603  workload_idx / master_strides_[i] * tr.byte_strides_[i];
604  workload_idx = workload_idx % master_strides_[i];
605  }
606  return static_cast<T*>(static_cast<void*>(
607  static_cast<char*>(tr.data_ptr_) + offset));
608  }
609  }
610 
612  int64_t num_inputs_ = 0;
613  int64_t num_outputs_ = 0;
614 
616  TensorRef inputs_[MAX_INPUTS];
617 
619  TensorRef outputs_[MAX_OUTPUTS];
620 
622  bool inputs_contiguous_[MAX_INPUTS];
623 
625  bool outputs_contiguous_[MAX_OUTPUTS];
626 
638  int64_t master_shape_[MAX_DIMS];
639 
642  int64_t master_strides_[MAX_DIMS];
643 
645  int64_t ndims_ = 0;
646 
650  bool final_output_ = true;
651 
654  bool accumulate_ = false;
655 };
656 
658 public:
659  struct Iterator {
660  Iterator(){};
661  Iterator(const Indexer& indexer);
662  Iterator(Iterator&& other) = default;
663 
664  Indexer& operator*() const;
665  Iterator& operator++();
666  bool operator==(const Iterator& other) const;
667  bool operator!=(const Iterator& other) const;
668 
669  std::vector<std::unique_ptr<Indexer>> vec_;
670  };
671 
672  IndexerIterator(const Indexer& indexer);
673 
674  Iterator begin() const;
675  Iterator end() const;
676 
677 private:
678  const Indexer& indexer_;
679 };
680 
681 } // namespace core
682 } // namespace open3d
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:456
TensorRef()
Definition: Indexer.h:128
int64_t dtype_byte_size_
Definition: Indexer.h:213
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:253
int64_t NumDims() const
Definition: Tensor.h:1102
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:245
void * data_ptr_
Definition: Indexer.h:211
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:242
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:412
int dims_
Definition: Indexer.h:118
A minimalistic class that reference a Tensor.
Definition: Indexer.h:124
Definition: Indexer.h:67
int64_t ndims_
Definition: Indexer.h:212
Definition: Indexer.h:240
TensorRef(const Tensor &t)
Definition: Indexer.h:130
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:214
int64_t * GetMasterShape()
Definition: Indexer.h:335
bool operator==(const PointXYZ A, const PointXYZ B)
Definition: Cloud.h:176
constexpr bool operator!=(const optional< T > &x, const optional< T > &y)
Definition: Optional.h:650
TensorRef input_
Definition: Indexer.h:269
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:479
Definition: SizeVector.h:79
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:363
const int64_t * GetMasterStrides() const
Definition: Indexer.h:339
void Permute(const SizeVector &dims)
Permute (dimension shuffle) the reference to a Tensor.
Definition: Indexer.h:150
Dtype GetDtype() const
Definition: Tensor.h:1094
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:1069
DtypePolicy
Definition: Indexer.h:218
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:63
int64_t NumOutputs() const
Number of output Tensors.
Definition: Indexer.h:360
bool IsFinalOutput() const
Definition: Indexer.h:317
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:424
int32_t index_t
Definition: NanoFlannImpl.h:43
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:68
TensorRef & GetOutput()
Definition: Indexer.h:396
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:669
int offset
Definition: FilePCD.cpp:64
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition: ShapeUtil.cpp:150
Tensor operator*(T scalar_lhs, const Tensor &rhs)
Definition: Tensor.h:1350
bool IsContiguous() const
Returns True if the underlying memory buffer is contiguous.
Definition: Indexer.h:182
const TensorRef & GetOutput() const
Definition: Indexer.h:403
SizeVector GetShape() const
Definition: Tensor.h:1057
Iterator()
Definition: Indexer.h:660
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:386
int64_t ndims_
Definition: Indexer.h:270
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:330
Definition: PinholeCameraIntrinsic.cpp:35
Definition: Tensor.h:50
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:370
OPEN3D_HOST_DEVICE T * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:442
int64_t ByteSize() const
Definition: Dtype.h:77
bool ShouldAccumulate() const
Definition: Indexer.h:315
const int64_t * GetMasterShape() const
Definition: Indexer.h:334
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:492
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:469
Definition: Indexer.h:657
T * GetDataPtr()
Definition: Tensor.h:1074
OPEN3D_HOST_DEVICE T * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:589
Definition: MiniVec.h:43
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:204
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:560
Definition: Indexer.h:280
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:215
Common CUDA utilities.
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:233
int size
Definition: FilePCD.cpp:59
#define LogError(...)
Definition: Logging.h:72
Indexer()
Definition: Indexer.h:282
bool operator==(const TensorRef &other) const
Definition: Indexer.h:192
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:357
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:379