41 #ifdef BUILD_ISPC_MODULE 53 class IndexerIterator;
56 static constexpr int64_t MAX_DIMS = 10;
60 static constexpr int64_t MAX_INPUTS = 10;
64 static constexpr int64_t MAX_OUTPUTS = 2;
66 template <
int NARGS,
typename index_t = u
int32_t>
70 const int64_t*
const* strides)
72 if (dims_ > MAX_DIMS) {
76 for (
int i = 0; i < MAX_DIMS; ++i) {
82 for (
int arg = 0; arg < NARGS; arg++) {
83 strides_[i][arg] = i < dims_ ? strides[arg][i] : 0;
91 #if defined(__CUDA_ARCH__) 94 for (
int arg = 0; arg < NARGS; arg++) {
98 #if defined(__CUDA_ARCH__) 101 for (
int dim = 0; dim < MAX_DIMS; ++dim) {
105 index_t mod = linear_idx % sizes_[dim];
106 linear_idx = linear_idx / sizes_[dim];
108 #if defined(__CUDA_ARCH__) 111 for (
int arg = 0; arg < NARGS; arg++) {
112 offsets[arg] += mod * strides_[dim][arg];
128 TensorRef() : data_ptr_(nullptr), ndims_(0), dtype_byte_size_(0) {}
135 data_ptr_ =
const_cast<void*
>(t.
GetDataPtr());
138 for (int64_t i = 0; i < ndims_; ++i) {
140 byte_strides_[i] = t.
GetStride(i) * dtype_byte_size_;
152 if (static_cast<int64_t>(dims.size()) != ndims_) {
154 dims.size(), ndims_);
156 std::vector<bool> seen_dims(ndims_,
false);
157 for (
const int64_t& dim : dims) {
158 seen_dims[dim] =
true;
160 if (!std::all_of(seen_dims.begin(), seen_dims.end(),
161 [](
bool seen) {
return seen; })) {
163 "Permute dims must be a permuntation from 0 to {}.",
170 for (int64_t i = 0; i < ndims_; ++i) {
172 new_shape[i] = shape_[old_dim];
173 new_byte_strides[i] = byte_strides_[old_dim];
175 for (int64_t i = 0; i < ndims_; ++i) {
176 shape_[i] = new_shape[i];
177 byte_strides_[i] = new_byte_strides[i];
185 for (int64_t i = 0; i < ndims_; ++i) {
186 shape[i] = shape_[i];
187 strides[i] = byte_strides_[i] / dtype_byte_size_;
194 rc = rc && (data_ptr_ == other.
data_ptr_);
195 rc = rc && (ndims_ == other.
ndims_);
197 for (int64_t i = 0; i < ndims_; ++i) {
198 rc = rc && (shape_[i] == other.
shape_[i]);
206 #ifdef BUILD_ISPC_MODULE 207 ispc::TensorRef ToISPC()
const;
213 int64_t dtype_byte_size_ = 0;
214 int64_t shape_[MAX_DIMS];
215 int64_t byte_strides_[MAX_DIMS];
243 : input_(
TensorRef(tensor)), ndims_(tensor.NumDims()) {}
246 int64_t num_workloads = 1;
247 for (int64_t i = 0; i < ndims_; ++i) {
248 num_workloads *= input_.shape_[i];
250 return num_workloads;
254 if (workload_idx < 0 || workload_idx >= NumWorkloads()) {
258 workload_idx = workload_idx * input_.dtype_byte_size_;
259 for (int64_t i = 0; i < ndims_; ++i) {
260 offset += workload_idx / input_.byte_strides_[i] *
261 input_.byte_strides_[i];
262 workload_idx = workload_idx % input_.byte_strides_[i];
264 return static_cast<void*
>(
static_cast<char*
>(input_.data_ptr_) +
289 Indexer(
const std::vector<Tensor>& input_tensors,
290 const Tensor& output_tensor,
294 Indexer(
const std::vector<Tensor>& input_tensors,
295 const std::vector<Tensor>& output_tensors,
300 bool CanUse32BitIndexing()
const;
309 std::unique_ptr<Indexer> SplitLargestDim();
313 Indexer GetPerOutputIndexer(int64_t output_idx)
const;
324 void ShrinkDim(int64_t dim, int64_t start, int64_t
size);
327 int64_t NumReductionDims()
const;
351 int64_t NumWorkloads()
const;
354 int64_t NumOutputElements()
const;
364 if (i >= num_inputs_ || i < 0) {
371 if (i >= num_inputs_ || i < 0) {
380 if (i >= num_outputs_ || i < 0) {
387 if (i >= num_outputs_ || i < 0) {
397 if (num_outputs_ > 1) {
404 if (num_outputs_ > 1) {
416 return outputs_[0].byte_strides_[dim] == 0 && master_shape_[dim] > 1;
425 int64_t workload_idx)
const {
426 if (input_idx < 0 || input_idx >= num_inputs_) {
429 return GetWorkloadDataPtr(inputs_[input_idx],
430 inputs_contiguous_[input_idx], workload_idx);
441 template <
typename T>
443 int64_t workload_idx)
const {
444 if (input_idx < 0 || input_idx >= num_inputs_) {
447 return GetWorkloadDataPtr<T>(inputs_[input_idx],
448 inputs_contiguous_[input_idx],
457 return GetWorkloadDataPtr(outputs_[0], outputs_contiguous_[0],
468 template <
typename T>
470 return GetWorkloadDataPtr<T>(outputs_[0], outputs_contiguous_[0],
480 int64_t workload_idx)
const {
481 return GetWorkloadDataPtr(outputs_[output_idx],
482 outputs_contiguous_[output_idx],
491 template <
typename T>
493 int64_t workload_idx)
const {
494 return GetWorkloadDataPtr<T>(outputs_[output_idx],
495 outputs_contiguous_[output_idx],
499 #ifdef BUILD_ISPC_MODULE 500 ispc::Indexer ToISPC()
const;
507 void CoalesceDimensions();
512 void ReorderDimensions(
const SizeVector& reduction_dims);
515 void UpdateMasterStrides();
518 void UpdateContiguousFlags();
546 static void BroadcastRestride(
TensorRef& src,
548 const int64_t* dst_shape);
552 static void ReductionRestride(
TensorRef& dst,
554 const int64_t* src_shape,
562 int64_t workload_idx)
const {
565 if (workload_idx < 0) {
569 return static_cast<char*
>(tr.
data_ptr_) +
573 for (int64_t i = 0; i < ndims_; ++i) {
576 workload_idx = workload_idx % master_strides_[i];
578 return static_cast<char*
>(tr.
data_ptr_) + offset;
588 template <
typename T>
591 int64_t workload_idx)
const {
594 if (workload_idx < 0) {
598 return static_cast<T*
>(tr.
data_ptr_) + workload_idx;
601 for (int64_t i = 0; i < ndims_; ++i) {
604 workload_idx = workload_idx % master_strides_[i];
606 return static_cast<T*
>(
static_cast<void*
>(
607 static_cast<char*
>(tr.
data_ptr_) + offset));
612 int64_t num_inputs_ = 0;
613 int64_t num_outputs_ = 0;
622 bool inputs_contiguous_[MAX_INPUTS];
625 bool outputs_contiguous_[MAX_OUTPUTS];
638 int64_t master_shape_[MAX_DIMS];
642 int64_t master_strides_[MAX_DIMS];
650 bool final_output_ =
true;
654 bool accumulate_ =
false;
669 std::vector<std::unique_ptr<Indexer>>
vec_;
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:456
TensorRef()
Definition: Indexer.h:128
int64_t dtype_byte_size_
Definition: Indexer.h:213
OPEN3D_HOST_DEVICE void * GetPtr(int64_t workload_idx) const
Definition: Indexer.h:253
int64_t NumDims() const
Definition: Tensor.h:1102
OPEN3D_HOST_DEVICE int64_t NumWorkloads() const
Definition: Indexer.h:245
void * data_ptr_
Definition: Indexer.h:211
TensorIterator(const Tensor &tensor)
Definition: Indexer.h:242
bool IsReductionDim(int64_t dim) const
Returns true if the dim -th dimension is reduced.
Definition: Indexer.h:412
int dims_
Definition: Indexer.h:118
A minimalistic class that reference a Tensor.
Definition: Indexer.h:124
int64_t ndims_
Definition: Indexer.h:212
Definition: Indexer.h:240
TensorRef(const Tensor &t)
Definition: Indexer.h:130
int64_t shape_[MAX_DIMS]
Definition: Indexer.h:214
int64_t * GetMasterShape()
Definition: Indexer.h:335
bool operator==(const PointXYZ A, const PointXYZ B)
Definition: Cloud.h:176
constexpr bool operator!=(const optional< T > &x, const optional< T > &y)
Definition: Optional.h:650
TensorRef input_
Definition: Indexer.h:269
OPEN3D_HOST_DEVICE char * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:479
Definition: SizeVector.h:79
TensorRef & GetInput(int64_t i)
Returns input TensorRef.
Definition: Indexer.h:363
const int64_t * GetMasterStrides() const
Definition: Indexer.h:339
void Permute(const SizeVector &dims)
Permute (dimension shuffle) the reference to a Tensor.
Definition: Indexer.h:150
Dtype GetDtype() const
Definition: Tensor.h:1094
int64_t GetStride(int64_t dim) const
Definition: Tensor.h:1069
DtypePolicy
Definition: Indexer.h:218
Definition: Indexer.h:659
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:63
int64_t NumOutputs() const
Number of output Tensors.
Definition: Indexer.h:360
bool IsFinalOutput() const
Definition: Indexer.h:317
OPEN3D_HOST_DEVICE char * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:424
int32_t index_t
Definition: NanoFlannImpl.h:43
OffsetCalculator(int dims, const int64_t *sizes, const int64_t *const *strides)
Definition: Indexer.h:68
TensorRef & GetOutput()
Definition: Indexer.h:396
std::vector< std::unique_ptr< Indexer > > vec_
Definition: Indexer.h:669
int64_t WrapDim(int64_t dim, int64_t max_dim, bool inclusive)
Wrap around negative dim.
Definition: ShapeUtil.cpp:150
Tensor operator*(T scalar_lhs, const Tensor &rhs)
Definition: Tensor.h:1350
bool IsContiguous() const
Returns True if the underlying memory buffer is contiguous.
Definition: Indexer.h:182
const TensorRef & GetOutput() const
Definition: Indexer.h:403
SizeVector GetShape() const
Definition: Tensor.h:1057
Iterator()
Definition: Indexer.h:660
const TensorRef & GetOutput(int64_t i) const
Definition: Indexer.h:386
int64_t ndims_
Definition: Indexer.h:270
int64_t NumDims() const
Returns number of dimensions of the Indexer.
Definition: Indexer.h:330
Definition: PinholeCameraIntrinsic.cpp:35
const TensorRef & GetInput(int64_t i) const
Definition: Indexer.h:370
OPEN3D_HOST_DEVICE T * GetInputPtr(int64_t input_idx, int64_t workload_idx) const
Definition: Indexer.h:442
int64_t ByteSize() const
Definition: Dtype.h:77
bool ShouldAccumulate() const
Definition: Indexer.h:315
const int64_t * GetMasterShape() const
Definition: Indexer.h:334
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t output_idx, int64_t workload_idx) const
Definition: Indexer.h:492
OPEN3D_HOST_DEVICE T * GetOutputPtr(int64_t workload_idx) const
Definition: Indexer.h:469
Definition: Indexer.h:657
T * GetDataPtr()
Definition: Tensor.h:1074
OPEN3D_HOST_DEVICE T * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:589
bool operator!=(const TensorRef &other) const
Definition: Indexer.h:204
OPEN3D_HOST_DEVICE char * GetWorkloadDataPtr(const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const
Definition: Indexer.h:560
Definition: Indexer.h:280
int64_t byte_strides_[MAX_DIMS]
Definition: Indexer.h:215
SizeVector DefaultStrides(const SizeVector &shape)
Compute default strides for a shape when a tensor is contiguous.
Definition: ShapeUtil.cpp:233
#define LogError(...)
Definition: Logging.h:72
Indexer()
Definition: Indexer.h:282
bool operator==(const TensorRef &other) const
Definition: Indexer.h:192
int64_t NumInputs() const
Number of input Tensors.
Definition: Indexer.h:357
TensorRef & GetOutput(int64_t i)
Returns output TensorRef.
Definition: Indexer.h:379