#include <Indexer.h>

Public Member Functions
	Indexer ()

	Indexer (const Indexer &)=default

Indexer &	operator= (const Indexer &)=default

	Indexer (const std::vector< Tensor > &input_tensors, const Tensor &output_tensor, DtypePolicy dtype_policy=DtypePolicy::ALL_SAME, const SizeVector &reduction_dims={})

	Indexer (const std::vector< Tensor > &input_tensors, const std::vector< Tensor > &output_tensors, DtypePolicy dtype_policy=DtypePolicy::ALL_SAME, const SizeVector &reduction_dims={})

bool	CanUse32BitIndexing () const
	Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1. More...

IndexerIterator	SplitTo32BitIndexing () const

std::unique_ptr< Indexer >	SplitLargestDim ()

Indexer	GetPerOutputIndexer (int64_t output_idx) const

bool	ShouldAccumulate () const

bool	IsFinalOutput () const

void	ShrinkDim (int64_t dim, int64_t start, int64_t size)

int64_t	NumReductionDims () const
	Returns the number of reduction dimensions. More...

int64_t	NumDims () const
	Returns number of dimensions of the Indexer. More...

const int64_t *	GetPrimaryShape () const

int64_t *	GetPrimaryShape ()

const int64_t *	GetPrimaryStrides () const

int64_t	NumWorkloads () const

int64_t	NumOutputElements () const
	Returns the number of output elements. More...

int64_t	NumInputs () const
	Number of input Tensors. More...

int64_t	NumOutputs () const
	Number of output Tensors. More...

TensorRef &	GetInput (int64_t i)
	Returns input TensorRef. More...

const TensorRef &	GetInput (int64_t i) const

TensorRef &	GetOutput (int64_t i)
	Returns output TensorRef. More...

const TensorRef &	GetOutput (int64_t i) const

TensorRef &	GetOutput ()

const TensorRef &	GetOutput () const

bool	IsReductionDim (int64_t dim) const
	Returns true if the `dim` -th dimension is reduced. More...

OPEN3D_HOST_DEVICE char *	GetInputPtr (int64_t input_idx, int64_t workload_idx) const

template<typename T >
OPEN3D_HOST_DEVICE T *	GetInputPtr (int64_t input_idx, int64_t workload_idx) const

OPEN3D_HOST_DEVICE char *	GetOutputPtr (int64_t workload_idx) const

template<typename T >
OPEN3D_HOST_DEVICE T *	GetOutputPtr (int64_t workload_idx) const

OPEN3D_HOST_DEVICE char *	GetOutputPtr (int64_t output_idx, int64_t workload_idx) const

template<typename T >
OPEN3D_HOST_DEVICE T *	GetOutputPtr (int64_t output_idx, int64_t workload_idx) const

Protected Member Functions
void	CoalesceDimensions ()

void	ReorderDimensions (const SizeVector &reduction_dims)

void	UpdatePrimaryStrides ()
	Update primary_strides_ based on primary_shape_. More...

void	UpdateContiguousFlags ()
	Update input_contiguous_ and output_contiguous_. More...

OPEN3D_HOST_DEVICE char *	GetWorkloadDataPtr (const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const

template<typename T >
OPEN3D_HOST_DEVICE T *	GetWorkloadDataPtr (const TensorRef &tr, bool tr_contiguous, int64_t workload_idx) const

Static Protected Member Functions
static void	BroadcastRestride (TensorRef &src, int64_t dst_ndims, const int64_t *dst_shape)

static void	ReductionRestride (TensorRef &dst, int64_t src_ndims, const int64_t *src_shape, const SizeVector &reduction_dims)

Protected Attributes
int64_t	num_inputs_ = 0
	Number of input and output Tensors. More...

int64_t	num_outputs_ = 0

TensorRef	inputs_ [MAX_INPUTS]
	Array of input TensorRefs. More...

TensorRef	outputs_ [MAX_OUTPUTS]
	Array of output TensorRefs. More...

bool	inputs_contiguous_ [MAX_INPUTS]
	Array of contiguous flags for all input TensorRefs. More...

bool	outputs_contiguous_ [MAX_OUTPUTS]
	Array of contiguous flags for all output TensorRefs. More...

int64_t	primary_shape_ [MAX_DIMS]

int64_t	primary_strides_ [MAX_DIMS]

int64_t	ndims_ = 0
	Indexer's global number of dimensions. More...

bool	final_output_ = true

bool	accumulate_ = false

Detailed Description

Indexing engine for elementwise ops with broadcasting support.

Fancy indexing is supported by restriding input tensor and treating the operation as elementwise op.

After constructing Indexer on the host, the indexing methods can be used from both host and device.

Constructor & Destructor Documentation

◆ Indexer() [1/4]

open3d::core::Indexer::Indexer ( )

inline

◆ Indexer() [2/4]

open3d::core::Indexer::Indexer ( const Indexer & )

default

◆ Indexer() [3/4]

open3d::core::Indexer::Indexer	(	const std::vector< Tensor > &	input_tensors,
		const Tensor &	output_tensor,
		DtypePolicy	dtype_policy = `DtypePolicy::ALL_SAME`,
		const SizeVector &	reduction_dims = `{}`
	)

Only single output is supported for simplicity. To extend this function to support multiple outputs, one may check for shape compatibility of all outputs.

◆ Indexer() [4/4]

open3d::core::Indexer::Indexer	(	const std::vector< Tensor > &	input_tensors,
		const std::vector< Tensor > &	output_tensors,
		DtypePolicy	dtype_policy = `DtypePolicy::ALL_SAME`,
		const SizeVector &	reduction_dims = `{}`
	)

Member Function Documentation

◆ BroadcastRestride()

void open3d::core::Indexer::BroadcastRestride	(	TensorRef &	src,
		int64_t	dst_ndims,
		const int64_t *	dst_shape
	)

staticprotected

Broadcast src to dst by setting shape 1 to omitted dimensions and setting stride 0 to brocasted dimensions.

Note that other approaches may also work. E.g. one could set src's shape to exactly the same as dst's shape. In general, if a dimension is of size 1, the stride have no effect in computing offsets; or likewise if a dimension has stride 0, the shape have no effect in computing offsets.

[After] src.shape_: [ 1, 2, 1, 1, 3] src.strides_: [ 0, 3, 0, 3, 1]

Parameters

src	The source TensorRef to be broadcasted.
dst_ndims	Number of dimensions to be broadcasted to.
dst_shape	Shape to be broadcasted to.

◆ CanUse32BitIndexing()

bool open3d::core::Indexer::CanUse32BitIndexing ( ) const

Returns true iff the maximum_offsets in bytes are smaller than 2^31 - 1.

◆ CoalesceDimensions()

void open3d::core::Indexer::CoalesceDimensions ( )

protected

Merge adjacent dimensions if either dim is 1 or if: shape[n] * stride[n] == shape[n + 1]

◆ GetInput() [1/2]

TensorRef& open3d::core::Indexer::GetInput ( int64_t i )

inline

Returns input TensorRef.

◆ GetInput() [2/2]

const TensorRef& open3d::core::Indexer::GetInput ( int64_t i ) const

inline

◆ GetInputPtr() [1/2]

OPEN3D_HOST_DEVICE char* open3d::core::Indexer::GetInputPtr	(	int64_t	input_idx,
		int64_t	workload_idx
	)		const

inline

Get input Tensor data pointer based on workload_idx.

Parameters

input_idx	Input tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

◆ GetInputPtr() [2/2]

template<typename T >

OPEN3D_HOST_DEVICE T* open3d::core::Indexer::GetInputPtr	(	int64_t	input_idx,
		int64_t	workload_idx
	)		const

inline

Get input Tensor data pointer based on workload_idx.

Parameters

input_idx	Input tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Note: Assumes that sizeof(T) matches the input's dtype size, but does not check this constraint for performance reasons.

◆ GetOutput() [1/4]

TensorRef& open3d::core::Indexer::GetOutput ( )

inline

Returns output TensorRef. Only works if there's only one output. Equivalent to GetOutput(0).

◆ GetOutput() [2/4]

const TensorRef& open3d::core::Indexer::GetOutput ( ) const

inline

◆ GetOutput() [3/4]

TensorRef& open3d::core::Indexer::GetOutput ( int64_t i )

inline

Returns output TensorRef.

◆ GetOutput() [4/4]

const TensorRef& open3d::core::Indexer::GetOutput ( int64_t i ) const

inline

◆ GetOutputPtr() [1/4]

OPEN3D_HOST_DEVICE char* open3d::core::Indexer::GetOutputPtr	(	int64_t	output_idx,
		int64_t	workload_idx
	)		const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

output_idx	Output tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

◆ GetOutputPtr() [2/4]

template<typename T >

OPEN3D_HOST_DEVICE T* open3d::core::Indexer::GetOutputPtr	(	int64_t	output_idx,
		int64_t	workload_idx
	)		const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

output_idx	Output tensor index.
workload_idx	The index of the compute workload, similar to thread_id, if a thread only processes one workload.

◆ GetOutputPtr() [3/4]

OPEN3D_HOST_DEVICE char* open3d::core::Indexer::GetOutputPtr ( int64_t workload_idx ) const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

workload_idx The index of the compute workload, similar to thread_id, if a thread only processes one workload.

◆ GetOutputPtr() [4/4]

template<typename T >

OPEN3D_HOST_DEVICE T* open3d::core::Indexer::GetOutputPtr ( int64_t workload_idx ) const

inline

Get output Tensor data pointer based on workload_idx.

Parameters

workload_idx The index of the compute workload, similar to thread_id, if a thread only processes one workload.

Note: Assumes that sizeof(T) matches the output's dtype size, but does not check this constraint for performance reasons.

◆ GetPerOutputIndexer()

Indexer open3d::core::Indexer::GetPerOutputIndexer ( int64_t output_idx ) const

Get a sub-indexer that loops through all inputs corresponding to a single output.

◆ GetPrimaryShape() [1/2]

int64_t* open3d::core::Indexer::GetPrimaryShape ( )

inline

◆ GetPrimaryShape() [2/2]

const int64_t* open3d::core::Indexer::GetPrimaryShape ( ) const

inline

Returns Indexer's primary shape, one can iterate the Indexer with this shape.

◆ GetPrimaryStrides()

const int64_t* open3d::core::Indexer::GetPrimaryStrides ( ) const

inline

Returns Indexer's primary strides, one can iterate the Indexer with this strides. It is always set to be the default strides from primary_shape_.

◆ GetWorkloadDataPtr() [1/2]

OPEN3D_HOST_DEVICE char* open3d::core::Indexer::GetWorkloadDataPtr	(	const TensorRef &	tr,
		bool	tr_contiguous,
		int64_t	workload_idx
	)		const

inlineprotected

Get data pointer from a TensorRef with workload_idx. Note: can be optimized by computing all input ptrs and output ptr together.

◆ GetWorkloadDataPtr() [2/2]

template<typename T >

OPEN3D_HOST_DEVICE T* open3d::core::Indexer::GetWorkloadDataPtr	(	const TensorRef &	tr,
		bool	tr_contiguous,
		int64_t	workload_idx
	)		const

inlineprotected

Get data pointer from a TensorRef with workload_idx. Note: can be optimized by computing all input ptrs and output ptr together.

Note: Assumes that sizeof(T) matches the data's dtype size, but does not check this constraint for performance reasons.

◆ IsFinalOutput()

bool open3d::core::Indexer::IsFinalOutput ( ) const

inline

◆ IsReductionDim()

bool open3d::core::Indexer::IsReductionDim ( int64_t dim ) const

inline

Returns true if the dim -th dimension is reduced.

◆ NumDims()

int64_t open3d::core::Indexer::NumDims ( ) const

inline

Returns number of dimensions of the Indexer.

◆ NumInputs()

int64_t open3d::core::Indexer::NumInputs ( ) const

inline

Number of input Tensors.

◆ NumOutputElements()

int64_t open3d::core::Indexer::NumOutputElements ( ) const

Returns the number of output elements.

◆ NumOutputs()

int64_t open3d::core::Indexer::NumOutputs ( ) const

inline

Number of output Tensors.

◆ NumReductionDims()

int64_t open3d::core::Indexer::NumReductionDims ( ) const

Returns the number of reduction dimensions.

◆ NumWorkloads()

int64_t open3d::core::Indexer::NumWorkloads ( ) const

Returns the total number of workloads (e.g. computations) needed for the op. The scheduler schedules these workloads to run on parallel threads.

For non-reduction ops, NumWorkloads() is the same as number of output elements (e.g. for broadcasting ops).

For reduction ops, NumWorkLoads() is the same as the number of input elements. Currently we don't allow mixing broadcasting and reduction in one op kernel.

◆ operator=()

Indexer& open3d::core::Indexer::operator= ( const Indexer & )

default

◆ ReductionRestride()

void open3d::core::Indexer::ReductionRestride	(	TensorRef &	dst,
		int64_t	src_ndims,
		const int64_t *	src_shape,
		const SizeVector &	reduction_dims
	)

staticprotected

Symmetrical to BroadcastRestride. Set the reduced dimensions' stride to 0 at output. Currently only support the keepdim=true case.

◆ ReorderDimensions()

void open3d::core::Indexer::ReorderDimensions ( const SizeVector & reduction_dims )

protected

◆ ShouldAccumulate()

bool open3d::core::Indexer::ShouldAccumulate ( ) const

inline

◆ ShrinkDim()

void open3d::core::Indexer::ShrinkDim	(	int64_t	dim,
		int64_t	start,
		int64_t	size
	)

Shrink iteration to a specific range in a specific dimension.

Parameters

dim	The dimension to be shrunken to.
start	Starting index (inclusive) for dimension `dim`. No dimension wrapping is available.
size	The size to iterate in dimension `dim`.

◆ SplitLargestDim()

std::unique_ptr< Indexer > open3d::core::Indexer::SplitLargestDim ( )

Split the indexer such that the largest-span-dimension is split into two halves. The returned new indexer iterates the first half while the current indexer iterates the second half.

◆ SplitTo32BitIndexing()

IndexerIterator open3d::core::Indexer::SplitTo32BitIndexing ( ) const

Returns an iterator of Indexers, each of which can be indexed in 32 bits.

◆ UpdateContiguousFlags()

void open3d::core::Indexer::UpdateContiguousFlags ( )

protected

Update input_contiguous_ and output_contiguous_.

◆ UpdatePrimaryStrides()

void open3d::core::Indexer::UpdatePrimaryStrides ( )

protected

Update primary_strides_ based on primary_shape_.

Field Documentation

◆ accumulate_

bool open3d::core::Indexer::accumulate_ = false

protected

If the kernel should accumulate into the output. Only relevant for CUDA reductions.

◆ final_output_

bool open3d::core::Indexer::final_output_ = true

protected

Whether this iterator produces the actual output, as opposed to something that will be accumulated further. Only relevant for CUDA reductions.

◆ inputs_

TensorRef open3d::core::Indexer::inputs_[MAX_INPUTS]

protected

Array of input TensorRefs.

◆ inputs_contiguous_

bool open3d::core::Indexer::inputs_contiguous_[MAX_INPUTS]

protected

Array of contiguous flags for all input TensorRefs.

◆ ndims_

int64_t open3d::core::Indexer::ndims_ = 0

protected

Indexer's global number of dimensions.

◆ num_inputs_

int64_t open3d::core::Indexer::num_inputs_ = 0

protected

Number of input and output Tensors.

◆ num_outputs_

int64_t open3d::core::Indexer::num_outputs_ = 0

protected

◆ outputs_

TensorRef open3d::core::Indexer::outputs_[MAX_OUTPUTS]

protected

Array of output TensorRefs.

◆ outputs_contiguous_

bool open3d::core::Indexer::outputs_contiguous_[MAX_OUTPUTS]

protected

Array of contiguous flags for all output TensorRefs.

◆ primary_shape_

int64_t open3d::core::Indexer::primary_shape_[MAX_DIMS]

protected

Indexer's global shape. The shape's number of elements is the same as GetNumWorkloads() for the Indexer.

For broadcasting, primary_shape_ is the same as the output shape.
For reduction, primary_shape_ is the same as the input shape.
Currently we don't allow broadcasting mixed with reduction. But if broadcasting mixed with reduction is allowed, primary_shape_ is a mix of input shape and output shape. First, fill in all omitted dimensions (in inputs for broadcasting) and reduction dimensions (as if keepdim=true always) with size 1. For each axis, the primary dimension is the non-1 dimension (if both are 1, then the primary dimension is 1 in that axis).

◆ primary_strides_

int64_t open3d::core::Indexer::primary_strides_[MAX_DIMS]

protected

The default strides for primary_shape_ for internal use only. Used to compute the actual strides and ultimately the index offsets.

The documentation for this class was generated from the following files:

/home/runner/work/Open3D/Open3D/cpp/open3d/core/Indexer.h (5c982c7 (Thu Apr 18 12:15:13 2024 -0700))
/home/runner/work/Open3D/Open3D/cpp/open3d/core/Indexer.cpp (5c982c7 (Thu Apr 18 12:15:13 2024 -0700))

Public Member Functions

Protected Member Functions

Static Protected Member Functions

Protected Attributes

Detailed Description

Constructor & Destructor Documentation

◆ Indexer() [1/4]

◆ Indexer() [2/4]

◆ Indexer() [3/4]

◆ Indexer() [4/4]

Member Function Documentation

◆ BroadcastRestride()

◆ CanUse32BitIndexing()

◆ CoalesceDimensions()

◆ GetInput() [1/2]

◆ GetInput() [2/2]

◆ GetInputPtr() [1/2]

◆ GetInputPtr() [2/2]

◆ GetOutput() [1/4]

◆ GetOutput() [2/4]

◆ GetOutput() [3/4]

◆ GetOutput() [4/4]

◆ GetOutputPtr() [1/4]

◆ GetOutputPtr() [2/4]

◆ GetOutputPtr() [3/4]

◆ GetOutputPtr() [4/4]

◆ GetPerOutputIndexer()

◆ GetPrimaryShape() [1/2]

◆ GetPrimaryShape() [2/2]

◆ GetPrimaryStrides()

◆ GetWorkloadDataPtr() [1/2]

◆ GetWorkloadDataPtr() [2/2]

◆ IsFinalOutput()

◆ IsReductionDim()

◆ NumDims()

◆ NumInputs()

◆ NumOutputElements()

◆ NumOutputs()

◆ NumReductionDims()

◆ NumWorkloads()

◆ operator=()

◆ ReductionRestride()

◆ ReorderDimensions()

◆ ShouldAccumulate()

◆ ShrinkDim()

◆ SplitLargestDim()

◆ SplitTo32BitIndexing()

◆ UpdateContiguousFlags()

◆ UpdatePrimaryStrides()

Field Documentation

◆ accumulate_

◆ final_output_

◆ inputs_

◆ inputs_contiguous_

◆ ndims_

◆ num_inputs_

◆ num_outputs_

◆ outputs_

◆ outputs_contiguous_

◆ primary_shape_

◆ primary_strides_