latest/cpp_api/_slab_hash_backend_8h_source.html

 // ----------------------------------------------------------------------------

 // -                        Open3D: www.open3d.org                            -

 // ----------------------------------------------------------------------------

 // Copyright (c) 2018-2024 www.open3d.org

 // SPDX-License-Identifier: MIT

 // ----------------------------------------------------------------------------


 #pragma once


 #include <memory>


 #include "open3d/core/CUDAUtils.h"

 #include "open3d/core/hashmap/CUDA/SlabHashBackendImpl.h"

 #include "open3d/core/hashmap/DeviceHashBackend.h"

 #include "open3d/core/hashmap/Dispatch.h"


 namespace open3d {

 namespace core {

 template <typename Key, typename Hash, typename Eq>

 class SlabHashBackend : public DeviceHashBackend {

 public:

     SlabHashBackend(int64_t init_capacity,

                     int64_t key_dsize,

                     const std::vector<int64_t>& value_dsizes,

                     const Device& device);


     ~SlabHashBackend();


     void Reserve(int64_t capacity) override;


     void Insert(const void* input_keys,

                 const std::vector<const void*>& input_values_soa,

                 buf_index_t* output_buf_indices,

                 bool* output_masks,

                 int64_t count) override;


     void Find(const void* input_keys,

               buf_index_t* output_buf_indices,

               bool* output_masks,

               int64_t count) override;


     void Erase(const void* input_keys,

                bool* output_masks,

                int64_t count) override;


     int64_t GetActiveIndices(buf_index_t* output_indices) override;

     void Clear() override;


     int64_t Size() const override;

     int64_t GetBucketCount() const override;

     std::vector<int64_t> BucketSizes() const override;

     float LoadFactor() const override;


     SlabHashBackendImpl<Key, Hash, Eq> GetImpl() { return impl_; }


     void Allocate(int64_t capacity) override;

     void Free() override;


 protected:

     SlabHashBackendImpl<Key, Hash, Eq> impl_;


     CUDAHashBackendBufferAccessor buffer_accessor_;

     std::shared_ptr<SlabNodeManager> node_mgr_;


     int64_t bucket_count_;

 };


 template <typename Key, typename Hash, typename Eq>

 SlabHashBackend<Key, Hash, Eq>::SlabHashBackend(

         int64_t init_capacity,

         int64_t key_dsize,

         const std::vector<int64_t>& value_dsizes,

         const Device& device)

     : DeviceHashBackend(init_capacity, key_dsize, value_dsizes, device) {

     CUDAScopedDevice scoped_device(this->device_);

     Allocate(init_capacity);

 }


 template <typename Key, typename Hash, typename Eq>

 SlabHashBackend<Key, Hash, Eq>::~SlabHashBackend() {

     CUDAScopedDevice scoped_device(this->device_);

     Free();

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Reserve(int64_t capacity) {

     CUDAScopedDevice scoped_device(this->device_);

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Find(const void* input_keys,

                                           buf_index_t* output_buf_indices,

                                           bool* output_masks,

                                           int64_t count) {

     CUDAScopedDevice scoped_device(this->device_);

     if (count == 0) return;


     OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     const int64_t num_blocks =

             (count + kThreadsPerBlock - 1) / kThreadsPerBlock;

     FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(

             impl_, input_keys, output_buf_indices, output_masks, count);

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Erase(const void* input_keys,

                                            bool* output_masks,

                                            int64_t count) {

     CUDAScopedDevice scoped_device(this->device_);

     if (count == 0) return;


     OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());

     auto buf_indices = static_cast<buf_index_t*>(

             MemoryManager::Malloc(sizeof(buf_index_t) * count, this->device_));


     const int64_t num_blocks =

             (count + kThreadsPerBlock - 1) / kThreadsPerBlock;

     EraseKernelPass0<<<num_blocks, kThreadsPerBlock, 0,

                        core::cuda::GetStream()>>>(

             impl_, input_keys, buf_indices, output_masks, count);

     EraseKernelPass1<<<num_blocks, kThreadsPerBlock, 0,

                        core::cuda::GetStream()>>>(impl_, buf_indices,

                                                   output_masks, count);

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     MemoryManager::Free(buf_indices, this->device_);

 }


 template <typename Key, typename Hash, typename Eq>

 int64_t SlabHashBackend<Key, Hash, Eq>::GetActiveIndices(

         buf_index_t* output_buf_indices) {

     CUDAScopedDevice scoped_device(this->device_);

     uint32_t* count = static_cast<uint32_t*>(

             MemoryManager::Malloc(sizeof(uint32_t), this->device_));

     OPEN3D_CUDA_CHECK(cudaMemset(count, 0, sizeof(uint32_t)));


     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     const int64_t num_blocks =

             (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /

             kThreadsPerBlock;

     GetActiveIndicesKernel<<<num_blocks, kThreadsPerBlock, 0,

                              core::cuda::GetStream()>>>(

             impl_, output_buf_indices, count);

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     uint32_t ret;

     MemoryManager::MemcpyToHost(&ret, count, this->device_, sizeof(uint32_t));

     MemoryManager::Free(count, this->device_);


     return static_cast<int64_t>(ret);

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Clear() {

     CUDAScopedDevice scoped_device(this->device_);

     // Clear the heap

     this->buffer_->ResetHeap();


     // Clear the linked list heads

     OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,

                                  sizeof(Slab) * this->bucket_count_));

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     // Clear the linked list nodes

     node_mgr_->Reset();

 }


 template <typename Key, typename Hash, typename Eq>

 int64_t SlabHashBackend<Key, Hash, Eq>::Size() const {

     CUDAScopedDevice scoped_device(this->device_);

     return this->buffer_->GetHeapTopIndex();

 }


 template <typename Key, typename Hash, typename Eq>

 int64_t SlabHashBackend<Key, Hash, Eq>::GetBucketCount() const {

     CUDAScopedDevice scoped_device(this->device_);

     return bucket_count_;

 }


 template <typename Key, typename Hash, typename Eq>

 std::vector<int64_t> SlabHashBackend<Key, Hash, Eq>::BucketSizes() const {

     CUDAScopedDevice scoped_device(this->device_);

     thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);

     thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);


     const int64_t num_blocks =

             (impl_.buffer_accessor_.capacity_ + kThreadsPerBlock - 1) /

             kThreadsPerBlock;

     CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock, 0,

                                 core::cuda::GetStream()>>>(

             impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     std::vector<int64_t> result(impl_.bucket_count_);

     thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),

                  result.begin());

     return result;

 }


 template <typename Key, typename Hash, typename Eq>

 float SlabHashBackend<Key, Hash, Eq>::LoadFactor() const {

     CUDAScopedDevice scoped_device(this->device_);

     return float(Size()) / float(this->bucket_count_);

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Insert(

         const void* input_keys,

         const std::vector<const void*>& input_values_soa,

         buf_index_t* output_buf_indices,

         bool* output_masks,

         int64_t count) {

     CUDAScopedDevice scoped_device(this->device_);

     if (count == 0) return;


     int prev_heap_top = this->buffer_->GetHeapTopIndex();

     *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_top_) =

             prev_heap_top + count;


     const int64_t num_blocks =

             (count + kThreadsPerBlock - 1) / kThreadsPerBlock;

     InsertKernelPass0<<<num_blocks, kThreadsPerBlock, 0,

                         core::cuda::GetStream()>>>(

             impl_, input_keys, output_buf_indices, prev_heap_top, count);

     InsertKernelPass1<<<num_blocks, kThreadsPerBlock, 0,

                         core::cuda::GetStream()>>>(

             impl_, input_keys, output_buf_indices, output_masks, count);


     thrust::device_vector<const void*> input_values_soa_device(

             input_values_soa.begin(), input_values_soa.end());


     int64_t n_values = input_values_soa.size();

     const void* const* ptr_input_values_soa =

             thrust::raw_pointer_cast(input_values_soa_device.data());

     DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(

             impl_.buffer_accessor_.common_block_size_, [&]() {

                 InsertKernelPass2<Key, Hash, Eq, block_t>

                         <<<num_blocks, kThreadsPerBlock, 0,

                            core::cuda::GetStream()>>>(

                                 impl_, ptr_input_values_soa, output_buf_indices,

                                 output_masks, count, n_values);

             });

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Allocate(int64_t capacity) {

     CUDAScopedDevice scoped_device(this->device_);

     this->bucket_count_ = capacity * 2;

     this->capacity_ = capacity;


     // Allocate buffer for key values.

     this->buffer_ = std::make_shared<HashBackendBuffer>(

             this->capacity_, this->key_dsize_, this->value_dsizes_,

             this->device_);

     buffer_accessor_.Setup(*this->buffer_);


     // Allocate buffer for linked list nodes.

     node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);


     // Allocate linked list heads.

     impl_.bucket_list_head_ = static_cast<Slab*>(MemoryManager::Malloc(

             sizeof(Slab) * this->bucket_count_, this->device_));

     OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,

                                  sizeof(Slab) * this->bucket_count_));

     cuda::Synchronize();

     OPEN3D_CUDA_CHECK(cudaGetLastError());


     impl_.Setup(this->bucket_count_, node_mgr_->impl_, buffer_accessor_);

 }


 template <typename Key, typename Hash, typename Eq>

 void SlabHashBackend<Key, Hash, Eq>::Free() {

     CUDAScopedDevice scoped_device(this->device_);

     buffer_accessor_.Shutdown(this->device_);

     MemoryManager::Free(impl_.bucket_list_head_, this->device_);

 }

 }  // namespace core

 }  // namespace open3d

CUDAUtils.h
Common CUDA utilities.

OPEN3D_CUDA_CHECK
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:46

DeviceHashBackend.h

SlabHashBackendImpl.h

result
core::Tensor result
Definition: VtkUtils.cpp:76

copy
bool copy
Definition: VtkUtils.cpp:74

open3d::core::CUDAHashBackendBufferAccessor
Definition: CUDAHashBackendBufferAccessor.h:24

open3d::core::CUDAScopedDevice
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:213

open3d::core::DeviceHashBackend
Definition: DeviceHashBackend.h:20

open3d::core::DeviceHashBackend::device_
Device device_
Definition: DeviceHashBackend.h:100

open3d::core::Device
Definition: Device.h:18

open3d::core::MemoryManager::MemcpyToHost
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:85

open3d::core::MemoryManager::Malloc
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:22

open3d::core::MemoryManager::Free
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:28

open3d::core::SlabHashBackend
Definition: SlabHashBackend.h:20

open3d::core::SlabHashBackend::SlabHashBackend
SlabHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition: SlabHashBackend.h:71

open3d::core::SlabHashBackend::Free
void Free() override
Definition: SlabHashBackend.h:291

open3d::core::SlabHashBackend::buffer_accessor_
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: SlabHashBackend.h:64

open3d::core::SlabHashBackend::Allocate
void Allocate(int64_t capacity) override
Definition: SlabHashBackend.h:265

open3d::core::SlabHashBackend::~SlabHashBackend
~SlabHashBackend()
Definition: SlabHashBackend.h:82

open3d::core::SlabHashBackend::LoadFactor
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition: SlabHashBackend.h:216

open3d::core::SlabHashBackend::node_mgr_
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashBackend.h:65

open3d::core::SlabHashBackend::GetActiveIndices
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashBackend.h:140

open3d::core::SlabHashBackend::impl_
SlabHashBackendImpl< Key, Hash, Eq > impl_
Definition: SlabHashBackend.h:62

open3d::core::SlabHashBackend::Insert
void Insert(const void *input_keys, const std::vector< const void * > &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashBackend.h:222

open3d::core::SlabHashBackend::GetImpl
SlabHashBackendImpl< Key, Hash, Eq > GetImpl()
Definition: SlabHashBackend.h:54

open3d::core::SlabHashBackend::bucket_count_
int64_t bucket_count_
Definition: SlabHashBackend.h:67

open3d::core::SlabHashBackend::Size
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition: SlabHashBackend.h:183

open3d::core::SlabHashBackend::GetBucketCount
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition: SlabHashBackend.h:189

open3d::core::SlabHashBackend::Reserve
void Reserve(int64_t capacity) override
Definition: SlabHashBackend.h:88

open3d::core::SlabHashBackend::Clear
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashBackend.h:167

open3d::core::SlabHashBackend::BucketSizes
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition: SlabHashBackend.h:195

open3d::core::SlabHashBackend::Find
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashBackend.h:93

open3d::core::SlabHashBackend::Erase
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashBackend.h:113

open3d::core::SlabHashBackendImpl
Definition: SlabHashBackendImpl.h:45

open3d::core::Slab
Definition: SlabNodeManager.h:39

Dispatch.h

count
int count
Definition: FilePCD.cpp:42

open3d::core::cuda::Synchronize
void Synchronize()
Definition: CUDAUtils.cpp:58

open3d::core::InsertKernelPass1
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:493

open3d::core::InsertKernelPass0
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashBackendImpl.h:474

open3d::core::buf_index_t
uint32_t buf_index_t
Definition: HashBackendBuffer.h:44

open3d::core::EraseKernelPass1
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:636

open3d::core::FindKernel
__global__ void FindKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:562

open3d::core::EraseKernelPass0
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:601

open3d::core::GetActiveIndicesKernel
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition: SlabHashBackendImpl.h:647

open3d::core::CountElemsPerBucketKernel
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition: SlabHashBackendImpl.h:687

open3d::io::k4a_plugin::uint32_t
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548

open3d::io::k4a_plugin::float
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:460

open3d
Definition: PinholeCameraIntrinsic.cpp:16