38 template <
typename Key,
typename Hash,
typename Eq>
43 const std::vector<int64_t>& value_dsizes,
48 void Reserve(int64_t capacity)
override;
50 void Insert(
const void* input_keys,
51 const std::vector<const void*>& input_values_soa,
54 int64_t
count)
override;
56 void Find(
const void* input_keys,
59 int64_t count)
override;
61 void Erase(
const void* input_keys,
63 int64_t count)
override;
66 void Clear()
override;
68 int64_t
Size()
const override;
75 void Allocate(int64_t capacity)
override;
89 template <
typename Key,
typename Hash,
typename Eq>
91 int64_t init_capacity,
93 const std::vector<int64_t>& value_dsizes,
99 template <
typename Key,
typename Hash,
typename Eq>
104 template <
typename Key,
typename Hash,
typename Eq>
107 template <
typename Key,
typename Hash,
typename Eq>
112 if (count == 0)
return;
118 const int64_t num_blocks =
119 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
120 FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
121 impl_, input_keys, output_buf_indices, output_masks,
count);
126 template <
typename Key,
typename Hash,
typename Eq>
130 if (count == 0)
return;
138 const int64_t num_blocks =
139 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
141 core::cuda::GetStream()>>>(
142 impl_, input_keys, buf_indices, output_masks,
count);
144 core::cuda::GetStream()>>>(
impl_, buf_indices,
145 output_masks,
count);
152 template <
typename Key,
typename Hash,
typename Eq>
162 const int64_t num_blocks =
163 (
impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
166 core::cuda::GetStream()>>>(
175 return static_cast<int64_t
>(ret);
178 template <
typename Key,
typename Hash,
typename Eq>
185 sizeof(
Slab) * this->bucket_count_));
193 template <
typename Key,
typename Hash,
typename Eq>
195 return this->
buffer_->GetHeapTopIndex();
198 template <
typename Key,
typename Hash,
typename Eq>
203 template <
typename Key,
typename Hash,
typename Eq>
205 thrust::device_vector<int64_t> elems_per_bucket(
impl_.bucket_count_);
206 thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
208 const int64_t num_blocks =
209 (
impl_.buffer_accessor_.capacity_ + kThreadsPerBlock - 1) /
212 core::cuda::GetStream()>>>(
213 impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
217 std::vector<int64_t>
result(impl_.bucket_count_);
218 thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
223 template <
typename Key,
typename Hash,
typename Eq>
228 template <
typename Key,
typename Hash,
typename Eq>
230 const void* input_keys,
231 const std::vector<const void*>& input_values_soa,
235 if (count == 0)
return;
239 int prev_heap_top = this->
buffer_->GetHeapTopIndex();
240 *thrust::device_ptr<int>(
impl_.buffer_accessor_.heap_top_) =
241 prev_heap_top + count;
243 const int64_t num_blocks =
244 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
246 core::cuda::GetStream()>>>(
247 impl_, input_keys, output_buf_indices, prev_heap_top,
count);
249 core::cuda::GetStream()>>>(
250 impl_, input_keys, output_buf_indices, output_masks,
count);
252 thrust::device_vector<const void*> input_values_soa_device(
253 input_values_soa.begin(), input_values_soa.end());
255 int64_t n_values = input_values_soa.size();
256 const void*
const* ptr_input_values_soa =
257 thrust::raw_pointer_cast(input_values_soa_device.data());
258 DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(
259 impl_.buffer_accessor_.common_block_size_, [&]() {
260 InsertKernelPass2<Key, Hash, Eq, block_t>
261 <<<num_blocks, kThreadsPerBlock, 0,
262 core::cuda::GetStream()>>>(
263 impl_, ptr_input_values_soa, output_buf_indices,
264 output_masks, count, n_values);
270 template <
typename Key,
typename Hash,
typename Eq>
276 this->
buffer_ = std::make_shared<HashBackendBuffer>(
288 sizeof(
Slab) * this->bucket_count_));
295 template <
typename Key,
typename Hash,
typename Eq>
std::shared_ptr< HashBackendBuffer > buffer_
Definition: DeviceHashBackend.h:121
Definition: CUDAHashBackendBufferAccessor.h:43
void Allocate(int64_t capacity) override
Definition: SlabHashBackend.h:271
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashBackend.h:84
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:567
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition: SlabHashBackendImpl.h:666
std::vector< int64_t > value_dsizes_
Definition: DeviceHashBackend.h:117
SlabHashBackendImpl< Key, Hash, Eq > impl_
Definition: SlabHashBackend.h:81
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:47
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:66
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashBackendImpl.h:493
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:104
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:41
Device device_
Definition: DeviceHashBackend.h:119
int64_t bucket_count_
Definition: SlabHashBackend.h:86
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashBackend.h:179
uint32_t buf_index_t
Definition: HashBackendBuffer.h:63
int64_t key_dsize_
Definition: DeviceHashBackend.h:116
int64_t capacity_
Definition: DeviceHashBackend.h:114
void Synchronize()
Definition: CUDAUtils.cpp:77
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashBackend.h:153
__host__ void Setup(HashBackendBuffer &hashmap_buffer)
Definition: CUDAHashBackendBufferAccessor.h:45
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashBackend.h:108
__host__ void Shutdown(const Device &device)
Definition: CUDAHashBackendBufferAccessor.h:92
core::Tensor result
Definition: VtkUtils.cpp:91
void Reserve(int64_t capacity) override
Definition: SlabHashBackend.h:105
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:620
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashBackend.h:127
Definition: SlabHashBackendImpl.h:64
SlabHashBackendImpl< Key, Hash, Eq > GetImpl()
Definition: SlabHashBackend.h:73
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:655
Definition: PinholeCameraIntrinsic.cpp:35
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:475
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition: SlabHashBackend.h:204
Definition: DeviceHashBackend.h:39
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: SlabHashBackend.h:83
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition: SlabHashBackend.h:224
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition: SlabHashBackendImpl.h:706
void Insert(const void *input_keys, const std::vector< const void *> &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashBackend.h:229
~SlabHashBackend()
Definition: SlabHashBackend.h:100
Definition: SlabNodeManager.h:58
bool copy
Definition: VtkUtils.cpp:89
void Free() override
Definition: SlabHashBackend.h:296
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition: SlabHashBackend.h:194
SlabHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition: SlabHashBackend.h:90
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition: SlabHashBackend.h:199
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:512
Definition: SlabHashBackend.h:39