37 template <
typename Key,
typename Hash>
47 void Rehash(int64_t buckets)
override;
49 void Insert(
const void* input_keys,
50 const void* input_values,
53 int64_t
count)
override;
55 void Activate(
const void* input_keys,
58 int64_t count)
override;
60 void Find(
const void* input_keys,
63 int64_t count)
override;
65 void Erase(
const void* input_keys,
67 int64_t count)
override;
70 void Clear()
override;
72 int64_t
Size()
const override;
90 const void* input_values,
95 void Allocate(int64_t bucket_count, int64_t capacity);
101 template <
typename Key,
typename Hash>
106 :
DeviceHashmap(init_capacity, dsize_key, dsize_value, device) {
107 int64_t init_buckets = init_capacity * 2;
108 Allocate(init_buckets, init_capacity);
111 template <
typename Key,
typename Hash>
116 template <
typename Key,
typename Hash>
118 int64_t iterator_count =
Size();
123 if (iterator_count > 0) {
129 active_keys = this->
buffer_->GetKeyBuffer().IndexGet({active_indices});
131 this->
buffer_->GetValueBuffer().IndexGet({active_indices});
134 float avg_capacity_per_bucket =
140 std::max(int64_t(
std::ceil(buckets * avg_capacity_per_bucket)),
143 if (iterator_count > 0) {
148 static_cast<addr_t*
>(output_addrs.GetDataPtr()),
149 output_masks.GetDataPtr<
bool>(), iterator_count);
153 template <
typename Key,
typename Hash>
155 const void* input_values,
161 float avg_capacity_per_bucket =
163 int64_t expected_buckets = std::max(
165 int64_t(
std::ceil(new_size / avg_capacity_per_bucket)));
169 InsertImpl(input_keys, input_values, output_addrs, output_masks, count);
172 template <
typename Key,
typename Hash>
177 Insert(input_keys,
nullptr, output_addrs, output_masks, count);
180 template <
typename Key,
typename Hash>
185 if (count == 0)
return;
191 const int64_t num_blocks =
192 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
193 FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
194 impl_, input_keys, output_addrs, output_masks,
count);
199 template <
typename Key,
typename Hash>
203 if (count == 0)
return;
208 auto iterator_addrs =
static_cast<addr_t*
>(
211 const int64_t num_blocks =
212 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
214 core::cuda::GetStream()>>>(
215 impl_, input_keys, iterator_addrs, output_masks,
count);
217 core::cuda::GetStream()>>>(
impl_, iterator_addrs,
218 output_masks,
count);
225 template <
typename Key,
typename Hash>
233 const int64_t num_blocks =
234 (
impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
237 core::cuda::GetStream()>>>(
impl_, output_addrs,
247 return static_cast<int64_t
>(ret);
250 template <
typename Key,
typename Hash>
257 sizeof(
Slab) * this->bucket_count_));
265 template <
typename Key,
typename Hash>
270 template <
typename Key,
typename Hash>
275 template <
typename Key,
typename Hash>
277 thrust::device_vector<int64_t> elems_per_bucket(
impl_.bucket_count_);
278 thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
280 const int64_t num_blocks =
281 (
impl_.capacity_ + kThreadsPerBlock - 1) / kThreadsPerBlock;
283 core::cuda::GetStream()>>>(
284 impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
288 std::vector<int64_t> result(impl_.bucket_count_);
289 thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
294 template <
typename Key,
typename Hash>
299 template <
typename Key,
typename Hash>
301 const void* input_values,
305 if (count == 0)
return;
310 *thrust::device_ptr<int>(
impl_.buffer_accessor_.heap_counter_) =
311 prev_heap_counter + count;
313 const int64_t num_blocks =
314 (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
316 core::cuda::GetStream()>>>(
317 impl_, input_keys, output_addrs, prev_heap_counter,
count);
319 core::cuda::GetStream()>>>(
320 impl_, input_keys, output_addrs, output_masks,
count);
322 core::cuda::GetStream()>>>(
323 impl_, input_values, output_addrs, output_masks,
count);
328 template <
typename Key,
typename Hash>
339 this->dsize_value_, this->
buffer_->GetKeyBuffer(),
340 this->
buffer_->GetValueBuffer(),
351 sizeof(
Slab) * this->bucket_count_));
355 impl_.Setup(this->bucket_count_, this->capacity_, this->dsize_key_,
359 template <
typename Key,
typename Hash>
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashmap.h:251
void Find(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashmap.h:181
CUDAHashmapBufferAccessor buffer_accessor_
Definition: SlabHashmap.h:84
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashmap.h:200
const Dtype Bool
Definition: Dtype.cpp:72
const Dtype Int64
Definition: Dtype.cpp:67
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:557
float LoadFactor() const override
Definition: SlabHashmap.h:295
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:47
void Rehash(int64_t buckets) override
Definition: SlabHashmap.h:117
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:66
__global__ void InsertKernelPass2(SlabHashmapImpl< Key, Hash > impl, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:561
__host__ void Setup(int64_t capacity, int64_t dsize_key, int64_t dsize_value, Tensor &keys, Tensor &values, Tensor &heap)
Definition: CUDAHashmapBufferAccessor.h:50
void Activate(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Definition: SlabHashmap.h:173
Definition: SlabHashmap.h:38
__host__ void HostAllocate(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:80
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:94
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:41
__global__ void CountElemsPerBucketKernel(SlabHashmapImpl< Key, Hash > impl, int64_t *bucket_elem_counts)
Definition: SlabHashmapImpl.h:715
FN_SPECIFIERS MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:108
Definition: DeviceHashmap.h:39
int64_t GetActiveIndices(addr_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashmap.h:226
int64_t Size() const override
Definition: SlabHashmap.h:266
SlabHashmapImpl< Key, Hash > GetImpl()
Definition: SlabHashmap.h:77
void Synchronize()
Definition: CUDAUtils.cpp:72
Definition: CUDAHashmapBufferAccessor.h:48
__global__ void InsertKernelPass1(SlabHashmapImpl< Key, Hash > impl, const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:523
const Dtype Int32
Definition: Dtype.cpp:66
~SlabHashmap()
Definition: SlabHashmap.h:112
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:541
Device device_
Definition: DeviceHashmap.h:113
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashmap.h:85
int count
Definition: FilePCD.cpp:61
void Free()
Definition: SlabHashmap.h:360
Definition: PinholeCameraIntrinsic.cpp:35
__global__ void GetActiveIndicesKernel(SlabHashmapImpl< Key, Hash > impl, addr_t *output_addrs, uint32_t *output_iterator_count)
Definition: SlabHashmapImpl.h:674
__global__ void EraseKernelPass0(SlabHashmapImpl< Key, Hash > impl, const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:628
void Allocate(int64_t bucket_count, int64_t capacity)
Definition: SlabHashmap.h:329
__host__ int HeapCounter(const Device &device) const
Definition: CUDAHashmapBufferAccessor.h:102
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:465
int64_t capacity_
Definition: DeviceHashmap.h:109
SlabHashmap(int64_t init_capacity, int64_t dsize_key, int64_t dsize_value, const Device &device)
Definition: SlabHashmap.h:102
SlabHashmapImpl< Key, Hash > impl_
Definition: SlabHashmap.h:82
uint32_t addr_t
Definition: HashmapBuffer.h:58
int64_t bucket_count_
Definition: SlabHashmap.h:98
int64_t dsize_key_
Definition: DeviceHashmap.h:110
T * GetDataPtr()
Definition: Tensor.h:1004
void Insert(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashmap.h:154
__host__ void HostFree(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:85
__host__ void Reset(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:67
Definition: SlabNodeManager.h:58
__global__ void EraseKernelPass1(SlabHashmapImpl< Key, Hash > impl, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:663
int64_t dsize_value_
Definition: DeviceHashmap.h:111
int64_t GetLength() const
Definition: Tensor.h:985
int64_t GetBucketCount() const override
Definition: SlabHashmap.h:271
Definition: SlabHashmapImpl.h:54
__global__ void InsertKernelPass0(SlabHashmapImpl< Key, Hash > impl, const void *input_keys, addr_t *output_addrs, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashmapImpl.h:502
void InsertImpl(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmap.h:300
std::shared_ptr< HashmapBuffer > buffer_
Definition: DeviceHashmap.h:115
std::vector< int64_t > BucketSizes() const override
Definition: SlabHashmap.h:276