29 #include <stdgpu/iterator.h> 30 #include <stdgpu/memory.h> 31 #include <stdgpu/platform.h> 32 #include <thrust/for_each.h> 33 #include <thrust/transform.h> 36 #include <stdgpu/unordered_map.cuh> 37 #include <unordered_map> 45 template <
typename Key,
typename Hash>
54 void Rehash(int64_t buckets)
override;
56 void Insert(
const void* input_keys,
57 const void* input_values,
60 int64_t
count)
override;
62 void Activate(
const void* input_keys,
65 int64_t count)
override;
67 void Find(
const void* input_keys,
70 int64_t count)
override;
72 void Erase(
const void* input_keys,
74 int64_t count)
override;
78 void Clear()
override;
80 int64_t
Size()
const override;
86 stdgpu::unordered_map<Key, addr_t, Hash>
GetImpl()
const {
return impl_; }
91 stdgpu::unordered_map<Key, addr_t, Hash>
impl_;
96 const void* input_values,
105 template <
typename Key,
typename Hash>
110 :
DeviceHashmap(init_capacity, dsize_key, dsize_value, device) {
114 template <
typename Key,
typename Hash>
119 template <
typename Key,
typename Hash>
124 template <
typename Key,
typename Hash>
126 const void* input_values,
133 float avg_capacity_per_bucket =
135 int64_t expected_buckets =
std::max(
137 int64_t(
std::ceil(new_size / avg_capacity_per_bucket)));
140 InsertImpl(input_keys, input_values, output_addrs, output_masks, count);
143 template <
typename Key,
typename Hash>
148 Insert(input_keys,
nullptr, output_addrs, output_masks, count);
152 template <
typename Key,
typename Hash>
155 const Key* input_keys,
159 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
160 if (tid >= count)
return;
162 Key key = input_keys[tid];
163 auto iter = map.find(key);
164 bool flag = (iter != map.end());
165 output_masks[tid] = flag;
166 output_addrs[tid] = flag ? iter->second : 0;
169 template <
typename Key,
typename Hash>
175 uint32_t blocks = (count + threads - 1) / threads;
178 static_cast<const Key*
>(input_keys),
179 output_addrs, output_masks, count);
184 template <
typename Key,
typename Hash>
187 const Key* input_keys,
191 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
192 if (tid >= count)
return;
194 Key key = input_keys[tid];
195 auto iter = map.find(key);
196 bool flag = (iter != map.end());
197 output_masks[tid] = flag;
198 output_addrs[tid] = flag ? iter->second : 0;
200 if (output_masks[tid]) {
201 output_masks[tid] = map.erase(key);
202 if (output_masks[tid]) {
203 buffer_accessor.
DeviceFree(output_addrs[tid]);
208 template <
typename Key,
typename Hash>
213 uint32_t blocks = (count + threads - 1) / threads;
220 static_cast<const Key*
>(input_keys),
221 output_addrs, output_masks, count);
225 template <
typename Key>
233 template <
typename Key,
typename Hash>
235 auto range =
impl_.device_range();
237 thrust::transform(range.begin(), range.end(), output_indices,
243 template <
typename Key,
typename Hash>
249 template <
typename Key,
typename Hash>
251 int64_t iterator_count =
Size();
256 if (iterator_count > 0) {
265 float avg_capacity_per_bucket =
269 int64_t new_capacity =
270 int64_t(
std::ceil(buckets * avg_capacity_per_bucket));
273 if (iterator_count > 0) {
278 static_cast<addr_t*
>(output_addrs.GetDataPtr()),
279 output_masks.GetDataPtr<
bool>(), iterator_count);
283 template <
typename Key,
typename Hash>
285 return impl_.bucket_count();
288 template <
typename Key,
typename Hash>
293 template <
typename Key,
typename Hash>
295 return impl_.load_factor();
299 template <
typename Key,
typename Hash>
302 const Key* input_keys,
303 const void* input_values,
308 uint32_t tid = threadIdx.x + blockIdx.x * blockDim.x;
309 if (tid >= count)
return;
311 Key key = input_keys[tid];
312 output_addrs[tid] = 0;
313 output_masks[tid] =
false;
316 auto res = map.emplace(key, 0);
325 *
static_cast<Key*
>(dst_kv_iter.first) = key;
328 uint8_t* dst_value =
static_cast<uint8_t*
>(dst_kv_iter.second);
329 if (input_values !=
nullptr) {
330 const uint8_t* src_value =
331 static_cast<const uint8_t*
>(input_values) +
333 for (
int byte = 0; byte < dsize_value; ++byte) {
334 dst_value[byte] = src_value[byte];
339 res.
first->second = dst_kv_addr;
342 output_addrs[tid] = dst_kv_addr;
343 output_masks[tid] =
true;
347 template <
typename Key,
typename Hash>
349 const void* input_values,
354 uint32_t blocks = (count + threads - 1) / threads;
357 static_cast<const Key*
>(input_keys),
359 output_addrs, output_masks, count);
363 template <
typename Key,
typename Hash>
374 this->dsize_value_, this->
buffer_->GetKeyBuffer(),
375 this->
buffer_->GetValueBuffer(),
379 impl_ = stdgpu::unordered_map<Key, addr_t, Hash>::createDeviceObject(
384 template <
typename Key,
typename Hash>
390 stdgpu::unordered_map<Key, addr_t, Hash>::destroyDeviceObject(
impl_);
__global__ void STDGPUInsertKernel(stdgpu::unordered_map< Key, addr_t, Hash > map, CUDAHashmapBufferAccessor buffer_accessor, const Key *input_keys, const void *input_values, int64_t dsize_value, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: StdGPUHashmap.h:300
void * first
Definition: SlabTraits.h:54
void Clear() override
Clear stored map without reallocating memory.
Definition: StdGPUHashmap.h:244
void Insert(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: StdGPUHashmap.h:125
void Free()
Definition: StdGPUHashmap.h:385
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:557
StdGPUHashmap(int64_t init_capacity, int64_t dsize_key, int64_t dsize_value, const Device &device)
Definition: StdGPUHashmap.h:106
~StdGPUHashmap()
Definition: StdGPUHashmap.h:115
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:59
stdgpu::unordered_map< Key, addr_t, Hash > impl_
Definition: StdGPUHashmap.h:91
void Allocate(int64_t capacity)
Definition: StdGPUHashmap.h:364
stdgpu::unordered_map< Key, addr_t, Hash > GetImpl() const
Definition: StdGPUHashmap.h:86
__host__ void Setup(int64_t capacity, int64_t dsize_key, int64_t dsize_value, Tensor &keys, Tensor &values, Tensor &heap)
Definition: CUDAHashmapBufferAccessor.h:50
__host__ void HostAllocate(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:80
Definition: StdGPUHashmap.h:46
#define LogError(...)
Definition: Console.h:79
FN_SPECIFIERS MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:108
Definition: DeviceHashmap.h:39
__device__ addr_t DeviceAllocate()
Definition: CUDAHashmapBufferAccessor.h:92
static const Dtype Int32
Definition: Dtype.h:46
Tensor IndexGet(const std::vector< Tensor > &index_tensors) const
Advanced indexing getter.
Definition: Tensor.cpp:704
Definition: CUDAHashmapBufferAccessor.h:48
#define OPEN3D_HOST_DEVICE
Definition: CUDAUtils.h:56
void Rehash(int64_t buckets) override
Definition: StdGPUHashmap.h:250
__global__ void STDGPUEraseKernel(stdgpu::unordered_map< Key, addr_t, Hash > map, CUDAHashmapBufferAccessor buffer_accessor, const Key *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: StdGPUHashmap.h:185
__device__ void DeviceFree(addr_t ptr)
Definition: CUDAHashmapBufferAccessor.h:97
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:540
__device__ iterator_t ExtractIterator(addr_t ptr)
Definition: CUDAHashmapBufferAccessor.h:109
int64_t GetActiveIndices(addr_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: StdGPUHashmap.h:234
Device device_
Definition: DeviceHashmap.h:113
void Activate(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Definition: StdGPUHashmap.h:144
int64_t GetBucketCount() const override
Definition: StdGPUHashmap.h:284
__global__ void STDGPUFindKernel(stdgpu::unordered_map< Key, addr_t, Hash > map, CUDAHashmapBufferAccessor buffer_accessor, const Key *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: StdGPUHashmap.h:153
int count
Definition: FilePCD.cpp:61
float LoadFactor() const override
Definition: StdGPUHashmap.h:294
int64_t Size() const override
Definition: StdGPUHashmap.h:120
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: StdGPUHashmap.h:209
static const Dtype Int64
Definition: Dtype.h:47
Definition: PinholeCameraIntrinsic.cpp:35
CUDAHashmapBufferAccessor buffer_accessor_
Definition: StdGPUHashmap.h:93
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:465
int64_t capacity_
Definition: DeviceHashmap.h:109
void InsertImpl(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: StdGPUHashmap.h:348
Tensor & GetKeyBuffer()
Definition: DeviceHashmap.h:101
uint32_t addr_t
Definition: HashmapBuffer.h:58
int64_t dsize_key_
Definition: DeviceHashmap.h:110
T * GetDataPtr()
Definition: Tensor.h:1005
__host__ void HostFree(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:85
__host__ void Reset(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:67
static const Dtype Bool
Definition: Dtype.h:52
int64_t dsize_value_
Definition: DeviceHashmap.h:111
Tensor & GetValueBuffer()
Definition: DeviceHashmap.h:102
std::shared_ptr< HashmapBuffer > buffer_
Definition: DeviceHashmap.h:115
void Find(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: StdGPUHashmap.h:170
#define max(x, y)
Definition: SVD3x3CPU.h:38
std::vector< int64_t > BucketSizes() const override
Definition: StdGPUHashmap.h:289