Open3D (C++ API)  0.13.0+8be9a53
SlabHashmap.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018-2021 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <memory>
30 
31 #include "open3d/core/CUDAUtils.h"
34 
35 namespace open3d {
36 namespace core {
37 template <typename Key, typename Hash>
38 class SlabHashmap : public DeviceHashmap {
39 public:
40  SlabHashmap(int64_t init_capacity,
41  int64_t dsize_key,
42  int64_t dsize_value,
43  const Device& device);
44 
45  ~SlabHashmap();
46 
47  void Rehash(int64_t buckets) override;
48 
49  void Insert(const void* input_keys,
50  const void* input_values,
51  addr_t* output_addrs,
52  bool* output_masks,
53  int64_t count) override;
54 
55  void Activate(const void* input_keys,
56  addr_t* output_addrs,
57  bool* output_masks,
58  int64_t count) override;
59 
60  void Find(const void* input_keys,
61  addr_t* output_addrs,
62  bool* output_masks,
63  int64_t count) override;
64 
65  void Erase(const void* input_keys,
66  bool* output_masks,
67  int64_t count) override;
68 
69  int64_t GetActiveIndices(addr_t* output_indices) override;
70  void Clear() override;
71 
72  int64_t Size() const override;
73  int64_t GetBucketCount() const override;
74  std::vector<int64_t> BucketSizes() const override;
75  float LoadFactor() const override;
76 
78 
79 protected:
83 
85  std::shared_ptr<SlabNodeManager> node_mgr_;
86 
89  void InsertImpl(const void* input_keys,
90  const void* input_values,
91  addr_t* output_addrs,
92  bool* output_masks,
93  int64_t count);
94 
95  void Allocate(int64_t bucket_count, int64_t capacity);
96  void Free();
97 
98  int64_t bucket_count_;
99 };
100 
101 template <typename Key, typename Hash>
103  int64_t dsize_key,
104  int64_t dsize_value,
105  const Device& device)
106  : DeviceHashmap(init_capacity, dsize_key, dsize_value, device) {
107  int64_t init_buckets = init_capacity * 2;
108  Allocate(init_buckets, init_capacity);
109 }
110 
111 template <typename Key, typename Hash>
113  Free();
114 }
115 
116 template <typename Key, typename Hash>
117 void SlabHashmap<Key, Hash>::Rehash(int64_t buckets) {
118  int64_t iterator_count = Size();
119 
120  Tensor active_keys;
121  Tensor active_values;
122 
123  if (iterator_count > 0) {
124  Tensor active_addrs =
125  Tensor({iterator_count}, core::Int32, this->device_);
126  GetActiveIndices(static_cast<addr_t*>(active_addrs.GetDataPtr()));
127 
128  Tensor active_indices = active_addrs.To(core::Int64);
129  active_keys = this->buffer_->GetKeyBuffer().IndexGet({active_indices});
130  active_values =
131  this->buffer_->GetValueBuffer().IndexGet({active_indices});
132  }
133 
134  float avg_capacity_per_bucket =
135  float(this->capacity_) / float(this->bucket_count_);
136 
137  Free();
138 
139  Allocate(buckets,
140  std::max(int64_t(std::ceil(buckets * avg_capacity_per_bucket)),
141  active_keys.GetLength()));
142 
143  if (iterator_count > 0) {
144  Tensor output_addrs({iterator_count}, core::Int32, this->device_);
145  Tensor output_masks({iterator_count}, core::Bool, this->device_);
146 
147  InsertImpl(active_keys.GetDataPtr(), active_values.GetDataPtr(),
148  static_cast<addr_t*>(output_addrs.GetDataPtr()),
149  output_masks.GetDataPtr<bool>(), iterator_count);
150  }
151 }
152 
153 template <typename Key, typename Hash>
154 void SlabHashmap<Key, Hash>::Insert(const void* input_keys,
155  const void* input_values,
156  addr_t* output_addrs,
157  bool* output_masks,
158  int64_t count) {
159  int64_t new_size = Size() + count;
160  if (new_size > this->capacity_) {
161  float avg_capacity_per_bucket =
162  float(this->capacity_) / float(this->bucket_count_);
163  int64_t expected_buckets = std::max(
164  int64_t(this->bucket_count_ * 2),
165  int64_t(std::ceil(new_size / avg_capacity_per_bucket)));
166  Rehash(expected_buckets);
167  }
168 
169  InsertImpl(input_keys, input_values, output_addrs, output_masks, count);
170 }
171 
172 template <typename Key, typename Hash>
173 void SlabHashmap<Key, Hash>::Activate(const void* input_keys,
174  addr_t* output_addrs,
175  bool* output_masks,
176  int64_t count) {
177  Insert(input_keys, nullptr, output_addrs, output_masks, count);
178 }
179 
180 template <typename Key, typename Hash>
181 void SlabHashmap<Key, Hash>::Find(const void* input_keys,
182  addr_t* output_addrs,
183  bool* output_masks,
184  int64_t count) {
185  if (count == 0) return;
186 
187  OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
189  OPEN3D_CUDA_CHECK(cudaGetLastError());
190 
191  const int64_t num_blocks =
192  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
193  FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
194  impl_, input_keys, output_addrs, output_masks, count);
196  OPEN3D_CUDA_CHECK(cudaGetLastError());
197 }
198 
199 template <typename Key, typename Hash>
200 void SlabHashmap<Key, Hash>::Erase(const void* input_keys,
201  bool* output_masks,
202  int64_t count) {
203  if (count == 0) return;
204 
205  OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
207  OPEN3D_CUDA_CHECK(cudaGetLastError());
208  auto iterator_addrs = static_cast<addr_t*>(
209  MemoryManager::Malloc(sizeof(addr_t) * count, this->device_));
210 
211  const int64_t num_blocks =
212  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
213  EraseKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
214  core::cuda::GetStream()>>>(
215  impl_, input_keys, iterator_addrs, output_masks, count);
216  EraseKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
217  core::cuda::GetStream()>>>(impl_, iterator_addrs,
218  output_masks, count);
220  OPEN3D_CUDA_CHECK(cudaGetLastError());
221 
222  MemoryManager::Free(iterator_addrs, this->device_);
223 }
224 
225 template <typename Key, typename Hash>
227  uint32_t* iterator_count = static_cast<uint32_t*>(
228  MemoryManager::Malloc(sizeof(uint32_t), this->device_));
229  OPEN3D_CUDA_CHECK(cudaMemset(iterator_count, 0, sizeof(uint32_t)));
231  OPEN3D_CUDA_CHECK(cudaGetLastError());
232 
233  const int64_t num_blocks =
234  (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
235  kThreadsPerBlock;
236  GetActiveIndicesKernel<<<num_blocks, kThreadsPerBlock, 0,
237  core::cuda::GetStream()>>>(impl_, output_addrs,
238  iterator_count);
240  OPEN3D_CUDA_CHECK(cudaGetLastError());
241 
242  uint32_t ret;
243  MemoryManager::MemcpyToHost(&ret, iterator_count, this->device_,
244  sizeof(uint32_t));
245  MemoryManager::Free(iterator_count, this->device_);
246 
247  return static_cast<int64_t>(ret);
248 }
249 
250 template <typename Key, typename Hash>
252  // Clear the heap
254 
255  // Clear the linked list heads
256  OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
257  sizeof(Slab) * this->bucket_count_));
259  OPEN3D_CUDA_CHECK(cudaGetLastError());
260 
261  // Clear the linked list nodes
262  node_mgr_->Reset();
263 }
264 
265 template <typename Key, typename Hash>
267  return buffer_accessor_.HeapCounter(this->device_);
268 }
269 
270 template <typename Key, typename Hash>
272  return bucket_count_;
273 }
274 
275 template <typename Key, typename Hash>
276 std::vector<int64_t> SlabHashmap<Key, Hash>::BucketSizes() const {
277  thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);
278  thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
279 
280  const int64_t num_blocks =
281  (impl_.capacity_ + kThreadsPerBlock - 1) / kThreadsPerBlock;
282  CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock, 0,
283  core::cuda::GetStream()>>>(
284  impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
286  OPEN3D_CUDA_CHECK(cudaGetLastError());
287 
288  std::vector<int64_t> result(impl_.bucket_count_);
289  thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
290  result.begin());
291  return result;
292 }
293 
294 template <typename Key, typename Hash>
296  return float(Size()) / float(this->bucket_count_);
297 }
298 
299 template <typename Key, typename Hash>
300 void SlabHashmap<Key, Hash>::InsertImpl(const void* input_keys,
301  const void* input_values,
302  addr_t* output_addrs,
303  bool* output_masks,
304  int64_t count) {
305  if (count == 0) return;
306 
309  int prev_heap_counter = buffer_accessor_.HeapCounter(this->device_);
310  *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_counter_) =
311  prev_heap_counter + count;
312 
313  const int64_t num_blocks =
314  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
315  InsertKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
316  core::cuda::GetStream()>>>(
317  impl_, input_keys, output_addrs, prev_heap_counter, count);
318  InsertKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
319  core::cuda::GetStream()>>>(
320  impl_, input_keys, output_addrs, output_masks, count);
321  InsertKernelPass2<<<num_blocks, kThreadsPerBlock, 0,
322  core::cuda::GetStream()>>>(
323  impl_, input_values, output_addrs, output_masks, count);
325  OPEN3D_CUDA_CHECK(cudaGetLastError());
326 }
327 
328 template <typename Key, typename Hash>
329 void SlabHashmap<Key, Hash>::Allocate(int64_t bucket_count, int64_t capacity) {
330  this->bucket_count_ = bucket_count;
331  this->capacity_ = capacity;
332 
333  // Allocate buffer for key values.
334  this->buffer_ =
335  std::make_shared<HashmapBuffer>(this->capacity_, this->dsize_key_,
336  this->dsize_value_, this->device_);
338  buffer_accessor_.Setup(this->capacity_, this->dsize_key_,
339  this->dsize_value_, this->buffer_->GetKeyBuffer(),
340  this->buffer_->GetValueBuffer(),
341  this->buffer_->GetHeap());
343 
344  // Allocate buffer for linked list nodes.
345  node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);
346 
347  // Allocate linked list heads.
348  impl_.bucket_list_head_ = static_cast<Slab*>(MemoryManager::Malloc(
349  sizeof(Slab) * this->bucket_count_, this->device_));
350  OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
351  sizeof(Slab) * this->bucket_count_));
353  OPEN3D_CUDA_CHECK(cudaGetLastError());
354 
355  impl_.Setup(this->bucket_count_, this->capacity_, this->dsize_key_,
356  this->dsize_value_, node_mgr_->impl_, buffer_accessor_);
357 }
358 
359 template <typename Key, typename Hash>
362  MemoryManager::Free(impl_.bucket_list_head_, this->device_);
363 }
364 } // namespace core
365 } // namespace open3d
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashmap.h:251
void Find(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashmap.h:181
CUDAHashmapBufferAccessor buffer_accessor_
Definition: SlabHashmap.h:84
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashmap.h:200
const Dtype Bool
Definition: Dtype.cpp:72
const Dtype Int64
Definition: Dtype.cpp:67
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:557
float LoadFactor() const override
Definition: SlabHashmap.h:295
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:47
void Rehash(int64_t buckets) override
Definition: SlabHashmap.h:117
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:66
__global__ void InsertKernelPass2(SlabHashmapImpl< Key, Hash > impl, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:561
__host__ void Setup(int64_t capacity, int64_t dsize_key, int64_t dsize_value, Tensor &keys, Tensor &values, Tensor &heap)
Definition: CUDAHashmapBufferAccessor.h:50
void Activate(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Definition: SlabHashmap.h:173
Definition: SlabHashmap.h:38
__host__ void HostAllocate(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:80
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:94
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:41
__global__ void CountElemsPerBucketKernel(SlabHashmapImpl< Key, Hash > impl, int64_t *bucket_elem_counts)
Definition: SlabHashmapImpl.h:715
FN_SPECIFIERS MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:108
Definition: DeviceHashmap.h:39
int64_t GetActiveIndices(addr_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashmap.h:226
int64_t Size() const override
Definition: SlabHashmap.h:266
SlabHashmapImpl< Key, Hash > GetImpl()
Definition: SlabHashmap.h:77
void Synchronize()
Definition: CUDAUtils.cpp:72
Definition: CUDAHashmapBufferAccessor.h:48
__global__ void InsertKernelPass1(SlabHashmapImpl< Key, Hash > impl, const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:523
const Dtype Int32
Definition: Dtype.cpp:66
~SlabHashmap()
Definition: SlabHashmap.h:112
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:541
Device device_
Definition: DeviceHashmap.h:113
Definition: Device.h:39
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashmap.h:85
int count
Definition: FilePCD.cpp:61
void Free()
Definition: SlabHashmap.h:360
Definition: PinholeCameraIntrinsic.cpp:35
__global__ void GetActiveIndicesKernel(SlabHashmapImpl< Key, Hash > impl, addr_t *output_addrs, uint32_t *output_iterator_count)
Definition: SlabHashmapImpl.h:674
__global__ void EraseKernelPass0(SlabHashmapImpl< Key, Hash > impl, const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:628
void Allocate(int64_t bucket_count, int64_t capacity)
Definition: SlabHashmap.h:329
Definition: Tensor.h:49
__host__ int HeapCounter(const Device &device) const
Definition: CUDAHashmapBufferAccessor.h:102
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:465
int64_t capacity_
Definition: DeviceHashmap.h:109
SlabHashmap(int64_t init_capacity, int64_t dsize_key, int64_t dsize_value, const Device &device)
Definition: SlabHashmap.h:102
SlabHashmapImpl< Key, Hash > impl_
Definition: SlabHashmap.h:82
uint32_t addr_t
Definition: HashmapBuffer.h:58
int64_t bucket_count_
Definition: SlabHashmap.h:98
int64_t dsize_key_
Definition: DeviceHashmap.h:110
T * GetDataPtr()
Definition: Tensor.h:1004
void Insert(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashmap.h:154
__host__ void HostFree(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:85
__host__ void Reset(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:67
Definition: SlabNodeManager.h:58
__global__ void EraseKernelPass1(SlabHashmapImpl< Key, Hash > impl, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmapImpl.h:663
int64_t dsize_value_
Definition: DeviceHashmap.h:111
Common CUDA utilities.
int64_t GetLength() const
Definition: Tensor.h:985
int64_t GetBucketCount() const override
Definition: SlabHashmap.h:271
Definition: SlabHashmapImpl.h:54
__global__ void InsertKernelPass0(SlabHashmapImpl< Key, Hash > impl, const void *input_keys, addr_t *output_addrs, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashmapImpl.h:502
void InsertImpl(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmap.h:300
std::shared_ptr< HashmapBuffer > buffer_
Definition: DeviceHashmap.h:115
std::vector< int64_t > BucketSizes() const override
Definition: SlabHashmap.h:276