Open3D (C++ API)  0.13.0
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
SlabHashmap.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <cassert>
30 #include <memory>
31 
32 #include "open3d/core/CUDAUtils.h"
35 
36 namespace open3d {
37 namespace core {
38 template <typename Key, typename Hash>
39 class SlabHashmap : public DeviceHashmap {
40 public:
41  SlabHashmap(int64_t init_capacity,
42  int64_t dsize_key,
43  int64_t dsize_value,
44  const Device& device);
45 
46  ~SlabHashmap();
47 
48  void Rehash(int64_t buckets) override;
49 
50  void Insert(const void* input_keys,
51  const void* input_values,
52  addr_t* output_addrs,
53  bool* output_masks,
54  int64_t count) override;
55 
56  void Activate(const void* input_keys,
57  addr_t* output_addrs,
58  bool* output_masks,
59  int64_t count) override;
60 
61  void Find(const void* input_keys,
62  addr_t* output_addrs,
63  bool* output_masks,
64  int64_t count) override;
65 
66  void Erase(const void* input_keys,
67  bool* output_masks,
68  int64_t count) override;
69 
70  int64_t GetActiveIndices(addr_t* output_indices) override;
71  void Clear() override;
72 
73  int64_t Size() const override;
74  int64_t GetBucketCount() const override;
75  std::vector<int64_t> BucketSizes() const override;
76  float LoadFactor() const override;
77 
79 
80 protected:
84 
86  std::shared_ptr<SlabNodeManager> node_mgr_;
87 
90  void InsertImpl(const void* input_keys,
91  const void* input_values,
92  addr_t* output_addrs,
93  bool* output_masks,
94  int64_t count);
95 
96  void Allocate(int64_t bucket_count, int64_t capacity);
97  void Free();
98 
99  int64_t bucket_count_;
100 };
101 
102 template <typename Key, typename Hash>
104  int64_t dsize_key,
105  int64_t dsize_value,
106  const Device& device)
107  : DeviceHashmap(init_capacity, dsize_key, dsize_value, device) {
108  int64_t init_buckets = init_capacity * 2;
109  Allocate(init_buckets, init_capacity);
110 }
111 
112 template <typename Key, typename Hash>
114  Free();
115 }
116 
117 template <typename Key, typename Hash>
118 void SlabHashmap<Key, Hash>::Rehash(int64_t buckets) {
119  int64_t iterator_count = Size();
120 
121  Tensor active_keys;
122  Tensor active_values;
123 
124  if (iterator_count > 0) {
125  Tensor active_addrs =
126  Tensor({iterator_count}, Dtype::Int32, this->device_);
127  GetActiveIndices(static_cast<addr_t*>(active_addrs.GetDataPtr()));
128 
129  Tensor active_indices = active_addrs.To(Dtype::Int64);
130  active_keys = this->buffer_->GetKeyBuffer().IndexGet({active_indices});
131  active_values =
132  this->buffer_->GetValueBuffer().IndexGet({active_indices});
133  }
134 
135  float avg_capacity_per_bucket =
136  float(this->capacity_) / float(this->bucket_count_);
137 
138  Free();
140 
141  Allocate(buckets,
142  std::max(int64_t(std::ceil(buckets * avg_capacity_per_bucket)),
143  active_keys.GetLength()));
144 
145  if (iterator_count > 0) {
146  Tensor output_addrs({iterator_count}, Dtype::Int32, this->device_);
147  Tensor output_masks({iterator_count}, Dtype::Bool, this->device_);
148 
149  InsertImpl(active_keys.GetDataPtr(), active_values.GetDataPtr(),
150  static_cast<addr_t*>(output_addrs.GetDataPtr()),
151  output_masks.GetDataPtr<bool>(), iterator_count);
152  }
154 }
155 
156 template <typename Key, typename Hash>
157 void SlabHashmap<Key, Hash>::Insert(const void* input_keys,
158  const void* input_values,
159  addr_t* output_addrs,
160  bool* output_masks,
161  int64_t count) {
162  int64_t new_size = Size() + count;
163  if (new_size > this->capacity_) {
164  float avg_capacity_per_bucket =
165  float(this->capacity_) / float(this->bucket_count_);
166  int64_t expected_buckets = std::max(
167  int64_t(this->bucket_count_ * 2),
168  int64_t(std::ceil(new_size / avg_capacity_per_bucket)));
169  Rehash(expected_buckets);
170  }
171 
172  InsertImpl(input_keys, input_values, output_addrs, output_masks, count);
173 }
174 
175 template <typename Key, typename Hash>
176 void SlabHashmap<Key, Hash>::Activate(const void* input_keys,
177  addr_t* output_addrs,
178  bool* output_masks,
179  int64_t count) {
180  Insert(input_keys, nullptr, output_addrs, output_masks, count);
181 }
182 
183 template <typename Key, typename Hash>
184 void SlabHashmap<Key, Hash>::Find(const void* input_keys,
185  addr_t* output_addrs,
186  bool* output_masks,
187  int64_t count) {
188  if (count == 0) return;
189 
190  OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
191  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
192  OPEN3D_CUDA_CHECK(cudaGetLastError());
193 
194  const int64_t num_blocks =
195  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
196  FindKernel<<<num_blocks, kThreadsPerBlock>>>(
197  impl_, input_keys, output_addrs, output_masks, count);
198  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
199  OPEN3D_CUDA_CHECK(cudaGetLastError());
200 }
201 
202 template <typename Key, typename Hash>
203 void SlabHashmap<Key, Hash>::Erase(const void* input_keys,
204  bool* output_masks,
205  int64_t count) {
206  if (count == 0) return;
207 
208  OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
209  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
210  OPEN3D_CUDA_CHECK(cudaGetLastError());
211  auto iterator_addrs = static_cast<addr_t*>(
212  MemoryManager::Malloc(sizeof(addr_t) * count, this->device_));
213 
214  const int64_t num_blocks =
215  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
216  EraseKernelPass0<<<num_blocks, kThreadsPerBlock>>>(
217  impl_, input_keys, iterator_addrs, output_masks, count);
218  EraseKernelPass1<<<num_blocks, kThreadsPerBlock>>>(impl_, iterator_addrs,
219  output_masks, count);
220  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
221  OPEN3D_CUDA_CHECK(cudaGetLastError());
222 
223  MemoryManager::Free(iterator_addrs, this->device_);
224 }
225 
226 template <typename Key, typename Hash>
228  uint32_t* iterator_count = static_cast<uint32_t*>(
229  MemoryManager::Malloc(sizeof(uint32_t), this->device_));
230  OPEN3D_CUDA_CHECK(cudaMemset(iterator_count, 0, sizeof(uint32_t)));
231  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
232  OPEN3D_CUDA_CHECK(cudaGetLastError());
233 
234  const int64_t num_blocks =
235  (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
236  kThreadsPerBlock;
237  GetActiveIndicesKernel<<<num_blocks, kThreadsPerBlock>>>(
238  impl_, output_addrs, iterator_count);
239  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
240  OPEN3D_CUDA_CHECK(cudaGetLastError());
241 
242  uint32_t ret;
243  MemoryManager::MemcpyToHost(&ret, iterator_count, this->device_,
244  sizeof(uint32_t));
245  MemoryManager::Free(iterator_count, this->device_);
246 
247  return static_cast<int64_t>(ret);
248 }
249 
250 template <typename Key, typename Hash>
252  // Clear the heap
254 
255  // Clear the linked list heads
256  OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
257  sizeof(Slab) * this->bucket_count_));
258  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
259  OPEN3D_CUDA_CHECK(cudaGetLastError());
260 
261  // Clear the linked list nodes
262  node_mgr_->Reset();
263 }
264 
265 template <typename Key, typename Hash>
267  return buffer_accessor_.HeapCounter(this->device_);
268 }
269 
270 template <typename Key, typename Hash>
272  return bucket_count_;
273 }
274 
275 template <typename Key, typename Hash>
276 std::vector<int64_t> SlabHashmap<Key, Hash>::BucketSizes() const {
277  thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);
278  thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
279 
280  const int64_t num_blocks =
281  (impl_.capacity_ + kThreadsPerBlock - 1) / kThreadsPerBlock;
282  CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock>>>(
283  impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
284  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
285  OPEN3D_CUDA_CHECK(cudaGetLastError());
286 
287  std::vector<int64_t> result(impl_.bucket_count_);
288  thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
289  result.begin());
290  return result;
291 }
292 
293 template <typename Key, typename Hash>
295  return float(Size()) / float(this->bucket_count_);
296 }
297 
298 template <typename Key, typename Hash>
299 void SlabHashmap<Key, Hash>::InsertImpl(const void* input_keys,
300  const void* input_values,
301  addr_t* output_addrs,
302  bool* output_masks,
303  int64_t count) {
304  if (count == 0) return;
305 
308  int prev_heap_counter = buffer_accessor_.HeapCounter(this->device_);
309  *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_counter_) =
310  prev_heap_counter + count;
311 
312  const int64_t num_blocks =
313  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
314  InsertKernelPass0<<<num_blocks, kThreadsPerBlock>>>(
315  impl_, input_keys, output_addrs, prev_heap_counter, count);
316  InsertKernelPass1<<<num_blocks, kThreadsPerBlock>>>(
317  impl_, input_keys, output_addrs, output_masks, count);
318  InsertKernelPass2<<<num_blocks, kThreadsPerBlock>>>(
319  impl_, input_values, output_addrs, output_masks, count);
320  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
321  OPEN3D_CUDA_CHECK(cudaGetLastError());
322 }
323 
324 template <typename Key, typename Hash>
325 void SlabHashmap<Key, Hash>::Allocate(int64_t bucket_count, int64_t capacity) {
326  this->bucket_count_ = bucket_count;
327  this->capacity_ = capacity;
328 
329  // Allocate buffer for key values.
330  this->buffer_ =
331  std::make_shared<HashmapBuffer>(this->capacity_, this->dsize_key_,
332  this->dsize_value_, this->device_);
334  buffer_accessor_.Setup(this->capacity_, this->dsize_key_,
335  this->dsize_value_, this->buffer_->GetKeyBuffer(),
336  this->buffer_->GetValueBuffer(),
337  this->buffer_->GetHeap());
339 
340  // Allocate buffer for linked list nodes.
341  node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);
342 
343  // Allocate linked list heads.
344  impl_.bucket_list_head_ = static_cast<Slab*>(MemoryManager::Malloc(
345  sizeof(Slab) * this->bucket_count_, this->device_));
346  OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
347  sizeof(Slab) * this->bucket_count_));
348  OPEN3D_CUDA_CHECK(cudaDeviceSynchronize());
349  OPEN3D_CUDA_CHECK(cudaGetLastError());
350 
351  impl_.Setup(this->bucket_count_, this->capacity_, this->dsize_key_,
352  this->dsize_value_, node_mgr_->impl_, buffer_accessor_);
353 }
354 
355 template <typename Key, typename Hash>
358  MemoryManager::Free(impl_.bucket_list_head_, this->device_);
359 }
360 } // namespace core
361 } // namespace open3d
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashmap.h:251
void Find(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashmap.h:184
CUDAHashmapBufferAccessor buffer_accessor_
Definition: SlabHashmap.h:85
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashmap.h:203
void ReleaseCache()
Definition: CUDAUtils.cpp:56
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:557
float LoadFactor() const override
Definition: SlabHashmap.h:294
static void Free(void *ptr, const Device &device)
Definition: MemoryManager.cpp:44
void Rehash(int64_t buckets) override
Definition: SlabHashmap.h:118
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:59
__host__ void Setup(int64_t capacity, int64_t dsize_key, int64_t dsize_value, Tensor &keys, Tensor &values, Tensor &heap)
Definition: CUDAHashmapBufferAccessor.h:50
void Activate(const void *input_keys, addr_t *output_addrs, bool *output_masks, int64_t count) override
Definition: SlabHashmap.h:176
Definition: SlabHashmap.h:39
__host__ void HostAllocate(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:80
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:88
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:40
FN_SPECIFIERS MiniVec< float, N > ceil(const MiniVec< float, N > &a)
Definition: MiniVec.h:108
Definition: DeviceHashmap.h:39
int64_t GetActiveIndices(addr_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashmap.h:227
int64_t Size() const override
Definition: SlabHashmap.h:266
static const Dtype Int32
Definition: Dtype.h:46
SlabHashmapImpl< Key, Hash > GetImpl()
Definition: SlabHashmap.h:78
Definition: CUDAHashmapBufferAccessor.h:48
~SlabHashmap()
Definition: SlabHashmap.h:113
Tensor To(Dtype dtype, bool copy=false) const
Definition: Tensor.cpp:540
Device device_
Definition: DeviceHashmap.h:113
Definition: Device.h:39
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashmap.h:86
int count
Definition: FilePCD.cpp:61
void Free()
Definition: SlabHashmap.h:356
static const Dtype Int64
Definition: Dtype.h:47
Definition: PinholeCameraIntrinsic.cpp:35
void Allocate(int64_t bucket_count, int64_t capacity)
Definition: SlabHashmap.h:325
Definition: Tensor.h:50
__host__ int HeapCounter(const Device &device) const
Definition: CUDAHashmapBufferAccessor.h:102
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:465
int64_t capacity_
Definition: DeviceHashmap.h:109
SlabHashmap(int64_t init_capacity, int64_t dsize_key, int64_t dsize_value, const Device &device)
Definition: SlabHashmap.h:103
SlabHashmapImpl< Key, Hash > impl_
Definition: SlabHashmap.h:83
uint32_t addr_t
Definition: HashmapBuffer.h:58
int64_t bucket_count_
Definition: SlabHashmap.h:99
int64_t dsize_key_
Definition: DeviceHashmap.h:110
T * GetDataPtr()
Definition: Tensor.h:1005
void Insert(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashmap.h:157
__host__ void HostFree(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:85
__host__ void Reset(const Device &device)
Definition: CUDAHashmapBufferAccessor.h:67
Definition: SlabNodeManager.h:59
static const Dtype Bool
Definition: Dtype.h:52
int64_t dsize_value_
Definition: DeviceHashmap.h:111
Common CUDA utilities.
int64_t GetLength() const
Definition: Tensor.h:986
int64_t GetBucketCount() const override
Definition: SlabHashmap.h:271
Definition: SlabHashmapImpl.h:54
void InsertImpl(const void *input_keys, const void *input_values, addr_t *output_addrs, bool *output_masks, int64_t count)
Definition: SlabHashmap.h:299
std::shared_ptr< HashmapBuffer > buffer_
Definition: DeviceHashmap.h:115
#define max(x, y)
Definition: SVD3x3CPU.h:38
std::vector< int64_t > BucketSizes() const override
Definition: SlabHashmap.h:276