Loading [MathJax]/extensions/TeX/AMSsymbols.js
Open3D (C++ API)  0.16.0
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
SlabHashBackend.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // The MIT License (MIT)
5 //
6 // Copyright (c) 2018-2021 www.open3d.org
7 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to deal
10 // in the Software without restriction, including without limitation the rights
11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 // copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // The above copyright notice and this permission notice shall be included in
16 // all copies or substantial portions of the Software.
17 //
18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 // IN THE SOFTWARE.
25 // ----------------------------------------------------------------------------
26 
27 #pragma once
28 
29 #include <memory>
30 
31 #include "open3d/core/CUDAUtils.h"
35 
36 namespace open3d {
37 namespace core {
38 template <typename Key, typename Hash, typename Eq>
40 public:
41  SlabHashBackend(int64_t init_capacity,
42  int64_t key_dsize,
43  const std::vector<int64_t>& value_dsizes,
44  const Device& device);
45 
47 
48  void Reserve(int64_t capacity) override;
49 
50  void Insert(const void* input_keys,
51  const std::vector<const void*>& input_values_soa,
52  buf_index_t* output_buf_indices,
53  bool* output_masks,
54  int64_t count) override;
55 
56  void Find(const void* input_keys,
57  buf_index_t* output_buf_indices,
58  bool* output_masks,
59  int64_t count) override;
60 
61  void Erase(const void* input_keys,
62  bool* output_masks,
63  int64_t count) override;
64 
65  int64_t GetActiveIndices(buf_index_t* output_indices) override;
66  void Clear() override;
67 
68  int64_t Size() const override;
69  int64_t GetBucketCount() const override;
70  std::vector<int64_t> BucketSizes() const override;
71  float LoadFactor() const override;
72 
74 
75  void Allocate(int64_t capacity) override;
76  void Free() override;
77 
78 protected:
82 
84  std::shared_ptr<SlabNodeManager> node_mgr_;
85 
86  int64_t bucket_count_;
87 };
88 
89 template <typename Key, typename Hash, typename Eq>
91  int64_t init_capacity,
92  int64_t key_dsize,
93  const std::vector<int64_t>& value_dsizes,
94  const Device& device)
95  : DeviceHashBackend(init_capacity, key_dsize, value_dsizes, device) {
96  Allocate(init_capacity);
97 }
98 
99 template <typename Key, typename Hash, typename Eq>
101  Free();
102 }
103 
104 template <typename Key, typename Hash, typename Eq>
106 
107 template <typename Key, typename Hash, typename Eq>
108 void SlabHashBackend<Key, Hash, Eq>::Find(const void* input_keys,
109  buf_index_t* output_buf_indices,
110  bool* output_masks,
111  int64_t count) {
112  if (count == 0) return;
113 
114  OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
116  OPEN3D_CUDA_CHECK(cudaGetLastError());
117 
118  const int64_t num_blocks =
119  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
120  FindKernel<<<num_blocks, kThreadsPerBlock, 0, core::cuda::GetStream()>>>(
121  impl_, input_keys, output_buf_indices, output_masks, count);
123  OPEN3D_CUDA_CHECK(cudaGetLastError());
124 }
125 
126 template <typename Key, typename Hash, typename Eq>
127 void SlabHashBackend<Key, Hash, Eq>::Erase(const void* input_keys,
128  bool* output_masks,
129  int64_t count) {
130  if (count == 0) return;
131 
132  OPEN3D_CUDA_CHECK(cudaMemset(output_masks, 0, sizeof(bool) * count));
134  OPEN3D_CUDA_CHECK(cudaGetLastError());
135  auto buf_indices = static_cast<buf_index_t*>(
136  MemoryManager::Malloc(sizeof(buf_index_t) * count, this->device_));
137 
138  const int64_t num_blocks =
139  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
140  EraseKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
141  core::cuda::GetStream()>>>(
142  impl_, input_keys, buf_indices, output_masks, count);
143  EraseKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
144  core::cuda::GetStream()>>>(impl_, buf_indices,
145  output_masks, count);
147  OPEN3D_CUDA_CHECK(cudaGetLastError());
148 
149  MemoryManager::Free(buf_indices, this->device_);
150 }
151 
152 template <typename Key, typename Hash, typename Eq>
154  buf_index_t* output_buf_indices) {
155  uint32_t* count = static_cast<uint32_t*>(
156  MemoryManager::Malloc(sizeof(uint32_t), this->device_));
157  OPEN3D_CUDA_CHECK(cudaMemset(count, 0, sizeof(uint32_t)));
158 
160  OPEN3D_CUDA_CHECK(cudaGetLastError());
161 
162  const int64_t num_blocks =
163  (impl_.bucket_count_ * kWarpSize + kThreadsPerBlock - 1) /
164  kThreadsPerBlock;
165  GetActiveIndicesKernel<<<num_blocks, kThreadsPerBlock, 0,
166  core::cuda::GetStream()>>>(
167  impl_, output_buf_indices, count);
169  OPEN3D_CUDA_CHECK(cudaGetLastError());
170 
171  uint32_t ret;
172  MemoryManager::MemcpyToHost(&ret, count, this->device_, sizeof(uint32_t));
173  MemoryManager::Free(count, this->device_);
174 
175  return static_cast<int64_t>(ret);
176 }
177 
178 template <typename Key, typename Hash, typename Eq>
180  // Clear the heap
181  this->buffer_->ResetHeap();
182 
183  // Clear the linked list heads
184  OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
185  sizeof(Slab) * this->bucket_count_));
187  OPEN3D_CUDA_CHECK(cudaGetLastError());
188 
189  // Clear the linked list nodes
190  node_mgr_->Reset();
191 }
192 
193 template <typename Key, typename Hash, typename Eq>
195  return this->buffer_->GetHeapTopIndex();
196 }
197 
198 template <typename Key, typename Hash, typename Eq>
200  return bucket_count_;
201 }
202 
203 template <typename Key, typename Hash, typename Eq>
204 std::vector<int64_t> SlabHashBackend<Key, Hash, Eq>::BucketSizes() const {
205  thrust::device_vector<int64_t> elems_per_bucket(impl_.bucket_count_);
206  thrust::fill(elems_per_bucket.begin(), elems_per_bucket.end(), 0);
207 
208  const int64_t num_blocks =
209  (impl_.buffer_accessor_.capacity_ + kThreadsPerBlock - 1) /
210  kThreadsPerBlock;
211  CountElemsPerBucketKernel<<<num_blocks, kThreadsPerBlock, 0,
212  core::cuda::GetStream()>>>(
213  impl_, thrust::raw_pointer_cast(elems_per_bucket.data()));
215  OPEN3D_CUDA_CHECK(cudaGetLastError());
216 
217  std::vector<int64_t> result(impl_.bucket_count_);
218  thrust::copy(elems_per_bucket.begin(), elems_per_bucket.end(),
219  result.begin());
220  return result;
221 }
222 
223 template <typename Key, typename Hash, typename Eq>
225  return float(Size()) / float(this->bucket_count_);
226 }
227 
228 template <typename Key, typename Hash, typename Eq>
230  const void* input_keys,
231  const std::vector<const void*>& input_values_soa,
232  buf_index_t* output_buf_indices,
233  bool* output_masks,
234  int64_t count) {
235  if (count == 0) return;
236 
239  int prev_heap_top = this->buffer_->GetHeapTopIndex();
240  *thrust::device_ptr<int>(impl_.buffer_accessor_.heap_top_) =
241  prev_heap_top + count;
242 
243  const int64_t num_blocks =
244  (count + kThreadsPerBlock - 1) / kThreadsPerBlock;
245  InsertKernelPass0<<<num_blocks, kThreadsPerBlock, 0,
246  core::cuda::GetStream()>>>(
247  impl_, input_keys, output_buf_indices, prev_heap_top, count);
248  InsertKernelPass1<<<num_blocks, kThreadsPerBlock, 0,
249  core::cuda::GetStream()>>>(
250  impl_, input_keys, output_buf_indices, output_masks, count);
251 
252  thrust::device_vector<const void*> input_values_soa_device(
253  input_values_soa.begin(), input_values_soa.end());
254 
255  int64_t n_values = input_values_soa.size();
256  const void* const* ptr_input_values_soa =
257  thrust::raw_pointer_cast(input_values_soa_device.data());
258  DISPATCH_DIVISOR_SIZE_TO_BLOCK_T(
259  impl_.buffer_accessor_.common_block_size_, [&]() {
260  InsertKernelPass2<Key, Hash, Eq, block_t>
261  <<<num_blocks, kThreadsPerBlock, 0,
262  core::cuda::GetStream()>>>(
263  impl_, ptr_input_values_soa, output_buf_indices,
264  output_masks, count, n_values);
265  });
267  OPEN3D_CUDA_CHECK(cudaGetLastError());
268 }
269 
270 template <typename Key, typename Hash, typename Eq>
272  this->bucket_count_ = capacity * 2;
273  this->capacity_ = capacity;
274 
275  // Allocate buffer for key values.
276  this->buffer_ = std::make_shared<HashBackendBuffer>(
277  this->capacity_, this->key_dsize_, this->value_dsizes_,
278  this->device_);
280 
281  // Allocate buffer for linked list nodes.
282  node_mgr_ = std::make_shared<SlabNodeManager>(this->device_);
283 
284  // Allocate linked list heads.
285  impl_.bucket_list_head_ = static_cast<Slab*>(MemoryManager::Malloc(
286  sizeof(Slab) * this->bucket_count_, this->device_));
287  OPEN3D_CUDA_CHECK(cudaMemset(impl_.bucket_list_head_, 0xFF,
288  sizeof(Slab) * this->bucket_count_));
290  OPEN3D_CUDA_CHECK(cudaGetLastError());
291 
292  impl_.Setup(this->bucket_count_, node_mgr_->impl_, buffer_accessor_);
293 }
294 
295 template <typename Key, typename Hash, typename Eq>
298  MemoryManager::Free(impl_.bucket_list_head_, this->device_);
299 }
300 } // namespace core
301 } // namespace open3d
std::shared_ptr< HashBackendBuffer > buffer_
Definition: DeviceHashBackend.h:121
Definition: CUDAHashBackendBufferAccessor.h:43
void Allocate(int64_t capacity) override
Definition: SlabHashBackend.h:271
std::shared_ptr< SlabNodeManager > node_mgr_
Definition: SlabHashBackend.h:84
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:567
__global__ void GetActiveIndicesKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, uint32_t *output_count)
Definition: SlabHashBackendImpl.h:666
std::vector< int64_t > value_dsizes_
Definition: DeviceHashBackend.h:117
SlabHashBackendImpl< Key, Hash, Eq > impl_
Definition: SlabHashBackend.h:81
static void Free(void *ptr, const Device &device)
Frees previously allocated memory at address ptr on device device.
Definition: MemoryManager.cpp:47
#define OPEN3D_CUDA_CHECK(err)
Definition: CUDAUtils.h:66
__global__ void InsertKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, int heap_counter_prev, int64_t count)
Kernels.
Definition: SlabHashBackendImpl.h:493
static void MemcpyToHost(void *host_ptr, const void *src_ptr, const Device &src_device, size_t num_bytes)
Same as Memcpy, but with host (CPU:0) as default dst_device.
Definition: MemoryManager.cpp:104
static void * Malloc(size_t byte_size, const Device &device)
Definition: MemoryManager.cpp:41
Device device_
Definition: DeviceHashBackend.h:119
int64_t bucket_count_
Definition: SlabHashBackend.h:86
void Clear() override
Clear stored map without reallocating memory.
Definition: SlabHashBackend.h:179
uint32_t buf_index_t
Definition: HashBackendBuffer.h:63
int64_t key_dsize_
Definition: DeviceHashBackend.h:116
int count
Definition: FilePCD.cpp:61
int64_t capacity_
Definition: DeviceHashBackend.h:114
void Synchronize()
Definition: CUDAUtils.cpp:77
int64_t GetActiveIndices(buf_index_t *output_indices) override
Parallel collect all iterators in the hash table.
Definition: SlabHashBackend.h:153
__host__ void Setup(HashBackendBuffer &hashmap_buffer)
Definition: CUDAHashBackendBufferAccessor.h:45
void Find(const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel find a contiguous array of keys.
Definition: SlabHashBackend.h:108
__host__ void Shutdown(const Device &device)
Definition: CUDAHashBackendBufferAccessor.h:92
Definition: Device.h:37
core::Tensor result
Definition: VtkUtils.cpp:91
void Reserve(int64_t capacity) override
Definition: SlabHashBackend.h:105
__global__ void EraseKernelPass0(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:620
void Erase(const void *input_keys, bool *output_masks, int64_t count) override
Parallel erase a contiguous array of keys.
Definition: SlabHashBackend.h:127
Definition: SlabHashBackendImpl.h:64
SlabHashBackendImpl< Key, Hash, Eq > GetImpl()
Definition: SlabHashBackend.h:73
__global__ void EraseKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:655
Definition: PinholeCameraIntrinsic.cpp:35
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle float
Definition: K4aPlugin.cpp:475
std::vector< int64_t > BucketSizes() const override
Get the number of entries per bucket.
Definition: SlabHashBackend.h:204
Definition: DeviceHashBackend.h:39
CUDAHashBackendBufferAccessor buffer_accessor_
Definition: SlabHashBackend.h:83
float LoadFactor() const override
Get the current load factor, defined as size / bucket count.
Definition: SlabHashBackend.h:224
__global__ void CountElemsPerBucketKernel(SlabHashBackendImpl< Key, Hash, Eq > impl, int64_t *bucket_elem_counts)
Definition: SlabHashBackendImpl.h:706
void Insert(const void *input_keys, const std::vector< const void *> &input_values_soa, buf_index_t *output_buf_indices, bool *output_masks, int64_t count) override
Parallel insert contiguous arrays of keys and values.
Definition: SlabHashBackend.h:229
~SlabHashBackend()
Definition: SlabHashBackend.h:100
Definition: SlabNodeManager.h:58
bool copy
Definition: VtkUtils.cpp:89
Common CUDA utilities.
void Free() override
Definition: SlabHashBackend.h:296
int64_t Size() const override
Get the size (number of valid entries) of the hash map.
Definition: SlabHashBackend.h:194
SlabHashBackend(int64_t init_capacity, int64_t key_dsize, const std::vector< int64_t > &value_dsizes, const Device &device)
Definition: SlabHashBackend.h:90
int64_t GetBucketCount() const override
Get the number of buckets of the hash map.
Definition: SlabHashBackend.h:199
__global__ void InsertKernelPass1(SlabHashBackendImpl< Key, Hash, Eq > impl, const void *input_keys, buf_index_t *output_buf_indices, bool *output_masks, int64_t count)
Definition: SlabHashBackendImpl.h:512
Definition: SlabHashBackend.h:39