Open3D (C++ API)  0.19.0
CUDAUtils.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 
13 
14 #pragma once
15 
16 #include "open3d/core/Device.h"
17 #include "open3d/utility/Logging.h"
18 
19 #ifdef BUILD_CUDA_MODULE
20 
21 #include <cuda.h>
22 #include <cuda_runtime.h>
23 
24 #include <memory>
25 #include <optional>
26 #include <vector>
27 
28 #define OPEN3D_FORCE_INLINE __forceinline__
29 #define OPEN3D_HOST_DEVICE __host__ __device__
30 #define OPEN3D_DEVICE __device__
31 #define OPEN3D_ASSERT_HOST_DEVICE_LAMBDA(type) \
32  static_assert(__nv_is_extended_host_device_lambda_closure_type(type), \
33  #type " must be a __host__ __device__ lambda")
34 #define OPEN3D_CUDA_CHECK(err) \
35  open3d::core::__OPEN3D_CUDA_CHECK(err, __FILE__, __LINE__)
36 #define OPEN3D_GET_LAST_CUDA_ERROR(message) \
37  __OPEN3D_GET_LAST_CUDA_ERROR(message, __FILE__, __LINE__)
38 #define CUDA_CALL(cuda_function, ...) cuda_function(__VA_ARGS__);
39 
40 #else // #ifdef BUILD_CUDA_MODULE
41 
42 #define OPEN3D_FORCE_INLINE inline
43 #define OPEN3D_HOST_DEVICE
44 #define OPEN3D_DEVICE
45 #define OPEN3D_ASSERT_HOST_DEVICE_LAMBDA(type)
46 #define OPEN3D_CUDA_CHECK(err)
47 #define OPEN3D_GET_LAST_CUDA_ERROR(message)
48 #define CUDA_CALL(cuda_function, ...) \
49  open3d::utility::LogError( \
50  "Not built with CUDA, cannot call " #cuda_function);
51 
52 #endif // #ifdef BUILD_CUDA_MODULE
53 
54 namespace open3d {
55 namespace core {
56 
57 #ifdef BUILD_CUDA_MODULE
58 
82 class CUDAScopedDevice {
83 public:
84  explicit CUDAScopedDevice(int device_id);
85 
86  explicit CUDAScopedDevice(const Device& device);
87 
89 
90  CUDAScopedDevice(const CUDAScopedDevice&) = delete;
92 
93 private:
94  int prev_device_id_;
95 };
96 
136 class CUDAScopedStream {
137 private:
138  struct CreateNewStreamTag {
139  CreateNewStreamTag(const CreateNewStreamTag&) = delete;
140  CreateNewStreamTag& operator=(const CreateNewStreamTag&) = delete;
141  CreateNewStreamTag(CreateNewStreamTag&&) = delete;
142  CreateNewStreamTag& operator=(CreateNewStreamTag&&) = delete;
143  };
144 
145 public:
146  constexpr static CreateNewStreamTag CreateNewStream = {};
147 
148  explicit CUDAScopedStream(const CreateNewStreamTag&);
149 
150  explicit CUDAScopedStream(cudaStream_t stream);
151 
152  ~CUDAScopedStream();
153 
154  CUDAScopedStream(const CUDAScopedStream&) = delete;
155  CUDAScopedStream& operator=(const CUDAScopedStream&) = delete;
156 
157 private:
158  cudaStream_t prev_stream_;
159  cudaStream_t new_stream_;
160  bool owns_new_stream_ = false;
161 };
162 
176 class CUDAState {
177 public:
178  static CUDAState& GetInstance();
179 
180  CUDAState(const CUDAState&) = delete;
181  CUDAState& operator=(const CUDAState&) = delete;
182 
185  bool IsP2PEnabled(int src_id, int tar_id) const;
186 
189  bool IsP2PEnabled(const Device& src, const Device& tar) const;
190 
193  void ForceDisableP2PForTesting();
194 
195 private:
196  CUDAState();
197 
198  std::vector<std::vector<bool>> p2p_enabled_;
199 };
200 
202 int GetCUDACurrentWarpSize();
203 
205 int GetCUDACurrentDeviceTextureAlignment();
206 
208 size_t GetCUDACurrentTotalMemSize();
209 
210 #else
211 
214 public:
215  explicit CUDAScopedDevice(int device_id) {}
216  explicit CUDAScopedDevice(const Device& device) {}
220 };
221 
222 #endif
223 
224 namespace cuda {
225 
228 int DeviceCount();
229 
232 bool IsAvailable();
233 
235 void ReleaseCache();
236 
239 void Synchronize();
240 
245 void Synchronize(const Device& device);
246 
250 void AssertCUDADeviceAvailable(int device_id);
251 
255 void AssertCUDADeviceAvailable(const Device& device);
256 
262 bool SupportsMemoryPools(const Device& device);
263 
264 #ifdef BUILD_CUDA_MODULE
265 
266 int GetDevice();
267 cudaStream_t GetStream();
268 cudaStream_t GetDefaultStream();
269 
270 #endif
271 
272 } // namespace cuda
273 } // namespace core
274 } // namespace open3d
275 
276 // Exposed as implementation detail of macros at the end of the file.
277 #ifdef BUILD_CUDA_MODULE
278 
279 namespace open3d {
280 namespace core {
281 
282 void __OPEN3D_CUDA_CHECK(cudaError_t err, const char* file, const int line);
283 
284 void __OPEN3D_GET_LAST_CUDA_ERROR(const char* message,
285  const char* file,
286  const int line);
287 
288 } // namespace core
289 } // namespace open3d
290 
291 #endif
When CUDA is not enabled, this is a dummy class.
Definition: CUDAUtils.h:213
CUDAScopedDevice(const Device &device)
Definition: CUDAUtils.h:216
CUDAScopedDevice(const CUDAScopedDevice &)=delete
CUDAScopedDevice(int device_id)
Definition: CUDAUtils.h:215
~CUDAScopedDevice()
Definition: CUDAUtils.h:217
CUDAScopedDevice & operator=(const CUDAScopedDevice &)=delete
Definition: Device.h:18
void ReleaseCache()
Releases CUDA memory manager cache. This is typically used for debugging.
Definition: CUDAUtils.cpp:40
bool IsAvailable()
Definition: CUDAUtils.cpp:38
bool SupportsMemoryPools(const Device &device)
Definition: CUDAUtils.cpp:111
int DeviceCount()
Definition: CUDAUtils.cpp:21
void Synchronize()
Definition: CUDAUtils.cpp:58
void AssertCUDADeviceAvailable(int device_id)
Definition: CUDAUtils.cpp:75
Definition: PinholeCameraIntrinsic.cpp:16