Open3D (C++ API)  0.19.0
ComputeGPU.h
Go to the documentation of this file.
1 // ----------------------------------------------------------------------------
2 // - Open3D: www.open3d.org -
3 // ----------------------------------------------------------------------------
4 // Copyright (c) 2018-2024 www.open3d.org
5 // SPDX-License-Identifier: MIT
6 // ----------------------------------------------------------------------------
7 //
8 // Generic GPU compute abstraction used by the Gaussian splatting pipeline.
9 // One header covers all platforms (OpenGL on Linux/Windows, Metal on macOS).
10 // Runtime shader resources are loaded from resources/gaussian_splat/.
11 //
12 // Typical usage:
13 //
14 // // One GpuComputeFrame per geometry or composite stage (RAII).
15 // GpuComputeFrame frame(ctx, GpuComputeFrame::kGeometry);
16 //
17 // // Each dispatch: one temporary GpuComputePass expression.
18 // GpuComputePass(ctx, ComputeProgramId::kGsProject, "gs_project")
19 // .UBO(0, view_params_buf)
20 // .SSBO(1, positions_buf)
21 // .Dispatch(groups_x, 1, 1);
22 // ctx.FullBarrier();
23 
24 #pragma once
25 
26 #include <cstddef>
27 #include <cstdint>
28 #include <vector>
29 
30 namespace open3d {
31 namespace visualization {
32 namespace rendering {
33 
34 // ---------------------------------------------------------------------------
35 // Enums
36 // ---------------------------------------------------------------------------
37 
40 enum class ComputeProgramId : int {
41  kGsProject = 0,
42  kGsComposite = 1,
44  kGsRadixScatter = 3,
45  kGsDispatchArgs = 4,
48  kGsDepthMerge = 5,
49  kCount = 6,
50 };
51 
53 enum class ImageFormat { kRGBA16F, kR32F, kR16UI };
54 
57 constexpr const char* kGsShaderNames[] = {
58  "gaussian_project",
59  "gaussian_composite",
60  "gaussian_radix_sort_histograms",
61  "gaussian_radix_sort_scatter",
62  "gaussian_compute_dispatch_args",
63  "gaussian_depth_merge",
64 };
65 static_assert(std::size(kGsShaderNames) ==
67  "kGsShaderNames must match ComputeProgramId::kCount");
68 
69 // ---------------------------------------------------------------------------
70 // GPU data layout structs
71 // ---------------------------------------------------------------------------
72 
82 };
83 static_assert(sizeof(RadixSortParams) == 16,
84  "RadixSortParams must be 16 bytes to match GLSL layout");
85 
88  std::uintptr_t view_params_buf = 0;
89  std::uintptr_t positions_buf = 0;
90  std::uintptr_t scales_buf = 0;
91  std::uintptr_t rotations_buf = 0;
92  std::uintptr_t dc_opacity_buf = 0;
93  std::uintptr_t sh_buf = 0;
96  std::uintptr_t projected_composite_buf = 0;
99  std::uintptr_t tile_counts_buf = 0;
101  std::uintptr_t counters_buf = 0;
102  std::uintptr_t dispatch_args_buf = 0;
105  std::uintptr_t sort_keys_buf[2] = {0, 0};
106  std::uintptr_t sort_values_buf[2] = {0, 0};
107  std::uintptr_t histogram_buf = 0;
108  std::uintptr_t radix_params_buf = 0;
110  std::uintptr_t mask_buf = 0;
112  std::uintptr_t composite_depth_tex = 0;
115  std::uintptr_t merged_depth_u16_tex = 0;
118  int final_sort_src = 0;
122 };
123 
124 // ---------------------------------------------------------------------------
125 // GaussianSplatGpuContext — abstract GPU backend interface
126 // ---------------------------------------------------------------------------
127 
131 public:
132  virtual ~GaussianSplatGpuContext() = default;
133 
135  virtual bool EnsureProgramsLoaded() = 0;
136 
137  // --- Buffer management ------------------------------------------------
138  virtual std::uintptr_t CreateBuffer(std::size_t size,
139  const char* label = nullptr) = 0;
140  virtual void DestroyBuffer(std::uintptr_t buf) = 0;
142  virtual std::uintptr_t ResizeBuffer(std::uintptr_t buf,
143  std::size_t new_size,
144  const char* label = nullptr) = 0;
145 
148  virtual std::uintptr_t CreatePrivateBuffer(std::size_t size,
149  const char* label = nullptr) {
150  return CreateBuffer(size, label);
151  }
152  virtual std::uintptr_t ResizePrivateBuffer(std::uintptr_t buf,
153  std::size_t new_size,
154  const char* label = nullptr) {
155  return ResizeBuffer(buf, new_size, label);
156  }
157  virtual void UploadBuffer(std::uintptr_t buf,
158  const void* data,
160  std::size_t offset) = 0;
161  virtual bool DownloadBuffer(std::uintptr_t buf,
162  void* dst,
165  (void)buf;
166  (void)dst;
167  (void)size;
168  (void)offset;
169  return false;
170  }
171  virtual void ClearBufferUInt32Zero(std::uintptr_t buf) = 0;
172 
173  // --- Bindings ---------------------------------------------------------
174  virtual void BindSSBO(std::uint32_t binding, std::uintptr_t buf) = 0;
175  virtual void BindUBO(std::uint32_t binding, std::uintptr_t buf) = 0;
176  virtual void BindUBORange(std::uint32_t binding,
177  std::uintptr_t buf,
179  std::size_t range_size) = 0;
180 
181  // --- Dispatch ---------------------------------------------------------
182  virtual void UseProgram(ComputeProgramId id) = 0;
183  virtual void Dispatch(std::uint32_t groups_x,
184  std::uint32_t groups_y,
185  std::uint32_t groups_z) = 0;
186  virtual void DispatchIndirect(std::uintptr_t indirect_buf,
187  std::size_t byte_offset) = 0;
188  virtual void FullBarrier() = 0;
189 
194  virtual std::uint32_t GetMaxComputeWorkGroupCount() const { return 65535u; }
195 
196  // --- Textures / images ------------------------------------------------
197  virtual std::uintptr_t CreateTexture2DR32F(std::uint32_t width,
199  const char* label = nullptr) = 0;
200  virtual void DestroyTexture(std::uintptr_t tex) = 0;
201  virtual std::uintptr_t ResizeTexture2DR32F(std::uintptr_t tex,
204  const char* label = nullptr) = 0;
206  virtual std::uintptr_t ResizeTexture2DR16UI(
207  std::uintptr_t tex,
210  const char* label = nullptr) = 0;
211 
215  virtual bool DownloadTextureR32F(std::uintptr_t tex,
218  std::vector<float>& out) {
219  (void)tex;
220  (void)width;
221  (void)height;
222  (void)out;
223  return false;
224  }
225 
229  virtual bool DownloadTextureR16UI(std::uintptr_t tex,
232  std::vector<std::uint16_t>& out) {
233  (void)tex;
234  (void)width;
235  (void)height;
236  (void)out;
237  return false;
238  }
239 
241  virtual void BindImage(std::uint32_t binding,
242  std::uintptr_t tex,
245  ImageFormat fmt) = 0;
246 
248  std::uintptr_t tex,
250  std::uint32_t height) = 0;
251 
252  // --- Frame sync -------------------------------------------------------
255  virtual void FinishGpuWork() = 0;
256 
258  virtual bool WasLastSubmitSuccessful() const { return true; }
259 
262  virtual void BeginGeometryPass() {}
263  virtual void EndGeometryPass() {}
264  virtual void BeginCompositePass() {}
265  virtual void EndCompositePass() {}
266 
272  virtual void WaitForGeometryPass() {}
273 
276  virtual void PushDebugGroup(const char* /*label*/) {}
277  virtual void PopDebugGroup() {}
278 };
279 
280 // ---------------------------------------------------------------------------
281 // GpuComputeFrame — RAII for Begin/EndGeometryPass or Begin/EndCompositePass
282 // ---------------------------------------------------------------------------
283 
287 public:
289 
291  : ctx_(ctx), kind_(kind) {
292  if (kind_ == kGeometry) {
293  ctx_.BeginGeometryPass();
294  } else {
295  ctx_.BeginCompositePass();
296  }
297  }
299 
302 
304  void End() {
305  if (!ended_) {
306  ended_ = true;
307  if (kind_ == kGeometry) {
308  ctx_.EndGeometryPass();
309  } else {
310  ctx_.EndCompositePass();
311  }
312  }
313  }
314 
315 private:
317  Kind kind_;
318  bool ended_ = false;
319 };
320 
321 // ---------------------------------------------------------------------------
322 // GpuComputePass — RAII + builder for a single compute dispatch
323 // ---------------------------------------------------------------------------
324 
334 public:
336  ComputeProgramId pid,
337  const char* label = nullptr)
338  : ctx_(ctx), label_(label) {
339  ok_ = ctx_.EnsureProgramsLoaded();
340  if (ok_) {
341  ctx_.UseProgram(pid);
342  if (label_) ctx_.PushDebugGroup(label_);
343  }
344  }
345 
347  if (ok_ && label_) ctx_.PopDebugGroup();
348  }
349 
350  GpuComputePass(const GpuComputePass&) = delete;
352 
354  [[nodiscard]] bool ok() const { return ok_; }
355 
356  // --- Resource binding (fluent builder) --------------------------------
357 
358  GpuComputePass& UBO(std::uint32_t binding, std::uintptr_t buf) {
359  if (ok_) ctx_.BindUBO(binding, buf);
360  return *this;
361  }
362 
364  std::uintptr_t buf,
366  std::size_t size) {
367  if (ok_) ctx_.BindUBORange(binding, buf, offset, size);
368  return *this;
369  }
370 
371  GpuComputePass& SSBO(std::uint32_t binding, std::uintptr_t buf) {
372  if (ok_) ctx_.BindSSBO(binding, buf);
373  return *this;
374  }
375 
377  std::uintptr_t tex,
378  std::uint32_t w,
379  std::uint32_t h,
380  ImageFormat fmt) {
381  if (ok_) ctx_.BindImage(binding, tex, w, h, fmt);
382  return *this;
383  }
384 
386  std::uintptr_t tex,
387  std::uint32_t w,
388  std::uint32_t h) {
389  if (ok_) ctx_.BindSamplerTexture(unit, tex, w, h);
390  return *this;
391  }
392 
393  // --- Dispatch ---------------------------------------------------------
394 
396  if (ok_) ctx_.Dispatch(gx, gy, gz);
397  }
398 
399  void DispatchIndirect(std::uintptr_t buf, std::size_t byte_offset) {
400  if (ok_) ctx_.DispatchIndirect(buf, byte_offset);
401  }
402 
403 private:
405  const char* label_;
406  bool ok_;
407 };
408 
409 // ---------------------------------------------------------------------------
410 // Factory functions
411 // ---------------------------------------------------------------------------
412 
413 #if !defined(__APPLE__)
414 // Vulkan-only: no GL compute factory.
415 #endif
416 #if defined(__APPLE__)
417 [[nodiscard]] std::unique_ptr<GaussianSplatGpuContext>
418 CreateComputeGpuContextMetal(std::uintptr_t device_handle,
419  std::uintptr_t command_queue_handle);
420 #endif
421 
422 } // namespace rendering
423 } // namespace visualization
424 } // namespace open3d
virtual void DestroyBuffer(std::uintptr_t buf)=0
virtual void EndCompositePass()
Definition: ComputeGPU.h:265
virtual std::uintptr_t ResizeBuffer(std::uintptr_t buf, std::size_t new_size, const char *label=nullptr)=0
Returns a valid handle (may replace buf when the API reallocates).
virtual void DestroyTexture(std::uintptr_t tex)=0
virtual void BeginCompositePass()
Definition: ComputeGPU.h:264
virtual void BindUBORange(std::uint32_t binding, std::uintptr_t buf, std::size_t offset, std::size_t range_size)=0
virtual std::uintptr_t ResizeTexture2DR32F(std::uintptr_t tex, std::uint32_t width, std::uint32_t height, const char *label=nullptr)=0
virtual void PopDebugGroup()
Definition: ComputeGPU.h:277
virtual bool DownloadBuffer(std::uintptr_t buf, void *dst, std::size_t size, std::size_t offset)
Definition: ComputeGPU.h:161
virtual void BindSamplerTexture(std::uint32_t unit, std::uintptr_t tex, std::uint32_t width, std::uint32_t height)=0
virtual void UploadBuffer(std::uintptr_t buf, const void *data, std::size_t size, std::size_t offset)=0
virtual void BindSSBO(std::uint32_t binding, std::uintptr_t buf)=0
virtual void Dispatch(std::uint32_t groups_x, std::uint32_t groups_y, std::uint32_t groups_z)=0
virtual std::uintptr_t ResizePrivateBuffer(std::uintptr_t buf, std::size_t new_size, const char *label=nullptr)
Definition: ComputeGPU.h:152
virtual void ClearBufferUInt32Zero(std::uintptr_t buf)=0
virtual bool DownloadTextureR32F(std::uintptr_t tex, std::uint32_t width, std::uint32_t height, std::vector< float > &out)
Definition: ComputeGPU.h:215
virtual std::uintptr_t CreateTexture2DR32F(std::uint32_t width, std::uint32_t height, const char *label=nullptr)=0
virtual void BindUBO(std::uint32_t binding, std::uintptr_t buf)=0
virtual void WaitForGeometryPass()
Definition: ComputeGPU.h:272
virtual bool EnsureProgramsLoaded()=0
Load all compute programs (lazy, idempotent).
virtual void DispatchIndirect(std::uintptr_t indirect_buf, std::size_t byte_offset)=0
virtual std::uintptr_t CreateBuffer(std::size_t size, const char *label=nullptr)=0
virtual std::uint32_t GetMaxComputeWorkGroupCount() const
Definition: ComputeGPU.h:194
virtual void BindImage(std::uint32_t binding, std::uintptr_t tex, std::uint32_t width, std::uint32_t height, ImageFormat fmt)=0
Bind a write image at the given unit with the specified format.
virtual void BeginGeometryPass()
Definition: ComputeGPU.h:262
virtual void EndGeometryPass()
Definition: ComputeGPU.h:263
virtual std::uintptr_t ResizeTexture2DR16UI(std::uintptr_t tex, std::uint32_t width, std::uint32_t height, const char *label=nullptr)=0
Create or resize an R16UI texture for merged-depth CPU readback.
virtual bool DownloadTextureR16UI(std::uintptr_t tex, std::uint32_t width, std::uint32_t height, std::vector< std::uint16_t > &out)
Definition: ComputeGPU.h:229
virtual void UseProgram(ComputeProgramId id)=0
virtual std::uintptr_t CreatePrivateBuffer(std::size_t size, const char *label=nullptr)
Definition: ComputeGPU.h:148
virtual void PushDebugGroup(const char *)
Definition: ComputeGPU.h:276
virtual bool WasLastSubmitSuccessful() const
Returns whether the most recently submitted GPU work succeeded.
Definition: ComputeGPU.h:258
GpuComputeFrame(const GpuComputeFrame &)=delete
~GpuComputeFrame()
Definition: ComputeGPU.h:298
GpuComputeFrame(GaussianSplatGpuContext &ctx, Kind kind)
Definition: ComputeGPU.h:290
GpuComputeFrame & operator=(const GpuComputeFrame &)=delete
void End()
Explicitly end the frame early (dtor becomes a no-op).
Definition: ComputeGPU.h:304
GpuComputePass(const GpuComputePass &)=delete
GpuComputePass & operator=(const GpuComputePass &)=delete
GpuComputePass & UBORange(std::uint32_t binding, std::uintptr_t buf, std::size_t offset, std::size_t size)
Definition: ComputeGPU.h:363
GpuComputePass & Image(std::uint32_t binding, std::uintptr_t tex, std::uint32_t w, std::uint32_t h, ImageFormat fmt)
Definition: ComputeGPU.h:376
void DispatchIndirect(std::uintptr_t buf, std::size_t byte_offset)
Definition: ComputeGPU.h:399
~GpuComputePass()
Definition: ComputeGPU.h:346
GpuComputePass(GaussianSplatGpuContext &ctx, ComputeProgramId pid, const char *label=nullptr)
Definition: ComputeGPU.h:335
void Dispatch(std::uint32_t gx, std::uint32_t gy, std::uint32_t gz)
Definition: ComputeGPU.h:395
bool ok() const
Returns false only when EnsureProgramsLoaded() failed (device error).
Definition: ComputeGPU.h:354
GpuComputePass & SSBO(std::uint32_t binding, std::uintptr_t buf)
Definition: ComputeGPU.h:371
GpuComputePass & UBO(std::uint32_t binding, std::uintptr_t buf)
Definition: ComputeGPU.h:358
GpuComputePass & Sampler(std::uint32_t unit, std::uintptr_t tex, std::uint32_t w, std::uint32_t h)
Definition: ComputeGPU.h:385
int width
Definition: FilePCD.cpp:52
int size
Definition: FilePCD.cpp:40
int height
Definition: FilePCD.cpp:53
int offset
Definition: FilePCD.cpp:45
Definition: DLPack.h:678
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle uint32_t
Definition: K4aPlugin.cpp:548
const char const char value recording_handle imu_sample void
Definition: K4aPlugin.cpp:250
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle device_handle
Definition: K4aPlugin.cpp:392
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c k4a_image_t image_handle uint8_t image_handle image_handle image_handle image_handle image_handle timestamp_usec white_balance image_handle k4a_device_configuration_t config device_handle char size_t serial_number_size bool int32_t int32_t int32_t int32_t k4a_color_control_mode_t default_mode value const const k4a_calibration_t calibration char size_t
Definition: K4aPlugin.cpp:719
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample playback_handle k4a_logging_message_cb_t void min_level device_handle k4a_imu_sample_t timeout_in_ms capture_handle capture_handle capture_handle image_handle temperature_c int
Definition: K4aPlugin.cpp:474
const char const char value recording_handle imu_sample recording_handle uint8_t data
Definition: K4aPlugin.cpp:269
const char const char value recording_handle imu_sample recording_handle uint8_t size_t data_size k4a_record_configuration_t config target_format k4a_capture_t capture_handle k4a_imu_sample_t imu_sample uint64_t
Definition: K4aPlugin.cpp:343
ImageFormat
Format selector for GaussianSplatGpuContext::BindImage().
Definition: ComputeGPU.h:53
ComputeProgramId
Definition: ComputeGPU.h:40
constexpr const char * kGsShaderNames[]
Definition: ComputeGPU.h:57
Definition: PinholeCameraIntrinsic.cpp:16
Per-view GPU resource handles (opaque: GL name or MTLBuffer/MTLTexture).
Definition: ComputeGPU.h:87
std::uintptr_t sh_buf
Definition: ComputeGPU.h:93
std::uintptr_t projected_composite_buf
Definition: ComputeGPU.h:96
std::uint32_t cached_splat_count
Definition: ComputeGPU.h:120
std::uintptr_t scales_buf
Definition: ComputeGPU.h:90
std::uintptr_t dc_opacity_buf
Definition: ComputeGPU.h:92
std::uintptr_t dispatch_args_buf
Definition: ComputeGPU.h:102
std::uintptr_t sort_keys_buf[2]
Definition: ComputeGPU.h:105
std::uintptr_t composite_depth_tex
GS composite depth output (image binding 1); not the shared scene depth.
Definition: ComputeGPU.h:112
std::uintptr_t tile_counts_buf
Definition: ComputeGPU.h:99
std::uintptr_t mask_buf
Bit-packed per-splat visibility mask. Bound at binding 15.
Definition: ComputeGPU.h:110
std::uintptr_t rotations_buf
Definition: ComputeGPU.h:91
std::uint64_t cached_scene_id
Definition: ComputeGPU.h:119
std::uintptr_t positions_buf
Definition: ComputeGPU.h:89
std::uintptr_t radix_params_buf
Definition: ComputeGPU.h:108
std::uint32_t warned_gpu_error_flags
Definition: ComputeGPU.h:121
std::uintptr_t merged_depth_u16_tex
Definition: ComputeGPU.h:115
std::uintptr_t sort_values_buf[2]
Definition: ComputeGPU.h:106
std::uintptr_t view_params_buf
Definition: ComputeGPU.h:88
std::uintptr_t counters_buf
GPU error/diagnostic counters (total_entries, error_flags, ...).
Definition: ComputeGPU.h:101
std::uintptr_t histogram_buf
Definition: ComputeGPU.h:107
std::uint32_t g_num_elements
Definition: ComputeGPU.h:78
std::uint32_t g_shift
Definition: ComputeGPU.h:79
std::uint32_t g_num_workgroups
Definition: ComputeGPU.h:80
std::uint32_t g_num_blocks_per_workgroup
Definition: ComputeGPU.h:81