0.14.1/cpp_api/_parallel_for_8h_source.html

 // ----------------------------------------------------------------------------
 // -                        Open3D: www.open3d.org                            -
 // ----------------------------------------------------------------------------
 // The MIT License (MIT)
 //
 // Copyright (c) 2018-2021 www.open3d.org
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 // IN THE SOFTWARE.
 // ----------------------------------------------------------------------------

 #pragma once

 #include <cstdint>
 #include <type_traits>

 #include "open3d/core/Device.h"
 #include "open3d/utility/Logging.h"
 #include "open3d/utility/Overload.h"
 #include "open3d/utility/Parallel.h"
 #include "open3d/utility/Preprocessor.h"

 #ifdef __CUDACC__
 #include <cuda.h>
 #include <cuda_runtime.h>

 #include "open3d/core/CUDAUtils.h"
 #endif

 namespace open3d {
 namespace core {

 #ifdef __CUDACC__

 static constexpr int64_t OPEN3D_PARFOR_BLOCK = 128;
 static constexpr int64_t OPEN3D_PARFOR_THREAD = 4;

 template <int64_t block_size, int64_t thread_size, typename func_t>
 __global__ void ElementWiseKernel_(int64_t n, func_t f) {
     int64_t items_per_block = block_size * thread_size;
     int64_t idx = blockIdx.x * items_per_block + threadIdx.x;
 #pragma unroll
     for (int64_t i = 0; i < thread_size; ++i) {
         if (idx < n) {
             f(idx);
             idx += block_size;
         }
     }
 }

 template <typename func_t>
 void ParallelForCUDA_(const Device& device, int64_t n, const func_t& func) {
     if (device.GetType() != Device::DeviceType::CUDA) {
         utility::LogError("ParallelFor for CUDA cannot run on device {}.",
                           device.ToString());
     }
     if (n == 0) {
         return;
     }

     CUDAScopedDevice scoped_device(device);
     int64_t items_per_block = OPEN3D_PARFOR_BLOCK * OPEN3D_PARFOR_THREAD;
     int64_t grid_size = (n + items_per_block - 1) / items_per_block;

     ElementWiseKernel_<OPEN3D_PARFOR_BLOCK, OPEN3D_PARFOR_THREAD>
             <<<grid_size, OPEN3D_PARFOR_BLOCK, 0, core::cuda::GetStream()>>>(
                     n, func);
     OPEN3D_GET_LAST_CUDA_ERROR("ParallelFor failed.");
 }

 #else

 template <typename func_t>
 void ParallelForCPU_(const Device& device, int64_t n, const func_t& func) {
     if (device.GetType() != Device::DeviceType::CPU) {
         utility::LogError("ParallelFor for CPU cannot run on device {}.",
                           device.ToString());
     }
     if (n == 0) {
         return;
     }

 #pragma omp parallel for num_threads(utility::EstimateMaxThreads())
     for (int64_t i = 0; i < n; ++i) {
         func(i);
     }
 }

 #endif

 template <typename func_t>
 void ParallelFor(const Device& device, int64_t n, const func_t& func) {
 #ifdef __CUDACC__
     ParallelForCUDA_(device, n, func);
 #else
     ParallelForCPU_(device, n, func);
 #endif
 }

 template <typename vec_func_t, typename func_t>
 void ParallelFor(const Device& device,
                  int64_t n,
                  const func_t& func,
                  const vec_func_t& vec_func) {
 #ifdef BUILD_ISPC_MODULE

 #ifdef __CUDACC__
     ParallelForCUDA_(device, n, func);
 #else
     int num_threads = utility::EstimateMaxThreads();
     ParallelForCPU_(device, num_threads, [&](int64_t i) {
         int64_t start = n * i / num_threads;
         int64_t end = std::min<int64_t>(n * (i + 1) / num_threads, n);
         vec_func(start, end);
     });
 #endif

 #else

 #ifdef __CUDACC__
     ParallelForCUDA_(device, n, func);
 #else
     ParallelForCPU_(device, n, func);
 #endif

 #endif
 }

 #ifdef BUILD_ISPC_MODULE

 // Internal helper macro.
 #define OPEN3D_CALL_ISPC_KERNEL_(ISPCKernel, start, end, ...) \
     using namespace ispc;                                     \
     ISPCKernel(start, end, __VA_ARGS__);

 #else

 // Internal helper macro.
 #define OPEN3D_CALL_ISPC_KERNEL_(ISPCKernel, start, end, ...)            \
     utility::LogError(                                                   \
             "ISPC module disabled. Unable to call vectorized kernel {}", \
             OPEN3D_STRINGIFY(ISPCKernel));

 #endif

 #define OPEN3D_OVERLOADED_LAMBDA_(T, ISPCKernel, ...)                       \
     [&](T, int64_t start, int64_t end) {                                    \
         OPEN3D_CALL_ISPC_KERNEL_(                                           \
                 OPEN3D_CONCAT(ISPCKernel, OPEN3D_CONCAT(_, T)), start, end, \
                 __VA_ARGS__);                                               \
     }

 #define OPEN3D_VECTORIZED(ISPCKernel, ...)                             \
     [&](int64_t start, int64_t end) {                                  \
         OPEN3D_CALL_ISPC_KERNEL_(ISPCKernel, start, end, __VA_ARGS__); \
     }

 #define OPEN3D_TEMPLATE_VECTORIZED(T, ISPCKernel, ...)                        \
     [&](int64_t start, int64_t end) {                                         \
         static_assert(std::is_arithmetic<T>::value,                           \
                       "Data type is not an arithmetic type");                 \
         utility::Overload(                                                    \
                 OPEN3D_OVERLOADED_LAMBDA_(bool, ISPCKernel, __VA_ARGS__),     \
                 OPEN3D_OVERLOADED_LAMBDA_(uint8_t, ISPCKernel, __VA_ARGS__),  \
                 OPEN3D_OVERLOADED_LAMBDA_(int8_t, ISPCKernel, __VA_ARGS__),   \
                 OPEN3D_OVERLOADED_LAMBDA_(uint16_t, ISPCKernel, __VA_ARGS__), \
                 OPEN3D_OVERLOADED_LAMBDA_(int16_t, ISPCKernel, __VA_ARGS__),  \
                 OPEN3D_OVERLOADED_LAMBDA_(uint32_t, ISPCKernel, __VA_ARGS__), \
                 OPEN3D_OVERLOADED_LAMBDA_(int32_t, ISPCKernel, __VA_ARGS__),  \
                 OPEN3D_OVERLOADED_LAMBDA_(uint64_t, ISPCKernel, __VA_ARGS__), \
                 OPEN3D_OVERLOADED_LAMBDA_(int64_t, ISPCKernel, __VA_ARGS__),  \
                 OPEN3D_OVERLOADED_LAMBDA_(float, ISPCKernel, __VA_ARGS__),    \
                 OPEN3D_OVERLOADED_LAMBDA_(double, ISPCKernel, __VA_ARGS__),   \
                 [&](auto&& generic, int64_t start, int64_t end) {             \
                     utility::LogError(                                        \
                             "Unsupported data type {} for calling "           \
                             "vectorized kernel {}",                           \
                             typeid(generic).name(),                           \
                             OPEN3D_STRINGIFY(ISPCKernel));                    \
                 })(T{}, start, end);                                          \
     }

 }  // namespace core
 }  // namespace open3d
Device.h

OPEN3D_GET_LAST_CUDA_ERROR
#define OPEN3D_GET_LAST_CUDA_ERROR(message)
Definition: CUDAUtils.h:67

core

open3d::core::ParallelFor
void ParallelFor(const Device &device, int64_t n, const func_t &func)
Definition: ParallelFor.h:122

open3d::core::Device::GetType
DeviceType GetType() const
Definition: Device.h:91

Preprocessor.h

Overload.h

open3d::utility::EstimateMaxThreads
int EstimateMaxThreads()
Estimate the maximum number of threads to be used in a parallel region.
Definition: Parallel.cpp:50

Parallel.h

open3d::core::Device::DeviceType::CUDA

open3d::core::ParallelForCPU_
void ParallelForCPU_(const Device &device, int64_t n, const func_t &func)
Run a function in parallel on CPU.
Definition: ParallelFor.h:92

open3d::core::Device
Definition: Device.h:39

open3d::core::Device::DeviceType::CPU

open3d
Definition: PinholeCameraIntrinsic.cpp:35

Logging.h

CUDAUtils.h
Common CUDA utilities.

open3d::core::Device::ToString
std::string ToString() const
Definition: Device.h:75

LogError
#define LogError(...)
Definition: Logging.h:72