19#ifndef OPM_GPU_THREAD_UTILS_HPP
20#define OPM_GPU_THREAD_UTILS_HPP
23#include <cuda_runtime.h>
24#include <opm/simulators/linalg/gpuistl/detail/gpu_safe_call.hpp>
31constexpr inline size_t
32getThreads([[maybe_unused]]
size_t numberOfRows)
38getBlocks(
size_t numberOfRows)
40 const auto threads = getThreads(numberOfRows);
41 return (numberOfRows + threads - 1) / threads;
45template <
class Kernel>
47getCudaRecomendedThreadBlockSize(Kernel k,
int suggestedThrBlockSize = -1)
49 if (suggestedThrBlockSize != -1) {
50 return suggestedThrBlockSize;
54 OPM_GPU_SAFE_CALL(cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0));
59getNumberOfBlocks(
int wantedThreads,
int threadBlockSize)
61 return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
Contains wrappers to make the CuBLAS library behave as a modern C++ library with function overlading.
Definition autotuner.hpp:29