54 using typename Dune::IterativeSolver<X, X>::domain_type;
55 using typename Dune::IterativeSolver<X, X>::range_type;
56 using typename Dune::IterativeSolver<X, X>::field_type;
57 using typename Dune::IterativeSolver<X, X>::real_type;
58 using typename Dune::IterativeSolver<X, X>::scalar_real_type;
59 static constexpr auto block_size = domain_type::block_type::dimension;
60 using XGPU = Opm::gpuistl::GpuVector<real_type>;
75 Dune::ScalarProduct<X>& sp,
76 std::shared_ptr<Dune::Preconditioner<X, X>> prec,
77 scalar_real_type reduction,
81 : Dune::IterativeSolver<X, X>(op, sp, *prec, reduction, maxit, verbose)
82 , m_opOnCPUWithMatrix(op)
84 , m_underlyingSolver(constructSolver(prec, reduction, maxit, verbose, comm))
88 "Currently we only support operators of type MatrixAdapter in the CUDA/HIP solver. "
89 "Use --matrix-add-well-contributions=true. "
90 "Using WellModelMatrixAdapter with SolverAdapter is not well-defined so it will not work well, or at all.");
93 virtual void apply(X& x, X& b,
double reduction, Dune::InverseOperatorResult& res)
override
100 m_inputBuffer.reset(
new XGPU(b.dim()));
101 m_outputBuffer.reset(
new XGPU(x.dim()));
104 m_inputBuffer->copyFromHost(b);
106 m_outputBuffer->copyFromHost(x);
108 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, reduction, res);
111 m_inputBuffer->copyToHost(b);
112 m_outputBuffer->copyToHost(x);
114 virtual void apply(X& x, X& b, Dune::InverseOperatorResult& res)
override
119 if (!m_inputBuffer) {
120 m_inputBuffer.reset(
new XGPU(b.dim()));
121 m_outputBuffer.reset(
new XGPU(x.dim()));
124 m_inputBuffer->copyFromHost(b);
126 m_outputBuffer->copyFromHost(x);
128 m_underlyingSolver.apply(*m_outputBuffer, *m_inputBuffer, res);
131 m_inputBuffer->copyToHost(b);
132 m_outputBuffer->copyToHost(x);
136 Operator& m_opOnCPUWithMatrix;
137 GpuSparseMatrix<real_type> m_matrix;
139 UnderlyingSolver<XGPU> m_underlyingSolver;
144 template <
class Comm>
145 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
146 scalar_real_type reduction,
149 const Comm& communication)
155 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<X, X>>(prec);
157 OPM_THROW(std::invalid_argument,
158 "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
159 "Opm::gpuistl::PreconditionerAdapter wrapped in a "
160 "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
161 "preconditioner to 'GPUDILU'");
164 auto preconditionerAdapter = precAsHolder->getUnderlyingPreconditioner();
165 auto preconditionerAdapterAsHolder
166 = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(preconditionerAdapter);
167 if (!preconditionerAdapterAsHolder) {
168 OPM_THROW(std::invalid_argument,
169 "The preconditioner needs to be a CUDA preconditioner (eg. GPUDILU) wrapped in a "
170 "Opm::gpuistl::PreconditionerAdapter wrapped in a "
171 "Opm::gpuistl::GpuBlockPreconditioner. If you are unsure what this means, set "
172 "preconditioner to 'GPUDILU'");
175 auto preconditionerReallyOnGPU = preconditionerAdapterAsHolder->getUnderlyingPreconditioner();
178 bool mpiSupportsCudaAwareAtCompileTime =
false;
179 bool mpiSupportsCudaAwareAtRunTime =
false;
181#if defined(MPIX_CUDA_AWARE_SUPPORT) && MPIX_CUDA_AWARE_SUPPORT
182 mpiSupportsCudaAwareAtCompileTime =
true;
185#if defined(MPIX_CUDA_AWARE_SUPPORT)
186 if (1 == MPIX_Query_cuda_support()) {
187 mpiSupportsCudaAwareAtRunTime =
true;
193 std::shared_ptr<Opm::gpuistl::GPUSender<real_type, Comm>> gpuComm;
194 if (mpiSupportsCudaAwareAtCompileTime && mpiSupportsCudaAwareAtRunTime) {
195 gpuComm = std::make_shared<
199 gpuComm = std::make_shared<
204 using CudaCommunication = GpuOwnerOverlapCopy<real_type, block_size, Comm>;
205 using SchwarzOperator
206 = Dune::OverlappingSchwarzOperator<GpuSparseMatrix<real_type>, XGPU, XGPU, CudaCommunication>;
207 auto cudaCommunication = std::make_shared<CudaCommunication>(gpuComm);
209 auto mpiPreconditioner = std::make_shared<GpuBlockPreconditioner<XGPU, XGPU, CudaCommunication>>(
210 preconditionerReallyOnGPU, cudaCommunication);
212 auto scalarProduct = std::make_shared<Dune::ParallelScalarProduct<XGPU, CudaCommunication>>(
213 cudaCommunication, m_opOnCPUWithMatrix.category());
220 OPM_ERROR_IF(cudaCommunication.use_count() < 2,
"Internal error. Shared pointer not owned properly.");
221 auto overlappingCudaOperator = std::make_shared<SchwarzOperator>(m_matrix, *cudaCommunication);
223 return UnderlyingSolver<XGPU>(
224 overlappingCudaOperator, scalarProduct, mpiPreconditioner, reduction, maxit, verbose);
228 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
229 scalar_real_type reduction,
232 [[maybe_unused]]
const Dune::Amg::SequentialInformation& communication)
235 return constructSolver(prec, reduction, maxit, verbose);
239 UnderlyingSolver<XGPU> constructSolver(std::shared_ptr<Dune::Preconditioner<X, X>> prec,
240 scalar_real_type reduction,
248 auto precAsHolder = std::dynamic_pointer_cast<PreconditionerHolder<XGPU, XGPU>>(prec);
250 OPM_THROW(std::invalid_argument,
251 "The preconditioner needs to be a CUDA preconditioner wrapped in a "
252 "Opm::gpuistl::PreconditionerHolder (eg. GPUDILU).");
254 auto preconditionerOnGPU = precAsHolder->getUnderlyingPreconditioner();
256 auto matrixOperator = std::make_shared<Dune::MatrixAdapter<GpuSparseMatrix<real_type>, XGPU, XGPU>>(m_matrix);
257 auto scalarProduct = std::make_shared<Dune::SeqScalarProduct<XGPU>>();
258 return UnderlyingSolver<XGPU>(matrixOperator, scalarProduct, preconditionerOnGPU, reduction, maxit, verbose);
261 std::unique_ptr<XGPU> m_inputBuffer;
262 std::unique_ptr<XGPU> m_outputBuffer;