escript Revision_
BlockOps.h
Go to the documentation of this file.
1
2/*****************************************************************************
3*
4* Copyright (c) 2003-2020 by The University of Queensland
5* http://www.uq.edu.au
6*
7* Primary Business: Queensland, Australia
8* Licensed under the Apache License, version 2.0
9* http://www.apache.org/licenses/LICENSE-2.0
10*
11* Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12* Development 2012-2013 by School of Earth Sciences
13* Development from 2014-2017 by Centre for Geoscience Computing (GeoComp)
14* Development from 2019 by School of Earth and Environmental Sciences
15**
16*****************************************************************************/
17
18#ifndef __PASO_BLOCKOPS_H__
19#define __PASO_BLOCKOPS_H__
20
21#include "Paso.h"
22#include "PasoException.h"
23
24#include <cstring> // memcpy
25
26#ifdef ESYS_HAVE_LAPACK
27 #ifdef ESYS_MKL_LAPACK
28 #include <mkl_lapack.h>
29 #include <mkl_cblas.h>
30 #else
31 extern "C" {
32 #include <clapack.h>
33 #include <cblas.h>
34 }
35 #endif
36#endif
37
38namespace paso {
39
40inline void BlockOps_Cpy_N(dim_t N, double* R, const double* V)
41{
42 memcpy((void*)R, (void*)V, N*sizeof(double));
43}
44
46inline void BlockOps_SMV_2(double* R, const double* mat, const double* V)
47{
48 const double S1 = V[0];
49 const double S2 = V[1];
50 const double A11 = mat[0];
51 const double A12 = mat[2];
52 const double A21 = mat[1];
53 const double A22 = mat[3];
54 R[0] -= A11 * S1 + A12 * S2;
55 R[1] -= A21 * S1 + A22 * S2;
56}
57
59inline void BlockOps_SMV_3(double* R, const double* mat, const double* V)
60{
61 const double S1 = V[0];
62 const double S2 = V[1];
63 const double S3 = V[2];
64 const double A11 = mat[0];
65 const double A21 = mat[1];
66 const double A31 = mat[2];
67 const double A12 = mat[3];
68 const double A22 = mat[4];
69 const double A32 = mat[5];
70 const double A13 = mat[6];
71 const double A23 = mat[7];
72 const double A33 = mat[8];
73 R[0] -= A11 * S1 + A12 * S2 + A13 * S3;
74 R[1] -= A21 * S1 + A22 * S2 + A23 * S3;
75 R[2] -= A31 * S1 + A32 * S2 + A33 * S3;
76}
77
78#define PASO_MISSING_CLAPACK throw PasoException("You need to install a LAPACK version to enable operations on block sizes > 3.")
79
81inline void BlockOps_SMV_N(dim_t N, double* R, const double* mat, const double* V)
82{
83#ifdef ESYS_HAVE_LAPACK
84 cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, -1., mat, N, V, 1, 1., R, 1);
85#else
87#endif
88}
89
90inline void BlockOps_MV_N(dim_t N, double* R, const double* mat, const double* V)
91{
92#ifdef ESYS_HAVE_LAPACK
93 cblas_dgemv(CblasColMajor,CblasNoTrans, N, N, 1., mat, N, V, 1, 0., R, 1);
94#else
96#endif
97}
98
99inline void BlockOps_invM_2(double* invA, const double* A, int* failed)
100{
101 const double A11 = A[0];
102 const double A12 = A[2];
103 const double A21 = A[1];
104 const double A22 = A[3];
105 double D = A11*A22-A12*A21;
106 if (std::abs(D) > 0) {
107 D = 1./D;
108 invA[0] = A22*D;
109 invA[1] = -A21*D;
110 invA[2] = -A12*D;
111 invA[3] = A11*D;
112 } else {
113 *failed = 1;
114 }
115}
116
117inline void BlockOps_invM_3(double* invA, const double* A, int* failed)
118{
119 const double A11 = A[0];
120 const double A21 = A[1];
121 const double A31 = A[2];
122 const double A12 = A[3];
123 const double A22 = A[4];
124 const double A32 = A[5];
125 const double A13 = A[6];
126 const double A23 = A[7];
127 const double A33 = A[8];
128 double D = A11*(A22*A33-A23*A32) +
129 A12*(A31*A23-A21*A33) +
130 A13*(A21*A32-A31*A22);
131 if (std::abs(D) > 0) {
132 D = 1./D;
133 invA[0] = (A22*A33-A23*A32)*D;
134 invA[1] = (A31*A23-A21*A33)*D;
135 invA[2] = (A21*A32-A31*A22)*D;
136 invA[3] = (A13*A32-A12*A33)*D;
137 invA[4] = (A11*A33-A31*A13)*D;
138 invA[5] = (A12*A31-A11*A32)*D;
139 invA[6] = (A12*A23-A13*A22)*D;
140 invA[7] = (A13*A21-A11*A23)*D;
141 invA[8] = (A11*A22-A12*A21)*D;
142 } else {
143 *failed = 1;
144 }
145}
146
148inline void BlockOps_invM_N(dim_t N, double* mat, index_t* pivot, int* failed)
149{
150#ifdef ESYS_HAVE_LAPACK
151#ifdef ESYS_MKL_LAPACK
152 int res = 0;
153 dgetrf(&N, &N, mat, &N, pivot, &res);
154 if (res != 0)
155 *failed = 1;
156#else
157 int res = clapack_dgetrf(CblasColMajor, N, N, mat, N, pivot);
158 if (res != 0)
159 *failed = 1;
160#endif // ESYS_MKL_LAPACK
161#else
163#endif
164}
165
167inline void BlockOps_solve_N(dim_t N, double* X, double* mat, index_t* pivot, int* failed)
168{
169#ifdef ESYS_HAVE_LAPACK
170#ifdef ESYS_MKL_LAPACK
171 int res = 0;
172 int ONE = 1;
173 dgetrs("N", &N, &ONE, mat, &N, pivot, X, &N, &res);
174 if (res != 0)
175 *failed = 1;
176#else
177 int res = clapack_dgetrs(CblasColMajor, CblasNoTrans, N, 1, mat, N, pivot, X, N);
178 if (res != 0)
179 *failed = 1;
180#endif // ESYS_MKL_LAPACK
181#else
183#endif
184}
185
187inline void BlockOps_MViP_2(const double* mat, double* V)
188{
189 const double S1 = V[0];
190 const double S2 = V[1];
191 const double A11 = mat[0];
192 const double A12 = mat[2];
193 const double A21 = mat[1];
194 const double A22 = mat[3];
195 V[0] = A11 * S1 + A12 * S2;
196 V[1] = A21 * S1 + A22 * S2;
197}
198
200inline void BlockOps_MViP_3(const double* mat, double* V)
201{
202 const double S1 = V[0];
203 const double S2 = V[1];
204 const double S3 = V[2];
205 const double A11 = mat[0];
206 const double A21 = mat[1];
207 const double A31 = mat[2];
208 const double A12 = mat[3];
209 const double A22 = mat[4];
210 const double A32 = mat[5];
211 const double A13 = mat[6];
212 const double A23 = mat[7];
213 const double A33 = mat[8];
214 V[0] = A11 * S1 + A12 * S2 + A13 * S3;
215 V[1] = A21 * S1 + A22 * S2 + A23 * S3;
216 V[2] = A31 * S1 + A32 * S2 + A33 * S3;
217}
218
219inline void BlockOps_solveAll(dim_t n_block, dim_t n, double* D,
220 index_t* pivot, double* x)
221{
222 if (n_block == 1) {
223#pragma omp parallel for
224 for (dim_t i=0; i<n; ++i)
225 x[i] *= D[i];
226 } else if (n_block == 2) {
227#pragma omp parallel for
228 for (dim_t i=0; i<n; ++i)
229 BlockOps_MViP_2(&D[4*i], &x[2*i]);
230 } else if (n_block == 3) {
231#pragma omp parallel for
232 for (dim_t i=0; i<n; ++i)
233 BlockOps_MViP_3(&D[9*i], &x[3*i]);
234 } else {
235 int failed = 0;
236#pragma omp parallel for
237 for (dim_t i=0; i<n; ++i) {
238 const dim_t block_size = n_block*n_block;
239 BlockOps_solve_N(n_block, &x[n_block*i], &D[block_size*i], &pivot[n_block*i], &failed);
240 }
241 if (failed > 0) {
242 throw PasoException("BlockOps_solveAll: solution failed.");
243 }
244 }
245}
246
247} // namespace paso
248
249#endif // __PASO_BLOCKOPS_H__
250
#define PASO_MISSING_CLAPACK
Definition BlockOps.h:78
#define V(_K_, _I_)
Definition ShapeFunctions.cpp:121
PasoException exception class.
Definition PasoException.h:34
index_t dim_t
Definition DataTypes.h:66
int index_t
type for array/matrix indices used both globally and on each rank
Definition DataTypes.h:61
Definition BiCGStab.cpp:25
void BlockOps_solve_N(dim_t N, double *X, double *mat, index_t *pivot, int *failed)
solves system of linear equations A*X=B
Definition BlockOps.h:167
void BlockOps_invM_N(dim_t N, double *mat, index_t *pivot, int *failed)
LU factorization of NxN matrix mat with partial pivoting.
Definition BlockOps.h:148
static dim_t N
Definition SparseMatrix_saveHB.cpp:37
void BlockOps_Cpy_N(dim_t N, double *R, const double *V)
Definition BlockOps.h:40
void BlockOps_SMV_3(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 3x3
Definition BlockOps.h:59
void BlockOps_solveAll(dim_t n_block, dim_t n, double *D, index_t *pivot, double *x)
Definition BlockOps.h:219
void BlockOps_SMV_2(double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - 2x2
Definition BlockOps.h:46
void BlockOps_invM_3(double *invA, const double *A, int *failed)
Definition BlockOps.h:117
void BlockOps_MViP_2(const double *mat, double *V)
inplace matrix vector product - order 2
Definition BlockOps.h:187
void BlockOps_MV_N(dim_t N, double *R, const double *mat, const double *V)
Definition BlockOps.h:90
void BlockOps_invM_2(double *invA, const double *A, int *failed)
Definition BlockOps.h:99
void BlockOps_MViP_3(const double *mat, double *V)
inplace matrix vector product - order 3
Definition BlockOps.h:200
void BlockOps_SMV_N(dim_t N, double *R, const double *mat, const double *V)
performs operation R=R-mat*V (V and R are not overlapping) - NxN
Definition BlockOps.h:81