-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdgsrfs.c
262 lines (228 loc) · 7.99 KB
/
pdgsrfs.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
* \brief Improves the computed solution to a system of linear equations and provides error bounds and backward error estimates
*
* <pre>
* -- Distributed SuperLU routine (version 4.3) --
* Lawrence Berkeley National Lab, Univ. of California Berkeley.
* March 15, 2003
*
* Last modified:
* December 31, 2015
* </pre>
*/
#include <math.h>
#include "superlu_ddefs.h"
/*! \brief
*
* <pre>
* Purpose
* =======
*
* PDGSRFS improves the computed solution to a system of linear
* equations and provides error bounds and backward error estimates
* for the solution.
*
* Arguments
* =========
*
* n (input) int (global)
* The order of the system of linear equations.
*
* A (input) SuperMatrix*
* The original matrix A, or the scaled A if equilibration was done.
* A is also permuted into diag(R)*A*diag(C)*Pc'. The type of A can be:
* Stype = SLU_NR_loc; Dtype = SLU_D; Mtype = SLU_GE.
*
* anorm (input) double
* The norm of the original matrix A, or the scaled A if
* equilibration was done.
*
* LUstruct (input) LUstruct_t*
* The distributed data structures storing L and U factors.
* The L and U factors are obtained from pdgstrf for
* the possibly scaled and permuted matrix A.
* See superlu_ddefs.h for the definition of 'LUstruct_t'.
*
* ScalePermstruct (input) ScalePermstruct_t* (global)
* The data structure to store the scaling and permutation vectors
* describing the transformations performed to the matrix A.
*
* grid (input) gridinfo_t*
* The 2D process mesh. It contains the MPI communicator, the number
* of process rows (NPROW), the number of process columns (NPCOL),
* and my process rank. It is an input argument to all the
* parallel routines.
* Grid can be initialized by subroutine SUPERLU_GRIDINIT.
* See superlu_defs.h for the definition of 'gridinfo_t'.
*
* B (input) double* (local)
* The m_loc-by-NRHS right-hand side matrix of the possibly
* equilibrated system. That is, B may be overwritten by diag(R)*B.
*
* ldb (input) int (local)
* Leading dimension of matrix B.
*
* X (input/output) double* (local)
* On entry, the solution matrix Y, as computed by PDGSTRS, of the
* transformed system A1*Y = Pc*Pr*B. where
* A1 = Pc*Pr*diag(R)*A*diag(C)*Pc' and Y = Pc*diag(C)^(-1)*X.
* On exit, the improved solution matrix Y.
*
* In order to obtain the solution X to the original system,
* Y should be permutated by Pc^T, and premultiplied by diag(C)
* if DiagScale = COL or BOTH.
* This must be done after this routine is called.
*
* ldx (input) int (local)
* Leading dimension of matrix X.
*
* nrhs (input) int
* Number of right-hand sides.
*
* SOLVEstruct (output) SOLVEstruct_t* (global)
* Contains the information for the communication during the
* solution phase.
*
* berr (output) double*, dimension (nrhs)
* The componentwise relative backward error of each solution
* vector X(j) (i.e., the smallest relative change in
* any element of A or B that makes X(j) an exact solution).
*
* stat (output) SuperLUStat_t*
* Record the statistics about the refinement steps.
* See util.h for the definition of SuperLUStat_t.
*
* info (output) int*
* = 0: successful exit
* < 0: if info = -i, the i-th argument had an illegal value
*
* Internal Parameters
* ===================
*
* ITMAX is the maximum number of steps of iterative refinement.
* </pre>
*/
void
pdgsrfs(int_t n, SuperMatrix *A, double anorm, LUstruct_t *LUstruct,
ScalePermstruct_t *ScalePermstruct, gridinfo_t *grid,
double *B, int_t ldb, double *X, int_t ldx, int nrhs,
SOLVEstruct_t *SOLVEstruct,
double *berr, SuperLUStat_t *stat, int *info)
{
#define ITMAX 20
Glu_persist_t *Glu_persist = LUstruct->Glu_persist;
LocalLU_t *Llu = LUstruct->Llu;
double *ax, *R, *dx, *temp, *work, *B_col, *X_col;
int_t count, i, j, lwork, nz;
int iam;
double eps, lstres;
double s, safmin, safe1, safe2;
/* Data structures used by matrix-vector multiply routine. */
pdgsmv_comm_t *gsmv_comm = SOLVEstruct->gsmv_comm;
NRformat_loc *Astore;
int_t m_loc, fst_row;
/* Initialization. */
Astore = (NRformat_loc *) A->Store;
m_loc = Astore->m_loc;
fst_row = Astore->fst_row;
iam = grid->iam;
/* Test the input parameters. */
*info = 0;
if ( n < 0 ) *info = -1;
else if ( A->nrow != A->ncol || A->nrow < 0 || A->Stype != SLU_NR_loc
|| A->Dtype != SLU_D || A->Mtype != SLU_GE )
*info = -2;
else if ( ldb < SUPERLU_MAX(0, m_loc) ) *info = -10;
else if ( ldx < SUPERLU_MAX(0, m_loc) ) *info = -12;
else if ( nrhs < 0 ) *info = -13;
if (*info != 0) {
i = -(*info);
pxerr_dist("PDGSRFS", grid, i);
return;
}
/* Quick return if possible. */
if ( n == 0 || nrhs == 0 ) {
return;
}
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(iam, "Enter pdgsrfs()");
#endif
lwork = 2 * m_loc; /* For ax/R/dx and temp */
if ( !(work = doubleMalloc_dist(lwork)) )
ABORT("Malloc fails for work[]");
ax = R = dx = work;
temp = ax + m_loc;
/* NZ = maximum number of nonzero elements in each row of A, plus 1 */
nz = A->ncol + 1;
eps = dmach_dist("Epsilon");
safmin = dmach_dist("Safe minimum");
/* Set SAFE1 essentially to be the underflow threshold times the
number of additions in each row. */
safe1 = nz * safmin;
safe2 = safe1 / eps;
#if ( DEBUGlevel>=1 )
if ( !iam ) printf(".. eps = %e\tanorm = %e\tsafe1 = %e\tsafe2 = %e\n",
eps, anorm, safe1, safe2);
#endif
/* Do for each right-hand side ... */
for (j = 0; j < nrhs; ++j) {
count = 0;
lstres = 3.;
B_col = &B[j*ldb];
X_col = &X[j*ldx];
while (1) { /* Loop until stopping criterion is satisfied. */
/* Compute residual R = B - op(A) * X,
where op(A) = A, A**T, or A**H, depending on TRANS. */
/* Matrix-vector multiply. */
pdgsmv(0, A, grid, gsmv_comm, X_col, ax);
/* Compute residual, stored in R[]. */
for (i = 0; i < m_loc; ++i) R[i] = B_col[i] - ax[i];
/* Compute abs(op(A))*abs(X) + abs(B), stored in temp[]. */
pdgsmv(1, A, grid, gsmv_comm, X_col, temp);
for (i = 0; i < m_loc; ++i) temp[i] += fabs(B_col[i]);
s = 0.0;
for (i = 0; i < m_loc; ++i) {
if ( temp[i] > safe2 ) {
s = SUPERLU_MAX(s, fabs(R[i]) / temp[i]);
} else if ( temp[i] != 0.0 ) {
/* Adding SAFE1 to the numerator guards against
spuriously zero residuals (underflow). */
s = SUPERLU_MAX(s, (safe1 + fabs(R[i])) /temp[i]);
}
/* If temp[i] is exactly 0.0 (computed by PxGSMV), then
we know the true residual also must be exactly 0.0. */
}
MPI_Allreduce( &s, &berr[j], 1, MPI_DOUBLE, MPI_MAX, grid->comm );
#if ( PRNTlevel>= 1 )
if ( !iam )
printf("(%2d) .. Step " IFMT ": berr[j] = %e\n", iam, count, berr[j]);
#endif
if ( berr[j] > eps && berr[j] * 2 <= lstres && count < ITMAX ) {
/* Compute new dx. */
pdgstrs(n, LUstruct, ScalePermstruct, grid,
dx, m_loc, fst_row, m_loc, 1,
SOLVEstruct, stat, info);
/* Update solution. */
for (i = 0; i < m_loc; ++i) X_col[i] += dx[i];
lstres = berr[j];
++count;
} else {
break;
}
} /* end while */
stat->RefineSteps = count;
} /* for j ... */
/* Deallocate storage. */
SUPERLU_FREE(work);
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(iam, "Exit pdgsrfs()");
#endif
} /* PDGSRFS */