-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdgsmv_AXglobal.c
324 lines (285 loc) · 8.82 KB
/
pdgsmv_AXglobal.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
/*! \file
Copyright (c) 2003, The Regents of the University of California, through
Lawrence Berkeley National Laboratory (subject to receipt of any required
approvals from U.S. Dept. of Energy)
All rights reserved.
The source code is distributed under BSD license, see the file License.txt
at the top-level directory.
*/
/*! @file
* \brief Performs sparse matrix-vector multiplication
*
* <pre>
* -- Distributed SuperLU routine (version 1.0) --
* Lawrence Berkeley National Lab, Univ. of California Berkeley.
* September 1, 1999
* </pre>
*/
#include <math.h>
#include "superlu_ddefs.h"
static void dcreate_msr_matrix(SuperMatrix *, int_t [], int_t,
double **, int_t **);
static void dPrintMSRmatrix(int, double [], int_t [], gridinfo_t *);
int pdgsmv_AXglobal_setup
(
SuperMatrix *A, /* Matrix A permuted by columns (input).
The type of A can be:
Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. */
Glu_persist_t *Glu_persist, /* input */
gridinfo_t *grid, /* input */
int_t *m, /* output */
int_t *update[], /* output */
double *val[], /* output */
int_t *bindx[], /* output */
int_t *mv_sup_to_proc /* output */
)
{
int n;
int input_option;
int N_update; /* Number of variables updated on this process (output) */
int iam = grid->iam;
int nprocs = grid->nprow * grid->npcol;
int_t *xsup = Glu_persist->xsup;
int_t *supno = Glu_persist->supno;
int_t nsupers;
int i, nsup, p, t1, t2, t3;
/* Initialize the list of global indices.
* NOTE: the list of global indices must be in ascending order.
*/
n = A->nrow;
input_option = SUPER_LINEAR;
nsupers = supno[n-1] + 1;
#if ( DEBUGlevel>=2 )
if ( !iam ) {
PrintInt10("xsup", supno[n-1]+1, xsup);
PrintInt10("supno", n, supno);
}
#endif
if ( input_option == SUPER_LINEAR ) { /* Block partitioning based on
individual rows. */
/* Figure out mv_sup_to_proc[] on all processes. */
for (p = 0; p < nprocs; ++p) {
t1 = n / nprocs; /* Number of rows */
t2 = n - t1 * nprocs; /* left-over, which will be assigned
to the first t2 processes. */
if ( p >= t2 ) t2 += (p * t1); /* Starting row number */
else { /* First t2 processes will get one more row. */
++t1; /* Number of rows. */
t2 = p * t1; /* Starting row. */
}
/* Make sure the starting and ending rows are at the
supernode boundaries. */
t3 = t2 + t1; /* Ending row. */
nsup = supno[t2];
if ( t2 > xsup[nsup] ) { /* Round up the starting row. */
t1 -= xsup[nsup+1] - t2;
t2 = xsup[nsup+1];
}
nsup = supno[t3];
if ( t3 > xsup[nsup] ) /* Round up the ending row. */
t1 += xsup[nsup+1] - t3;
t3 = t2 + t1 - 1;
if ( t1 ) {
for (i = supno[t2]; i <= supno[t3]; ++i) {
mv_sup_to_proc[i] = p;
#if ( DEBUGlevel>=3 )
if ( mv_sup_to_proc[i] == p-1 ) {
fprintf(stderr,
"mv_sup_to_proc conflicts at supno %d\n", i);
exit(-1);
}
#endif
}
}
if ( iam == p ) {
N_update = t1;
if ( N_update ) {
if ( !(*update = intMalloc_dist(N_update)) )
ABORT("Malloc fails for update[]");
}
for (i = 0; i < N_update; ++i) (*update)[i] = t2 + i;
#if ( DEBUGlevel>=3 )
printf("(%2d) N_update = %4d\t"
"supers %4d to %4d\trows %4d to %4d\n",
iam, N_update, supno[t2], supno[t3], t2, t3);
#endif
}
} /* for p ... */
} else if ( input_option == SUPER_BLOCK ) { /* Block partitioning based on
individual supernodes. */
/* This may cause bad load balance, because the blocks are usually
small in the beginning and large toward the end. */
t1 = nsupers / nprocs;
t2 = nsupers - t1 * nprocs; /* left-over */
if ( iam >= t2 ) t2 += (iam * t1);
else {
++t1; /* Number of blocks. */
t2 = iam * t1; /* Starting block. */
}
N_update = xsup[t2+t1] - xsup[t2];
if ( !(*update = intMalloc_dist(N_update)) )
ABORT("Malloc fails for update[]");
for (i = 0; i < N_update; ++i) (*update)[i] = xsup[t2] + i;
}
/* Create an MSR matrix in val/bindx to be used by pdgsmv(). */
dcreate_msr_matrix(A, *update, N_update, val, bindx);
#if ( DEBUGlevel>=2 )
PrintInt10("mv_sup_to_proc", nsupers, mv_sup_to_proc);
dPrintMSRmatrix(N_update, *val, *bindx, grid);
#endif
*m = N_update;
return 0;
} /* PDGSMV_AXglobal_SETUP */
/*! \brief
*
* <pre>
* Create the distributed modified sparse row (MSR) matrix: bindx/val.
* For a submatrix of size m-by-n, the MSR arrays are as follows:
* bindx[0] = m + 1
* bindx[0..m] = pointer to start of each row
* bindx[ks..ke] = column indices of the off-diagonal nonzeros in row k,
* where, ks = bindx[k], ke = bindx[k+1]-1
* val[k] = A(k,k), k < m, diagonal elements
* val[m] = not used
* val[ki] = A(k, bindx[ki]), where ks <= ki <= ke
* Both arrays are of length nnz + 1.
* </pre>
*/
static void dcreate_msr_matrix
(
SuperMatrix *A, /* Matrix A permuted by columns (input).
The type of A can be:
Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. */
int_t update[], /* input (local) */
int_t N_update, /* input (local) */
double **val, /* output */
int_t **bindx /* output */
)
{
int hi, i, irow, j, k, lo, n, nnz_local, nnz_diag;
NCPformat *Astore;
double *nzval;
int_t *rowcnt;
double zero = 0.0;
if ( !N_update ) return;
n = A->ncol;
Astore = A->Store;
nzval = Astore->nzval;
/* One pass of original matrix A to count nonzeros of each row. */
if ( !(rowcnt = (int_t *) intCalloc_dist(N_update)) )
ABORT("Malloc fails for rowcnt[]");
lo = update[0];
hi = update[N_update-1];
nnz_local = 0;
nnz_diag = 0;
for (j = 0; j < n; ++j) {
for (i = Astore->colbeg[j]; i < Astore->colend[j]; ++i) {
irow = Astore->rowind[i];
if ( irow >= lo && irow <= hi ) {
if ( irow != j ) /* Exclude diagonal */
++rowcnt[irow - lo];
else ++nnz_diag; /* Count nonzero diagonal entries */
++nnz_local;
}
}
}
/* Add room for the logical diagonal zeros which are not counted
in nnz_local. */
nnz_local += (N_update - nnz_diag);
/* Allocate storage for bindx[] and val[]. */
if ( !(*val = (double *) doubleMalloc_dist(nnz_local+1)) )
ABORT("Malloc fails for val[]");
for (i = 0; i < N_update; ++i) (*val)[i] = zero; /* Initialize diagonal */
if ( !(*bindx = (int_t *) SUPERLU_MALLOC((nnz_local+1) * sizeof(int_t))) )
ABORT("Malloc fails for bindx[]");
/* Set up row pointers. */
(*bindx)[0] = N_update + 1;
for (j = 1; j <= N_update; ++j) {
(*bindx)[j] = (*bindx)[j-1] + rowcnt[j-1];
rowcnt[j-1] = (*bindx)[j-1];
}
/* One pass of original matrix A to fill in matrix entries. */
for (j = 0; j < n; ++j) {
for (i = Astore->colbeg[j]; i < Astore->colend[j]; ++i) {
irow = Astore->rowind[i];
if ( irow >= lo && irow <= hi ) {
if ( irow == j ) /* Diagonal */
(*val)[irow - lo] = nzval[i];
else {
irow -= lo;
k = rowcnt[irow];
(*bindx)[k] = j;
(*val)[k] = nzval[i];
++rowcnt[irow];
}
}
}
}
SUPERLU_FREE(rowcnt);
}
/*! \brief
*
* <pre>
* Performs sparse matrix-vector multiplication.
* - val/bindx stores the distributed MSR matrix A
* - X is global
* - ax product is distributed the same way as A
* </pre>
*/
int
pdgsmv_AXglobal(int_t m, int_t update[], double val[], int_t bindx[],
double X[], double ax[])
{
int_t i, j, k;
if ( m <= 0 ) return 0; /* number of rows (local) */
for (i = 0; i < m; ++i) {
ax[i] = 0.0;
for (k = bindx[i]; k < bindx[i+1]; ++k) {
j = bindx[k]; /* column index */
ax[i] += val[k] * X[j];
}
ax[i] += val[i] * X[update[i]]; /* diagonal */
}
return 0;
} /* PDGSMV_AXglobal */
/*
* Performs sparse matrix-vector multiplication.
* - val/bindx stores the distributed MSR matrix A
* - X is global
* - ax product is distributed the same way as A
*/
int
pdgsmv_AXglobal_abs(int_t m, int_t update[], double val[], int_t bindx[],
double X[], double ax[])
{
int_t i, j, k;
if ( m <= 0 ) return 0; /* number of rows (local) */
for (i = 0; i < m; ++i) {
ax[i] = 0.0;
for (k = bindx[i]; k < bindx[i+1]; ++k) {
j = bindx[k]; /* column index */
ax[i] += fabs(val[k]) * fabs(X[j]);
}
ax[i] += fabs(val[i]) * fabs(X[update[i]]); /* diagonal */
}
return 0;
} /* PDGSMV_AXglobal_ABS */
/*
* Print the local MSR matrix
*/
static void dPrintMSRmatrix
(
int m, /* Number of rows of the submatrix. */
double val[],
int_t bindx[],
gridinfo_t *grid
)
{
int iam, nnzp1;
if ( !m ) return;
iam = grid->iam;
nnzp1 = bindx[m];
printf("(%2d) MSR submatrix has %d rows -->\n", iam, m);
PrintDouble5("val", nnzp1, val);
PrintInt10("bindx", nnzp1, bindx);
}