-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjacobi_mpi.c
345 lines (279 loc) · 11 KB
/
jacobi_mpi.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
/* Code from Pacheco book (see below). Comments added by N. Matloff:
We are solving the system of n linear equations Ax = b for x, in
an iterative manner.
Let x_old denote our last guess (i.e. our last iterate) for x. How
do we get x_new, our new guess? The answer lies in the observation
that if we multiply the i-th row of A times the true solution vector
x, we should get b[i], the i-th component of b. So, what we do is
take x_old, replace its i-th component by a scalar variable u to be
solved for, then multiply the i-th row of A times this modified
x_old, and solve for u. This value of u will now be the i-th
component of x_new. Doing this once for each row of A, we get all the
n components of x_new.
We iterate until x_new is not very different from x_old. It can be
proved that this scheme does converge, under the right conditions.
Now, let's parallelize this operation for p nodes, where n is an
exact multiple of p. Let n_bar = n/p. For concreteness, suppose
n = 100000 and p = 10, so n_bar = 10000. What we do is have the
first node update the first 10000 components of x (i.e. compute
the first 10000 components of x_new, from A, x_old and b), the
second node update the second 10000 components of x, and so on.
You can see that mode 0 uses MPI_Bcast() to get the initial data
(e.g. n) out to the other nodes. It could instead call MPI_Send()
from within a loop, but MPI_Bcast() is both clearer code and faster;
a good MPI implementation will have MPI_Bcast() very tightly
designed, e.g. to do some latency hiding.
Note that the i-th node only needs the i-th set of rows of A.
So node 0 needs to send the first 10000 rows of A to itself,
the second 10000 rows of A to node 1, and so on. Again, this
could be done using MPI_Send() in a loop, but it can be done
much more cleanly and efficiently using MPI_Scatter():
MPI_Scatter(temp, n_bar*MAX_DIM, MPI_FLOAT, A_local,
n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD);
Our matrix A is in temp. A set of n_bar of its rows will consist
of n_bar*MAX_DIM elements. The call above says that node 0 will
parcel out temp to the various nodes, with the i-th set of
n_bar*MAX_DIM elements going to the i-th node, to be stored in
A_local at that node. Note carefully that when node 0 executes
this call, MPI will treat it as a send; when any other node executes
the call, MPI will treat it as a receive.
The inverse of MPI_Scatter() is MPI_Gather(), used here to get the
final values back to node 0 for printing at the end of execution
of the program:
MPI_Gather(A_local, n_bar*MAX_DIM, MPI_FLOAT, temp,
n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD);
Here the call at node 0 is taken as a receive, and at the other
nodes it's taken to be a send.
After an iteration, each node must somehow get its chunk to all the
other nodes, in preparation for the next iteration. The most
primitive way to do this would be to have a loop in which a node
calls MPI_Send() once for each other node. An improvement on this
would be for the node to call MPI_Bcast(). But the best way, used
below, is to call MPI_AllGather(), which does a gather operation
at all nodes. */
/* parallel_jacobi.c -- parallel implementation of Jacobi's method
* for solving the linear system Ax = b. Uses block distribution
* of vectors and block-row distribution of A.
*
* Input:
* n: order of system
* tol: convergence tolerance
* max_iter: maximum number of iterations
* A: coefficient matrix
* b: right-hand side of system
*
* Output:
* x: the solution if the method converges
* max_iter: if the method fails to converge
*
* Notes:
* 1. A should be strongly diagonally dominant in
* order to insure convergence.
* 2. A, x, and b are statically allocated.
*
* Taken from Chap 10, Parallel Processing with MPI, by Peter Pacheco
*/
#include <stdio.h>
#include "mpi.h"
#include <math.h>
#define Swap(x,y) {float* temp; temp = x; x = y; y = temp;}
#define MAX_DIM 12
typedef float MATRIX_T[MAX_DIM][MAX_DIM];
int Parallel_jacobi(
MATRIX_T A_local /* in */,
float x_local[] /* out */,
float b_local[] /* in */,
int n /* in */,
float tol /* in */,
int max_iter /* in */,
int p /* in */,
int my_rank /* in */);
void Read_matrix(char* prompt, MATRIX_T A_local, int n,
int my_rank, int p);
void Read_vector(char* prompt, float x_local[], int n, int my_rank,
int p);
void Print_matrix(char* title, MATRIX_T A_local, int n,
int my_rank, int p);
void Print_vector(char* title, float x_local[], int n, int my_rank,
int p);
main(int argc, char* argv[]) {
int p;
int my_rank;
MATRIX_T A_local;
float x_local[MAX_DIM];
float b_local[MAX_DIM];
int n;
float tol;
int max_iter;
int converged;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &p);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
if (my_rank == 0) {
printf("Enter n, tolerance, and max number of iterations\n");
scanf("%d %f %d", &n, &tol, &max_iter);
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&tol, 1, MPI_FLOAT, 0, MPI_COMM_WORLD);
MPI_Bcast(&max_iter, 1, MPI_INT, 0, MPI_COMM_WORLD);
Read_matrix("Enter the matrix", A_local, n, my_rank, p);
Read_vector("Enter the right-hand side", b_local, n, my_rank, p);
converged = Parallel_jacobi(A_local, x_local, b_local, n,
tol, max_iter, p, my_rank);
if (converged)
Print_vector("The solution is", x_local, n, my_rank, p);
else
if (my_rank == 0)
printf("Failed to converge in %d iterations\n", max_iter);
MPI_Finalize();
} /* main */
/*********************************************************************/
/* Return 1 if iteration converged, 0 otherwise */
/* MATRIX_T is a 2-dimensional array */
int Parallel_jacobi(
MATRIX_T A_local /* in */,
float x_local[] /* out */,
float b_local[] /* in */,
int n /* in */,
float tol /* in */,
int max_iter /* in */,
int p /* in */,
int my_rank /* in */) {
int i_local, i_global, j;
int n_bar;
int iter_num;
float x_temp1[MAX_DIM];
float x_temp2[MAX_DIM];
float* x_old;
float* x_new;
float Distance(float x[], float y[], int n);
n_bar = n/p;
/* Initialize x */
MPI_Allgather(b_local, n_bar, MPI_FLOAT, x_temp1,
n_bar, MPI_FLOAT, MPI_COMM_WORLD);
x_new = x_temp1;
x_old = x_temp2;
iter_num = 0;
do {
iter_num++;
/* Interchange x_old and x_new */
Swap(x_old, x_new);
for (i_local = 0; i_local < n_bar; i_local++){
i_global = i_local + my_rank*n_bar;
x_local[i_local] = b_local[i_local];
for (j = 0; j < i_global; j++)
x_local[i_local] = x_local[i_local] -
A_local[i_local][j]*x_old[j];
for (j = i_global+1; j < n; j++)
x_local[i_local] = x_local[i_local] -
A_local[i_local][j]*x_old[j];
x_local[i_local] = x_local[i_local]/
A_local[i_local][i_global];
}
MPI_Allgather(x_local, n_bar, MPI_FLOAT, x_new,
n_bar, MPI_FLOAT, MPI_COMM_WORLD);
} while ((iter_num < max_iter) &&
(Distance(x_new,x_old,n) >= tol));
if (Distance(x_new,x_old,n) < tol)
return 1;
else
return 0;
} /* Jacobi */
/*********************************************************************/
float Distance(float x[], float y[], int n) {
int i;
float sum = 0.0;
for (i = 0; i < n; i++) {
sum = sum + (x[i] - y[i])*(x[i] - y[i]);
}
return sqrt(sum);
} /* Distance */
/*********************************************************************/
void Read_matrix(
char* prompt /* in */,
MATRIX_T A_local /* out */,
int n /* in */,
int my_rank /* in */,
int p /* in */) {
int i, j;
MATRIX_T temp;
int n_bar;
n_bar = n/p;
/* Fill dummy entries in temp with zeroes */
for (i = 0; i < n; i++)
for (j = n; j < MAX_DIM; j++)
temp[i][j] = 0.0;
if (my_rank == 0) {
printf("%s\n", prompt);
for (i = 0; i < n; i++)
for (j = 0; j < n; j++)
scanf("%f",&temp[i][j]);
}
MPI_Scatter(temp, n_bar*MAX_DIM, MPI_FLOAT, A_local,
n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD);
} /* Read_matrix */
/*********************************************************************/
void Read_vector(
char* prompt /* in */,
float x_local[] /* out */,
int n /* in */,
int my_rank /* in */,
int p /* in */) {
int i;
float temp[MAX_DIM];
int n_bar;
n_bar = n/p;
if (my_rank == 0) {
printf("%s\n", prompt);
for (i = 0; i < n; i++)
scanf("%f", &temp[i]);
}
MPI_Scatter(temp, n_bar, MPI_FLOAT, x_local, n_bar, MPI_FLOAT,
0, MPI_COMM_WORLD);
} /* Read_vector */
/*********************************************************************/
void Print_matrix(char* title, MATRIX_T A_local, int n,
int my_rank, int p);
void Print_matrix(
char* title /* in */,
MATRIX_T A_local /* in */,
int n /* in */,
int my_rank /* in */,
int p /* in */) {
int i, j;
MATRIX_T temp;
int n_bar;
n_bar = n/p;
MPI_Gather(A_local, n_bar*MAX_DIM, MPI_FLOAT, temp,
n_bar*MAX_DIM, MPI_FLOAT, 0, MPI_COMM_WORLD);
if (my_rank == 0) {
printf("%s\n", title);
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++)
printf("%4.1f ", temp[i][j]);
printf("\n");
}
}
} /* Print_matrix */
/*********************************************************************/
void Print_vector(char* title, float x_local[], int n, int my_rank,
int p);
void Print_vector(
char* title /* in */,
float x_local[] /* in */,
int n /* in */,
int my_rank /* in */,
int p /* in */) {
int i;
float temp[MAX_DIM];
int n_bar;
n_bar = n/p;
MPI_Gather(x_local, n_bar, MPI_FLOAT, temp, n_bar, MPI_FLOAT,
0, MPI_COMM_WORLD);
if (my_rank == 0) {
printf("%s\n", title);
for (i = 0; i < n; i++)
printf("%4.1f ", temp[i]);
printf("\n");
}
} /* Print_vector */