-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsendrecv_gol.c
164 lines (131 loc) · 5.09 KB
/
sendrecv_gol.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#include "utils.h"
// Main driver function
int main(int argc, char *argv[]) {
// Communicator size and ranks
int comm_sz, rank, cart_rank;
// Input args
int args[NUMBER_OF_ARGS], N, iters, visualize;
// Memory pointers
int *state, *this_state, *temp_state, *next_state;
double start, stop, time, time_max;
MPI_Init(&argc, &argv);
// Size of the default communicator
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
// Random seed dependent on rank
srand(rank*1337);
// Check input args
if (rank == ROOT) {
// Check number of args
if (argc != NUMBER_OF_ARGS+1) {
printf("-----------------------------------------------------------------"
"---------\n");
fprintf(stderr,
"Usage: mpirun -np <NPROCS> %s <SIZE> <ITERS> <VISUALIZE (1 or 0)>\n",
argv[0]);
MPI_Abort(MPI_COMM_WORLD, INIT_ERR);
}
// Check perfect square
if (comm_sz % (int)sqrt(comm_sz) || atoi(argv[1]) % ((int)sqrt(comm_sz))) {
printf("-----------------------------------------------------------------"
"---------\n");
fprintf(stderr, "Either <NPROCS> not a perfect square or \n<PROBLEM SIZE> not "
"divisible by sqrt<NPROCS>\n");
MPI_Abort(MPI_COMM_WORLD, PERFECT_SQUARE_ERR);
}
// Iterate through args with +1 offset
for (int i = 0; i < NUMBER_OF_ARGS; ++i) {
args[i] = atoi(argv[i + 1]);
}
}
// Bcast args since MPI does not guarantee args to all PE:s
MPI_Bcast(args, NUMBER_OF_ARGS, MPI_INT, ROOT, MPI_COMM_WORLD);
// Get args unrolled
N = args[0];
iters = args[1];
visualize = args[2];
// Input arg for N x N per process
const int block_sz = (N)/sqrt(comm_sz);
// Calculate starting points:
// Size of halo region is the block size padded with +2
const int halo_sz = block_sz + 2;
// Matrix size for halo matrix, for convenience
const int halo_mat_sz = halo_sz * halo_sz;
// Row dataype, excluding the corner, in contiguous memory
MPI_Datatype row_type;
MPI_Type_contiguous(block_sz, MPI_INT, &row_type);
MPI_Type_commit(&row_type);
// Column datatype containing the corner
// which eliminates intercardinal communication
// i.e diagonal exchange of corners.
MPI_Datatype col_type;
MPI_Type_vector(halo_sz, 1, halo_sz, MPI_INT, &col_type);
MPI_Type_commit(&col_type);
// Create dims for perfect squares
MPI_Comm_size(MPI_COMM_WORLD, &comm_sz);
int dims[2] = {(int)sqrt(comm_sz), (int)sqrt(comm_sz)};
MPI_Dims_create(comm_sz, 2, dims);
int periods[2] = {true, true};
int reorder = true;
int nbrs[4];
// Create a communicator with a cartesian topology.
MPI_Comm cart_comm;
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, reorder, &cart_comm);
MPI_Comm_rank(cart_comm, &cart_rank);
// Find nearest neighbours
MPI_Cart_shift(cart_comm, ROW_DIM, NEAREST_NBRS, &nbrs[0], &nbrs[1]);
MPI_Cart_shift(cart_comm, COL_DIM, NEAREST_NBRS, &nbrs[2], &nbrs[3]);
enum DIRECTIONS { WEST, EAST, SOUTH, NORTH };
// Let MPI allocate row-major memory since it _might_ be faster in RMA.
// The first half of the region contains 'next_state' followed
// by 'this_state'
MPI_Alloc_mem(2*halo_sz*halo_sz*sizeof(int), MPI_INFO_NULL, &state);
this_state = state+halo_mat_sz;
next_state = state;
// Initialize inner N x N matrix to random 0/1
init_halo_mat(this_state, halo_sz);
start = MPI_Wtime();
for (int i = 0; i < iters; ++i) {
// Send and receive within the cartesian neighbours.
// The offset allows
// putting the boundary value alternating between
// this_state and next_state.
MPI_Sendrecv(&this_state[UP_ROW_POS], 1, row_type, nbrs[SOUTH],
TAG, &state[DOWN_HALO_POS+TIMESTEP_OFFSET], 1, row_type, nbrs[NORTH],
TAG, cart_comm, MPI_STATUS_IGNORE);
MPI_Sendrecv(&this_state[DOWN_ROW_POS], 1, row_type, nbrs[NORTH],
TAG, &state[UP_HALO_POS+TIMESTEP_OFFSET], 1, row_type, nbrs[SOUTH],
TAG, cart_comm, MPI_STATUS_IGNORE);
MPI_Sendrecv(&this_state[LEFT_COL_POS], 1, col_type, nbrs[EAST],
TAG, &state[RIGHT_HALO_POS+TIMESTEP_OFFSET], 1, col_type, nbrs[WEST],
TAG, cart_comm, MPI_STATUS_IGNORE);
MPI_Sendrecv(&this_state[RIGHT_COL_POS], 1, col_type, nbrs[WEST],
TAG, &state[LEFT_HALO_POS+TIMESTEP_OFFSET], 1, col_type, nbrs[EAST],
TAG, cart_comm, MPI_STATUS_IGNORE);
// Sir Conway's Game of Life
game_of_life(halo_sz, this_state, next_state);
// Exchange pointers
temp_state = next_state;
next_state = this_state;
this_state = temp_state;
// Prints state from ROOT
// Omitted in performance evaluation to avoid branching
if (visualize == 1) {
CLEAR_AND_PRINT_STATE
}
}
stop = MPI_Wtime();
time = stop - start;
// Get the maximum runtime among the processes
MPI_Reduce(&time, &time_max, 1, MPI_DOUBLE, MPI_MAX, ROOT, cart_comm);
// Print reduced maximum
if (cart_rank == ROOT){
printf("%d, %d, %d, %lf\n", iters, N, comm_sz, time_max);
}
// Free types, memory and finalize
MPI_Type_free(&col_type);
MPI_Type_free(&row_type);
MPI_Free_mem(state);
MPI_Finalize();
return 0;
}