-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.hpp
217 lines (177 loc) · 7.85 KB
/
utils.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
/*
* Everything in this file is taken from:
* https://github.com/eth-cscs/COSMA
*
* This LICENCE of COSMA is BSD-3 Clause and here is a copy of it:
*
* BSD 3-Clause License
Copyright (c) 2018, ETH Zürich.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
extern "C" {
// Initialization
void Cblacs_pinfo(int* mypnum, int* nprocs);
void Cblacs_setup(int* mypnum, int* nprocs);
void Cblacs_set(int ictxt, int what, int* val);
void Cblacs_get(int ictxt, int what, int* val);
void Cblacs_gridinit(int* ictxt, const char* order, int nprow, int npcol);
void Cblacs_gridmap(int* ictxt, int* usermap, int ldup, int nprow, int npcol);
// Finalization
void Cblacs_freebuff(int ictxt, int wait);
void Cblacs_gridexit(int ictxt);
void Cblacs_exit(int NotDone);
// Abort
void Cblacs_abort(int ictxt, int errno);
// Information
void Cblacs_gridinfo(int ictxt, int* nprow, int* npcol, int* myrow, int* mycol);
int Cblacs_pnum(int ictxt, int prow, int pcol);
void Cblacs_pcoord(int ictxt, int nodenum, int* prow, int* pcol);
// Barrier
void Cblacs_barrier(int ictxt, char* scope);
// MPI communicator <-> Blacs context
MPI_Comm Cblacs2sys_handle(int ictxt);
int Csys2blacs_handle(MPI_Comm mpi_comm);
void Cfree_blacs_system_handle(int i_sys_ctxt);
void descinit_( int* desc, int* m, int* n, int* mb, int* nb,
int* irsrc, int* icsrc, int* ictxt, int* lld, int* info );
void pdgetrf_( int* m, int* n, double* a, int* ia, int* ja, int* desca,
int* ipiv, int* info );
void pdpotrf_(char* uplo, int* n, double* mat, int* ia, int* ja, int* desca, int* info);
}
namespace scalapack {
int leading_dimension(const int* desc) {
return desc[8];
}
// computes the number of rows or columns that the specified rank owns
int numroc(int n, int nb, int proc_coord, int proc_src, int n_procs) {
// Arguments:
/*
- n: global matrix dimension (rows or columns)
- nb: corresponding dimension of a block
- proc_coord: coordinate of the process for which we are querying
- proc_src: process src
- n_procs: total number of processes along this dimension
*/
// number of whole blocks along this dimension
int n_blocks = n / nb;
// the offset of given process to the source process
// make sure it stays positive
int proc_offset = (n_procs + proc_coord - proc_src) % n_procs;
// number of blocks per process (at least)
// Can also be zero.
int n_blocks_per_process = n_blocks/n_procs;
// Number of rows or columns that each process has (at least).
// Can also be zero.
int n_rows_or_cols_per_process = n_blocks_per_process * nb;
// each rank owns at least this base
int n_rows_or_columns_total = n_rows_or_cols_per_process;
// if there is a remainder, then the current
// process might own some additional blocks
int remainder = n_blocks % n_procs;
// possible additional "whole" blocks that
// the current rank owns
n_rows_or_columns_total += proc_offset < remainder ? nb : 0;
// possible additional "partial" blocks that
// the current ranks owns
n_rows_or_columns_total += proc_offset == remainder ? n % nb : 0;
return n_rows_or_columns_total;
}
int local_buffer_size(const int* desc) {
int lld = leading_dimension(desc);
int n_cols = desc[3]; // global matrix size (columns)
int nb_cols = desc[5]; // block size (columns)
int src_proc = desc[7]; // processor src (columns)
int ctxt = desc[1];
int nprow, npcol, myrow, mycol;
Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);
int P = nprow * npcol;
int n_local_cols = numroc(n_cols, nb_cols, mycol, src_proc, npcol);
return lld * n_local_cols;
}
int min_leading_dimension(int n, int nb, int rank_grid_dim) {
// Arguments:
/*
- n: global matrix dimension (rows or columns)
- nb: corresponding dimension of a block
- rank_grid_dim: total number of processes along this dimension
*/
// number of blocks along this dimension
int n_blocks = n / nb;
// number of blocks per process (at least)
// Can also be zero.
int n_blocks_per_process = n_blocks/rank_grid_dim;
// Number of rows or columns that each process has (at least).
// Can also be zero.
// each rank owns at least this many rows
int min_n_rows_or_cols_per_process = n_blocks_per_process * nb;
return min_n_rows_or_cols_per_process;
}
int max_leading_dimension(int n, int nb, int rank_grid_dim) {
// Arguments:
/*
- n: global matrix dimension (rows or columns)
- nb: corresponding dimension of a block
- rank_grid_dim: total number of processes along this dimension
*/
int lld = min_leading_dimension(n, nb, rank_grid_dim);
int n_blocks = n / nb;
int remainder = n_blocks % rank_grid_dim;
lld += (remainder == 0) ? (n % nb) : nb;
return lld;
}
// queries the grid blacs context to get the communication blacs context
int get_comm_context(const int grid_context) {
int comm_context;
Cblacs_get(grid_context, 10, &comm_context);
return comm_context;
}
// gets MPI_Comm from the grid blacs context
MPI_Comm get_communicator(const int grid_context) {
int comm_context = get_comm_context(grid_context);
MPI_Comm comm = Cblacs2sys_handle(comm_context);
return comm;
}
}
MPI_Comm subcommunicator(int new_P, MPI_Comm comm = MPI_COMM_WORLD) {
// original size
int P;
MPI_Comm_size(comm, &P);
// original group
MPI_Group group;
MPI_Comm_group(comm, &group);
// new comm and new group
MPI_Comm newcomm;
MPI_Group newcomm_group;
// ranks to exclude
std::vector<int> exclude_ranks;
for (int i = new_P; i < P; ++i) {
exclude_ranks.push_back(i);
}
// create reduced group
MPI_Group_excl(group, exclude_ranks.size(), exclude_ranks.data(), &newcomm_group);
// create reduced communicator
MPI_Comm_create_group(comm, newcomm_group, 0, &newcomm);
MPI_Group_free(&group);
MPI_Group_free(&newcomm_group);
return newcomm;
}