-
Notifications
You must be signed in to change notification settings - Fork 34
/
MPIPatternP2P.h
385 lines (329 loc) · 14.7 KB
/
MPIPatternP2P.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
// ---------------------------------------------------------------------
//
// Copyright (c) 2017-2022 The Regents of the University of Michigan and DFT-FE
// authors.
//
// This file is part of the DFT-FE code.
//
// The DFT-FE code is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE at
// the top level of the DFT-FE distribution.
//
// ---------------------------------------------------------------------
//
/*
* @author Sambit Das, Bikash Kanungo
*/
#ifndef dftfeMPIPatternP2P_h
#define dftfeMPIPatternP2P_h
#include <MemorySpaceType.h>
#include <MemoryStorage.h>
#include <OptimizedIndexSet.h>
#include <vector>
#include <mpi.h>
namespace dftfe
{
namespace utils
{
namespace mpi
{
/** @brief A class template to store the communication pattern
* (i.e., which entries/nodes to receive from which processor and
* which entries/nodes to send to which processor).
*
*
* + <b>Assumptions</b>
* 1. It assumes that a a sparse communication pattern. That is,
* a given processor only communicates with a few processors.
* This object should be avoided if the communication pattern
* is dense (e.g., all-to-all communication)
* 2. It assumes that the each processor owns a set of \em continuous
* integers (indices). Further, the ownership is exclusive (i.e.,
* no index is owned by more than one processor). In other words,
* the different sets of owning indices across all the processors
* are disjoint.
*
* @tparam memorySpace Defines the MemorySpace (i.e., HOST or
* DEVICE) in which the various data members of this object must reside.
*/
template <dftfe::utils::MemorySpace memorySpace>
class MPIPatternP2P
{
///
/// typedefs
///
public:
using SizeTypeVector = utils::MemoryStorage<size_type, memorySpace>;
using GlobalSizeTypeVector =
utils::MemoryStorage<global_size_type, memorySpace>;
public:
virtual ~MPIPatternP2P() = default;
/**
* @brief Constructor. This constructor is the typical way of
* creation of an MPI pattern.
*
* @param[in] locallyOwnedRange A pair of non-negtive integers
* \f$(a,b)\f$ which defines a range of indices (continuous)
* that are owned by the current processor.
* @note It is an open interval where \f$a\f$ is included,
* but \f$b\f$ is not included.
*
* @param[in] ghostIndices An ordered set of non-negtive indices
* specifyin the ghost indices for the current processor.
* @note the vector must be ordered
* (i.e., ordered in increasing order and non-repeating)
*
* @param[in] mpiComm The MPI communicator object which defines the
* set of processors for which the MPI pattern needs to be created.
*
* @throw Throws exception if \p mpiComm is in an invalid state, if
* the \p locallyOwnedRange across all the processors are not disjoint,
* if \p ghostIndices are not ordered (if it is not strictly
* increasing), or if some sanity checks with respect to MPI sends and
* receives fail.
*
* @note Care is taken to create a dummy MPIPatternP2P while not linking
* to an MPI library. This allows the user code to seamlessly link and
* delink an MPI library.
*/
MPIPatternP2P(const std::pair<global_size_type, global_size_type>
&locallyOwnedRange,
const std::vector<dftfe::global_size_type> &ghostIndices,
const MPI_Comm & mpiComm);
/**
* @brief Constructor. This constructor is to create an MPI Pattern for
* a serial case. This is provided so that one can seamlessly use
* has to be used even for a serial case. In this case, all the indices
* are owned by the current processor.
*
* @param[in] size Total number of indices.
* @note This is an explicitly serial construction (i.e., it uses
* MPI_COMM_SELF), which is different from the dummy MPIPatternP2P
* created while not linking to an MPI library. For examples,
* within a parallel run, one might have the need to create a serial
* MPIPatternP2P. A typical case is creation of a serial vector as a
* special case of distributed vector.
* @note Similar to the previous
* constructor, care is taken to create a dummy MPIPatternP2P while not
* linking to an MPI library.
*/
MPIPatternP2P(const size_type size);
// void
// reinit(){};
std::pair<global_size_type, global_size_type>
getLocallyOwnedRange() const;
size_type
localOwnedSize() const;
size_type
localGhostSize() const;
bool
inLocallyOwnedRange(const global_size_type globalId) const;
bool
isGhostEntry(const global_size_type globalId) const;
size_type
globalToLocal(const global_size_type globalId) const;
global_size_type
localToGlobal(const size_type localId) const;
const std::vector<global_size_type> &
getGhostIndices() const;
const std::vector<size_type> &
getGhostProcIds() const;
const std::vector<size_type> &
getNumGhostIndicesInProcs() const;
size_type
getNumGhostIndicesInProc(const size_type procId) const;
SizeTypeVector
getGhostLocalIndices(const size_type procId) const;
const std::vector<size_type> &
getGhostLocalIndicesRanges() const;
const std::vector<size_type> &
getTargetProcIds() const;
const std::vector<size_type> &
getNumOwnedIndicesForTargetProcs() const;
size_type
getNumOwnedIndicesForTargetProc(const size_type procId) const;
const SizeTypeVector &
getOwnedLocalIndicesForTargetProcs() const;
SizeTypeVector
getOwnedLocalIndices(const size_type procId) const;
size_type
nmpiProcesses() const;
size_type
thisProcessId() const;
global_size_type
nGlobalIndices() const;
const MPI_Comm &
mpiCommunicator() const;
bool
isCompatible(const MPIPatternP2P<memorySpace> &rhs) const;
private:
/**
* A pair \f$(a,b)\f$ which defines a range of indices (continuous)
* that are owned by the current processor.
*
* @note It is an open interval where \f$a\f$ is included,
* but \f$b\f$ is not included.
*/
std::pair<global_size_type, global_size_type> d_locallyOwnedRange;
/**
* A vector of size 2 times number of processors to store the
* locallyOwnedRange of each processor. That is it store the list
* \f$[a_0,b_0,a_1,b_1,\ldots,a_{P-1},b_{P-1}]\f$, where the pair
* \f$(a_i,b_i)\f$ defines a range of indices (continuous) that are
* owned by the \f$i-\f$th processor.
*
* @note \f$a\f$ is included but \f$b\f$ is not included.
*/
std::vector<global_size_type> d_allOwnedRanges;
/**
* Number of locally owned indices in the current processor
*/
size_type d_numLocallyOwnedIndices;
/**
* Number of ghost indices in the current processor
*/
size_type d_numGhostIndices;
/**
* Vector to store an ordered set of ghost indices
* (ordered in increasing order and non-repeating)
*/
std::vector<global_size_type> d_ghostIndices;
/**
* A copy of the above d_ghostIndices stored as an STL set
*/
std::set<global_size_type> d_ghostIndicesSetSTL;
/**
* An OptimizedIndexSet object to store the ghost indices for
* efficient operations. The OptimizedIndexSet internally creates
* contiguous sub-ranges within the set of indices and hence can
* optimize the finding of an index
*/
OptimizedIndexSet<global_size_type> d_ghostIndicesOptimizedIndexSet;
/**
* Number of ghost processors for the current processor. A ghost
* processor is one which owns at least one of the ghost indices of this
* processor.
*/
size_type d_numGhostProcs;
/**
* Vector to store the ghost processor Ids. A ghost processor is
* one which owns at least one of the ghost indices of this processor.
*/
std::vector<size_type> d_ghostProcIds;
/** Vector of size number of ghost processors to store how many ghost
* indices
* of this current processor are owned by a ghost processor.
*/
std::vector<size_type> d_numGhostIndicesInGhostProcs;
/**
* A flattened vector of size number of ghosts containing the ghost
* indices ordered as per the list of ghost processor Ids in
* d_ghostProcIds In other words it stores a concatentaion of the lists
* \f$L_i = \{g^{(k_i)}_1,g^{(k_i)}_2,\ldots,g^{(k_i)}_{N_i}\}\f$, where
* \f$g\f$'s are the ghost indices, \f$k_i\f$ is the rank of the
* \f$i\f$-th ghost processor (i.e., d_ghostProcIds[i]) and \f$N_i\f$
* is the number of ghost indices owned by the \f$i\f$-th
* ghost processor (i.e., d_numGhostIndicesInGhostProcs[i]).
* @note \f$L_i\f$ has to be an increasing set.
* @note We store only the ghost index local to this processor, i.e.,
* position of the ghost index in d_ghostIndicesSetSTL or
d_ghostIndices.
* This is done to use size_type which is unsigned int instead of
* global_size_type which is long unsigned it. This helps in reducing
the
* volume of data transfered during MPI calls.
* @note In the case that the locally owned ranges across all the
* processors are ordered as per the processor Id, this vector is
* redundant and one can only work with d_ghostIndices and
* d_numGhostIndicesInGhostProcs. By locally owned range being ordered
as
* per the processor Id, means that the ranges for processor
* \f$0, 1,\ldots,P-1\f$ are
* \f$[N_0,N_1), [N_1, N_2), [N_2, N_3), ..., [N_{P-1},N_P)\f$ with
* \f$N_0, N_1,\ldots, N_P\f$ beign non-decreasing. But in a more
general
* case, where the locally owned ranges are not ordered as per the
processor
* Id, this following array is useful.
*/
SizeTypeVector d_flattenedLocalGhostIndices;
/**
* @brief A vector of size 2 times the number of ghost processors
* to store the range of local ghost indices that are owned by the
* ghost processors. In other words, it stores the list
* \f$L=\{a_1,b_1, a_2, b_2, \ldots, a_G, b_G\}\f$, where
* \f$a_i\f$ and \f$b_i\f$is are the start local ghost index
* and one-past-the-last local ghost index of the current processor
* that is owned by the \f$i\f$-th ghost processor
* (i.e., d_ghostProcIds[i]). Put it differently, \f$[a_i,b_i)\f$ form
* an open interval, where \f$a_i\f$ is included but \f$b_i\f$ is not
* included.
*
* @note Given the fact that the locally owned indices of each processor
* are contiguous and the global ghost indices (i.e., d_ghostIndices) is
* ordered, it is sufficient to just store the range of local ghost
* indices for each ghost procId. The actual global ghost indices
* belonging to the \f$i\f$-th ghost processor can be fetched from
* d_ghostIndices (i.e., it is the subset of d_ghostIndices lying
* bewteen d_ghostIndices[a_i] and d_ghostIndices[b_i].
*/
std::vector<size_type> d_localGhostIndicesRanges;
/**
* Number of target processors for the current processor. A
* target processor is one which owns at least one of the locally owned
* indices of this processor as its ghost index.
*/
size_type d_numTargetProcs;
/**
* Vector to store the target processor Ids. A target processor is
* one which contains at least one of the locally owned indices of this
* processor as its ghost index.
*/
std::vector<size_type> d_targetProcIds;
/**
* Vector of size number of target processors to store how many locally
* owned indices
* of this current processor are need ghost in each of the target
* processors.
*/
std::vector<size_type> d_numOwnedIndicesForTargetProcs;
/** Vector of size \f$\sum_i\f$ d_numOwnedIndicesForTargetProcs[i]
* to store all thelocally owned indices
* which other processors need (i.e., which are ghost indices in other
* processors). It is stored as a concatentation of lists where the
* \f$i\f$-th list indices
* \f$L_i = \{o^{(k_i)}_1,o^{(k_i)}_2,\ldots,o^{(k_i)}_{N_i}\}\f$, where
* where \f$o\f$'s are indices target to other processors,
* \f$k_i\f$ is the rank of the \f$i\f$-th target processor
* (i.e., d_targetProcIds[i]) and N_i is the number of
* indices to be sent to i-th target processor (i.e.,
* d_numOwnedIndicesForTargetProcs[i]).
*
* @note We store only the indices local to this processor, i.e.,
* the relative position of the index in the locally owned range of this
* processor This is done to use size_type which is unsigned int instead
* of global_size_type which is long unsigned it. This helps in reducing
* the volume of data transfered during MPI calls.
*
* @note The list \f$L_i\f$ must be ordered.
*/
SizeTypeVector d_flattenedLocalTargetIndices;
/// Number of processors in the MPI Communicator.
int d_nprocs;
/// Rank of the current processor.
int d_myRank;
/**
* Total number of unique indices across all processors
*/
global_size_type d_nGlobalIndices;
/// MPI Communicator object.
MPI_Comm d_mpiComm;
};
} // end of namespace mpi
} // end of namespace utils
} // end of namespace dftfe
#include "MPIPatternP2P.t.cc"
#endif // dftfeMPIPatternP2P_h