Skip to content

Commit

Permalink
Squashed '3rd_party/nek5000_parRSB/' changes from 6744720..9f39f283
Browse files Browse the repository at this point in the history
9f39f283 Turn off more comm_barriers
31ac4959 Turn off metric_tic and metric_toc
d5666ba6 Undo some changes to avoid compilation failures
47e39427 Import changes from next related to parRSB bugs
92d8c1eb Fix a bug in repair_partitions
b191ac26 Refactor sort dir structure and remove genmap-gslib.h
40061481 Remove the assert statement since it fails for some meshes
bfb318ce Fix MPI hangs due to a break statement (Nek5000#44)
4c108b18 Update README.md (Nek5000#41)
7c70ebe8 Fixes bug disconnected components + load imbalance (Nek5000#39)
b6d60ca4 Fix element redistribution issue (Nek5000#37)
b2a77e4f Enable warnings and a small refactor (Nek5000#35)
9690625b print start+end of parcon info to stdout
4e123a1e Import latest changes from thilinarmtb
4c837302 [gencon] Fix issues in parallel connectivity
363f2645 [gencon] Add a schedule and make segment discovery consistent
bd871e8 [gencon] Fix a bug related to outdated ifSegment values
19b4d15 [gencon] Add redundant runs and return error when face-check fails
63c4445 [gencon] Fix issues related to ranks with zero elements
e923047 [gencon] Make gencon identical to serial version
984e9d5 Import latest changes (Nek5000#26)
4e8af7b print warning if Lanczos did not converge
fabcd44 Import gencon_and_mg_precon branch (Nek5000#24)
0cc6858 Changes to parRCB interface (Nek5000#21)
bb51af9 parRCB (Nek5000#20)
f81875c load balance input data
8ed6e5a Add Histogram sort (Nek5000#15)
7f9403f remove some remaining loop counter declerations
7ec2b9d Declare loop variables outside of loop
49da449 Merge branch 'master' of https://github.com/Nek5000/parRSB
d974576 Add debug info, change from globalId0 --> globalId
f75d1d7 Update to latest gslib
fc8b629 make astyle
6b9f393 Remove a print statement
c488be4 Fix a few issues in parRSB
d399045 Update README.md
4682703 moved quality metrics to example
76d698b Update README.md
d3ba524 Add additional quality metrics
b84d09b Update README.md
baaa27f Update README.md
ea4cba5 Update issue templates
c6885d3 Remove redundant print statements
7f32fb3 Copy the eigenvalues upon return from TQLI
d46ba41 Exit if N elements is less then n mpi ranks
68ebc46 Use MPI_Comm_split instead of GenmapCommSplit
c64a9a2 make astyle
f0cbc23 Replace tests --> example in make astyle
42d7ff1 Do RSB on procs where nel > 0
7f4a53d Use Genmap API to init and finalize crystal router
06f8dde Make legacy version the default
ce144a3 Make Lanczos and eigen routines same
7ce837f Fix a problem in RSB init vector
d4e8532 fix memory leak
8f8aceb fix mem leak
88adca6 Add small ethier data file
231ab49 Add element balance to quality and corrected example
b3cfa5f Set the proc field to correct value
0168ced Sort after return from sarray_transfer
4ad6574 Use a cleaner interface
b87d740 Fix for the error
2c9a3b1 Kind of a fix?
c1d882f Update README.md
f92687a Update README.md
857af45 Update README.md
9bcea8e Use new API in example
39f3bf1 Update issue templates
37c3299 Merge pull request #11 from Nek5000/add-code-of-conduct-1
604ea01 Create CODE_OF_CONDUCT.md
97f5717 Update README.md
53addbc Update README.md
a562253 Merge pull request #10 from Nek5000/remove_readers
fd039a6 Merge branch 'master' into remove_readers
da48834 make astyle
8d76eb4 Update README.md
6514e19 Update README.md
0dc0023 Merge pull request #9 from Nek5000/update_c_api
3d7f556 Adapt the new C API
7ddefef First try at the new C API
85ecb55 Change Rank and Size to GenmapInt
b876836 Move common code from the if branches
786e18c Update the bin sort routine to balance elements
929fa2f Remove the unbalance after global sort by globalId
36feb4c Fix a warning
c637a3c Remove the concept of readers
d19c36d Refactor genmap-gslib.h
6454010 Merge pull request #8 from Nek5000/polishing
4878ba1 Separate routine to split the communicator
798d336 Rename genmap-algo.c --> genmap-rsb.c
0b1edb1 Make the Comm split a separate function
a883699 Add a new function to split by the median
d887019 Split the sort into two parts
50964e4 Simplify RSB algorithm
81e05d4 Added GenmapScan to calculate NEL and start index
3497a62 Use the API to access the properties
fa9d0ac Update interface for GenmapSetNLocalElements
bdb424d Remove MPI timers
37a13bb make astyle
b7ce7f0 Move Lanczos to genmap-lanczos.c
406337f Move the eigenvalue routines to genmap-eigen.c
5b58c8a Split functions in genmap.c to smaller files
56750c5 Add the Binsort function
325aab6 Remove GenmapPrimeFactors
4e583d4 Update parRSB interface
adfb3a8 h->start --> GenmapGet(or Set)LocalStartIndex
c3428f3 Ax --> Laplacian
87a82c6 Get rid of GenmapRead function
ecd998f Refactor C-Helper functions
276e8d9 Get rid of GenmapHeader
7329f6e h->header->nel --> GenmapGetNGlobalElements(h)
e8c3d19 Remove power iteration algorithm
5282d17 Add setters for GenmapHandle
fe8b8b5 h->header->lelt --> GenmapGetNLocalElements(h)
182a2eb Add minor comments
0eea829 h->global --> GenmapGetGlobalComm(h)
0abb112 Further polishing
5191bbf Adding interface functions for header
f2b20c4 Update README.md
eb911ef Update README.md
d82e5f4 Update README.md
f3670a9 Update README.md
f41a7f7 more polishing
4d28ff5 polishing
f38ced9 make astyle
db59799 Add Paul's version of genmap
fcbc48d Take the absolute value of the eigenvalue
516184c Fix an error with TQLI routine
3f33568 Fix a memory bug with GenmapTQLI
e100690 Minor changes
edb3802 A few changes
55180f4 Start implementing TQLI
02f91e3 make astyle
c05417b Fix a bug with ipass value
d0993eb Add Paul's bias to init condition
490e9c3 Switch co2 test file
7e0c0cc Print progress
138c639 Import latest changes
0cb4f0b disable DEBUG by default
eaf7f7b Initialize vector p in LL
d897ec9 Freeing some vectors and add debug info.
c7a760a Add Paul's version of Lanczos
79f2cb2 Fix a typo
3c4b775 Lanczos with restart + make astyle
ed1225f Update README.md
c094235 Init buffer
748c364 Add con-test and fix remaining memory leaks
0192192 Set maxIter to 50
d157d7a Declare loop count outside
a226302 Pickup correct gslib location
684515e Make datatypes consistent
5844437 Update README.md
1234cec Update README.md
a9f17f4 Update README.md
29c525b Update README.md
397c9ab Update README.md
069fb17 Add missing init
3c3faea Update README.md
5c63c4d Update README.md
0ad459a Clean up
9e83f53 delete *.a file
4834ac3 Update README.md
41ca2be initial commit
ece3103 Initial commit
REVERT: 6744720 (split)Merge commit 'f4e850cb8df857a85045cf524d6aa79be4e83905' into next
REVERT: 07c4afe (split)update to latest parRSB 7c70ebe802e83c19ee4d5dada77c46c7159e0283
REVERT: 8d2d88a (split)Import next (Nek5000#259)
REVERT: 3c232a5 (split)Merge commit '318068f4b93b3a51a7ad07a61fcb155642bddf47' as '3rd_party/nek5000_parRSB'

git-subtree-dir: 3rd_party/nek5000_parRSB
git-subtree-split: 9f39f2838b91c45c94ec78d5f35b20c2123a3fc6
  • Loading branch information
MalachiTimothyPhillips committed Aug 24, 2021
1 parent 6744720 commit 3c9c045
Show file tree
Hide file tree
Showing 19 changed files with 312 additions and 348 deletions.
3 changes: 1 addition & 2 deletions src/gencon/gencon.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#ifndef _GENMAP_GENCON_H_
#define _GENMAP_GENCON_H_

#include <genmap-gslib.h>
#include <genmap-types.h>
#include <genmap.h>

/* Upper bound for number of dimensions */
#define GC_MAX_DIM 3
Expand Down
8 changes: 8 additions & 0 deletions src/genmap-chelpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,11 @@ double GenmapGetMaxRss() {
return (double)(r_usage.ru_maxrss * 1024L);
#endif
}

int log2ll(long long n) {
int k = 0;
while (n > 1)
n /= 2, k++;

return k;
}
225 changes: 94 additions & 131 deletions src/genmap-components.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ struct interface_element {
GenmapScalar fiedler;
};

/* Check the bin value */
int check_bin_val(int bin, struct comm *gc) {
if (bin < 0 || bin > 1) {
if (gc->id == 0) {
printf("%s:%d bin value out of range: %d\n", __FILE__, __LINE__, bin);
fflush(stdout);
}
return 1;
}
return 0;
}

/* Find the number of disconnected components */
sint get_components(sint *component, struct rsb_element *elements,
struct comm *c, buffer *buf, uint nelt, uint nv) {
Expand Down Expand Up @@ -138,31 +150,32 @@ sint get_components(sint *component, struct rsb_element *elements,
return count;
}

void balance_partitions(genmap_handle h, struct comm *lc, int bin,
struct comm *gc) {
assert(bin == 0 || bin == 1);
int balance_partitions(genmap_handle h, struct comm *lc, int bin,
struct comm *gc) {
assert(check_bin_val(bin, gc) == 0);

uint nelt = genmap_get_nel(h);
slong nelgt = nelt;
slong buf;
slong nelgt = nelt;
comm_allreduce(lc, gs_long, gs_add, &nelgt, 1, &buf);

/* Calculate expected # of elements per processor */
slong nglob = nelt;
comm_allreduce(gc, gs_long, gs_add, &nglob, 1, &buf);

slong nelt_ = nglob / gc->np;
sint nrem = nglob - nelt_ * gc->np;

slong nelgt_exp = nelt_ * lc->np;
nelgt_exp += nrem / 2 + (nrem - (nrem / 2) * 2) * (1 - bin);
sint nrem = nglob - nelt_ * gc->np;
nelgt_exp += nrem / 2 + (nrem % 2) * (1 - bin);

uint send_cnt = 0;
slong send_cnt = 0;
if (nelgt - nelgt_exp > 0)
send_cnt = nelgt - nelgt_exp;

// Setup gather-scatter
/* Setup gather-scatter */
int nv = genmap_get_nvertices(h);
uint size = nelt * nv;

slong *ids = NULL;
GenmapMalloc(size, &ids);

Expand All @@ -189,21 +202,23 @@ void balance_partitions(genmap_handle h, struct comm *lc, int bin,
for (e = 0; e < nelt; e++)
elems[e].proc = gc->id;

slong start_id = (send_cnt == 0) ? gc->id : LONG_MAX;
comm_allreduce(gc, gs_long, gs_min, &start_id, 1, &buf);
sint start_id = (send_cnt == 0) ? gc->id : INT_MAX;
comm_allreduce(gc, gs_int, gs_min, &start_id, 1, &buf);

struct crystal cr;
sint balanced = 0;

if (send_cnt > 0) {
int mul = -1.0;
if (start_id == 0) // we are sending to lower fiedler values
mul = 1.0;
int mul = -1;
if (start_id == 0) /* we are sending to lower fiedler values */
mul = 1;

struct array ielems;
array_init(struct interface_element, &ielems, 10);

struct interface_element ielem;
ielem.dest = -1;

for (e = 0; e < nelt; e++) {
for (v = 0; v < nv; v++)
if (input[e * nv + v] > 0) {
Expand All @@ -222,120 +237,87 @@ void balance_partitions(genmap_handle h, struct comm *lc, int bin,
slong out[2][1], bfr[2][1];
comm_scan(out, lc, gs_long, gs_add, &ielems_n, 1, bfr);
slong start = out[0][0];
assert(out[1][0] >= send_cnt);

struct interface_element *ptr = ielems.ptr;
for (e = 0; start + e < send_cnt && e < ielems.n; e++)
ptr[e].dest = start_id;
sint P = gc->np - lc->np;
slong part_size = (send_cnt + P - 1) / P;

crystal_init(&cr, lc);
sarray_transfer(struct interface_element, &ielems, orig, 0, &cr);
crystal_free(&cr);
if (out[1][0] < send_cnt)
balanced = 0;
else {
struct interface_element *ptr = ielems.ptr;
for (e = 0; start + e < send_cnt && e < ielems.n; e++)
ptr[e].dest = start_id + (start + e) / part_size;

crystal_init(&cr, lc);
sarray_transfer(struct interface_element, &ielems, orig, 0, &cr);
crystal_free(&cr);

ptr = ielems.ptr;
for (e = 0; e < ielems.n; e++)
if (ptr[e].dest != -1)
elems[ptr[e].index].proc =
ptr[e]
.dest; // This is redundant and everything is equal to start_id
ptr = ielems.ptr;
for (e = 0; e < ielems.n; e++)
if (ptr[e].dest != -1)
elems[ptr[e].index].proc = ptr[e].dest;
}

array_free(&ielems);
}

crystal_init(&cr, gc);
sarray_transfer(struct rsb_element, h->elements, proc, 1, &cr);
crystal_free(&cr);
comm_allreduce(gc, gs_int, gs_min, &balanced, 1, &buf);
if (balanced == 1) {
crystal_init(&cr, gc);
sarray_transfer(struct rsb_element, h->elements, proc, 0, &cr);
crystal_free(&cr);

// do a load balanced sort in each partition
parallel_sort(struct rsb_element, h->elements, fiedler, gs_double, 0, 1, lc,
&h->buf);
/* Do a load balanced sort in each partition */
parallel_sort(struct rsb_element, h->elements, fiedler, gs_double, 0, 1, lc,
&h->buf);
} else {
/* Forget repair, just do a load balanced partition */
parallel_sort(struct rsb_element, h->elements, fiedler, gs_double, 0, 1, gc,
&h->buf);
}

genmap_comm_scan(h, lc);

GenmapFree(input);
nelt = genmap_get_nel(h);
sint ncomp =
get_components(NULL, genmap_get_elements(h), lc, &h->buf, nelt, nv);
metric_acc(COMPONENTS, ncomp);

GenmapFree(input);
gs_free(gsh);
GenmapFree(ids);
}

sint count_comp_sizes(sint *comp_ids, slong *min_, slong *max_, struct comm *tc,
genmap_handle h) {
struct rsb_element *e = genmap_get_elements(h);
uint nelt = genmap_get_nel(h);
int nv = genmap_get_nvertices(h);

sint ncomp = get_components(comp_ids, e, tc, &h->buf, nelt, nv);

slong *size;
GenmapCalloc(2 * ncomp, &size);

uint i;
for (i = 0; i < nelt; i++)
size[comp_ids[i]]++;

comm_allreduce(tc, gs_long, gs_add, size, ncomp, &size[ncomp]);

slong min = LONG_MAX;
slong max = 0;
for (i = 0; i < ncomp; i++) {
if (size[i] < min)
min = size[i];
if (size[i] > max)
max = size[i];
}

*min_ = min;
*max_ = max;

GenmapFree(size);
int repair_partitions(genmap_handle h, struct comm *tc, struct comm *lc,
int bin, struct comm *gc) {
assert(check_bin_val(bin, gc) == 0);

return ncomp;
}
uint nelt = genmap_get_nel(h);
slong buf;
slong nelg = nelt;
comm_allreduce(lc, gs_long, gs_add, &nelg, 1, &buf);

void split_and_repair_partitions(genmap_handle h, struct comm *lc, int level,
struct comm *gc) {
/* Check for disconnected components */
GenmapInitLaplacianWeighted(h, tc);
sint np = lc->np;
sint id = lc->id;
int bin = 1;
if (id < (np + 1) / 2)
bin = 0;

struct comm tc;
genmap_comm_split(lc, bin, id, &tc);

/* Check for disconnected components */
GenmapInitLaplacianWeighted(h, &tc);

struct rsb_element *e = genmap_get_elements(h);
int nv = genmap_get_nvertices(h);
uint nelt = genmap_get_nel(h);

slong buf;
slong nelg = nelt;
comm_allreduce(lc, gs_long, gs_add, &nelg, 1, &buf);
uint nv = genmap_get_nvertices(h);

sint *comp_ids = NULL;
GenmapMalloc(nelt, &comp_ids);

sint ncomp;
slong min, max;
ncomp = count_comp_sizes(comp_ids, &min, &max, &tc, h);
sint ncomp = get_components(comp_ids, e, tc, &h->buf, nelt, nv);
slong ncompg = ncomp;
comm_allreduce(lc, gs_long, gs_max, &ncompg, 1, &buf);

sint root = (lc->id == 0) * gc->id;
comm_allreduce(lc, gs_int, gs_max, &root, 1, &buf);

if (tc.id == 0 && ncomp > 1) {
printf(
"\tWarning: %d disconnected components in Level = %d (%ld)! (min/max "
"size: %ld %ld) root = %d, np = %d\n",
ncomp, level, nelg, min, max, root, np);
fflush(stdout);
}

int attempt = 0;
int nattempts = 2 * ncompg;
int nattempts = 1;

while (ncompg > 1 && attempt < nattempts) {
slong *comp_count = NULL;
Expand All @@ -348,27 +330,26 @@ void split_and_repair_partitions(genmap_handle h, struct comm *lc, int level,
for (i = 0; i < ncomp; i++)
comp_count[ncomp + i] = comp_count[i];

comm_allreduce(&tc, gs_long, gs_add, &comp_count[ncomp], ncomp,
comm_allreduce(tc, gs_long, gs_add, &comp_count[ncomp], ncomp,
&comp_count[2 * ncomp]);

slong min_count = LONG_MAX;
sint min_id = -1;
for (i = 0; i < ncomp; i++)
for (i = 0; i < ncomp; i++) {
if (comp_count[ncomp + i] < min_count) {
min_count = comp_count[ncomp + i];
min_id = i;
}
}

slong min_count_global = min_count;
comm_allreduce(lc, gs_long, gs_min, &min_count_global, 1, &buf);

// bin is the tie breaker
/* bin is the tie breaker */
sint min_bin = (min_count_global == min_count) ? bin : INT_MAX;
comm_allreduce(lc, gs_int, gs_min, &min_bin, 1, &buf);

struct crystal cr;
crystal_init(&cr, lc);

e = genmap_get_elements(h);
for (i = 0; i < nelt; i++)
e[i].proc = id;

Expand All @@ -381,7 +362,7 @@ void split_and_repair_partitions(genmap_handle h, struct comm *lc, int level,
if (min_count_global == min_count && min_bin == bin) {
slong in = comp_count[min_id];
slong out[2][1], buff[2][1];
comm_scan(out, &tc, gs_long, gs_add, &in, 1, buff);
comm_scan(out, tc, gs_long, gs_add, &in, 1, buff);
slong off = out[0][0];

for (i = 0; i < nelt; i++) {
Expand All @@ -392,47 +373,29 @@ void split_and_repair_partitions(genmap_handle h, struct comm *lc, int level,
}
}

sarray_transfer(struct rsb_element, h->elements, proc, 1, &cr);
struct crystal cr;
crystal_init(&cr, lc);
sarray_transfer(struct rsb_element, h->elements, proc, 0, &cr);
crystal_free(&cr);

attempt++;

// Do a load balanced sort in each partition
parallel_sort(struct rsb_element, h->elements, fiedler, gs_double, 0, 1,
&tc, &h->buf);
genmap_comm_scan(h, &tc);
/* Do a load balanced sort in each partition */
parallel_sort(struct rsb_element, h->elements, fiedler, gs_double, 0, 1, tc,
&h->buf);
genmap_comm_scan(h, tc);
nelt = genmap_get_nel(h);
GenmapInitLaplacianWeighted(h, &tc);
GenmapInitLaplacianWeighted(h, tc);

GenmapRealloc(nelt, &comp_ids);
ncompg = ncomp = count_comp_sizes(comp_ids, &min, &max, &tc, h);
ncompg = ncomp =
get_components(comp_ids, genmap_get_elements(h), tc, &h->buf, nelt, nv);
comm_allreduce(lc, gs_long, gs_max, &ncompg, 1, &buf);
if (tc.id == 0 && ncomp > 1) {
printf("\t\t %ld disconnected components after attempt %d/%d, Level = %d "
"(%ld) "
"(min/max size: %ld %ld) root = %d np = %d\n",
ncomp, attempt, nattempts, level, nelg, min, max, root, np);
fflush(stdout);
}

GenmapFree(comp_count);
}

balance_partitions(h, &tc, bin, lc);

nelt = genmap_get_nel(h);
GenmapRealloc(nelt, &comp_ids);
ncomp = count_comp_sizes(comp_ids, &min, &max, &tc, h);
if (ncomp > 1 && tc.id == 0) {
printf(
"%d disconnected components after balance, Level = %d (%ld) (min/max "
"size: %ld %ld) root = %d np =%d\n",
ncomp, level, nelg, min, max, root, np);
fflush(stdout);
}

GenmapFree(comp_ids);
comm_free(lc);
comm_dup(lc, &tc);
comm_free(&tc);

return 0;
}
Loading

0 comments on commit 3c9c045

Please sign in to comment.