Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Import latest changes #155

Merged
merged 39 commits into from
Oct 11, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
a6f1cd6
fix wrong numStep if restart time is non-zero
stgeke Sep 17, 2020
0923118
unload drashan on summit
stgeke Sep 17, 2020
5fb242f
correct wrong bc type in tgv.par
stgeke Sep 17, 2020
060791f
add more timers
stgeke Sep 17, 2020
e344a73
do not solve for the difference
stgeke Sep 17, 2020
cf32d70
fix wrong scale factor for lowMach
stgeke Sep 17, 2020
ab6335d
fix projection if we do not solve for the solution differnce
stgeke Sep 18, 2020
b622215
fix residual factor if Nfields>1
stgeke Sep 18, 2020
7e52f59
add par keys
stgeke Sep 18, 2020
969f8c9
add channel example with varying visocity (not tested yet)
stgeke Sep 18, 2020
5ecb448
run at least one iteration inside CG
stgeke Sep 18, 2020
ec56bb6
add missing stress things
stgeke Sep 18, 2020
428716a
Allow for nodes to be computed on the fly. (#7)
MalachiTimothyPhillips Sep 18, 2020
0968427
Add in additional levels
MalachiTimothyPhillips Sep 21, 2020
0758e98
add more timers during setup
stgeke Sep 23, 2020
2545553
Merge branch 'compute-nodes' of https://github.com/MalachiTimothyPhil…
stgeke Sep 23, 2020
81cdba6
make cubN a runtime parameter, delete nodes files and fix bug in exte…
stgeke Sep 23, 2020
ce129dc
do load unused kernel
stgeke Sep 23, 2020
29968a5
fix wrong stressFormulation flag
stgeke Sep 23, 2020
bc39a20
add weak derivative matrix
stgeke Sep 25, 2020
cbb1ae3
change default to Chebyshev for BoomerAMG
stgeke Sep 25, 2020
efc2ad1
Remove unnecessary parts of AMGSetup
stgeke Sep 25, 2020
c36d934
correct some things related to weak velocity residual (still not work…
stgeke Sep 25, 2020
de3a5fb
fix incorrect slip BC handling in surface terms for pressure residual
stgeke Sep 25, 2020
dac61f3
fix proj issue when not solving for the solution difference
Sep 25, 2020
3dd6735
fix scalar flux + div by zero if r==0 in cg
Sep 30, 2020
71013c1
remove uninitialized okl parameter
Sep 30, 2020
5f3e7ac
remove matlab code to generate node files
Sep 30, 2020
c217a57
compute weak velocity residual
Oct 2, 2020
84fdac5
Import (#156)
MalachiTimothyPhillips Oct 5, 2020
a23af8f
add non-zero outflow handling for weak residual
Oct 5, 2020
7335e98
fix variable properties for velocity only + fix several issue with ch…
Oct 5, 2020
0d75fcc
make lowMach a plug-in
Oct 5, 2020
2db6bcd
delete incomplete channel example + remove variable prop or visc par key
Oct 5, 2020
fb54c94
update par file settings
Oct 5, 2020
36b4f5c
add missing lowMach files
Oct 8, 2020
784ae8e
add flux testing to ethier
Oct 8, 2020
5b50ce9
add LMM + o_LMM + linAlg
Oct 10, 2020
b7d4a41
add some lingAlg stuff to examples
Oct 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
add more timers
  • Loading branch information
stgeke committed Sep 17, 2020
commit 060791fc2c703708d5f82aaa46b3e39b2f780f46
5 changes: 0 additions & 5 deletions 3rd_party/gslib/ogs/ogs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,6 @@ SOFTWARE.
#include "mpi.h"
#include "types.h"

//#define OGS_ENABLE_TIMER
#ifdef OGS_ENABLE_TIMER
#include "timer.hpp"
#endif

#define ogsFloat "float"
#define ogsDouble "double"
#define ogsDfloat dfloatString
Expand Down
85 changes: 34 additions & 51 deletions 3rd_party/gslib/ogs/src/oogs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
#include "ogsInterface.h"
#include <list>

//#define DISABLE_OOGS
//#define OGS_ENABLE_TIMER

#ifdef OGS_ENABLE_TIMER
#include "timer.hpp"
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -100,11 +107,8 @@ static void pairwiseExchange(int unit_size, oogs_t *gs)
}
}

if(gs->mode == OOGS_HOSTMPI)
gs->o_bufSend.copyTo(gs->bufSend, pwd->comm[send].total*unit_size, 0, "async: true");

{ // pw exchange
if(gs->mode != OOGS_DEVICEMPI) ogs->device.finish(); // waiting for buffers to be ready
if(gs->mode != OOGS_DEVICEMPI) ogs->device.finish(); // waiting for send buffers to be ready

comm_req *req = &pwd->req[pwd->comm[recv].n];
const struct pw_comm_data *c = &pwd->comm[send];
Expand All @@ -119,9 +123,6 @@ static void pairwiseExchange(int unit_size, oogs_t *gs)
}
MPI_Waitall(pwd->comm[send].n + pwd->comm[recv].n,pwd->req,MPI_STATUSES_IGNORE);
}

if(gs->mode == OOGS_HOSTMPI)
gs->o_bufRecv.copyFrom(gs->bufRecv,pwd->comm[recv].total*unit_size, 0, "async: true");
}

oogs_t* oogs::setup(ogs_t *ogs, int nVec, dlong stride, const char *type, std::function<void()> callback, oogs_mode gsMode)
Expand Down Expand Up @@ -230,7 +231,9 @@ oogs_t* oogs::setup(ogs_t *ogs, int nVec, dlong stride, const char *type, std::f
gs->mode = gsMode;
}

//gs->mode = OOGS_DEFAULT;
#ifdef DISABLE_OOGS
gs->mode = OOGS_DEFAULT;
#endif
if(rank == 0) printf("used mode: %d\n", gs->mode);

return gs;
Expand Down Expand Up @@ -333,29 +336,23 @@ void oogs::start(occa::memory o_v, const int k, const dlong stride, const char *

ogs_t *ogs = gs->ogs;

#if 1
if(gs->mode == OOGS_DEFAULT) {
if(gs->mode == OOGS_DEFAULT) {
if(k>1)
ogsGatherScatterManyStart(o_v, k, stride, type, op, ogs);
else
ogsGatherScatterStart(o_v, type, op, ogs);

return;
}
#endif

if (ogs->NhaloGather) {
reallocBuffers(Nbytes*k, gs);

occaGatherMany(ogs->NhaloGather, k, stride, ogs->NhaloGather, ogs->o_haloGatherOffsets, ogs->o_haloGatherIds, type, op, o_v, ogs::o_haloBuf);
if(gs->mode != OOGS_DEFAULT) packBuf(gs, ogs->NhaloGather, k, gs->o_scatterOffsets, gs->o_scatterIds, type, ogs::o_haloBuf, gs->o_bufSend);
ogs->device.finish();
occaGatherMany(ogs->NhaloGather, k, stride, ogs->NhaloGather, ogs->o_haloGatherOffsets,
ogs->o_haloGatherIds, type, op, o_v, ogs::o_haloBuf);

if(gs->mode == OOGS_DEFAULT) {
ogs->device.setStream(ogs::dataStream);
ogs::o_haloBuf.copyTo(ogs::haloBuf, ogs->NhaloGather*Nbytes*k, 0, "async: true");
ogs->device.setStream(ogs::defaultStream);
}
packBuf(gs, ogs->NhaloGather, k, gs->o_scatterOffsets, gs->o_scatterIds, type, ogs::o_haloBuf, gs->o_bufSend);
ogs->device.finish();
}
}

Expand All @@ -369,45 +366,42 @@ void oogs::finish(occa::memory o_v, const int k, const dlong stride, const char

ogs_t *ogs = gs->ogs;

#if 1
if(gs->mode == OOGS_DEFAULT) {
if(gs->mode == OOGS_DEFAULT) {
if(k>1)
ogsGatherScatterManyFinish(o_v, k, stride, type, op, ogs);
else
ogsGatherScatterFinish(o_v, type, op, ogs);

return;
}
#endif

if(ogs->NlocalGather) {
occaGatherScatterMany(ogs->NlocalGather, k, stride, ogs->o_localGatherOffsets, ogs->o_localGatherIds, type, op, o_v);
}
if(ogs->NlocalGather)
occaGatherScatterMany(ogs->NlocalGather, k, stride, ogs->o_localGatherOffsets,
ogs->o_localGatherIds, type, op, o_v);

if (ogs->NhaloGather) {
ogs->device.setStream(ogs::dataStream);

if(gs->mode == OOGS_DEFAULT) {
ogs->device.finish(); // waiting for gs::haloBuf copy to finish
struct gs_data *hgs = (gs_data*) ogs->haloGshSym;
const void* execdata = hgs->r.data;
const struct pw_data *pwd = (pw_data*) execdata;
const unsigned transpose = 0; // hardwired for now
const unsigned recv = 0^transpose, send = 1^transpose;
if(gs->mode == OOGS_HOSTMPI)
gs->o_bufSend.copyTo(gs->bufSend, pwd->comm[send].total*Nbytes*k, 0, "async: true");

void* H[10];
for (int i=0;i<k;i++) H[i] = (char*)ogs::haloBuf + i*ogs->NhaloGather*Nbytes;
ogsHostGatherScatterMany(H, k, type, op, ogs->haloGshSym);
} else {
#ifdef OGS_ENABLE_TIMER
timer::tic("oogsMPI",1);
timer::hostTic("oogsMPI",1);
#endif
pairwiseExchange(Nbytes*k, gs);
pairwiseExchange(Nbytes*k, gs);
#ifdef OGS_ENABLE_TIMER
timer::toc("oogsMPI");
timer::hostToc("oogsMPI");
#endif
}

if(gs->mode == OOGS_HOSTMPI)
gs->o_bufRecv.copyFrom(gs->bufRecv,pwd->comm[recv].total*Nbytes*k, 0, "async: true");

if(gs->mode == OOGS_DEFAULT) {
ogs::o_haloBuf.copyFrom(ogs::haloBuf, ogs->NhaloGather*Nbytes*k, 0, "async: true");
} else {
unpackBuf(gs, ogs->NhaloGather, k, gs->o_gatherOffsets, gs->o_gatherIds, type, op, gs->o_bufRecv, ogs::o_haloBuf);
}
unpackBuf(gs, ogs->NhaloGather, k, gs->o_gatherOffsets, gs->o_gatherIds, type, op, gs->o_bufRecv, ogs::o_haloBuf);
occaScatterMany(ogs->NhaloGather, k, ogs->NhaloGather, stride, ogs->o_haloGatherOffsets,
ogs->o_haloGatherIds, type, op, ogs::o_haloBuf, o_v);

Expand All @@ -422,17 +416,6 @@ void oogs::startFinish(void *v, const int k, const dlong stride, const char *typ
}
void oogs::startFinish(occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, oogs_t *h)
{
#if 1
if(h->mode == OOGS_DEFAULT) {
if(k>1)
ogsGatherScatterMany(o_v, k, stride, type, op, h->ogs);
else
ogsGatherScatter(o_v, type, op, h->ogs);

return;
}
#endif

start(o_v, k, stride, type, op, h);
finish(o_v, k, stride, type, op, h);
}
Expand Down
29 changes: 21 additions & 8 deletions src/core/runTime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ void runStep(ins_t* ins, dfloat time, dfloat dt, int tstep)
if(ins->Nscalar)
scalarSolve(ins, time, dt, cds->o_S);

timer::tic("udfProperties", 1);
if(udf.properties)
if(udf.properties) {
timer::tic("udfProperties", 1);
udf.properties(ins, time + dt, ins->o_U, cds->o_S, ins->o_prop, cds->o_prop);
timer::toc("udfProperties");
timer::toc("udfProperties");
}

if(ins->lowMach) {
if(udf.div)
Expand All @@ -72,6 +73,8 @@ void runStep(ins_t* ins, dfloat time, dfloat dt, int tstep)
mesh->device.finish();
MPI_Barrier(mesh->comm);
const double tElapsedStep = MPI_Wtime() - tStart;
tElapsed += tElapsedStep;
timer::set("solve", tElapsed);
if(mesh->rank == 0) {
printf("step= %d t= %.8e dt=%.1e C= %.2f",
tstep, time + dt, dt, cfl);
Expand All @@ -86,7 +89,6 @@ void runStep(ins_t* ins, dfloat time, dfloat dt, int tstep)
for(int is = 0; is < ins->Nscalar; is++)
if(cds->compute[is]) printf(" S: %d", cds->Niter[is]);

tElapsed += tElapsedStep;
printf(" eTime= %.2e, %.5e s\n", tElapsedStep, tElapsed);
}

Expand Down Expand Up @@ -140,7 +142,11 @@ void makeq(ins_t* ins, dfloat time, occa::memory o_FS, occa::memory o_BF)
cds_t* cds = ins->cds;
mesh_t* mesh = cds->mesh;

if(udf.sEqnSource) udf.sEqnSource(ins, time, cds->o_S, o_FS);
if(udf.sEqnSource) {
timer::tic("udfSEqnSource", 1);
udf.sEqnSource(ins, time, cds->o_S, o_FS);
timer::toc("udfSEqnSource");
}

for(int is = 0; is < cds->NSfields; is++) {
if(!cds->compute[is]) continue;
Expand Down Expand Up @@ -266,7 +272,11 @@ void makef(ins_t* ins, dfloat time, occa::memory o_FU, occa::memory o_BF)
{
mesh_t* mesh = ins->mesh;

if(udf.uEqnSource) udf.uEqnSource(ins, time, ins->o_U, o_FU);
if(udf.uEqnSource) {
timer::tic("udfUEqnSource", 1);
udf.uEqnSource(ins, time, ins->o_U, o_FU);
timer::toc("udfUEqnSource");
}

if(ins->options.compareArgs("FILTER STABILIZATION", "RELAXATION"))
ins->filterRTKernel(
Expand Down Expand Up @@ -669,10 +679,13 @@ void qthermal(ins_t* ins, dfloat time, occa::memory o_div)
ins->o_InvM,
cds->o_wrk0);

if(udf.sEqnSource)
if(udf.sEqnSource) {
timer::tic("udfSEqnSource", 1);
udf.sEqnSource(ins, time, cds->o_S, cds->o_wrk3);
else
timer::toc("udfSEqnSource");
} else {
ins->fillKernel(mesh->Nelements * mesh->Np, 0.0, cds->o_wrk3);
}

ins->qtlKernel(
mesh->Nelements,
Expand Down
49 changes: 44 additions & 5 deletions src/core/timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ void init(MPI_Comm comm,occa::device device,int ifSync)
comm_ = comm;
}

void set(const std::string tag, double time)
{
m_[tag].startTime = time;
auto it = m_.find(tag);
if(it == m_.end()) {
printf("Error in set: Invalid tag name. %s:%u\n",__FILE__,__LINE__);
MPI_Abort(comm_,1);
}

it->second.hostElapsed = time;
it->second.deviceElapsed = it->second.hostElapsed;
it->second.count++;
}

void reset()
{
m_.clear();
Expand Down Expand Up @@ -213,7 +227,7 @@ void printRunStat()
int rank;
MPI_Comm_rank(comm_, &rank);

double dEtime[10];
double dEtime[20];
dEtime[0] = timer::query("makef", "DEVICE:MAX");
dEtime[1] = timer::query("velocitySolve", "DEVICE:MAX");
dEtime[2] = timer::query("pressureSolve", "DEVICE:MAX");
Expand All @@ -222,17 +236,33 @@ void printRunStat()
dEtime[5] = timer::query("preconditioner", "DEVICE:MAX");
dEtime[6] = timer::query("preSolveProjection", "DEVICE:MAX");
dEtime[6]+= timer::query("postSolveProjection", "DEVICE:MAX");
dEtime[7] = timer::query("oogsMPI", "DEVICE:MAX");

dEtime[8] = timer::query("dotp", "DEVICE:MAX");

dEtime[9] = timer::query("solve", "DEVICE:MAX");
dEtime[10] = timer::query("setup", "DEVICE:MAX");
dEtime[11] = timer::query("checkpointing", "DEVICE:MAX");

dEtime[12] = timer::query("udfExecuteStep", "DEVICE:MAX");
dEtime[13] = timer::query("udfUEqnSource", "DEVICE:MAX");
dEtime[14] = timer::query("udfSEqnSource", "DEVICE:MAX");
dEtime[15] = timer::query("udfProperties", "DEVICE:MAX");

double hEtime[10];
hEtime[0] = timer::query("BoomerAMGSolve", "HOST:MAX");
hEtime[1] = timer::query("oogsMPI", "HOST:MAX");

if (rank == 0) {
std::cout.setf ( std::ios::scientific );

std::cout << "runtime statistics\n\n"
<< " makef " << dEtime[0] << " s\n"
<< " setup " << dEtime[10]<< " s\n";

if(dEtime[11] > 0)
std::cout << " checkpointing " << dEtime[11]<< " s\n";

std::cout << " total solve " << dEtime[9] << " s\n"
<< " makef " << dEtime[0] << " s\n"
<< " velocitySolve " << dEtime[1] << " s\n"
<< " pressureSolve " << dEtime[2] << " s\n";

Expand All @@ -247,9 +277,18 @@ void printRunStat()
<< " scalarSolve " << dEtime[4] << " s\n"
<< std::endl;

if(dEtime[12] > 0)
std::cout << " udfExecuteStep " << dEtime[12] << " s\n";
if(dEtime[13] > 0)
std::cout << " udfUEqnSource " << dEtime[13] << " s\n";
if(dEtime[14] > 0)
std::cout << " udfSEqnSource " << dEtime[14] << " s\n";
if(dEtime[15] > 0)
std::cout << " udfProperties " << dEtime[15] << " s\n"
<< std::endl;

if(dEtime[7] > 0)
std::cout << " gsMPI " << dEtime[7] << " s\n";
if(hEtime[1] > 0)
std::cout << " gsMPI " << hEtime[1] << " s (without overlap)\n";
if(dEtime[8] > 0)
std::cout << " dotp " << dEtime[8] << " s\n";

Expand Down
2 changes: 2 additions & 0 deletions src/core/timer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ void deviceTic(const std::string tag);
void deviceTic(const std::string tag,int ifSync);
void deviceToc(const std::string tag);

void set(const std::string tag, double time);

double hostElapsed(const std::string tag);
double deviceElapsed(const std::string tag);
int count(const std::string tag);
Expand Down
4 changes: 3 additions & 1 deletion src/libP/parAlmond/src/parAlmond.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ void AMGSetup(solver_t *MM,
hlong TotalRows = globalRowStarts[M->size];
dlong numLocalRows = (dlong) (globalRowStarts[M->rank+1]-globalRowStarts[M->rank]);

MPI_Barrier(M->comm);
double startTime = MPI_Wtime();
if(rank==0) printf("Setting up AMG...");fflush(stdout);

//populate null space vector
Expand All @@ -70,7 +72,7 @@ void AMGSetup(solver_t *MM,

M->AMGSetup(A);

if(rank==0) printf("done.\n");
if(rank==0) printf("done (%gs)\n", MPI_Wtime()-startTime);
}

void Precon(solver_t *M, occa::memory o_x, occa::memory o_rhs) {
Expand Down
7 changes: 7 additions & 0 deletions src/nekInterface/nekInterfaceAdapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,19 +63,25 @@ void* nek_scPtr(int id)

void nek_outfld()
{
timer::tic("checkpointing", 1);
const char suffix[] = " ";
(*nek_outfld_ptr)((char*)suffix);
timer::toc("checkpointing");
}

void nek_outfld(const char* suffix)
{
timer::tic("checkpointing", 1);
(*nek_outfld_ptr)((char*)suffix);
timer::toc("checkpointing");
}

void nek_outfld(const char* suffix, dfloat t, int coords,
occa::memory o_u, occa::memory o_p, occa::memory o_s,
int NSfields, int FP64)
{

timer::tic("checkpointing", 1);
mesh_t* mesh = (*ins)->mesh;
cds_t* cds = (*ins)->cds;
dlong Nlocal = mesh->Nelements * mesh->Np;
Expand Down Expand Up @@ -118,6 +124,7 @@ void nek_outfld(const char* suffix, dfloat t, int coords,
(*nek_setio_ptr)(&t, &xo, &vo, &po, &so, &NSfields, &FP64);
(*nek_outfld_ptr)((char*)suffix);
(*nek_resetio_ptr)();
timer::toc("checkpointing");
}

void nek_uic(int ifield)
Expand Down
Loading