Skip to content

Commit

Permalink
Compile over files in parallel. (#165)
Browse files Browse the repository at this point in the history
  • Loading branch information
MalachiTimothyPhillips committed Sep 7, 2021
1 parent 07b4f67 commit 07c2775
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 36 deletions.
6 changes: 1 addition & 5 deletions okl/elliptic/ellipticSerialUpdatePCG.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,8 @@ void FUNC(ellipticBlockUpdatePCG)(const dlong & N,

#ifdef __NEKRS__OMP__
#pragma omp parallel for collapse(2)
<<<<<<< HEAD
for(int fld = 0; fld < p_Nfields; fld++)
=======
#endif
for(int fld = 0; fld < p_eNfields; fld++)
>>>>>>> next
for(int fld = 0; fld < p_Nfields; fld++)
for(int i = 0; i < N; ++i) {
const dlong n = i + fld * offset;
cpu_x[n] += alpha * cpu_p[n];
Expand Down
17 changes: 3 additions & 14 deletions src/core/compileKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1321,7 +1321,7 @@ void compileKernels() {
mangleOCCACacheDir();
}

{ registerLinAlgKernels(); }
registerLinAlgKernels();

{
const bool buildOnly = platform->options.compareArgs("BUILD ONLY", "TRUE");
Expand All @@ -1331,9 +1331,9 @@ void compileKernels() {
platform->device, platform->device.mode(), communicator, buildOnly);
}

{ registerMeshKernels(); }
registerMeshKernels();

{ registerNrsKernels(); }
registerNrsKernels();

{
int Nscalars;
Expand All @@ -1350,17 +1350,6 @@ void compileKernels() {
};
for (auto &&section : sections) {
registerEllipticKernels(section);
}
}

{
std::vector<std::string> sections = {
"pressure",
"velocity",
};
int Nscalar;
platform->options.getArgs("NUMBER OF SCALARS", Nscalar);
for (auto &&section : sections) {
registerEllipticPreconditionerKernels(section);
}
}
Expand Down
43 changes: 26 additions & 17 deletions src/core/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ kernelRequestManager_t::add_kernel(kernelRequest_t request, bool checkUnique)
ABORT(1);
}
}

const std::string fileName = request.fileName;
fileNameToRequestMap[fileName].insert(request);
}
occa::kernel
kernelRequestManager_t::get(const std::string& request, bool checkValid) const
Expand Down Expand Up @@ -377,35 +380,41 @@ kernelRequestManager_t::compile()

const int rank = buildNodeLocal ? platformRef.comm.localRank : platformRef.comm.mpiRank;
const int ranksCompiling =
#if 0
std::min(
maxCompilingRanks,
buildNodeLocal ?
platformRef.comm.localCommSize :
platformRef.comm.mpiCommSize
);
#else
1;
#endif

std::vector<kernelRequest_t> kernelRequestVec(kernels.begin(), kernels.end());
std::vector<std::string> kernelFiles(fileNameToRequestMap.size());

unsigned ctr = 0;
for(auto&& fileNameAndRequests : fileNameToRequestMap)
{
kernelFiles[ctr] = fileNameAndRequests.first;
ctr++;
}

const auto& device = platformRef.device;
auto& requestToKernel = requestToKernelMap;
auto compileKernels = [&kernelRequestVec, &requestToKernel, &device, rank, ranksCompiling](){
auto& fileNameToRequest = fileNameToRequestMap;
auto compileKernels = [&kernelFiles, &requestToKernel, &fileNameToRequest, &device, rank, ranksCompiling](){
if(rank >= ranksCompiling) return;
const unsigned nKernels = kernelRequestVec.size();
for(unsigned kernelId = 0; kernelId < nKernels; ++kernelId)
const unsigned nFiles = kernelFiles.size();
for(unsigned fileId = 0; fileId < nFiles; ++fileId)
{
if(kernelId % ranksCompiling == rank){
const auto& kernelRequest = kernelRequestVec.at(kernelId);
const std::string requestName = kernelRequest.requestName;
const std::string fileName = kernelRequest.fileName;
const std::string kernelName = kernelRequest.kernelName;
const std::string suffix = kernelRequest.suffix;
const occa::properties props = kernelRequest.props;
auto kernel = device.buildKernel(fileName, kernelName, props, suffix);
requestToKernel[requestName] = kernel;
if(fileId % ranksCompiling == rank){
const std::string fileName = kernelFiles[fileId];
for(auto && kernelRequest : fileNameToRequest[fileName]){
const std::string requestName = kernelRequest.requestName;
const std::string fileName = kernelRequest.fileName;
const std::string kernelName = kernelRequest.kernelName;
const std::string suffix = kernelRequest.suffix;
const occa::properties props = kernelRequest.props;
auto kernel = device.buildKernel(fileName, kernelName, props, suffix);
requestToKernel[requestName] = kernel;
}
}
}
};
Expand Down
1 change: 1 addition & 0 deletions src/core/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class kernelRequestManager_t
bool kernelsProcessed;
std::set<kernelRequest_t> kernels;
std::map<std::string, occa::kernel> requestToKernelMap;
std::map<std::string, std::set<kernelRequest_t>> fileNameToRequestMap;

void add_kernel(kernelRequest_t request, bool assertUnique = true);

Expand Down

0 comments on commit 07c2775

Please sign in to comment.