Skip to content

Commit

Permalink
CPU compile fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
knikhil1995 committed Mar 16, 2024
1 parent e017c2f commit cee0cd7
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 11 deletions.
14 changes: 7 additions & 7 deletions include/deviceDirectCCLWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
# include <mpi.h>
# include <DeviceTypeConfig.h>

# if defined(DFTFE_WITH_CUDA_NCCL)
# include <nccl.h>
# include <DeviceTypeConfig.h>
# elif defined(DFTFE_WITH_HIP_RCCL)
# include <rccl.h>
# include <DeviceTypeConfig.h>
# endif
# if defined(DFTFE_WITH_CUDA_NCCL)
# include <nccl.h>
# include <DeviceTypeConfig.h>
# elif defined(DFTFE_WITH_HIP_RCCL)
# include <rccl.h>
# include <DeviceTypeConfig.h>
# endif

namespace dftfe
{
Expand Down
2 changes: 2 additions & 0 deletions src/dft/dft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,7 @@ namespace dftfe
if (d_kohnShamDFTOperatorsInitialized)
finalizeKohnShamDFTOperator();

#ifdef DFTFE_WITH_DEVICE
if constexpr (dftfe::utils::MemorySpace::DEVICE == memorySpace)
d_kohnShamDFTOperatorPtr = new KohnShamHamiltonianOperator<memorySpace>(
d_BLASWrapperPtr,
Expand All @@ -1895,6 +1896,7 @@ namespace dftfe
d_mpiCommParent,
mpi_communicator);
else
#endif
d_kohnShamDFTOperatorPtr = new KohnShamHamiltonianOperator<memorySpace>(
d_BLASWrapperPtrHost,
d_basisOperationsPtrHost,
Expand Down
40 changes: 36 additions & 4 deletions utils/MPICommunicatorP2P.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ namespace dftfe
d_mpiPatternP2P->getTargetProcIds().size());


#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -83,6 +84,7 @@ namespace dftfe
blockSize,
0.0);
}
#endif
}

template <typename ValueType, dftfe::utils::MemorySpace memorySpace>
Expand All @@ -103,6 +105,7 @@ namespace dftfe
d_blockSize,
0.0);

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -116,6 +119,7 @@ namespace dftfe
d_blockSize,
0.0);
}
#endif
}
if (precision == communicationPrecision::single)
{
Expand All @@ -130,6 +134,7 @@ namespace dftfe
d_mpiPatternP2P->localGhostSize() * d_blockSize)
d_ghostDataCopySinglePrec.resize(
d_mpiPatternP2P->localGhostSize() * d_blockSize);
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -145,6 +150,7 @@ namespace dftfe
d_blockSize,
0.0);
}
#endif
}
}

Expand All @@ -171,12 +177,14 @@ namespace dftfe
dataArray.data() +
d_mpiPatternP2P->localOwnedSize() * d_blockSize;

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
{
if (d_commProtocol == communicationProtocol::mpiHost)
recvArrayStartPtr = d_ghostDataCopyHostPinnedPtr->begin();
dftfe::utils::deviceSynchronize();
}
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
i < (d_mpiPatternP2P->getGhostProcIds()).size();
Expand Down Expand Up @@ -223,6 +231,7 @@ namespace dftfe
// initiate non-blocking sends to target processors
ValueType *sendArrayStartPtr = d_sendRecvBuffer.data();

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
{
if (d_commProtocol == communicationProtocol::mpiHost)
Expand All @@ -241,7 +250,7 @@ namespace dftfe
else
dftfe::utils::deviceSynchronize();
}
#if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
# if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::nccl)
{
Expand Down Expand Up @@ -289,6 +298,7 @@ namespace dftfe
}
ncclGroupEnd();
}
# endif
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
Expand Down Expand Up @@ -326,13 +336,15 @@ namespace dftfe
typename singlePrecType<ValueType>::type *recvArrayStartPtr =
d_ghostDataCopySinglePrec.data();

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
{
if (d_commProtocol == communicationProtocol::mpiHost)
recvArrayStartPtr =
d_ghostDataCopySinglePrecHostPinnedPtr->begin();
dftfe::utils::deviceSynchronize();
}
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
i < (d_mpiPatternP2P->getGhostProcIds()).size();
Expand Down Expand Up @@ -382,6 +394,7 @@ namespace dftfe
typename singlePrecType<ValueType>::type *sendArrayStartPtr =
d_sendRecvBufferSinglePrec.data();

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
{
if (d_commProtocol == communicationProtocol::mpiHost)
Expand All @@ -401,7 +414,7 @@ namespace dftfe
else
dftfe::utils::deviceSynchronize();
}
#if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
# if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::nccl)
{
Expand Down Expand Up @@ -454,6 +467,7 @@ namespace dftfe
}
ncclGroupEnd();
}
# endif
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
Expand Down Expand Up @@ -516,6 +530,7 @@ namespace dftfe
}
if (d_commPrecision == communicationPrecision::full)
{
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -528,9 +543,11 @@ namespace dftfe
d_blockSize,
d_ghostDataCopyHostPinnedPtr->data());
}
#endif
}
else
{
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -542,15 +559,18 @@ namespace dftfe
d_ghostDataCopySinglePrec.data(),
d_ghostDataCopySinglePrecHostPinnedPtr->data());
}
#endif
MPICommunicatorP2PKernels<ValueType, memorySpace>::
copyValueType1ArrToValueType2Arr(
d_ghostDataCopySinglePrec.size(),
d_ghostDataCopySinglePrec.data(),
dataArray.begin() +
d_mpiPatternP2P->localOwnedSize() * d_blockSize);
}
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
dftfe::utils::deviceSynchronize();
#endif
}


Expand All @@ -575,13 +595,15 @@ namespace dftfe
{
// initiate non-blocking receives from target processors
ValueType *recvArrayStartPtr = d_sendRecvBuffer.data();
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
{
if (d_commProtocol == communicationProtocol::mpiHost)
recvArrayStartPtr = d_sendRecvBufferHostPinnedPtr->begin();

dftfe::utils::deviceSynchronize();
}
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
i < (d_mpiPatternP2P->getTargetProcIds()).size();
Expand Down Expand Up @@ -618,6 +640,7 @@ namespace dftfe
dataArray.data() +
d_mpiPatternP2P->localOwnedSize() * d_blockSize;

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -634,7 +657,7 @@ namespace dftfe
}
if constexpr (memorySpace == MemorySpace::DEVICE)
dftfe::utils::deviceSynchronize();
#if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
# if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::nccl)
{
Expand Down Expand Up @@ -682,6 +705,7 @@ namespace dftfe
}
ncclGroupEnd();
}
# endif
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
Expand Down Expand Up @@ -723,6 +747,7 @@ namespace dftfe
// initiate non-blocking receives from target processors
typename singlePrecType<ValueType>::type *recvArrayStartPtr =
d_sendRecvBufferSinglePrec.data();
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
{
if (d_commProtocol == communicationProtocol::mpiHost)
Expand All @@ -731,6 +756,7 @@ namespace dftfe

dftfe::utils::deviceSynchronize();
}
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
i < (d_mpiPatternP2P->getTargetProcIds()).size();
Expand Down Expand Up @@ -773,6 +799,7 @@ namespace dftfe
typename singlePrecType<ValueType>::type *sendArrayStartPtr =
d_ghostDataCopySinglePrec.data();

#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -789,7 +816,7 @@ namespace dftfe
}
if constexpr (memorySpace == MemorySpace::DEVICE)
dftfe::utils::deviceSynchronize();
#if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
# if defined(DFTFE_WITH_CUDA_NCCL) || defined(DFTFE_WITH_HIP_RCCL)
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::nccl)
{
Expand Down Expand Up @@ -842,6 +869,7 @@ namespace dftfe
}
ncclGroupEnd();
}
# endif
#endif
if (d_commProtocol != communicationProtocol::nccl)
for (size_type i = 0;
Expand Down Expand Up @@ -910,6 +938,7 @@ namespace dftfe
}
if (d_commPrecision == communicationPrecision::full)
{
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -922,6 +951,7 @@ namespace dftfe
}
if constexpr (memorySpace == MemorySpace::DEVICE)
dftfe::utils::deviceSynchronize();
#endif
// accumulate add into locally owned entries from recv buffer
if ((d_mpiPatternP2P->getOwnedLocalIndicesForTargetProcs().size()) >
0)
Expand All @@ -936,6 +966,7 @@ namespace dftfe
}
else
{
#ifdef DFTFE_WITH_DEVICE
if constexpr (memorySpace == MemorySpace::DEVICE)
if (d_commProtocol == communicationProtocol::mpiHost)
{
Expand All @@ -949,6 +980,7 @@ namespace dftfe
}
if constexpr (memorySpace == MemorySpace::DEVICE)
dftfe::utils::deviceSynchronize();
#endif
// accumulate add into locally owned entries from recv buffer
if ((d_mpiPatternP2P->getOwnedLocalIndicesForTargetProcs().size()) >
0)
Expand Down

0 comments on commit cee0cd7

Please sign in to comment.