Skip to content

Commit

Permalink
remove use of thrust vector everywhere and other refactoring changes
Browse files Browse the repository at this point in the history
  • Loading branch information
dsambit committed Dec 5, 2022
1 parent 9ce2976 commit 40bb155
Show file tree
Hide file tree
Showing 50 changed files with 1,999 additions and 2,503 deletions.
101 changes: 101 additions & 0 deletions include/DataTypeOverloads.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// ---------------------------------------------------------------------
//
// Copyright (c) 2017-2022 The Regents of the University of Michigan and DFT-FE
// authors.
//
// This file is part of the DFT-FE code.
//
// The DFT-FE code is free software; you can use it, redistribute
// it, and/or modify it under the terms of the GNU Lesser General
// Public License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// The full text of the license can be found in the file LICENSE at
// the top level of the DFT-FE distribution.
//
// ---------------------------------------------------------------------


#ifndef dftfeDataTypeOverloads_h
#define dftfeDataTypeOverloads_h

#include <complex>
namespace dftfe
{
namespace utils
{
inline double
realPart(const double x)
{
return x;
}

inline float
realPart(const float x)
{
return x;
}

inline double
realPart(const std::complex<double> x)
{
return x.real();
}

inline float
realPart(const std::complex<float> x)
{
return x.real();
}

inline double
imagPart(const double x)
{
return 0;
}


inline float
imagPart(const float x)
{
return 0;
}

inline double
imagPart(const std::complex<double> x)
{
return x.imag();
}

inline float
imagPart(const std::complex<float> x)
{
return x.imag();
}

inline double
complexConj(const double x)
{
return x;
}

inline float
complexConj(const float x)
{
return x;
}

inline std::complex<double>
complexConj(const std::complex<double> x)
{
return std::conj(x);
}

inline std::complex<float>
complexConj(const std::complex<float> x)
{
return std::conj(x);
}
}
} // namespace dftfe

#endif
48 changes: 48 additions & 0 deletions include/DeviceDataTypeOverloads.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ namespace dftfe
return a;
}

__inline__ __device__ global_size_type
conj(global_size_type a)
{
return a;
}

__inline__ __device__ int
conj(int a)
{
Expand Down Expand Up @@ -94,6 +100,12 @@ namespace dftfe
return a * b;
}

__inline__ __device__ global_size_type
mult(global_size_type a, global_size_type b)
{
return a * b;
}

__inline__ __device__ int
mult(int a, int b)
{
Expand Down Expand Up @@ -192,6 +204,12 @@ namespace dftfe
return a + b;
}

__inline__ __device__ global_size_type
add(global_size_type a, global_size_type b)
{
return a + b;
}

__inline__ __device__ int
add(int a, int b)
{
Expand Down Expand Up @@ -229,6 +247,12 @@ namespace dftfe
return a - b;
}

__inline__ __device__ global_size_type
sub(global_size_type a, global_size_type b)
{
return a - b;
}

__inline__ __device__ int
sub(int a, int b)
{
Expand Down Expand Up @@ -265,6 +289,12 @@ namespace dftfe
return a / b;
}

__inline__ __device__ global_size_type
div(global_size_type a, global_size_type b)
{
return a / b;
}

__inline__ __device__ int
div(int a, int b)
{
Expand Down Expand Up @@ -378,6 +408,18 @@ namespace dftfe
return a;
}

inline global_size_type *
makeDataTypeDeviceCompatible(global_size_type *a)
{
return a;
}

inline const global_size_type *
makeDataTypeDeviceCompatible(const global_size_type *a)
{
return a;
}

inline double *
makeDataTypeDeviceCompatible(double *a)
{
Expand Down Expand Up @@ -438,6 +480,12 @@ namespace dftfe
return a;
}

inline global_size_type
makeDataTypeDeviceCompatible(global_size_type a)
{
return a;
}

inline double
makeDataTypeDeviceCompatible(double a)
{
Expand Down
2 changes: 2 additions & 0 deletions include/DeviceTypeConfig.cu.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
#ifndef dftfeDeviceTypeConfig_cuh
#define dftfeDeviceTypeConfig_cuh

# include <cuComplex.h>

namespace dftfe
{
namespace utils
{
typedef cuDoubleComplex deviceDoubleComplex;
typedef cudaStream_t deviceStream_t;
typedef cudaEvent_t deviceEvent_t;
typedef cudaError_t deviceError_t;
Expand Down
44 changes: 24 additions & 20 deletions include/MemoryStorage.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ namespace dftfe
*/
~MemoryStorage();

/**
* @brief clear and set to d_data to nullptr
*/
void
clear();


/**
* @brief Set all the entries to a given value
* @param[in] val The value to which the entries are to be set
Expand Down Expand Up @@ -142,26 +149,23 @@ namespace dftfe
MemoryStorage &
operator=(MemoryStorage &&rhs) noexcept;

// // This part does not work for GPU version, will work on this
// until
// // having cleaner solution.
// /**
// * @brief Operator to get a reference to a element of the Vector
// * @param[in] i is the index to the element of the Vector
// * @returns reference to the element of the Vector
// * @throws exception if i >= size of the Vector
// */
// reference
// operator[](size_type i);
//
// /**
// * @brief Operator to get a const reference to a element of the Vector
// * @param[in] i is the index to the element of the Vector
// * @returns const reference to the element of the Vector
// * @throws exception if i >= size of the Vector
// */
// const_reference
// operator[](size_type i) const;
/**
* @brief Operator to get a reference to a element of the Vector
* @param[in] i is the index to the element of the Vector
* @returns reference to the element of the Vector
* @throws exception if i >= size of the Vector
*/
reference
operator[](size_type i);

/**
* @brief Operator to get a const reference to a element of the Vector
* @param[in] i is the index to the element of the Vector
* @returns const reference to the element of the Vector
* @throws exception if i >= size of the Vector
*/
const_reference
operator[](size_type i) const;

void
swap(MemoryStorage &rhs);
Expand Down
18 changes: 9 additions & 9 deletions include/chebyshevOrthogonalizedSubspaceIterationSolverDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ namespace dftfe
double
solve(operatorDFTDeviceClass & operatorMatrix,
elpaScalaManager & elpaScala,
dataTypes::numberDevice *eigenVectorsFlattenedDevice,
dataTypes::numberDevice *eigenVectorsRotFracDensityFlattenedDevice,
dataTypes::number *eigenVectorsFlattenedDevice,
dataTypes::number *eigenVectorsRotFracDensityFlattenedDevice,
const unsigned int flattenedSize,
const unsigned int totalNumberWaveFunctions,
std::vector<double> & eigenValues,
Expand All @@ -80,7 +80,7 @@ namespace dftfe
void
solveNoRR(operatorDFTDeviceClass & operatorMatrix,
elpaScalaManager & elpaScala,
dataTypes::numberDevice *eigenVectorsFlattenedDevice,
dataTypes::number *eigenVectorsFlattenedDevice,
const unsigned int flattenedSize,
const unsigned int totalNumberWaveFunctions,
std::vector<double> & eigenValues,
Expand All @@ -96,7 +96,7 @@ namespace dftfe
void
densityMatrixEigenBasisFirstOrderResponse(
operatorDFTDeviceClass & operatorMatrix,
dataTypes::numberDevice * eigenVectorsFlattenedDevice,
dataTypes::number * eigenVectorsFlattenedDevice,
const unsigned int flattenedSize,
const unsigned int totalNumberWaveFunctions,
const std::vector<double> &eigenValues,
Expand Down Expand Up @@ -137,16 +137,16 @@ namespace dftfe
//
// temporary parallel vectors needed for Chebyshev filtering
//
distributedDeviceVec<dataTypes::numberDevice> d_YArray;
distributedDeviceVec<dataTypes::number> d_YArray;

distributedDeviceVec<dataTypes::numberFP32Device>
distributedDeviceVec<dataTypes::numberFP32>
d_deviceFlattenedFloatArrayBlock;

distributedDeviceVec<dataTypes::numberDevice> d_deviceFlattenedArrayBlock2;
distributedDeviceVec<dataTypes::number> d_deviceFlattenedArrayBlock2;

distributedDeviceVec<dataTypes::numberDevice> d_YArray2;
distributedDeviceVec<dataTypes::number> d_YArray2;

distributedDeviceVec<dataTypes::numberDevice> d_projectorKetTimesVector2;
distributedDeviceVec<dataTypes::number> d_projectorKetTimesVector2;

bool d_isTemporaryParallelVectorsCreated;

Expand Down
33 changes: 16 additions & 17 deletions include/constraintMatrixInfoDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@
# ifndef constraintMatrixInfoDevice_H_
# define constraintMatrixInfoDevice_H_

# include <thrust/device_vector.h>

#include <MemoryStorage.h>
# include <vector>

# include "headers.h"
# include <headers.h>

namespace dftfe
{
Expand Down Expand Up @@ -105,7 +104,7 @@ namespace dftfe

inline void
distribute_slave_to_master(
distributedDeviceVec<cuDoubleComplex> &fieldVector,
distributedDeviceVec<std::complex<double>> &fieldVector,
const unsigned int blockSize) const
{}

Expand All @@ -121,7 +120,7 @@ namespace dftfe
*/
void
distribute_slave_to_master(
distributedDeviceVec<cuDoubleComplex> &fieldVector,
distributedDeviceVec<std::complex<double>> &fieldVector,
double * tempReal,
double * tempImag,
const unsigned int blockSize) const;
Expand All @@ -146,7 +145,7 @@ namespace dftfe
*/
void
distribute_slave_to_master(
distributedDeviceVec<cuFloatComplex> &fieldVector,
distributedDeviceVec<std::complex<float>> &fieldVector,
float * tempReal,
float * tempImag,
const unsigned int blockSize) const;
Expand Down Expand Up @@ -180,13 +179,13 @@ namespace dftfe
std::vector<dealii::types::global_dof_index>
d_localIndexMapUnflattenedToFlattened;

thrust::device_vector<unsigned int> d_rowIdsLocalDevice;
thrust::device_vector<unsigned int> d_columnIdsLocalDevice;
thrust::device_vector<double> d_columnValuesDevice;
thrust::device_vector<double> d_inhomogenitiesDevice;
thrust::device_vector<unsigned int> d_rowSizesDevice;
thrust::device_vector<unsigned int> d_rowSizesAccumulatedDevice;
thrust::device_vector<dealii::types::global_dof_index>
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowIdsLocalDevice;
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_columnIdsLocalDevice;
dftfe::utils::MemoryStorage<double, dftfe::utils::MemorySpace::DEVICE> d_columnValuesDevice;
dftfe::utils::MemoryStorage<double, dftfe::utils::MemorySpace::DEVICE> d_inhomogenitiesDevice;
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowSizesDevice;
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowSizesAccumulatedDevice;
dftfe::utils::MemoryStorage<dealii::types::global_dof_index, dftfe::utils::MemorySpace::DEVICE>
d_localIndexMapUnflattenedToFlattenedDevice;

std::vector<unsigned int> d_rowIdsLocalBins;
Expand All @@ -196,10 +195,10 @@ namespace dftfe
std::vector<unsigned int> d_binColumnSizes;
std::vector<unsigned int> d_binColumnSizesAccumulated;

thrust::device_vector<unsigned int> d_rowIdsLocalBinsDevice;
thrust::device_vector<unsigned int> d_columnIdsLocalBinsDevice;
thrust::device_vector<unsigned int> d_columnIdToRowIdMapBinsDevice;
thrust::device_vector<double> d_columnValuesBinsDevice;
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowIdsLocalBinsDevice;
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_columnIdsLocalBinsDevice;
dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_columnIdToRowIdMapBinsDevice;
dftfe::utils::MemoryStorage<double, dftfe::utils::MemorySpace::DEVICE> d_columnValuesBinsDevice;

unsigned int d_numConstrainedDofs;
};
Expand Down
Loading

0 comments on commit 40bb155

Please sign in to comment.