remove use of thrust vector everywhere and other refactoring changes

dftfeDevelopers · Dec 5, 2022 · 40bb155 · 40bb155
1 parent 9ce2976
commit 40bb155
Show file tree

Hide file tree

Showing 50 changed files with 1,999 additions and 2,503 deletions.
diff --git a/include/DataTypeOverloads.h b/include/DataTypeOverloads.h
@@ -0,0 +1,101 @@
+// ---------------------------------------------------------------------
+//
+// Copyright (c) 2017-2022 The Regents of the University of Michigan and DFT-FE
+// authors.
+//
+// This file is part of the DFT-FE code.
+//
+// The DFT-FE code is free software; you can use it, redistribute
+// it, and/or modify it under the terms of the GNU Lesser General
+// Public License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+// The full text of the license can be found in the file LICENSE at
+// the top level of the DFT-FE distribution.
+//
+// ---------------------------------------------------------------------
+
+
+#ifndef dftfeDataTypeOverloads_h
+#define dftfeDataTypeOverloads_h
+
+#include <complex>
+namespace dftfe
+{
+ namespace utils
+ {
+ inline double
+ realPart(const double x)
+ {
+ return x;
+ }
+
+ inline float
+ realPart(const float x)
+ {
+ return x;
+ }
+
+ inline double
+ realPart(const std::complex<double> x)
+ {
+ return x.real();
+ }
+
+ inline float
+ realPart(const std::complex<float> x)
+ {
+ return x.real();
+ }
+
+ inline double
+ imagPart(const double x)
+ { 
+ return 0;
+ }
+
+
+ inline float
+ imagPart(const float x)
+ { 
+ return 0;
+ }
+
+ inline double
+ imagPart(const std::complex<double> x)
+ { 
+ return x.imag();
+ }
+
+ inline float
+ imagPart(const std::complex<float> x)
+ { 
+ return x.imag();
+ }
+
+ inline double
+ complexConj(const double x)
+ {
+ return x;
+ }
+
+ inline float
+ complexConj(const float x)
+ {
+ return x;
+ }
+
+ inline std::complex<double>
+ complexConj(const std::complex<double> x)
+ {
+ return std::conj(x);
+ }
+
+ inline std::complex<float>
+ complexConj(const std::complex<float> x)
+ {
+ return std::conj(x);
+ }
+ }
+} // namespace dftfe
+
+#endif
diff --git a/include/DeviceDataTypeOverloads.cu.h b/include/DeviceDataTypeOverloads.cu.h
@@ -55,6 +55,12 @@ namespace dftfe
  return a;
  }
 
+ __inline__ __device__ global_size_type
+ conj(global_size_type a)
+ {
+ return a;
+ }
+
  __inline__ __device__ int
  conj(int a)
  {
@@ -94,6 +100,12 @@ namespace dftfe
  return a * b;
  }
 
+ __inline__ __device__ global_size_type
+ mult(global_size_type a, global_size_type b)
+ {
+ return a * b;
+ }
+
  __inline__ __device__ int
  mult(int a, int b)
  {
@@ -192,6 +204,12 @@ namespace dftfe
  return a + b;
  }
 
+ __inline__ __device__ global_size_type
+ add(global_size_type a, global_size_type b)
+ {
+ return a + b;
+ }
+
  __inline__ __device__ int
  add(int a, int b)
  {
@@ -229,6 +247,12 @@ namespace dftfe
  return a - b;
  }
 
+ __inline__ __device__ global_size_type
+ sub(global_size_type a, global_size_type b)
+ {
+ return a - b;
+ }
+
  __inline__ __device__ int
  sub(int a, int b)
  {
@@ -265,6 +289,12 @@ namespace dftfe
  return a / b;
  }
 
+ __inline__ __device__ global_size_type
+ div(global_size_type a, global_size_type b)
+ {
+ return a / b;
+ }
+
  __inline__ __device__ int
  div(int a, int b)
  {
@@ -378,6 +408,18 @@ namespace dftfe
  return a;
  }
 
+ inline global_size_type *
+ makeDataTypeDeviceCompatible(global_size_type *a)
+ {
+ return a;
+ }
+
+ inline const global_size_type *
+ makeDataTypeDeviceCompatible(const global_size_type *a)
+ {
+ return a;
+ }
+
  inline double *
  makeDataTypeDeviceCompatible(double *a)
  {
@@ -438,6 +480,12 @@ namespace dftfe
  return a;
  }
 
+ inline global_size_type
+ makeDataTypeDeviceCompatible(global_size_type a)
+ {
+ return a;
+ }
+
  inline double
  makeDataTypeDeviceCompatible(double a)
  {

diff --git a/include/DeviceTypeConfig.cu.h b/include/DeviceTypeConfig.cu.h
@@ -17,11 +17,13 @@
 #ifndef dftfeDeviceTypeConfig_cuh
 #define dftfeDeviceTypeConfig_cuh
 
+# include <cuComplex.h>
 
 namespace dftfe
 {
  namespace utils
  {
+ typedef cuDoubleComplex deviceDoubleComplex;
  typedef cudaStream_t deviceStream_t;
  typedef cudaEvent_t deviceEvent_t; 
  typedef cudaError_t deviceError_t;

diff --git a/include/MemoryStorage.h b/include/MemoryStorage.h
@@ -81,6 +81,13 @@ namespace dftfe
  */
  ~MemoryStorage();
 
+ /**
+ * @brief clear and set to d_data to nullptr
+ */
+ void
+ clear();
+
+
  /**
  * @brief Set all the entries to a given value
  * @param[in] val The value to which the entries are to be set
@@ -142,26 +149,23 @@ namespace dftfe
  MemoryStorage &
  operator=(MemoryStorage &&rhs) noexcept;
 
- // // This part does not work for GPU version, will work on this
- // until
- // // having cleaner solution.
- // /**
- // * @brief Operator to get a reference to a element of the Vector
- // * @param[in] i is the index to the element of the Vector
- // * @returns reference to the element of the Vector
- // * @throws exception if i >= size of the Vector
- // */
- // reference
- // operator[](size_type i);
- //
- // /**
- // * @brief Operator to get a const reference to a element of the Vector
- // * @param[in] i is the index to the element of the Vector
- // * @returns const reference to the element of the Vector
- // * @throws exception if i >= size of the Vector
- // */
- // const_reference
- // operator[](size_type i) const;
+ /**
+ * @brief Operator to get a reference to a element of the Vector
+ * @param[in] i is the index to the element of the Vector
+ * @returns reference to the element of the Vector
+ * @throws exception if i >= size of the Vector
+ */
+ reference
+ operator[](size_type i);
+
+ /**
+ * @brief Operator to get a const reference to a element of the Vector
+ * @param[in] i is the index to the element of the Vector
+ * @returns const reference to the element of the Vector
+ * @throws exception if i >= size of the Vector
+ */
+ const_reference
+ operator[](size_type i) const;
 
  void
  swap(MemoryStorage &rhs);

diff --git a/include/chebyshevOrthogonalizedSubspaceIterationSolverDevice.h b/include/chebyshevOrthogonalizedSubspaceIterationSolverDevice.h
@@ -60,8 +60,8 @@ namespace dftfe
  double
  solve(operatorDFTDeviceClass & operatorMatrix,
  elpaScalaManager & elpaScala,
- dataTypes::numberDevice *eigenVectorsFlattenedDevice,
- dataTypes::numberDevice *eigenVectorsRotFracDensityFlattenedDevice,
+ dataTypes::number *eigenVectorsFlattenedDevice,
+ dataTypes::number *eigenVectorsRotFracDensityFlattenedDevice,
  const unsigned int flattenedSize,
  const unsigned int totalNumberWaveFunctions,
  std::vector<double> & eigenValues,
@@ -80,7 +80,7 @@ namespace dftfe
  void
  solveNoRR(operatorDFTDeviceClass & operatorMatrix,
  elpaScalaManager & elpaScala,
- dataTypes::numberDevice *eigenVectorsFlattenedDevice,
+ dataTypes::number *eigenVectorsFlattenedDevice,
  const unsigned int flattenedSize,
  const unsigned int totalNumberWaveFunctions,
  std::vector<double> & eigenValues,
@@ -96,7 +96,7 @@ namespace dftfe
  void
  densityMatrixEigenBasisFirstOrderResponse(
  operatorDFTDeviceClass & operatorMatrix,
- dataTypes::numberDevice * eigenVectorsFlattenedDevice,
+ dataTypes::number * eigenVectorsFlattenedDevice,
  const unsigned int flattenedSize,
  const unsigned int totalNumberWaveFunctions,
  const std::vector<double> &eigenValues,
@@ -137,16 +137,16 @@ namespace dftfe
  //
  // temporary parallel vectors needed for Chebyshev filtering
  //
- distributedDeviceVec<dataTypes::numberDevice> d_YArray;
+ distributedDeviceVec<dataTypes::number> d_YArray;
 
- distributedDeviceVec<dataTypes::numberFP32Device>
+ distributedDeviceVec<dataTypes::numberFP32>
  d_deviceFlattenedFloatArrayBlock;
 
- distributedDeviceVec<dataTypes::numberDevice> d_deviceFlattenedArrayBlock2;
+ distributedDeviceVec<dataTypes::number> d_deviceFlattenedArrayBlock2;
 
- distributedDeviceVec<dataTypes::numberDevice> d_YArray2;
+ distributedDeviceVec<dataTypes::number> d_YArray2;
 
- distributedDeviceVec<dataTypes::numberDevice> d_projectorKetTimesVector2;
+ distributedDeviceVec<dataTypes::number> d_projectorKetTimesVector2;
 
  bool d_isTemporaryParallelVectorsCreated;
 

diff --git a/include/constraintMatrixInfoDevice.h b/include/constraintMatrixInfoDevice.h
@@ -19,11 +19,10 @@
 # ifndef constraintMatrixInfoDevice_H_
 # define constraintMatrixInfoDevice_H_
 
-# include <thrust/device_vector.h>
-
+#include <MemoryStorage.h>
 # include <vector>
 
-# include "headers.h"
+# include <headers.h>
 
 namespace dftfe
 {
@@ -105,7 +104,7 @@ namespace dftfe
 
  inline void
  distribute_slave_to_master(
- distributedDeviceVec<cuDoubleComplex> &fieldVector,
+ distributedDeviceVec<std::complex<double>> &fieldVector,
  const unsigned int blockSize) const
  {}
 
@@ -121,7 +120,7 @@ namespace dftfe
  */
  void
  distribute_slave_to_master(
- distributedDeviceVec<cuDoubleComplex> &fieldVector,
+ distributedDeviceVec<std::complex<double>> &fieldVector,
  double * tempReal,
  double * tempImag,
  const unsigned int blockSize) const;
@@ -146,7 +145,7 @@ namespace dftfe
  */
  void
  distribute_slave_to_master(
- distributedDeviceVec<cuFloatComplex> &fieldVector,
+ distributedDeviceVec<std::complex<float>> &fieldVector,
  float * tempReal,
  float * tempImag,
  const unsigned int blockSize) const;
@@ -180,13 +179,13 @@ namespace dftfe
  std::vector<dealii::types::global_dof_index>
  d_localIndexMapUnflattenedToFlattened;
 
- thrust::device_vector<unsigned int> d_rowIdsLocalDevice;
- thrust::device_vector<unsigned int> d_columnIdsLocalDevice;
- thrust::device_vector<double> d_columnValuesDevice;
- thrust::device_vector<double> d_inhomogenitiesDevice;
- thrust::device_vector<unsigned int> d_rowSizesDevice;
- thrust::device_vector<unsigned int> d_rowSizesAccumulatedDevice;
- thrust::device_vector<dealii::types::global_dof_index>
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowIdsLocalDevice;
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_columnIdsLocalDevice;
+ dftfe::utils::MemoryStorage<double, dftfe::utils::MemorySpace::DEVICE> d_columnValuesDevice;
+ dftfe::utils::MemoryStorage<double, dftfe::utils::MemorySpace::DEVICE> d_inhomogenitiesDevice;
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowSizesDevice;
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowSizesAccumulatedDevice;
+ dftfe::utils::MemoryStorage<dealii::types::global_dof_index, dftfe::utils::MemorySpace::DEVICE>
  d_localIndexMapUnflattenedToFlattenedDevice;
 
  std::vector<unsigned int> d_rowIdsLocalBins;
@@ -196,10 +195,10 @@ namespace dftfe
  std::vector<unsigned int> d_binColumnSizes;
  std::vector<unsigned int> d_binColumnSizesAccumulated;
 
- thrust::device_vector<unsigned int> d_rowIdsLocalBinsDevice;
- thrust::device_vector<unsigned int> d_columnIdsLocalBinsDevice;
- thrust::device_vector<unsigned int> d_columnIdToRowIdMapBinsDevice;
- thrust::device_vector<double> d_columnValuesBinsDevice;
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_rowIdsLocalBinsDevice;
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_columnIdsLocalBinsDevice;
+ dftfe::utils::MemoryStorage<unsigned int, dftfe::utils::MemorySpace::DEVICE> d_columnIdToRowIdMapBinsDevice;
+ dftfe::utils::MemoryStorage<double, dftfe::utils::MemorySpace::DEVICE> d_columnValuesBinsDevice;
 
  unsigned int d_numConstrainedDofs;
  };