Skip to content

Commit

Permalink
Merge pull request #164 from YaphetS-jx/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
phanish-suryanarayana authored Mar 31, 2023
2 parents 530ea13 + ddb4976 commit 60616ef
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 17 deletions.
7 changes: 7 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
-Name
-changes

--------------
Mar 31, 2023
Name: Xin Jing
Changes: (initialization.c, eigenSolver.c, eigenSolver_kpt.c)
1. Restore correct memory estimation which is accidently deleted before
2. change variable name to make it consistent

--------------
Mar 31, 2023
Name: Boqin Zhang
Expand Down
14 changes: 7 additions & 7 deletions src/eigenSolver.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,20 +434,20 @@ void CheFSI(SPARC_OBJ *pSPARC, double lambda_cutoff, double *x0, int count, int
#ifdef USE_DP_SUBEIG
DP_Subspace_Rotation(pSPARC, pSPARC->Xorb + spn_i*size_s);
#else
double *YQ_BLCYC;
if (pSPARC->npband > 1) {
// find Y * Q, store the result in Xorb (band+domain) and YQ_BLCYC (block cyclic format)
YQ_BLCYC = (double *)malloc(pSPARC->nr_orb_BLCYC * pSPARC->nc_orb_BLCYC * sizeof(double));
assert(YQ_BLCYC != NULL);
// find Y * Q, store the result in Xorb (band+domain) and Xorb_BLCYC (block cyclic format)
pSPARC->Xorb_BLCYC = (double *)malloc(pSPARC->nr_orb_BLCYC * pSPARC->nc_orb_BLCYC * sizeof(double));
assert(pSPARC->Xorb_BLCYC != NULL);
} else {
YQ_BLCYC = pSPARC->Xorb + spn_i*size_s;
pSPARC->Xorb_BLCYC = pSPARC->Xorb + spn_i*size_s;
}

// find Y * Q, store the result in Xorb (band+domain) and Xorb_BLCYC (block cyclic format)
Subspace_Rotation(pSPARC, pSPARC->Yorb_BLCYC, pSPARC->Q,
YQ_BLCYC, pSPARC->Xorb + spn_i*size_s, k, spn_i);
pSPARC->Xorb_BLCYC, pSPARC->Xorb + spn_i*size_s, k, spn_i);
if (pSPARC->npband > 1) {
free(YQ_BLCYC);
free(pSPARC->Xorb_BLCYC);
pSPARC->Xorb_BLCYC = NULL;
free(pSPARC->Yorb_BLCYC);
pSPARC->Yorb_BLCYC = NULL;
}
Expand Down
12 changes: 6 additions & 6 deletions src/eigenSolverKpt.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,18 +341,18 @@ void CheFSI_kpt(SPARC_OBJ *pSPARC, double lambda_cutoff, double _Complex *x0, in
#ifdef USE_DP_SUBEIG
DP_Subspace_Rotation_kpt(pSPARC, pSPARC->Xorb_kpt + kpt*size_k + spn_i*size_s);
#else
double _Complex *YQ_BLCYC;
if (pSPARC->npband > 1) {
YQ_BLCYC = (double _Complex *)malloc(pSPARC->nr_orb_BLCYC * pSPARC->nc_orb_BLCYC * sizeof(double _Complex));
assert(YQ_BLCYC != NULL);
pSPARC->Xorb_BLCYC_kpt = (double _Complex *)malloc(pSPARC->nr_orb_BLCYC * pSPARC->nc_orb_BLCYC * sizeof(double _Complex));
assert(pSPARC->Xorb_BLCYC_kpt != NULL);
} else {
YQ_BLCYC = pSPARC->Xorb_kpt + kpt*size_k + spn_i*size_s;
pSPARC->Xorb_BLCYC_kpt = pSPARC->Xorb_kpt + kpt*size_k + spn_i*size_s;
}
// ScaLAPACK stores the eigenvectors in Q
Subspace_Rotation_kpt(pSPARC, pSPARC->Yorb_BLCYC_kpt, pSPARC->Q_kpt,
YQ_BLCYC, pSPARC->Xorb_kpt + kpt*size_k + spn_i*size_s, kpt, spn_i);
pSPARC->Xorb_BLCYC_kpt, pSPARC->Xorb_kpt + kpt*size_k + spn_i*size_s, kpt, spn_i);
if (pSPARC->npband > 1) {
free(YQ_BLCYC);
free(pSPARC->Xorb_BLCYC_kpt);
pSPARC->Xorb_BLCYC_kpt = NULL;
free(pSPARC->Yorb_BLCYC_kpt);
pSPARC->Yorb_BLCYC_kpt = NULL;
}
Expand Down
39 changes: 35 additions & 4 deletions src/initialization.c
Original file line number Diff line number Diff line change
Expand Up @@ -2439,11 +2439,14 @@ double estimate_memory(const SPARC_OBJ *pSPARC) {
#endif
return memory_usage;
}
int Nd = pSPARC->Nd * pSPARC->Nspinor;
int Nd = pSPARC->Nd;
int Ns = pSPARC->Nstates;
int Nspin = pSPARC->Nspin;
int Nkpts_sym = pSPARC->Nkpts_sym;
int m = pSPARC->MixingHistory;
int npspin = pSPARC->npspin;
int npkpt = pSPARC->npkpt;
int npNd = pSPARC->npNd;

int type_size;
if (pSPARC->isGammaPoint) {
Expand All @@ -2453,15 +2456,39 @@ double estimate_memory(const SPARC_OBJ *pSPARC) {
}

// orbitals (dominant)
int ncpy_orbitals = 6; // copies required during chebyshev filtering
double memory_orbitals = (double) ncpy_orbitals * Nd * Ns * Nspin * Nkpts_sym * type_size;
int ncpy_orbitals; // extra copies required during CheFSI
if (pSPARC->npband > 1) {
// MKL pdgemr2d internally creates ~2.5 copies during pdgemr2d in projection + Yorb, Yorb_BLCYC, HY_BLCYC
ncpy_orbitals = 5.5;
} else {
// when there's no band parallelization (domain only), then pdgemr2d is not needed for projection
// moreover, the block cyclic formats Yorb_BLCYC, HY_BLCYC (also YQ_BLCYC during rotation) are not needed
// so the necessary copies are: Yorb, Ynew (during Chebyshev filtering)
// sometimes dgemm (MKL) also might internally create ~0.5 copy of the orbital, we add 0.5 for safety
ncpy_orbitals = 2.5;
}
#ifdef USE_DP_SUBEIG
ncpy_orbitals = 6; // DP requires 4 extra copies, Yorb, and a temp copy (Chebyshev filtering and Projection)
#endif
double memory_orbitals = (double) Nd * Ns * (ncpy_orbitals*npspin*npkpt + Nspin * Nkpts_sym) * type_size;

// vectors: rho, phi, Veff, mixing history vectors, etc.
int ncpy_vectors = 6 + 4 * Nspin + 2 * m * Nspin + 3 * (2*Nspin-1) + 1;
double memory_vectors = (double) ncpy_vectors * Nd * sizeof(double);

// subspace matrix: Hs, Ms, Q
int ncpy_matrices = 3 * npNd;
#ifdef USE_DP_SUBEIG
ncpy_matrices = 3 * nproc; // DP stores Hp_local,Mp_local,eigvecs in (almost) every process
#endif
double memory_matrices = (double) ncpy_matrices * Ns * Ns * sizeof(double);

// total memory
double memory_usage = memory_orbitals + memory_vectors;
double memory_usage = memory_orbitals + memory_vectors + memory_matrices;

// add some buffer for other memory
const double buf_rat = 0.10; // add 10% more memory
memory_usage *= (1.0 + buf_rat);

// memory for Exact Exchange part
double memory_exx = 0.0;
Expand All @@ -2481,10 +2508,14 @@ double estimate_memory(const SPARC_OBJ *pSPARC) {
printf("orbitals : %s\n", mem_str);
formatBytes(memory_vectors, 32, mem_str);
printf("global sized vectors : %s\n", mem_str);
formatBytes(memory_matrices, 32, mem_str);
printf("subspace matrices : %s\n", mem_str);
if (pSPARC->usefock > 0) {
formatBytes(memory_exx, 32, mem_str);
printf("global exact exchange memory : %s\n", mem_str);
}
formatBytes(memory_usage*buf_rat/(1.0+buf_rat), 32, mem_str);
printf("others : %s\n", mem_str);
printf("----------------------------------------------\n");
formatBytes(memory_usage/nproc,32,mem_str);
printf("Estimated memory usage per processor: %s\n",mem_str);
Expand Down

0 comments on commit 60616ef

Please sign in to comment.