From 2401da2b0a6244b26ff2abba6849a523e61911b9 Mon Sep 17 00:00:00 2001 From: YaphetS-jx Date: Tue, 26 Sep 2023 11:56:22 -0400 Subject: [PATCH] Enable cyclix with DP only and update 1 test --- ChangeLog | 6 ++ src/cyclix/cyclix_tools.c | 110 ++++++++++++++++++++++-------- src/cyclix/include/cyclix_tools.h | 9 +++ src/eigenSolver.c | 45 +++++------- src/eigenSolverKpt.c | 43 +++++------- src/initialization.c | 2 +- tests/SPARC_testing_script.py | 2 +- 7 files changed, 131 insertions(+), 86 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6f4e2578..adbb4a68 100644 --- a/ChangeLog +++ b/ChangeLog @@ -3,6 +3,12 @@ -Name -changes +-------------- +Sep 26, 2023 +Name: Xin Jing +Changes: (eigenSolver.c, cyclix/) +1. Enable cyclix with DP only and update 1 test + -------------- Sep 23, 2023 Name: Xin Jing diff --git a/src/cyclix/cyclix_tools.c b/src/cyclix/cyclix_tools.c index f30e3dfd..b1128c49 100644 --- a/src/cyclix/cyclix_tools.c +++ b/src/cyclix/cyclix_tools.c @@ -15,12 +15,22 @@ #include #include #include +#include + +/* BLAS and LAPACK routines */ +#ifdef USE_MKL + #define MKL_Complex16 double _Complex + #include +#else + #include + #include +#endif + // this is for checking existence of files #include "cyclix_tools.h" #include "isddft.h" - void init_cyclix(SPARC_OBJ *pSPARC) { pSPARC->lambda_sorted = (double *)calloc(pSPARC->Nstates * pSPARC->Nkpts_kptcomm * pSPARC->Nspin_spincomm, sizeof(double)); @@ -47,20 +57,20 @@ void init_cyclix(SPARC_OBJ *pSPARC) pSPARC->Intgwt_phi = (double *) malloc(pSPARC->Nd_d * sizeof(double)); Integration_weights_cyclix(pSPARC, pSPARC->Intgwt_phi, pSPARC->DMVertices[0], pSPARC->Nx_d, pSPARC->Ny_d, pSPARC->Nz_d); - #if defined(USE_MKL) || defined(USE_SCALAPACK) - if (pSPARC->isGammaPoint){ - pSPARC->vl = (double *) calloc(pSPARC->nr_Hp_BLCYC * pSPARC->nc_Hp_BLCYC, sizeof(double)); - pSPARC->vr = (double *) calloc(pSPARC->nr_Hp_BLCYC * pSPARC->nc_Hp_BLCYC, sizeof(double)); - pSPARC->lambda_temp1 = (double *)calloc(pSPARC->Nstates, sizeof(double)); - pSPARC->lambda_temp2 = (double *)calloc(pSPARC->Nstates, sizeof(double)); - pSPARC->lambda_temp3 = (double *)calloc(pSPARC->Nstates, sizeof(double)); - } else{ - pSPARC->vl_kpt = (double _Complex *) calloc(pSPARC->nr_Hp_BLCYC * pSPARC->nc_Hp_BLCYC, sizeof(double _Complex)); - pSPARC->vr_kpt = (double _Complex *) calloc(pSPARC->nr_Hp_BLCYC * pSPARC->nc_Hp_BLCYC, sizeof(double _Complex)); - pSPARC->lambda_temp1_kpt = (double _Complex *)calloc(pSPARC->Nstates, sizeof(double _Complex)); - pSPARC->lambda_temp2_kpt = (double _Complex *)calloc(pSPARC->Nstates, sizeof(double _Complex)); + if (pSPARC->bandcomm_index == 0) { + if (pSPARC->isGammaPoint){ + pSPARC->vl = (double *) calloc(pSPARC->Nstates * pSPARC->Nstates, sizeof(double)); + pSPARC->vr = (double *) calloc(pSPARC->Nstates * pSPARC->Nstates, sizeof(double)); + pSPARC->lambda_temp1 = (double *)calloc(pSPARC->Nstates, sizeof(double)); + pSPARC->lambda_temp2 = (double *)calloc(pSPARC->Nstates, sizeof(double)); + pSPARC->lambda_temp3 = (double *)calloc(pSPARC->Nstates, sizeof(double)); + } else{ + pSPARC->vl_kpt = (double _Complex *) calloc(pSPARC->Nstates * pSPARC->Nstates, sizeof(double _Complex)); + pSPARC->vr_kpt = (double _Complex *) calloc(pSPARC->Nstates * pSPARC->Nstates, sizeof(double _Complex)); + pSPARC->lambda_temp1_kpt = (double _Complex *)calloc(pSPARC->Nstates, sizeof(double _Complex)); + pSPARC->lambda_temp2_kpt = (double _Complex *)calloc(pSPARC->Nstates, sizeof(double _Complex)); + } } - #endif // #if defined(USE_MKL) || defined(USE_SCALAPACK) } void free_cyclix(SPARC_OBJ *pSPARC) @@ -72,20 +82,20 @@ void free_cyclix(SPARC_OBJ *pSPARC) free(pSPARC->Intgwt_psi); free(pSPARC->Intgwt_phi); - #if defined(USE_MKL) || defined(USE_SCALAPACK) - if (pSPARC->isGammaPoint) { - free(pSPARC->lambda_temp1); - free(pSPARC->lambda_temp2); - free(pSPARC->lambda_temp3); - free(pSPARC->vl); - free(pSPARC->vr); - } else { - free(pSPARC->lambda_temp1_kpt); - free(pSPARC->lambda_temp2_kpt); - free(pSPARC->vl_kpt); - free(pSPARC->vr_kpt); - } - #endif // #if defined(USE_MKL) || defined(USE_SCALAPACK) + if (pSPARC->bandcomm_index == 0) { + if (pSPARC->isGammaPoint) { + free(pSPARC->lambda_temp1); + free(pSPARC->lambda_temp2); + free(pSPARC->lambda_temp3); + free(pSPARC->vl); + free(pSPARC->vr); + } else { + free(pSPARC->lambda_temp1_kpt); + free(pSPARC->lambda_temp2_kpt); + free(pSPARC->vl_kpt); + free(pSPARC->vr_kpt); + } + } } /* @@ -348,3 +358,47 @@ void NormalizeEigfunc_kpt_cyclix(SPARC_OBJ *pSPARC, int spn_i, int kpt) { free(intg_psi); } + +/* +@ brief: generalized eigenvalue problem solver for cyclix +*/ +int generalized_eigenvalue_problem_cyclix(SPARC_OBJ *pSPARC, double *Hp_local, double *Mp_local, double *eig_val) +{ + int info = LAPACKE_dggev(LAPACK_COL_MAJOR,'N','V',pSPARC->Nstates, Hp_local, + pSPARC->Nstates, Mp_local, pSPARC->Nstates, + pSPARC->lambda_temp1, pSPARC->lambda_temp2, pSPARC->lambda_temp3, + pSPARC->vl, pSPARC->Nstates, pSPARC->vr, pSPARC->Nstates); + + for(int n = 0; n < pSPARC->Nstates; n++){ + // Warning if lambda_temp3 is almost zero + assert(fabs(pSPARC->lambda_temp3[n]) > 1e-15); + + eig_val[n] = pSPARC->lambda_temp1[n]/pSPARC->lambda_temp3[n]; + for(int m = 0; m < pSPARC->Nstates; m++){ + Hp_local[n*pSPARC->Nstates+m] = pSPARC->vr[n*pSPARC->Nstates+m]; + } + } + return info; +} + +/* +@ brief: generalized eigenvalue problem solver for cyclix complex case +*/ +int generalized_eigenvalue_problem_cyclix_kpt(SPARC_OBJ *pSPARC, double _Complex *Hp_local, double _Complex *Mp_local, double *eig_val) +{ + int info = LAPACKE_zggev(LAPACK_COL_MAJOR,'N','V',pSPARC->Nstates, Hp_local, + pSPARC->Nstates, Mp_local,pSPARC->Nstates, + pSPARC->lambda_temp1_kpt, pSPARC->lambda_temp2_kpt, + pSPARC->vl_kpt, pSPARC->Nstates, pSPARC->vr_kpt, pSPARC->Nstates); + + for(int n = 0; n < pSPARC->Nstates; n++){ + // Warning if lambda_temp2_kpt is almost zero + assert(fabs(creal(pSPARC->lambda_temp2_kpt[n])) > 1e-15); + + eig_val[n] = creal(pSPARC->lambda_temp1_kpt[n])/creal(pSPARC->lambda_temp2_kpt[n]); + for(int m = 0; m < pSPARC->Nstates; m++){ + Hp_local[n*pSPARC->Nstates+m] = pSPARC->vr_kpt[n*pSPARC->Nstates+m]; + } + } + return info; +} \ No newline at end of file diff --git a/src/cyclix/include/cyclix_tools.h b/src/cyclix/include/cyclix_tools.h index 2f09b4b6..327fb1fc 100644 --- a/src/cyclix/include/cyclix_tools.h +++ b/src/cyclix/include/cyclix_tools.h @@ -81,5 +81,14 @@ void NormalizeEigfunc_cyclix(SPARC_OBJ *pSPARC, int spn_i); */ void NormalizeEigfunc_kpt_cyclix(SPARC_OBJ *pSPARC, int spn_i, int kpt); +/* +@ brief: generalized eigenvalue problem solver for cyclix +*/ +int generalized_eigenvalue_problem_cyclix(SPARC_OBJ *pSPARC, double *Hp_local, double *Mp_local, double *eig_val); + +/* +@ brief: generalized eigenvalue problem solver for cyclix complex case +*/ +int generalized_eigenvalue_problem_cyclix_kpt(SPARC_OBJ *pSPARC, double _Complex *Hp_local, double _Complex *Mp_local, double *eig_val); #endif // CYCLIX_TOOLS_H diff --git a/src/eigenSolver.c b/src/eigenSolver.c index 141033e2..378f7232 100644 --- a/src/eigenSolver.c +++ b/src/eigenSolver.c @@ -477,7 +477,7 @@ void Solve_standard_EigenProblem(SPARC_OBJ *pSPARC, int k, int spn_i) #endif #ifdef SPARCX_ACCEL // SPARCX_ACCEL_NOTE - if (pSPARC->useACCEL == 1) { + if (pSPARC->useACCEL == 1 && pSPARC->cell_typ < 20) { int info = 0; t1 = MPI_Wtime(); if (!pSPARC->bandcomm_index) { @@ -1265,7 +1265,7 @@ void DP_Solve_Generalized_EigenProblem(SPARC_OBJ *pSPARC, int spn_i) if (DP_CheFSI == NULL) return; #ifdef SPARCX_ACCEL // SPARCX_ACCEL_NOTE -- ADDS GPU Eigensolver - if (pSPARC->useACCEL == 1) + if (pSPARC->useACCEL == 1 && pSPARC->cell_typ < 20) { int Ns_dp = DP_CheFSI->Ns_dp; int rank_kpt = DP_CheFSI->rank_kpt; @@ -1312,11 +1312,14 @@ void DP_Solve_Generalized_EigenProblem(SPARC_OBJ *pSPARC, int spn_i) double *Hp_local = DP_CheFSI->Hp_local; double *Mp_local = DP_CheFSI->Mp_local; double *eig_val = pSPARC->lambda + spn_i * Ns_dp; - if (pSPARC->StandardEigenFlag == 0) + if (pSPARC->CyclixFlag) { + info = generalized_eigenvalue_problem_cyclix(pSPARC, Hp_local, Mp_local, eig_val); + } else if (pSPARC->StandardEigenFlag == 0) { info = LAPACKE_dsygvd( LAPACK_COL_MAJOR, 1, 'V', 'U', Ns_dp, Hp_local, Ns_dp, Mp_local, Ns_dp, eig_val); - else + } else { info = LAPACKE_dsyevd(LAPACK_COL_MAJOR,'V','U', Ns_dp, Hp_local, Ns_dp, eig_val); + } copy_mat_blk(sizeof(double), Hp_local, Ns_dp, Ns_dp, Ns_dp, eig_vecs, Ns_dp); } @@ -1684,7 +1687,7 @@ void Solve_Generalized_EigenProblem(SPARC_OBJ *pSPARC, int k, int spn_i) #endif #ifdef SPARCX_ACCEL // SPARCX_ACCEL_NOTE - if (pSPARC->useACCEL == 1) { + if (pSPARC->useACCEL == 1 && pSPARC->cell_typ < 20) { int info = 0; t1 = MPI_Wtime(); if (!pSPARC->bandcomm_index) { @@ -1733,32 +1736,16 @@ void Solve_Generalized_EigenProblem(SPARC_OBJ *pSPARC, int k, int spn_i) int info = 0; t1 = MPI_Wtime(); if (!pSPARC->bandcomm_index) { - if (pSPARC->CyclixFlag) { - info = LAPACKE_dggev(LAPACK_COL_MAJOR,'N','V',pSPARC->Nstates,pSPARC->Hp, - pSPARC->Nstates,pSPARC->Mp,pSPARC->Nstates, - pSPARC->lambda_temp1, pSPARC->lambda_temp2, pSPARC->lambda_temp3, - pSPARC->vl, pSPARC->Nstates, pSPARC->vr, pSPARC->Nstates); - int indx0, n, indx, m; - indx0 = spn_i*pSPARC->Nstates; - for(n = 0; n < pSPARC->Nstates; n++){ - indx = indx0 + n; - // Warning if lambda_temp3 is almost zero - assert(fabs(pSPARC->lambda_temp3[n]) > 1e-15); - - pSPARC->lambda[indx] = pSPARC->lambda_temp1[n]/pSPARC->lambda_temp3[n]; - for(m = 0; m < pSPARC->Nstates; m++){ - pSPARC->Hp[n*pSPARC->Nstates+m] = pSPARC->vr[n*pSPARC->Nstates+m]; - } - } + info = generalized_eigenvalue_problem_cyclix(pSPARC, + pSPARC->Hp, pSPARC->Mp, pSPARC->lambda + spn_i*pSPARC->Nstates); + } else if (pSPARC->StandardEigenFlag == 0) { + info = LAPACKE_dsygvd(LAPACK_COL_MAJOR,1,'V','U',pSPARC->Nstates,pSPARC->Hp, + pSPARC->Nstates,pSPARC->Mp,pSPARC->Nstates, + pSPARC->lambda + spn_i*pSPARC->Nstates); } else { - if (pSPARC->StandardEigenFlag == 0) - info = LAPACKE_dsygvd(LAPACK_COL_MAJOR,1,'V','U',pSPARC->Nstates,pSPARC->Hp, - pSPARC->Nstates,pSPARC->Mp,pSPARC->Nstates, - pSPARC->lambda + spn_i*pSPARC->Nstates); - else - info = LAPACKE_dsyevd(LAPACK_COL_MAJOR,'V','U',pSPARC->Nstates,pSPARC->Hp, - pSPARC->Nstates, pSPARC->lambda + spn_i*pSPARC->Nstates); + info = LAPACKE_dsyevd(LAPACK_COL_MAJOR,'V','U',pSPARC->Nstates,pSPARC->Hp, + pSPARC->Nstates, pSPARC->lambda + spn_i*pSPARC->Nstates); } } t2 = MPI_Wtime(); diff --git a/src/eigenSolverKpt.c b/src/eigenSolverKpt.c index 96d51fb3..57f84f5b 100644 --- a/src/eigenSolverKpt.c +++ b/src/eigenSolverKpt.c @@ -233,9 +233,9 @@ void CheFSI_kpt(SPARC_OBJ *pSPARC, double lambda_cutoff, double _Complex *x0, in t1 = MPI_Wtime(); #ifdef SPARCX_ACCEL - if (pSPARC->useACCEL == 1 && pSPARC->cell_typ < 20 && pSPARC->spin_typ <= 1 && pSPARC->usefock <=1 && pSPARC->SOC_Flag == 0 && pSPARC->Nd_d_dmcomm == pSPARC->Nd) - { - ACCEL_ChebyshevFiltering_kpt(pSPARC, pSPARC->DMVertices_dmcomm, pSPARC->Xorb_kpt + kpt*size_k + spn_i*DMnd, DMndsp, + if (pSPARC->useACCEL == 1 && pSPARC->cell_typ < 20 && pSPARC->spin_typ <= 1 && pSPARC->usefock <=1 && pSPARC->SOC_Flag == 0 && pSPARC->Nd_d_dmcomm == pSPARC->Nd) + { + ACCEL_ChebyshevFiltering_kpt(pSPARC, pSPARC->DMVertices_dmcomm, pSPARC->Xorb_kpt + kpt*size_k + spn_i*DMnd, DMndsp, pSPARC->Yorb_kpt + spn_i*DMnd, DMndsp, pSPARC->Nband_bandcomm, pSPARC->ChebDegree, lambda_cutoff, pSPARC->eigmax[spn_i*pSPARC->Nkpts_kptcomm + kpt], pSPARC->eigmin[spn_i*pSPARC->Nkpts_kptcomm + kpt], kpt, spn_i, pSPARC->dmcomm); @@ -561,8 +561,8 @@ void init_DP_CheFSI_kpt(SPARC_OBJ *pSPARC) MPI_Comm_split(pSPARC->kptcomm, proc_active, rank_kpt, &DP_CheFSI_kpt->kpt_comm); if (proc_active == 0) { - free(DP_CheFSI_kpt); MPI_Comm_free(&DP_CheFSI_kpt->kpt_comm); + free(DP_CheFSI_kpt); pSPARC->DP_CheFSI_kpt = NULL; return; } else { @@ -872,22 +872,27 @@ void DP_Solve_Generalized_EigenProblem_kpt(SPARC_OBJ *pSPARC, int kpt, int spn_i int rank_kpt = DP_CheFSI_kpt->rank_kpt; double _Complex *eig_vecs = DP_CheFSI_kpt->eig_vecs; double st = MPI_Wtime(); + int info = 0; if (rank_kpt == 0) { double _Complex *Hp_local = DP_CheFSI_kpt->Hp_local; double _Complex *Mp_local = DP_CheFSI_kpt->Mp_local; double *eig_val = pSPARC->lambda + kpt*pSPARC->Nstates + spn_i*pSPARC->Nkpts_kptcomm*pSPARC->Nstates; - LAPACKE_zhegvd( - LAPACK_COL_MAJOR, 1, 'V', 'U', Ns_dp, - Hp_local, Ns_dp, Mp_local, Ns_dp, eig_val - ); + if (pSPARC->CyclixFlag) { + info = generalized_eigenvalue_problem_cyclix_kpt(pSPARC, Hp_local, Mp_local, eig_val); + } else { + info = LAPACKE_zhegvd( + LAPACK_COL_MAJOR, 1, 'V', 'U', Ns_dp, + Hp_local, Ns_dp, Mp_local, Ns_dp, eig_val + ); + } copy_mat_blk(sizeof(double _Complex), Hp_local, Ns_dp, Ns_dp, Ns_dp, eig_vecs, Ns_dp); } double et0 = MPI_Wtime(); MPI_Bcast(eig_vecs, Ns_dp * Ns_dp, MPI_C_DOUBLE_COMPLEX, 0, DP_CheFSI_kpt->kpt_comm); double et1 = MPI_Wtime(); #ifdef DEBUG - if (rank_kpt == 0) printf("Rank 0, DP_Solve_Generalized_EigenProblem_kpt used %.3lf ms, LAPACKE_zhegvd used %.3lf ms\n", 1000.0 * (et1 - st), 1000.0 * (et0 - st)); + if (rank_kpt == 0) printf("Rank 0, DP_Solve_Generalized_EigenProblem_kpt, info = %d, used %.3lf ms, LAPACKE_zhegvd used %.3lf ms\n", info, 1000.0 * (et1 - st), 1000.0 * (et0 - st)); #endif } else { #if defined(USE_MKL) || defined(USE_SCALAPACK) @@ -1207,24 +1212,8 @@ void Solve_Generalized_EigenProblem_kpt(SPARC_OBJ *pSPARC, int kpt, int spn_i) t1 = MPI_Wtime(); if (!pSPARC->bandcomm_index) { if (pSPARC->CyclixFlag) { - info = LAPACKE_zggev(LAPACK_COL_MAJOR,'N','V',pSPARC->Nstates,pSPARC->Hp_kpt, - pSPARC->Nstates,pSPARC->Mp_kpt,pSPARC->Nstates, - pSPARC->lambda_temp1_kpt, pSPARC->lambda_temp2_kpt, - pSPARC->vl_kpt, pSPARC->Nstates, pSPARC->vr_kpt, pSPARC->Nstates); - int indx0, n, indx, m; - indx0 = spn_i*pSPARC->Nkpts_kptcomm*pSPARC->Nstates + kpt*pSPARC->Nstates; - for(n = 0; n < pSPARC->Nstates; n++){ - indx = indx0 + n; - // Warning if lambda_temp2_kpt is almost zero - assert(fabs(creal(pSPARC->lambda_temp2_kpt[n])) > 1e-15); - - pSPARC->lambda[indx] = creal(pSPARC->lambda_temp1_kpt[n])/creal(pSPARC->lambda_temp2_kpt[n]); - //if(pSPARC->bandcomm_index == 0) - // printf("eigenvalues %.15f\n",pSPARC->lambda[indx]); - for(m = 0; m < pSPARC->Nstates; m++){ - pSPARC->Hp_kpt[n*pSPARC->Nstates+m] = pSPARC->vr_kpt[n*pSPARC->Nstates+m]; - } - } + info = generalized_eigenvalue_problem_cyclix_kpt(pSPARC, + pSPARC->Hp_kpt, pSPARC->Mp_kpt, pSPARC->lambda + kpt*pSPARC->Nstates + spn_i*pSPARC->Nkpts_kptcomm*pSPARC->Nstates); } else { info = LAPACKE_zhegvd(LAPACK_COL_MAJOR,1,'V','U',pSPARC->Nstates,pSPARC->Hp_kpt, pSPARC->Nstates,pSPARC->Mp_kpt,pSPARC->Nstates, diff --git a/src/initialization.c b/src/initialization.c index 8070b3a6..e4cba9c7 100644 --- a/src/initialization.c +++ b/src/initialization.c @@ -3338,7 +3338,7 @@ void write_output_init(SPARC_OBJ *pSPARC) { } fprintf(output_fp,"***************************************************************************\n"); - fprintf(output_fp,"* SPARC (version Sep 23, 2023) *\n"); + fprintf(output_fp,"* SPARC (version Sep 26, 2023) *\n"); fprintf(output_fp,"* Copyright (c) 2020 Material Physics & Mechanics Group, Georgia Tech *\n"); fprintf(output_fp,"* Distributed under GNU General Public License 3 (GPL) *\n"); fprintf(output_fp,"* Start time: %s *\n",c_time_str); diff --git a/tests/SPARC_testing_script.py b/tests/SPARC_testing_script.py index f7ff2069..a28ee752 100644 --- a/tests/SPARC_testing_script.py +++ b/tests/SPARC_testing_script.py @@ -363,7 +363,7 @@ ################################################################################################################## SYSTEMS["systemname"].append('WS2_cyclix_SOC') SYSTEMS["Tags"].append(['cyclix','SOC']) -SYSTEMS["Tols"].append([tols["E_tol"], tols["F_tol"], tols["stress_tol"]]) # E_tol(Ha/atom), F_tol(Ha/Bohr), stress_tol(%) +SYSTEMS["Tols"].append([tols["E_tol"], 2e-5, tols["stress_tol"]]) # E_tol(Ha/atom), F_tol(Ha/Bohr), stress_tol(%) ################################################################################################################## SYSTEMS["systemname"].append('MoS2_cyclix_SOC') SYSTEMS["Tags"].append(['cyclix','SOC'])