Skip to content

Commit

Permalink
[develop] update Gaea modulefile (ufs-community#836)
Browse files Browse the repository at this point in the history
Updated build_gaea_intel.lua modulefile to use the new stack location on C3/C4 built following an upgrade with intel-classic-2022.0.2 compiler and cray-mpich/7.7.20.

---------

Co-authored-by: Natalie Perlin <[email protected]>
Co-authored-by: michael.lueken <[email protected]>
  • Loading branch information
3 people committed Aug 14, 2023
1 parent 6b57195 commit 1031a28
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 47 deletions.
14 changes: 5 additions & 9 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ pipeline {
parameters {
// Allow job runner to filter based on platform
// Use the line below to enable all PW clusters
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion', 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'], description: 'Specify the platform(s) to use')
// Use the line below to enable the PW AWS cluster
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
// Use the line below to re-enable Gaea
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet', 'orion'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'hera', 'jet', 'orion'], description: 'Specify the platform(s) to use')
// choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion', 'pclusternoaav2use1'], description: 'Specify the platform(s) to use')
choice(name: 'SRW_PLATFORM_FILTER', choices: ['all', 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion'], description: 'Specify the platform(s) to use')
// Allow job runner to filter based on compiler
choice(name: 'SRW_COMPILER_FILTER', choices: ['all', 'gnu', 'intel'], description: 'Specify the compiler(s) to use to build')
// Uncomment the following line to re-enable comprehensive tests
Expand Down Expand Up @@ -78,8 +76,7 @@ pipeline {
axes {
axis {
name 'SRW_PLATFORM'
// values 'cheyenne', 'gaea', 'hera', 'jet', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'cheyenne', 'hera', 'jet', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'cheyenne', 'gaea', 'hera', 'jet-epic', 'orion' //, 'pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand All @@ -93,8 +90,7 @@ pipeline {
exclude {
axis {
name 'SRW_PLATFORM'
// values 'gaea', 'jet', 'orion' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'jet', 'orion' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
values 'gaea', 'jet-epic', 'orion' //, 'pclusternoaav2use1' , 'azclusternoaav2eus1', 'gclusternoaav2usc1'
}

axis {
Expand Down
4 changes: 4 additions & 0 deletions .cicd/scripts/srw_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ else
platform="${SRW_PLATFORM}"
fi

if [[ "${SRW_PLATFORM}" = jet-epic ]]; then
platform='jet'
fi

# Build and install
cd ${workspace}/tests
set +e
Expand Down
25 changes: 12 additions & 13 deletions .cicd/scripts/srw_ftest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ else
platform="${SRW_PLATFORM}"
fi

if [[ "${SRW_PLATFORM}" = jet-epic ]]; then
platform='jet'
fi

# Test directories
we2e_experiment_base_dir="${workspace}/expt_dirs"
we2e_test_dir="${workspace}/tests/WE2E"
Expand All @@ -64,7 +68,7 @@ sed "s|^workflow:|workflow:\n EXPT_BASEDIR: ${workspace}/expt_dirs|1" -i ush/co
sed "s|^workflow:|workflow:\n EXEC_SUBDIR: ${workspace}/install_${SRW_COMPILER}/exec|1" -i ush/config.yaml

# DATA_LOCATION differs on each platform ... find it.
export DATA_LOCATION=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${SRW_PLATFORM,,}.yaml | awk '{printf "%s", $2}')
export DATA_LOCATION=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${platform,,}.yaml | awk '{printf "%s", $2}')
echo "DATA_LOCATION=${DATA_LOCATION}"

# Configure a default test ...
Expand Down Expand Up @@ -129,18 +133,13 @@ rm -f ${results_file}
status=0

# Limit to machines that are fully ready
deny_machines=( gaea )
if [[ ${deny_machines[@]} =~ ${platform,,} ]] ; then
echo "# Deny ${platform} - incomplete configuration." | tee -a ${results_file}
else
echo "# Try ${platform} with the first few simple SRW tasks ..." | tee -a ${results_file}
for task in ${TASKS[@]:0:${TASK_DEPTH}} ; do
echo -n "./$task.sh ... "
./$task.sh > $task-log.txt 2>&1 && echo "COMPLETE" || echo "FAIL rc=$(( status+=$? ))"
# stop at the first sign of trouble ...
[[ 0 != ${status} ]] && echo "$task: FAIL" >> ${results_file} && break || echo "$task: COMPLETE" >> ${results_file}
done
fi
echo "# Try ${platform} with the first few simple SRW tasks ..." | tee -a ${results_file}
for task in ${TASKS[@]:0:${TASK_DEPTH}} ; do
echo -n "./$task.sh ... "
./$task.sh > $task-log.txt 2>&1 && echo "COMPLETE" || echo "FAIL rc=$(( status+=$? ))"
# stop at the first sign of trouble ...
[[ 0 != ${status} ]] && echo "$task: FAIL" >> ${results_file} && break || echo "$task: COMPLETE" >> ${results_file}
done

# Set exit code to number of failures
set +e
Expand Down
4 changes: 4 additions & 0 deletions .cicd/scripts/srw_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ else
platform="${SRW_PLATFORM}"
fi

if [[ "${SRW_PLATFORM}" = jet-epic ]]; then
platform='jet'
fi

# Test directories
we2e_experiment_base_dir="${workspace}/expt_dirs"
we2e_test_dir="${workspace}/tests/WE2E"
Expand Down
6 changes: 1 addition & 5 deletions etc/lmod-setup.csh
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ else if ( "$L_MACHINE" == singularity ) then
module purge

else if ( "$L_MACHINE" == gaea ) then
set ENV="/lustre/f2/dev/role.epic/contrib/apps/lmod/lmod/init/csh"
source $ENV

setenv LMOD_SYSTEM_DEFAULT_MODULES "modules/3.2.11.4"
module --initial_load --no_redirect restore
source /lustre/f2/dev/role.epic/contrib/Lmod_init.csh

else if ( "$L_MACHINE" == odin ) then
module unload modules
Expand Down
20 changes: 13 additions & 7 deletions modulefiles/build_gaea_intel.lua
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
help([[
This module loads libraries for building the UFS SRW App on
the NOAA RDHPC machine Gaea using Intel-2022.1.2
the NOAA RDHPC machine Gaea using Intel-2022.0.2
]])

whatis([===[Loads libraries needed for building the UFS SRW App on Gaea ]===])

load(pathJoin("cmake", os.getenv("cmake_ver") or "3.20.1"))

prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/hpc-stack/intel-2021.3.0_noarch/modulefiles/stack")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/hpc-stack/intel-classic-2022.0.2/modulefiles/stack")
load(pathJoin("hpc", os.getenv("hpc_ver") or "1.2.0"))
load(pathJoin("intel", os.getenv("intel_ver") or "2021.3.0"))
load(pathJoin("hpc-intel", os.getenv("hpc_intel_ver") or "2021.3.0"))
load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "7.7.11"))
load(pathJoin("gcc", os.getenv("gcc_ver") or "8.3.0"))
load(pathJoin("libpng", os.getenv("libpng_ver") or "1.6.37"))
load(pathJoin("hpc-intel-classic", os.getenv("hpc_intel_classic_ver") or "2022.0.2"))
load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "7.7.20"))

load("srw_common")
-- Need at runtime
load("alps")

local MKLROOT="/opt/intel/oneapi/mkl/2022.0.2/"
prepend_path("LD_LIBRARY_PATH",pathJoin(MKLROOT,"lib/intel64"))
pushenv("MKLROOT", MKLROOT)

pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix/20230601")
pushenv("CRAYPE_LINK_TYPE","dynamic")

setenv("CC","cc")
setenv("FC","ftn")
Expand Down
2 changes: 1 addition & 1 deletion modulefiles/srw_common.lua
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@ load("sigio/2.3.2")
load("w3nco/2.4.1")
load("wrf_io/1.2.0")

load("ncdiag/1.0.0")
load("ncdiag/1.1.1")
load("ncio/1.1.2")
load("wgrib2/2.0.8")
2 changes: 1 addition & 1 deletion modulefiles/tasks/gaea/plot_allvars.local.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/modulefiles")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow")
2 changes: 1 addition & 1 deletion modulefiles/tasks/gaea/python_srw.lua
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/modulefiles")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "workflow_tools")
8 changes: 6 additions & 2 deletions modulefiles/wflow_gaea.lua
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@ the NOAA RDHPC machine Gaea

whatis([===[Loads libraries needed for running the UFS SRW App on gaea ]===])

unload("python")
load("set_pythonpath")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/modulefiles")
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))
prepend_path("MODULEPATH","/lustre/f2/dev/role.epic/contrib/rocoto/modulefiles")
load("rocoto")
load("alps")

setenv("PROJ_LIB", "/lustre/f2/dev/role.epic/contrib/miniconda3/4.12.0/envs/regional_workflow/share/proj")
pushenv("MKLROOT", "/opt/intel/oneapi/mkl/2022.0.2/")
pushenv("GSI_BINARY_SOURCE_DIR", "/lustre/f2/dev/role.epic/contrib/GSI_data/fix/20230601")
setenv("PMI_NO_PREINITIALIZE","1")

if mode() == "load" then
LmodMsgRaw([===[Please do the following to activate conda:
Expand Down
30 changes: 22 additions & 8 deletions ush/machine/gaea.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ platform:
MRMS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/mrms/proc
NDAS_OBS_DIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/obs_data/ndas/proc
DOMAIN_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen
QUEUE_DEFAULT: normal
QUEUE_FCST: normal
QUEUE_HPSS: normal
QUEUE_DEFAULT: windfall
QUEUE_FCST: windfall
QUEUE_HPSS: windfall
REMOVE_MEMORY: True
RUN_CMD_FCST: srun --export=ALL --mpi=pmi2 -n ${PE_MEMBER01}
RUN_CMD_POST: srun --export=ALL --mpi=pmi2 -n $nprocs
RUN_CMD_PRDGEN: srun --export=ALL --mpi=pmi2 -n $nprocs
PARTITION_HPSS: eslogin
RUN_CMD_FCST: srun --export=ALL -n ${PE_MEMBER01}
RUN_CMD_POST: srun --export=ALL -n $nprocs
RUN_CMD_PRDGEN: srun --export=ALL -n $nprocs
RUN_CMD_SERIAL: time
RUN_CMD_UTILS: srun --export=ALL --mpi=pmi2 -n $nprocs
SCHED_NATIVE_CMD: -M c3 --export=NONE
SCHED_NATIVE_CMD_HPSS: -M es --export=NONE
SCHED_NATIVE_CMD: --clusters=c4 --export=NONE
SCHED_NATIVE_CMD_HPSS: --clusters=es --export=NONE
PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }'
TEST_EXTRN_MDL_SOURCE_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data
TEST_PREGEN_BASEDIR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/FV3LAM_pregen
Expand All @@ -30,8 +31,21 @@ platform:
FIXorg: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_orog
FIXsfc: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo
FIXshp: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/NaturalEarth
EXTRN_MDL_DATA_STORES: aws
data:
ics_lbcs:
FV3GFS:
nemsio: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh}
grib2: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh}
RAP: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh}
HRRR: /lustre/f2/dev/role.epic/contrib/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh}
rocoto:
tasks:
metatask_run_ensemble:
task_run_fcst_mem#mem#:
cores: '{{ task_run_fcst.PE_MEMBER01 // 1 }}'
native: '--cpus-per-task {{ task_run_fcst.OMP_NUM_THREADS_RUN_FCST|int }} --exclusive {{ platform.SCHED_NATIVE_CMD }}'
nodes:
nnodes:
nodesize:
ppn:

0 comments on commit 1031a28

Please sign in to comment.