Regression Testing Updates: GNU/Intel into one RT. New logs/ dir in t…

…ests/ (#1718) * move logs to logs/ folder, and move rt_gnu tests to rt.conf and delete rt_gnu.conf * Remove Intel/GNU specific calls for MACHINE_ID or in config scripts * default_vars machine name updates * ORT will continue and not exit if a test is unsupported * Logs will now contain the hashes of the UFSWM and its submodules * Adjust AutoRT with new BLDIR and no need for the compiler information * Remove compiler information from machine log files * Add -a option to opnReqTest
ufs-community · May 31, 2023 · 5d47ea8 · 5d47ea8
1 parent f809067
commit 5d47ea8
Show file tree

Hide file tree

Showing 93 changed files with 11,918 additions and 11,873 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -3,17 +3,15 @@
 Provide a detailed description of what this PR does. What bug does it fix, or what feature does it add? Is a change of answers expected from this PR? Are any library updates included in this PR (modulefiles etc.)?
 -->
 
-### Top of commit queue on: TBD
-<!-- Please have sub-component Code Managers ready for merging sub-component PR's on the date above and the day after the date above -->
-
 ### Input data additions/changes
 - [ ] No changes are expected to input data.
-- [ ] There will be new input data. <!-- Add "input data change" Label -->
-- [ ] Input data will be updated. <!-- Add "New Input Data Req'd" Label -->
+- [ ] Changes are expected to input data:
+ - [ ] New input data.
+ - [ ] Updated input data.
 
 ### Anticipated changes to regression tests:
-- [ ] No changes are expected to any regression test. <!-- Add "No Baseline Change" Label -->
-- [ ] Changes are expected to the following tests: <!-- Add "Baseline Change" Label -->
+- [ ] No changes are expected to any regression test.
+- [ ] Changes are expected to the following tests:
 <!-- Please insert what RT's change and why you expect them to change -->
 
 ## Subcomponents involved:
@@ -37,19 +35,21 @@ Provide a detailed description of what this PR does. What bug does it fix, or wh
 <!-- 
 Please complete all items in list. Make sure to attach logs from RT testing in comment, not in repository. Once all boxes are checked, please add the label "Ready for Commit Queue".
 -->
-- [ ] Link PR's from all sub-components involved
-- [ ] Confirm reviews completed in sub-component PR's
+- [ ] Link PR's from all sub-components involved in section below
+- [ ] Confirm reviews completed in ALL sub-component PR's
 - [ ] Add all appropriate labels to this PR.
-- [ ] Run full RT suite on either Hera/Cheyenne with both Intel/GNU compilers
+- [ ] Run full RT suite on either Hera/Cheyenne AND attach log to a PR comment.
 - [ ] Add list of any failed regression tests to "Anticipated changes to regression tests" section.
 
 ## Linked PR's and Issues:
 <!--
 Please link dependent pull requests.
-EXAMPLE: Depends on NOAA-EMC/fv3atm/pull/<pullrequest_number>
+EXAMPLE: "- Depends on NOAA-EMC/fv3atm/pull/<pullrequest_number>"
 
 Please link the related issues to be closed with this PR, whether in this repository, or in another repository.
-EXAMPLE: Closes NOAA-EMC/fv3atm/issues/<issue_number>
+EXAMPLE: "- Closes NOAA-EMC/fv3atm/issues/<issue_number>"
+
+PLEASE MAKE SURE TO USE THE - with a space before the "Depends on" or "Closes" as they show up well on github.
 -->
 
 ## Testing Day Checklist:
@@ -61,15 +61,11 @@ Please consult the ufs-weather-model [wiki](https://github.com/ufs-community/ufs
 
 ### Testing Log (for CM's):
 - RDHPCS
- - Intel
- - [ ] Hera
- - [ ] Orion
- - [ ] Jet
- - [ ] Gaea
- - [ ] Cheyenne
- - GNU
- - [ ] Hera
- - [ ] Cheyenne
+ - [ ] Hera
+ - [ ] Orion
+ - [ ] Jet
+ - [ ] Gaea
+ - [ ] Cheyenne
 - WCOSS2
  - [ ] Dogwood/Cactus
  - [ ] Acorn

diff --git a/.gitignore b/.gitignore
@@ -72,5 +72,4 @@ tests/fv3_conf/compile_slurm.IN
 tests/fv3_conf/compile_qsub.IN
 tests/fv3_conf/fv3_slurm.IN
 tests/fv3_conf/fv3_qsub.IN
-log*
 build*.log*
diff --git a/tests/RegressionTests_cheyenne.gnu.log b/tests/RegressionTests_cheyenne.gnu.log
diff --git a/tests/RegressionTests_hera.gnu.log b/tests/RegressionTests_hera.gnu.log
diff --git a/tests/auto-jenkins/jobs/bl.py b/tests/auto-jenkins/jobs/bl.py
@@ -10,7 +10,7 @@ def run(job_obj):
  workdir, rtbldir, blstore = set_directories(job_obj)
  pr_repo_loc, repo_dir_str = clone_pr_repo(job_obj, workdir)
  bldate = get_bl_date(job_obj, pr_repo_loc)
- bldir = f'{blstore}/develop-{bldate}/{job_obj.compiler.upper()}'
+ bldir = f'{blstore}/develop-{bldate}'
  bldirbool = check_for_bl_dir(bldir, job_obj)
  run_regression_test(job_obj, pr_repo_loc)
  post_process(job_obj, pr_repo_loc, repo_dir_str, rtbldir, bldir)
@@ -22,27 +22,27 @@ def set_directories(job_obj):
  workdir = '/scratch1/NCEPDEV/nems/emc.nemspara/autort/pr'
  blstore = '/scratch1/NCEPDEV/nems/emc.nemspara/RT/NEMSfv3gfs'
  rtbldir = '/scratch1/NCEPDEV/stmp4/emc.nemspara/FV3_RT/'\
- f'REGRESSION_TEST_{job_obj.compiler.upper()}'
+ f'REGRESSION_TEST'
  elif job_obj.machine == 'jet':
  workdir = '/lfs4/HFIP/h-nems/emc.nemspara/autort/pr'
  blstore = '/lfs4/HFIP/h-nems/emc.nemspara/RT/NEMSfv3gfs/'
  rtbldir = '/lfs4/HFIP/h-nems/emc.nemspara/RT_BASELINE/'\
- f'emc.nemspara/FV3_RT/REGRESSION_TEST_{job_obj.compiler.upper()}'
+ f'emc.nemspara/FV3_RT/REGRESSION_TEST'
  elif job_obj.machine == 'gaea':
  workdir = '/lustre/f2/pdata/ncep/emc.nemspara/autort/pr'
  blstore = '/lustre/f2/pdata/ncep_shared/emc.nemspara/RT/NEMSfv3gfs'
  rtbldir = '/lustre/f2/scratch/emc.nemspara/FV3_RT/'\
- f'REGRESSION_TEST_{job_obj.compiler.upper()}'
+ f'REGRESSION_TEST'
  elif job_obj.machine == 'orion':
  workdir = '/work/noaa/epic-ps/role-epic-ps/autort/tests/auto/pr'
  blstore = '/work2/noaa/epic-ps/RT/NEMSfv3gfs'
  rtbldir = '/work/noaa/stmp/role-epic-ps/stmp/role-epic-ps/FV3_RT/'\
- f'REGRESSION_TEST_{job_obj.compiler.upper()}'
+ f'REGRESSION_TEST'
  elif job_obj.machine == 'cheyenne':
  workdir = '/glade/scratch/epicufsrt/autort/jenkins/autort/pr'
  blstore = '/glade/scratch/epicufsrt/GMTB/ufs-weather-model/RT/NEMSfv3gfs'
  rtbldir = '/glade/scratch/epicufsrt/FV3_RT/'\
- f'REGRESSION_TEST_{job_obj.compiler.upper()}'
+ f'REGRESSION_TEST'
  else:
  logger.critical(f'Machine {job_obj.machine} is not supported for this job')
  raise KeyError
@@ -99,20 +99,10 @@ def create_bl_dir(bldir, job_obj):
 
 def run_regression_test(job_obj, pr_repo_loc):
  logger = logging.getLogger('RT/RUN_REGRESSION_TEST')
- if job_obj.compiler == 'gnu' and job_obj.machine != 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -e -c -l rt_gnu.conf',
- pr_repo_loc]]
- elif job_obj.compiler == 'gnu' and job_obj.machine == 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -r -c -l rt_gnu.conf',
- pr_repo_loc]]
- elif job_obj.compiler == 'intel' and job_obj.machine != 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -e -c', pr_repo_loc]]
- elif job_obj.compiler == 'intel' and job_obj.machine == 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -r -c', pr_repo_loc]]
+ if job_obj.machine != 'hera':
+ rt_command = [[f'cd tests && /bin/bash --login ./rt.sh -e -c', pr_repo_loc]]
+ elif job_obj.machine == 'hera':
+ rt_command = [[f'cd tests && /bin/bash --login ./rt.sh -r -c', pr_repo_loc]]
  job_obj.run_commands(logger, rt_command)
 
 
@@ -158,15 +148,15 @@ def clone_pr_repo(job_obj, workdir):
 
 def post_process(job_obj, pr_repo_loc, repo_dir_str, rtbldir, bldir):
  logger = logging.getLogger('BL/MOVE_RT_LOGS')
- rt_log = f'tests/RegressionTests_{job_obj.machine}'\
- f'.{job_obj.compiler}.log'
+ rt_log = f'tests/logs/RegressionTests_{job_obj.machine}.log'
  filepath = f'{pr_repo_loc}/{rt_log}'
  rt_dir, logfile_pass = process_logfile(job_obj, filepath)
  if logfile_pass:
  create_bl_dir(bldir, job_obj)
  move_bl_command = [[f'mv {rtbldir}/* {bldir}/', pr_repo_loc]]
- if job_obj.machine == 'orion':
- move_bl_command.append([f'/bin/bash --login adjust_permissions.sh orion develop-{bldate}', blstore])
+# bldate and blstore are not defined in this and will fail on Orion, currently.
+# if job_obj.machine == 'orion':
+# move_bl_command.append([f'/bin/bash --login adjust_permissions.sh orion develop-{bldate}', blstore])
  job_obj.run_commands(logger, move_bl_command)
  job_obj.comment_text_append('Baseline creation and move successful')
  logger.info('Starting RT Job')
@@ -228,6 +218,5 @@ def process_logfile(job_obj, logfile):
  job_obj.job_failed(logger, f'{job_obj.preq_dict["action"]}')
  else:
  logger.critical(f'Could not find {job_obj.machine}'
- f'.{job_obj.compiler} '
  f'{job_obj.preq_dict["action"]} log')
  raise FileNotFoundError
diff --git a/tests/auto-jenkins/jobs/rt.py b/tests/auto-jenkins/jobs/rt.py
@@ -45,20 +45,10 @@ def set_directories(job_obj):
 
 def run_regression_test(job_obj, pr_repo_loc):
  logger = logging.getLogger('RT/RUN_REGRESSION_TEST')
- if job_obj.compiler == 'gnu' and job_obj.machine != 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -e -l rt_gnu.conf',
- pr_repo_loc]]
- elif job_obj.compiler == 'gnu' and job_obj.machine == 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -r -l rt_gnu.conf',
- pr_repo_loc]]
- elif job_obj.compiler == 'intel' and job_obj.machine != 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -e', pr_repo_loc]]
- elif job_obj.compiler == 'intel' and job_obj.machine == 'hera':
- rt_command = [[f'export RT_COMPILER="{job_obj.compiler}" && cd tests '
- '&& /bin/bash --login ./rt.sh -r', pr_repo_loc]]
+ if job_obj.machine != 'hera':
+ rt_command = [[f'cd tests && /bin/bash --login ./rt.sh -e', pr_repo_loc]]
+ elif job_obj.machine == 'hera':
+ rt_command = [[f'cd tests && /bin/bash --login ./rt.sh -r', pr_repo_loc]]
  job_obj.run_commands(logger, rt_command)
 
 
@@ -114,17 +104,15 @@ def clone_pr_repo(job_obj, workdir):
 def post_process(job_obj, pr_repo_loc, repo_dir_str, branch):
  ''' This is the callback function associated with the "RT" command '''
  logger = logging.getLogger('RT/MOVE_RT_LOGS')
- rt_log = f'tests/RegressionTests_{job_obj.machine}'\
- f'.{job_obj.compiler}.log'
+ rt_log = f'tests/logs/RegressionTests_{job_obj.machine}.log'
  filepath = f'{pr_repo_loc}/{rt_log}'
  rt_dir, logfile_pass = process_logfile(job_obj, filepath)
  if logfile_pass:
  #if job_obj.preq_dict['preq'].maintainer_can_modify:
  move_rt_commands = [
  [f'git pull --ff-only origin {branch}', pr_repo_loc],
  [f'git add {rt_log}', pr_repo_loc],
- [f'git commit -m "[AutoRT] {job_obj.machine}'
- f'.{job_obj.compiler} Job Completed.\n\n\n'
+ [f'git commit -m "[AutoRT] {job_obj.machine} Job Completed.\n\n\n'
  'on-behalf-of @ufs-community <[email protected]>"',
  pr_repo_loc],
  ['sleep 10', pr_repo_loc],
@@ -154,8 +142,7 @@ def process_logfile(job_obj, logfile):
  job_obj.job_failed(logger, f'{job_obj.preq_dict["action"]}')
  else:
  logger.critical(f'Could not find {job_obj.machine}'
- f'.{job_obj.compiler} '
  f'{job_obj.preq_dict["action"]} log')
- print(f'Could not find {job_obj.machine}.{job_obj.compiler} '
+ print(f'Could not find {job_obj.machine}'
  f'{job_obj.preq_dict["action"]} log')
  raise FileNotFoundError
diff --git a/tests/auto-jenkins/rt_auto_jenkins.py b/tests/auto-jenkins/rt_auto_jenkins.py
@@ -53,28 +53,24 @@ def __init__(self):
 
 def set_action_from_label(machine, actions, label):
  ''' Match the label that initiates a job with an action in the dict'''
- # <machine>-<compiler>-<test> i.e. hera-gnu-RT
+ # <machine>-<test> i.e. hera-RT
  logger = logging.getLogger('MATCH_LABEL_WITH_ACTIONS')
  logger.info('Setting action from Label')
  split_label = label.name.split('-')
- # Make sure it has three parts
- if len(split_label) != 3:
- return False, False
+ # Make sure it has two parts
+ if len(split_label) != 2:
+ return False
  # Break the parts into their variables
  label_machine = split_label[0]
- label_compiler = split_label[1]
- label_action = split_label[2]
+ label_action = split_label[1]
  # check machine name matches
  if not re.match(label_machine, machine):
- return False, False
- # Compiler must be intel or gnu
- if not str(label_compiler) in ["intel", "gnu"]:
- return False, False
+ return False
  action_match = next((action for action in actions
  if re.match(action, label_action)), False)
 
- logging.info(f'Compiler: {label_compiler}, Action: {action_match}')
- return label_compiler, action_match
+ logging.info(f'Action: {action_match}')
+ return action_match
 
 def delete_pr_dirs(each_pr, machine):
  if machine == 'hera': 
@@ -166,12 +162,12 @@ def get_preqs_with_actions(repos, machine, ghinterface_obj, actions):
  jobs = []
  # return_preq = []
  for pr_label in preq_labels:
- compiler, match = set_action_from_label(machine, actions,
+ match = set_action_from_label(machine, actions,
  pr_label['label'])
  if match:
  pr_label['action'] = match
  # return_preq.append(pr_label.copy())
- jobs.append(Job(pr_label.copy(), ghinterface_obj, machine, compiler))
+ jobs.append(Job(pr_label.copy(), ghinterface_obj, machine))
 
  return jobs
 
@@ -193,14 +189,13 @@ class Job:
  provided by the bash script
  '''
 
- def __init__(self, preq_dict, ghinterface_obj, machine, compiler):
+ def __init__(self, preq_dict, ghinterface_obj, machine):
  self.logger = logging.getLogger('JOB')
  self.preq_dict = preq_dict
  self.job_mod = importlib.import_module(
  f'jobs.{self.preq_dict["action"].lower()}')
  self.ghinterface_obj = ghinterface_obj
  self.machine = machine
- self.compiler = compiler
  self.comment_text = '***Automated RT Failure Notification***\n'
  self.failed_tests = []
 
@@ -216,7 +211,6 @@ def check_label_before_job_start(self):
  # LETS Check the label still exists before the start of the job in the
  # case of multiple jobs
  label_to_check = f'{self.machine}'\
- f'-{self.compiler}'\
  f'-{self.preq_dict["action"]}'
  labels = self.preq_dict['preq'].get_labels()
  label_match = next((label for label in labels
@@ -250,7 +244,6 @@ def run(self):
  logger = logging.getLogger('JOB/RUN')
  logger.info(f'Starting Job: {self.preq_dict["label"]}')
  self.comment_text_append(newtext=f'Machine: {self.machine}')
- self.comment_text_append(f'Compiler: {self.compiler}')
  self.comment_text_append(f'Job: {self.preq_dict["action"]}')
  if self.check_label_before_job_start():
  try:
@@ -271,7 +264,6 @@ def send_comment_text(self):
  self.comment_text_append('Please make changes and add '
  'the following label back: '
  f'{self.machine}'
- f'-{self.compiler}'
  f'-{self.preq_dict["action"]}')
 
  self.preq_dict['preq'].create_issue_comment(self.comment_text)

diff --git a/tests/bl_date.conf b/tests/bl_date.conf
@@ -0,0 +1 @@
+export BL_DATE=20230525
diff --git a/tests/ci/Dockerfile b/tests/ci/Dockerfile
@@ -15,6 +15,8 @@ ENV CI_TEST=true
 ENV RT_COMPILER=gnu
 ENV RT_MACHINE=linux
 ENV MACHINE_ID=linux
+# ACCNR CAN BE ANYTHING; DOCKER DOES NOT NEED THIS
+ENV ACCNR=nems 
 
 WORKDIR $HOME/ufs-weather-model/tests
 CMD ./opnReqTest -n $test_name -c $run_case
diff --git a/tests/ci/Jenkinsfile b/tests/ci/Jenkinsfile
@@ -77,7 +77,7 @@ pipeline {
 
  stage('Test') {
  steps {
- sh 'docker run --rm --env test_name="${TEST_NAME}" --env run_case="${TEST_CASE}" --workdir /home/builder/ufs-weather-model/tests --volume DataVolume:/home/builder/data/NEMSfv3gfs --interactive --shm-size=1gb "${IMG_NAME}"'
+ sh 'docker run --rm --env test_name="${TEST_NAME}" --env run_case="${TEST_CASE}" --workdir /home/builder/ufs-weather-model/tests --volume DataVolume:/home/builder/data/NEMSfv3gfs --interactive --shm-size=16gb "${IMG_NAME}"'
  }
  }
  }