From 79c323d2a66a227a37278b4231b0aeba02113f4d Mon Sep 17 00:00:00 2001
From: Robin Salkeld <salkeldr@amazon.com>
Date: Wed, 1 May 2024 16:40:38 -0700
Subject: [PATCH] ci: Daily CI to soak flaky tests (#5382)

### Description
Setting up a daily scheduled build to run the LSP tests 5 times on each
platform, in an effort to proactively uncover flaky test failures that
show up at the worst possible time as you're trying to get your
wonderful PR you've been working on for weeks merged or even worse that
break the nightly build for the third day in a row forcing the team to
drop everything to click Retry Failed Jobs two or three times before ANY
PRs can be merged not that I'm frustrated or anything. :)

The PR CI was previously running the LSP tests twice every time for the
same reason. Given this daily job will soak them more deeply, and that
the `osx` unit test job on PRs in particular has been taking almost 45
minutes and becoming the limiting factor, I've opted to just run them
once on PRs now.

Note that unlike the original double run, it was simpler to use matrices
just as we do for integration tests, only in this case just iterating
the same test run in parallel 5 times. It's possible running twice on
the same runner might trigger more failures, but it seems unlikely since
the second run happens in a fresh process and no state leaks between the
runs AFAICT.

### How has this been tested?
Dry-run on my fork:
https://github.com/robin-aws/dafny/actions/runs/8914692810

I've run it a few times without any failures yet, but we'll have to see
what happens after a week or so of scheduled runs.

<small>By submitting this pull request, I confirm that my contribution
is made under the terms of the [MIT
license](https://github.com/dafny-lang/dafny/blob/master/LICENSE.txt).</small>
---
 .github/workflows/daily-soak-test-build.yml | 23 ++++++++++
 .github/workflows/xunit-tests-reusable.yml  | 48 ++++++++++++++++++---
 2 files changed, 65 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/daily-soak-test-build.yml
diff --git a/.github/workflows/daily-soak-test-build.yml b/.github/workflows/daily-soak-test-build.yml
new file mode 100644
index 00000000000..98f9025f244
--- /dev/null
+++ b/.github/workflows/daily-soak-test-build.yml
@@ -0,0 +1,23 @@
+
+# Scheduled daily build
+#
+# The purpose of this build is to run tests that may have non-deterministic failures
+# many times over, in the hopes of more aggressively revealing
+# flaky tests that occasionally slow down unrelated development.
+
+name: Daily soak test workflow
+
+on:
+  schedule:
+    # Chosen to be hopefully outside of business hours for most contributors'
+    # time zones, and not on the hour to avoid heavy scheduled-job times:
+    # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule
+    - cron: "30 3 * * *"
+  workflow_dispatch:
+
+jobs:
+  daily-soak-build-for-master:
+    if: github.repository_owner == 'dafny-lang' || github.event_name == 'workflow_dispatch'
+    uses: ./.github/workflows/xunit-tests-reusable.yml
+    with:
+        soak_test: true
diff --git a/.github/workflows/xunit-tests-reusable.yml b/.github/workflows/xunit-tests-reusable.yml
index b4f7c95c86c..07934ea3bd4 100644
--- a/.github/workflows/xunit-tests-reusable.yml
+++ b/.github/workflows/xunit-tests-reusable.yml
@@ -2,7 +2,17 @@ name: Run XUnit tests
 
 on:
   workflow_dispatch:
+    inputs:
+      soak_test:
+        required: false
+        type: boolean
+        default: false
   workflow_call:
+    inputs:
+      soak_test:
+        required: false
+        type: boolean
+        default: false
 
 ## In the matrix:
 ##  os - name of the Github actions runner
@@ -15,13 +25,32 @@ defaults:
     working-directory: dafny
 
 jobs:
+  populate-matrix-dimensions:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Populate iterations (normal mode)
+        id: populate-iterations-normal
+        if: "!inputs.soak_test"
+        working-directory: .
+        run: echo "iterations=[1]" >> $GITHUB_OUTPUT
+      - name: Populate iterations (soak test mode)
+        id: populate-iterations-soak
+        if: inputs.soak_test
+        working-directory: .
+        run: echo "iterations=[`seq -s , 1 5`]" >> $GITHUB_OUTPUT
+    outputs:
+      iterations: ${{ steps.populate-iterations-normal.outputs.iterations }} ${{ steps.populate-iterations-soak.outputs.iterations }}
+  
   build:
+    needs: populate-matrix-dimensions
     runs-on: ${{matrix.os}}
     timeout-minutes: 60
-    name: ${{matrix.suffix}}
+    name: ${{matrix.suffix}} (${{matrix.iteration}})
     strategy:
       fail-fast: false
       matrix:
+        os: [ubuntu-20.04, windows-2019, macos-11]
+        iteration: ${{ fromJson(needs.populate-matrix-dimensions.outputs.iterations) }}
         include:
           - os: macos-11
             suffix: osx
@@ -66,27 +95,34 @@ jobs:
     - name: Build
       run: dotnet build --no-restore ${{env.solutionPath}}
     - name: Run DafnyCore Tests
+      if: "!inputs.soak_test"
       run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyCore.Test/coverlet.runsettings Source/DafnyCore.Test
-    - name: Run DafnyLanguageServer Tests
+    - name: Run DafnyLanguageServer Tests (soak test - iteration ${{matrix.iteration}})
+      if: inputs.soak_test
       run: |
-        ## Run twice to catch unstable code (Issue #2673)
         dotnet test --no-restore --blame-hang-timeout 360s --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyLanguageServer.Test/coverlet.runsettings Source/DafnyLanguageServer.Test
-        ## On the second run, collect test coverage data
+    - name: Run DafnyLanguageServer Tests
+      if: "!inputs.soak_test"
+      run: |
         ##  Note that, for some mysterious reason, --collect doesn't work with the DafnyLanguageServer.Test package
         dotnet coverage collect -o DafnyLanguageServer.Test.coverage dotnet test --no-restore --blame-hang-timeout 360s --logger "console;verbosity=normal" --logger trx Source/DafnyLanguageServer.Test
-
     - name: Run DafnyDriver Tests
+      if: "!inputs.soak_test"
       run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyDriver.Test/coverlet.runsettings Source/DafnyDriver.Test
     - name: Run DafnyPipeline Tests
+      if: "!inputs.soak_test"
       run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyPipeline.Test/coverlet.runsettings Source/DafnyPipeline.Test
     - name: Run DafnyTestGeneration Tests
+      if: "!inputs.soak_test"
       run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyTestGeneration.Test/coverlet.runsettings Source/DafnyTestGeneration.Test
     - name: Run AutoExtern Tests
+      if: "!inputs.soak_test"
       run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/AutoExtern.Test/coverlet.runsettings Source/AutoExtern.Test
     - name: Run DafnyRuntime Tests
+      if: "!inputs.soak_test"
       run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyRuntime.Tests/coverlet.runsettings Source/DafnyRuntime.Tests
     - uses: actions/upload-artifact@v4
-      if: always()
+      if: always() && !inputs.soak_test
       with:
         name: unit-test-results-${{ matrix.os }}
         path: |