From 79c323d2a66a227a37278b4231b0aeba02113f4d Mon Sep 17 00:00:00 2001 From: Robin Salkeld Date: Wed, 1 May 2024 16:40:38 -0700 Subject: [PATCH] ci: Daily CI to soak flaky tests (#5382) ### Description Setting up a daily scheduled build to run the LSP tests 5 times on each platform, in an effort to proactively uncover flaky test failures that show up at the worst possible time as you're trying to get your wonderful PR you've been working on for weeks merged or even worse that break the nightly build for the third day in a row forcing the team to drop everything to click Retry Failed Jobs two or three times before ANY PRs can be merged not that I'm frustrated or anything. :) The PR CI was previously running the LSP tests twice every time for the same reason. Given this daily job will soak them more deeply, and that the `osx` unit test job on PRs in particular has been taking almost 45 minutes and becoming the limiting factor, I've opted to just run them once on PRs now. Note that unlike the original double run, it was simpler to use matrices just as we do for integration tests, only in this case just iterating the same test run in parallel 5 times. It's possible running twice on the same runner might trigger more failures, but it seems unlikely since the second run happens in a fresh process and no state leaks between the runs AFAICT. ### How has this been tested? Dry-run on my fork: https://github.com/robin-aws/dafny/actions/runs/8914692810 I've run it a few times without any failures yet, but we'll have to see what happens after a week or so of scheduled runs. By submitting this pull request, I confirm that my contribution is made under the terms of the [MIT license](https://github.com/dafny-lang/dafny/blob/master/LICENSE.txt). --- .github/workflows/daily-soak-test-build.yml | 23 ++++++++++ .github/workflows/xunit-tests-reusable.yml | 48 ++++++++++++++++++--- 2 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/daily-soak-test-build.yml diff --git a/.github/workflows/daily-soak-test-build.yml b/.github/workflows/daily-soak-test-build.yml new file mode 100644 index 00000000000..98f9025f244 --- /dev/null +++ b/.github/workflows/daily-soak-test-build.yml @@ -0,0 +1,23 @@ + +# Scheduled daily build +# +# The purpose of this build is to run tests that may have non-deterministic failures +# many times over, in the hopes of more aggressively revealing +# flaky tests that occasionally slow down unrelated development. + +name: Daily soak test workflow + +on: + schedule: + # Chosen to be hopefully outside of business hours for most contributors' + # time zones, and not on the hour to avoid heavy scheduled-job times: + # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#schedule + - cron: "30 3 * * *" + workflow_dispatch: + +jobs: + daily-soak-build-for-master: + if: github.repository_owner == 'dafny-lang' || github.event_name == 'workflow_dispatch' + uses: ./.github/workflows/xunit-tests-reusable.yml + with: + soak_test: true diff --git a/.github/workflows/xunit-tests-reusable.yml b/.github/workflows/xunit-tests-reusable.yml index b4f7c95c86c..07934ea3bd4 100644 --- a/.github/workflows/xunit-tests-reusable.yml +++ b/.github/workflows/xunit-tests-reusable.yml @@ -2,7 +2,17 @@ name: Run XUnit tests on: workflow_dispatch: + inputs: + soak_test: + required: false + type: boolean + default: false workflow_call: + inputs: + soak_test: + required: false + type: boolean + default: false ## In the matrix: ## os - name of the Github actions runner @@ -15,13 +25,32 @@ defaults: working-directory: dafny jobs: + populate-matrix-dimensions: + runs-on: ubuntu-latest + steps: + - name: Populate iterations (normal mode) + id: populate-iterations-normal + if: "!inputs.soak_test" + working-directory: . + run: echo "iterations=[1]" >> $GITHUB_OUTPUT + - name: Populate iterations (soak test mode) + id: populate-iterations-soak + if: inputs.soak_test + working-directory: . + run: echo "iterations=[`seq -s , 1 5`]" >> $GITHUB_OUTPUT + outputs: + iterations: ${{ steps.populate-iterations-normal.outputs.iterations }} ${{ steps.populate-iterations-soak.outputs.iterations }} + build: + needs: populate-matrix-dimensions runs-on: ${{matrix.os}} timeout-minutes: 60 - name: ${{matrix.suffix}} + name: ${{matrix.suffix}} (${{matrix.iteration}}) strategy: fail-fast: false matrix: + os: [ubuntu-20.04, windows-2019, macos-11] + iteration: ${{ fromJson(needs.populate-matrix-dimensions.outputs.iterations) }} include: - os: macos-11 suffix: osx @@ -66,27 +95,34 @@ jobs: - name: Build run: dotnet build --no-restore ${{env.solutionPath}} - name: Run DafnyCore Tests + if: "!inputs.soak_test" run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyCore.Test/coverlet.runsettings Source/DafnyCore.Test - - name: Run DafnyLanguageServer Tests + - name: Run DafnyLanguageServer Tests (soak test - iteration ${{matrix.iteration}}) + if: inputs.soak_test run: | - ## Run twice to catch unstable code (Issue #2673) dotnet test --no-restore --blame-hang-timeout 360s --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyLanguageServer.Test/coverlet.runsettings Source/DafnyLanguageServer.Test - ## On the second run, collect test coverage data + - name: Run DafnyLanguageServer Tests + if: "!inputs.soak_test" + run: | ## Note that, for some mysterious reason, --collect doesn't work with the DafnyLanguageServer.Test package dotnet coverage collect -o DafnyLanguageServer.Test.coverage dotnet test --no-restore --blame-hang-timeout 360s --logger "console;verbosity=normal" --logger trx Source/DafnyLanguageServer.Test - - name: Run DafnyDriver Tests + if: "!inputs.soak_test" run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyDriver.Test/coverlet.runsettings Source/DafnyDriver.Test - name: Run DafnyPipeline Tests + if: "!inputs.soak_test" run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyPipeline.Test/coverlet.runsettings Source/DafnyPipeline.Test - name: Run DafnyTestGeneration Tests + if: "!inputs.soak_test" run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyTestGeneration.Test/coverlet.runsettings Source/DafnyTestGeneration.Test - name: Run AutoExtern Tests + if: "!inputs.soak_test" run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/AutoExtern.Test/coverlet.runsettings Source/AutoExtern.Test - name: Run DafnyRuntime Tests + if: "!inputs.soak_test" run: dotnet test --no-restore --logger "console;verbosity=normal" --logger trx --collect:"XPlat Code Coverage" --settings Source/DafnyRuntime.Tests/coverlet.runsettings Source/DafnyRuntime.Tests - uses: actions/upload-artifact@v4 - if: always() + if: always() && !inputs.soak_test with: name: unit-test-results-${{ matrix.os }} path: |