Skip to content

Commit

Permalink
Migrate GitHub Actions to the new respective folders
Browse files Browse the repository at this point in the history
This moves the jenkins/terraform/packer GitHub Actions to the top level
actions folder and changes their triggers and any referenced paths
within
  • Loading branch information
driazati committed May 31, 2022
1 parent 8a05096 commit 831786e
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 33 deletions.
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
name: deploy_jenkins
on:
workflow_dispatch:

concurrency:
group: deploy_jenkins
cancel-in-progress: true

jobs:
deploy_docker:
runs-on: ubuntu-latest
if: github.ref == 'refs/heads/main'
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.JENKINS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.JENKINS_AWS_SECRET_ACCESS_KEY }}
JENKINS_PRIV_KEY_PROD: "${{ secrets.JENKINS_PRIV_KEY_PROD }}"
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
DOCKERHUB_TOKEN_TLCPACKSTAGING: ${{ secrets.DOCKERHUB_TOKEN_TLCPACKSTAGING }}
defaults:
run:
working-directory: ./jenkins
steps:
-
name: Pull repository
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,21 @@ name: prepare_jenkins
on:
push:
branches:
- 'main'
- main
paths:
- jenkins/**
workflow_dispatch:

concurrency:
group: prepare_jenkins
cancel-in-progress: true

jobs:
build_docker:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./jenkins
steps:
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
Expand All @@ -30,10 +39,13 @@ jobs:
with:
push: true
tags: ${{ secrets.DOCKERHUB_USERNAME }}/jenkins:${{ github.sha }}
context: docker
context: jenkins/docker
prepare_head_node:
needs: build_docker
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./jenkins
env:
OAUTH_CLIENT_ID_PROD: ${{ secrets.OAUTH_CLIENT_ID_PROD }}
OAUTH_CLIENT_SECRET_PROD: ${{ secrets.OAUTH_CLIENT_SECRET_PROD }}
Expand All @@ -44,8 +56,8 @@ jobs:
DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
DOCKERHUB_TOKEN_TLCPACKSTAGING: ${{ secrets.DOCKERHUB_TOKEN_TLCPACKSTAGING }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.JENKINS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.JENKINS_AWS_SECRET_ACCESS_KEY }}
TQCHEN_CI_PAT: ${{ secrets.TQCHEN_CI_PAT }}
SCCACHE_BUCKET: ${{ secrets.SCCACHE_BUCKET }}
DOCS_PUSH_TOKEN: ${{ secrets.DOCS_PUSH_TOKEN }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,5 @@ jobs:
VALIDATE_NATURAL_LANGUAGE: false
VALIDATE_ANSIBLE: false
VALIDATE_BASH: false
VALIDATE_TERRAFORM_TERRASCAN: false
VALIDATE_MARKDOWN: false
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
name: packer
on:
push:
paths:
- packer/**
branches:
- 'main'
- main
schedule:
- cron: "0 0 * * 0"


concurrency:
group: packer_build
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./packer
env:
PKR_VAR_aws_access_key: ${{ secrets.AWS_ACCESS_KEY_ID }}
PKR_VAR_aws_secret_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
PKR_VAR_aws_access_key: ${{ secrets.PACKER_AWS_ACCESS_KEY_ID }}
PKR_VAR_aws_secret_key: ${{ secrets.PACKER_AWS_SECRET_ACCESS_KEY }}
steps:
-
name: Pull repository
Expand All @@ -24,7 +34,7 @@ jobs:
PKR_VAR_image_prefix: ci-base
with:
command: build
target: base-images/aws
target: packer/base-images/aws
-
name: Build stock Jenkins agent
uses: hashicorp/packer-github-actions@master
Expand All @@ -34,7 +44,7 @@ jobs:
PKR_VAR_image_prefix: jenkins-stock-agent
with:
command: build
target: jenkins-agents/stock
target: packer/jenkins-agents/stock
-
name: Build GPU Jenkins agent
uses: hashicorp/packer-github-actions@master
Expand All @@ -47,4 +57,4 @@ jobs:
PKR_VAR_nvidia_driver_base_url: "https://us.download.nvidia.com/tesla"
with:
command: build
target: jenkins-agents/gpu
target: packer/jenkins-agents/gpu
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
name: terraform
on:
push:
branches: [main]
branches:
- main
paths:
- terraform/**


concurrency:
group: terraform_apply
cancel-in-progress: true

jobs:
apply:
runs-on: ubuntu-latest
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.TERRAFORM_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.TERRAFORM_AWS_SECRET_ACCESS_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
defaults:
run:
working-directory: ./terraform
steps:
- name: Checkout
uses: actions/checkout@v2
Expand All @@ -18,5 +29,6 @@ jobs:
uses: dflook/terraform-apply@e1c1b7e1d0eed8bda30338e54ff3cd6790a6f35b #v1.25.1
with:
var_file: |
vars/tvm-ci-prod.auto.tfvars
terraform/vars/tvm-ci-prod.auto.tfvars
workspace: tvm-ci-prod
path: ./terraform
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
name: terraform
on:
pull_request:
branches: [main]
branches:
- main
paths:
- terraform/**
pull_request_target:
branches: [main]
branches:
- main
paths:
- terraform/**

jobs:
validate:
Expand All @@ -12,6 +18,9 @@ jobs:
LOGIN: ${{ github.actor }}
PR_REPO_FULL_NAME: ${{ github.event.pull_request.head.repo.full_name }}
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./terraform
outputs:
valid_workflow: ${{ steps.validate_workflow.outputs.valid_workflow }}
steps:
Expand Down Expand Up @@ -40,10 +49,13 @@ jobs:
plan:
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.TERRAFORM_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.TERRAFORM_AWS_SECRET_ACCESS_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./terraform
needs: validate
if: needs.validate.outputs.valid_workflow == 'True'
#These steps run if either the PR is within the same repo or if the PR is on a fork and the committer has deployer access
Expand All @@ -55,13 +67,18 @@ jobs:
id: fmt
uses: dflook/terraform-fmt@f0d4aaacfe91d9319a40cbb2bfeb5bd0ee2f2739 #v1.25.1
continue-on-error: true
with:
path: ./terraform
- name: Terraform Validate
id: validate
uses: dflook/terraform-validate@85bc5b5cab93240dc66bf7b9e744570f12ace9d6 #v1.25.1
with:
path: ./terraform
- name: Terraform Plan
uses: dflook/terraform-plan@7196a67f47a16ef4f7e12ff0b55205e5eb2cee55 #v1.25.1
id: plan
with:
var_file: |
vars/tvm-ci-prod.auto.tfvars
terraform/vars/tvm-ci-prod.auto.tfvars
path: ./terraform
workspace: tvm-ci-prod
52 changes: 52 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# TVM Upstream CI

This repository holds the configuration as code for the CI of the upstream TVM project hosted on [GitHub](https://github.com/apache/tvm). Specifically, this repository currently handles configuration of a public Jenkins instance, as well as a dedicated set of worker nodes--this Jenkins instance is located at [https://ci.tlcpack.ai](https://ci.tlcpack.ai)

* [`jenkins`](./jenkins) - configuration for the Jenkins head node
* [`terraform`](./terraform) - Terraform code to provision CI resources in AWS
* [`packer`](./packer) - Packer configurations for AWS AMIs

## Deploying Jenkins

Restarting Jenkins is an occasional but necessary service interruption. To minimize developer impact when updating TVM's Jenkins, follow these steps:

0. Notify users:
1. message the TVM Discord with a couple hours notice
```
PSA that we'll be restarting Jenkins soon to <insert reason> -- we will need to retrigger in-flight builds as part of this process, so expect CI slowdowns for the next few hours.
```
2. In Jenkins under Manage Jenkins > Configure System > System Message set it to something like

```
<p style="text-align: center; padding: 10px; background-color: #dc5f5f; font-weight: bold; color: white; border-radius: 8px;">Jenkins will restart on 3/22/22 at 10 AM PDT (<a style="color: #c4e9ff" href="https://discuss.tvm.apache.org/t/ci-jenkins-restart-tuesday-3-21-22/12366/2">details</a>)</p>
````
1. Save a list of in-flight jobs (i.e. by saving the webpage at ci.tlcpack.ai to disk)
2. Ensure the latest Terraform defintions have been applied via the [`terraform_apply.yml`](/.github/workflows/terraform_apply.yml) workflow
1. Pull the Terraform output to the head node by running [`prepare.yml`](https://github.com/tlc-pack/ci/actions/workflows/prepare.yml)
2. Trigger a `workflow_dispatch` event [`deploy.yml`](https://github.com/tlc-pack/ci/actions/workflows/deploy.yml)
3. Wait for Jenkins to come up (5-ish minutes)
4. Cancel any jobs that Jenkins re-queued (due to [this issue](https://issues.jenkins.io/browse/JENKINS-51936) Jenkins may re-schedule old jobs). Restart any jobs that sent webhooks while Jenkins was down. These JavaScript snippets can help:
```javascript
// cancel all jobs from the main Jenkins page at ci.tlcpack.ai
const cancel = (x) => {
let href = x.parentNode.href;
console.log(href)
new Ajax.Request(href);
}
document.querySelectorAll("img[alt=\"cancel this build\"]").forEach(cancel)
document.querySelectorAll("img[alt=\"terminate this build\"]").forEach(cancel)
```

```javascript
// list unique in-flight and queued job URLs from the saved webpage HTML
let builds = Array.from(document.getElementById("executors").querySelectorAll("table[tooltip]")).map(x => x.previousSibling.href)
let builds = [];
document.getElementById("buildQueue").querySelectorAll("a[tooltip]").forEach(a => {
builds.push(a.href)
})
builds = [...new Set(builds)];
for (const b of builds) {
console.log(b);
}
```
5. Monitor CI for the next day to ensure that autoscaled nodes are being allocated / deallocated as necessary
2 changes: 1 addition & 1 deletion jenkins/ansible/roles/run_jjb/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

- name: Populate JJB Directory
ansible.builtin.copy:
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/jenkins-jobs' }}"
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/jenkins/jenkins-jobs' }}"
dest: "/home/ubuntu"
owner: 1000
group: 1000
Expand Down
8 changes: 4 additions & 4 deletions jenkins/ansible/roles/setup_jenkins_head/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,23 +101,23 @@

- name: Base configuration for Jenkins
ansible.builtin.copy:
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/docker/' + lookup('env', 'ENVIRONMENT') + '/base_config.yaml' }}"
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/jenkins/docker/' + lookup('env', 'ENVIRONMENT') + '/base_config.yaml' }}"
dest: "/var/jenkins/casc/base_config.yaml"
owner: 1000
group: 1000
mode: 0600

- name: Fleet configuration for Jenkins
ansible.builtin.copy:
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/fleet_config.yaml' }}"
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/jenkins/fleet_config.yaml' }}"
dest: "/var/jenkins/casc/fleet_config.yaml"
owner: 1000
group: 1000
mode: 0600

- name: Persistent Agent configuration for Jenkins
ansible.builtin.copy:
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/persistent_agent_config.yaml' }}"
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/jenkins/persistent_agent_config.yaml' }}"
dest: "/var/jenkins/casc/persistent_agent_config.yaml"
owner: 1000
group: 1000
Expand Down Expand Up @@ -162,7 +162,7 @@

- name: Copy over reload script
ansible.builtin.copy:
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/scripts/reload.sh' }}"
src: "{{ lookup('env', 'GITHUB_WORKSPACE') + '/jenkins/scripts/reload.sh' }}"
dest: "/home/ubuntu"
owner: 1000
group: 1000
Expand Down
5 changes: 0 additions & 5 deletions jenkins/readme.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
# TVM Upstream CI

This repository holds the configuration as code for the CI of the upstream TVM project hosted on [GitHub](https://github.com/apache/tvm). Specifically, this repository currently handles configuration of a public Jenkins instance, as well as a dedicated set of worker nodes--this Jenkins instance is located at [https://ci.tlcpack.ai](https://ci.tlcpack.ai)


## Components

### Docker
Expand Down
4 changes: 2 additions & 2 deletions terraform/scripts/validate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys
import os

from github import Github


Expand Down Expand Up @@ -31,7 +31,7 @@ def validate(


if __name__ == "__main__":
deployer_path = os.environ["GITHUB_WORKSPACE"] + "/DEPLOYERS.md"
deployer_path = os.environ["GITHUB_WORKSPACE"] + "/terraform/DEPLOYERS.md"
with open(deployer_path) as file:
deployers = [i.strip() for i in file]

Expand Down

0 comments on commit 831786e

Please sign in to comment.