-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This adds a Grafana + Prometheus + Postgres + Loki setup which is deployed via docker-compose and ansible to a remote machine (e.g. an EC2 instance) 2 docker images work to fill data in Postgres, one gathers data on test cases (only the failed tests are stored in pg) and the other gathers the hierarchy of data for Jenkins builds (jobs(`tvm`) -> builds (`main` or `PR-1234`) -> stages (`build: CPU`) -> steps (`run_a_script.sh`)), and Prometheus just scrapes Jenkins
- Loading branch information
Showing
22 changed files
with
1,367 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
vars.yml | ||
*.tar.gz | ||
*.pem | ||
__pycache__/ | ||
*.env | ||
*.log | ||
*.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
1. Install dependencies | ||
|
||
```bash | ||
pip install ansible | ||
|
||
# For local installs | ||
sudo apt install -y sshpass | ||
``` | ||
|
||
2. Create a file called `vars.yml` that looks like | ||
|
||
```yaml | ||
passwords: | ||
grafana_admin_username: 123 | ||
grafana_admin: 123 | ||
``` | ||
|
||
3. Generate keys | ||
|
||
```bash | ||
cd files | ||
openssl req -newkey rsa:2048 -nodes -keyout key.pem -x509 -days 365 -out certificate.pem | ||
``` | ||
|
||
4. Run the Ansible playbook to provision the machine | ||
|
||
```bash | ||
ansible-playbook -i <ssh remote>, install.yml [email protected] | ||
|
||
# For local installs | ||
ansible-playbook -i <ssh remote>, install.yml [email protected] -kK | ||
``` | ||
|
||
|
||
## Debugging | ||
|
||
```bash | ||
# see why containers aren't up | ||
sudo docker stack ps monitoring --no-trunc | ||
|
||
# see grafana logs | ||
sudo docker service logs monitoring_grafana --raw | ||
|
||
# log into a container | ||
sudo docker ps # get id | ||
sudo docker exec -it <ID> /bin/bash | ||
``` | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
[ssh_connection] | ||
ssh_args = -o ControlMaster=auto -o ControlPersist=60s | ||
pipelining = True | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
version: '3.7' | ||
|
||
services: | ||
grafana: | ||
image: grafana/grafana:8.4.4 | ||
logging: | ||
driver: "json-file" | ||
options: | ||
max-size: "20m" | ||
max-file: "10" | ||
networks: | ||
- monitoring | ||
volumes: | ||
- /etc/tvm/grafana:/var/lib/grafana | ||
- /etc/tvm/grafana-provisioning/:/etc/grafana/provisioning | ||
- /etc/tvm/email_template.html:/usr/share/grafana/public/emails/alert_notification.html | ||
- /etc/tvm/grafana.ini:/etc/grafana/grafana.ini | ||
- /etc/tvm/dashboards:/var/lib/grafana/dashboards | ||
|
||
loki: | ||
image: grafana/loki:main-52f9df4 | ||
logging: | ||
driver: "json-file" | ||
options: | ||
max-size: "20m" | ||
max-file: "10" | ||
networks: | ||
- monitoring | ||
volumes: | ||
- /etc/tvm/loki-config.yml:/etc/loki/loki-config.yaml | ||
- /etc/tvm/loki_data:/data/loki | ||
command: -config.file=/etc/loki/loki-config.yaml | ||
|
||
postgres: | ||
image: postgres:12.10 | ||
logging: | ||
driver: "json-file" | ||
options: | ||
max-size: "20m" | ||
max-file: "10" | ||
networks: | ||
- monitoring | ||
environment: | ||
- POSTGRES_USER={{ passwords.postgres_user }} | ||
- POSTGRES_PASSWORD={{ passwords.postgres_password }} | ||
volumes: | ||
- /etc/tvm/postgres_data:/var/lib/postgresql/data | ||
|
||
prometheus: | ||
image: prom/prometheus:v2.34.0 | ||
volumes: | ||
- /etc/tvm/prometheus.yml:/etc/prometheus/prometheus.yml | ||
- /etc/tvm/prometheus/:/etc/prometheus/ | ||
- /etc/tvm/prometheus_data:/prometheus | ||
command: | ||
- '--config.file=/etc/prometheus/prometheus.yml' | ||
- '--storage.tsdb.path=/prometheus' | ||
- '--web.console.libraries=/usr/share/prometheus/console_libraries' | ||
- '--web.console.templates=/usr/share/prometheus/consoles' | ||
# ports: | ||
# - 9090:9090 | ||
networks: | ||
- monitoring | ||
|
||
fetcher: | ||
image: jenkins_fetcher:latest | ||
environment: | ||
db_host: postgres:5432 | ||
db_user: {{ passwords.postgres_user }} | ||
db_password: {{ passwords.postgres_password }} | ||
loki_host: loki:3100 | ||
volumes: | ||
- /etc/tvm/fetcher_data:/opt/fetcher/.httpcache | ||
networks: | ||
- monitoring | ||
depends_on: | ||
- postgres | ||
|
||
testfetcher: | ||
image: jenkins_testfetcher:latest | ||
environment: | ||
db_host: postgres:5432 | ||
db_user: {{ passwords.postgres_user }} | ||
db_password: {{ passwords.postgres_password }} | ||
loki_host: loki:3100 | ||
volumes: | ||
- /etc/tvm/fetcher_data:/opt/fetcher/.httpcache | ||
networks: | ||
- monitoring | ||
depends_on: | ||
- postgres | ||
|
||
nginx: | ||
image: nginx:1.21.0 | ||
ports: | ||
- "80:80" | ||
- "443:443" | ||
logging: | ||
driver: "json-file" | ||
options: | ||
max-size: "20m" | ||
max-file: "10" | ||
networks: | ||
- monitoring | ||
volumes: | ||
- "/etc/tvm/http.conf:/etc/nginx/conf.d/default.conf" | ||
- "/etc/tvm/certificate.pem:/etc/nginx/fullchain.pem" | ||
- "/etc/tvm/key.pem:/etc/nginx/privkey.pem" | ||
|
||
networks: | ||
monitoring: | ||
external: false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{% raw %} | ||
<!DOCTYPE html> | ||
<html xmlns="http:https://www.w3.org/1999/xhtml"> | ||
<head> | ||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> | ||
<meta name="viewport" content="width=device-width" /> | ||
</head> | ||
|
||
<body> | ||
<p>{{.Name}} [{{.State}}]</p> | ||
<p>{{.Message}}</p> | ||
<p>{{.Error}}</p> | ||
<p>{{.RuleUrl}}</p> | ||
<p>{{.AlertPageUrl}}</p> | ||
<p>Values:</p> | ||
{{range .EvalMatches}} {{.Metric}} = {{.Value}}<br />{{end}} | ||
{{.ImageLink}} | ||
</body> | ||
</html> | ||
{% endraw %} | ||
|
21 changes: 21 additions & 0 deletions
21
monitoring/files/grafana-provisioning/datasources/data.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
apiVersion: 1 | ||
datasources: | ||
- name: Loki | ||
type: loki | ||
# access: proxy | ||
url: http:https://loki:3100 | ||
version: 1 | ||
- name: Prometheus | ||
type: prometheus | ||
# access: proxy | ||
url: http:https://prometheus:9090 | ||
version: 1 | ||
- name: Postgres | ||
type: postgres | ||
url: postgres:5432 | ||
database: tvm | ||
user: {{ passwords.postgres_user }} | ||
secureJsonData: | ||
password: {{ passwords.postgres_password }} | ||
jsonData: | ||
sslmode: disable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
[security] | ||
admin_user = {{ passwords.grafana_admin_username }} | ||
admin_password = {{ passwords.grafana_admin }} | ||
|
||
[users] | ||
allow_sign_up = false | ||
# New users should be able to use explore and create/edit dashboards | ||
auto_assign_org_role = Editor | ||
|
||
[auth.anonymous] | ||
enabled = false | ||
|
||
[unified_alerting] | ||
enabled = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
server { | ||
listen [::]:80 ipv6only=off; | ||
server_name /; | ||
|
||
location / { | ||
return 301 https://$host$request_uri; | ||
} | ||
} | ||
|
||
upstream grafana { | ||
server grafana; | ||
} | ||
|
||
server { | ||
listen [::]:443 ipv6only=off ssl; | ||
ssl_certificate /etc/nginx/fullchain.pem; | ||
ssl_certificate_key /etc/nginx/privkey.pem; | ||
|
||
client_max_body_size 500M; | ||
|
||
set $grafana_upstream_endpoint http:https://grafana:3000; | ||
|
||
# Adding a workaround for nginx rule https://grafana.com/blog/2021/11/03/grafana-8.2.3-released-with-medium-severity-security-fix-cve-2021-41174-grafana-xss/ | ||
location ~ \{\{ { | ||
deny all; | ||
} | ||
|
||
location / { | ||
resolver 127.0.0.11 valid=30s ipv6=off; | ||
proxy_pass $grafana_upstream_endpoint; | ||
|
||
proxy_set_header Host $host:$server_port; | ||
proxy_set_header X-Real-IP $remote_addr; | ||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; | ||
proxy_set_header X-Forwarded-Host $host; | ||
proxy_set_header X-Forwarded-Port $server_port; | ||
proxy_set_header X-Forwarded-Server $host:$server_port; | ||
proxy_set_header X-Forwarded-Proto $scheme; | ||
proxy_hide_header X-Frame-Options; | ||
|
||
|
||
proxy_set_header Upgrade $http_upgrade; | ||
proxy_set_header Connection "upgrade"; | ||
|
||
proxy_max_temp_file_size 0; | ||
|
||
client_max_body_size 100m; | ||
client_body_buffer_size 128k; | ||
|
||
proxy_connect_timeout 90; | ||
proxy_send_timeout 90; | ||
proxy_read_timeout 90; | ||
|
||
proxy_buffer_size 4k; | ||
proxy_buffers 4 32k; | ||
proxy_busy_buffers_size 64k; | ||
proxy_temp_file_write_size 64k; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
limits_config: | ||
enforce_metric_name: false | ||
reject_old_samples: false | ||
# reject_old_samples_max_age: 168h | ||
max_query_length: 0 | ||
ingestion_rate_mb: 100000 | ||
ingestion_burst_size_mb: 1000000 | ||
max_global_streams_per_user: 0 | ||
cardinality_limit: 10000000 | ||
|
||
auth_enabled: false | ||
|
||
server: | ||
http_listen_port: 3100 | ||
grpc_listen_port: 9096 | ||
|
||
common: | ||
path_prefix: /tmp/loki | ||
storage: | ||
filesystem: | ||
chunks_directory: /tmp/loki/chunks | ||
rules_directory: /tmp/loki/rules | ||
replication_factor: 1 | ||
ring: | ||
instance_addr: 127.0.0.1 | ||
kvstore: | ||
store: inmemory | ||
|
||
schema_config: | ||
configs: | ||
- from: 2020-10-24 | ||
store: boltdb-shipper | ||
object_store: filesystem | ||
schema: v11 | ||
index: | ||
prefix: index_ | ||
period: 24h |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# my global config | ||
global: | ||
scrape_interval: 30s # Set the scrape interval to every 15 seconds. Default is every 1 minute. | ||
evaluation_interval: 30s # Evaluate rules every 15 seconds. The default is every 1 minute. | ||
# scrape_timeout is set to the global default (10s). | ||
|
||
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. | ||
rule_files: | ||
# - "first_rules.yml" | ||
# - "second_rules.yml" | ||
|
||
# A scrape configuration containing exactly one endpoint to scrape: | ||
# Here it's Prometheus itself. | ||
scrape_configs: | ||
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. | ||
- job_name: "jenkins" | ||
metrics_path: /prometheus | ||
static_configs: | ||
- targets: ["ci.tlcpack.ai"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM python:3.9.12 | ||
|
||
WORKDIR /opt/fetcher | ||
COPY *.py requirements.txt ./ | ||
|
||
RUN python3 -m pip install -r requirements.txt | ||
|
||
CMD python3 forward.py --forever --wait-minutes 15 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM python:3.9.12 | ||
|
||
WORKDIR /opt/fetcher | ||
COPY *.py requirements.txt ./ | ||
|
||
RUN python3 -m pip install -r requirements.txt | ||
|
||
CMD python3 tests_fetcher.py --forever --wait-minutes 15 |
Empty file.
Oops, something went wrong.