Skip to content

Commit

Permalink
feat: Add HTTP Probe trace time metrics (#150)
Browse files Browse the repository at this point in the history
* feat: add http probe trace time metrics

* change the version number to v1.7.0

* refine the debug information

* add the prometheus metrics into README.md

* bug-fix: the regex sould match the whole string

* add more test cases for prometheus module
  • Loading branch information
haoel committed Jun 30, 2022
1 parent 2f049ee commit db50a78
Show file tree
Hide file tree
Showing 9 changed files with 526 additions and 12 deletions.
42 changes: 38 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ EaseProbe is a simple, standalone, and lightWeight tool that can do health/statu
- [1.3 Report](#13-report)
- [1.4 Channel](#14-channel)
- [1.5 Administration](#15-administration)
- [1.6 Prometheus Metrics](#16-prometheus-metrics)
- [1.6 Prometheus Metrics Exporter](#16-prometheus-metrics-exporter)
- [2. Getting Started](#2-getting-started)
- [2.1 Build](#21-build)
- [2.2 Configure](#22-configure)
Expand Down Expand Up @@ -375,15 +375,49 @@ There are some administration configuration options:

EaseProbe accepts the `HUP` signal to rotate the log.

### 1.6 Prometheus Metrics
### 1.6 Prometheus Metrics Exporter

EaseProbe supports Prometheus metrics exporter. The Prometheus endpoint is `http:https://localhost:8181/metrics` by default.

Currently, All of the Probers support the following metrics:

- `total`: the total number of probes
- `duration`: Probe duration in milliseconds
- `status`: Probe status
- `SLA`: Probe SLA percentage

And for the different Probers, the following metrics are available:

- HTTP Probe
- `status_code`: HTTP status code
- `content_len`: HTTP content length
- `dns_duration`: DNS duration in milliseconds
- `connect_duration`: TCP connection duration in milliseconds
- `tls_duration`: TLS handshake duration in milliseconds
- `send_duration`: HTTP send duration in milliseconds
- `wait_duration`: HTTP wait duration in milliseconds
- `transfer_duration`: HTTP transfer duration in milliseconds
- `total_duration`: HTTP total duration in milliseconds

- TLS Probe
- `earliest_cert_expiry`: last TLS chain expiry in timestamp seconds
- `last_chain_expiry_timestamp_seconds`: earliest TLS cert expiry in Unix time

- Shell & SSH Probe
- `exit_code`: exit code of the command
- `output_len`: length of the output

- Host Probe
- `cpu`: CPU usage in percentage
- `memory`: memory usage in percentage
- `disk`: disk usage in percentage

EaseProbe supports Prometheus metrics. The Prometheus endpoint is `http:https://localhost:8181/metrics` by default.

The following snapshot is the Grafana panel for host CPU metrics

![](./docs/grafana.demo.png)

Refer to the [Global Setting Configuration](#38-global-setting-configuration) for further details on how to configure the HTTP server.
Refer to the [Global Setting Configuration](#39-global-setting-configuration) for further details on how to configure the HTTP server.

## 2. Getting Started

Expand Down
2 changes: 1 addition & 1 deletion global/global.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const (
// DefaultProg is the program name
DefaultProg = "EaseProbe"
// Ver is the program version
Ver = "v1.6.0"
Ver = "v1.7.0"

//OrgProg combine organization and program
OrgProg = Org + " " + DefaultProg
Expand Down
6 changes: 2 additions & 4 deletions metric/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ var (
)

var (
validMetric = regexp.MustCompile(`[a-zA-Z_:][a-zA-Z0-9_:]*`)
validLabel = regexp.MustCompile(`[a-zA-Z_][a-zA-Z0-9_]*`)
validMetric = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`)
validLabel = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`)
)

// Counter get the counter metric by key
Expand Down Expand Up @@ -112,13 +112,11 @@ func NewGauge(namespace, subsystem, name, metric string,
func getAndValid(namespace, subsystem, name, metric string, labels []string) (string, error) {
metricName := GetName(namespace, subsystem, name, metric)
if ValidMetricName(metricName) == false {

return "", fmt.Errorf("Invalid metric name: %s", metricName)
}

for _, l := range labels {
if ValidLabelName(l) == false {

return "", fmt.Errorf("Invalid label name: %s", l)
}
}
Expand Down
69 changes: 69 additions & 0 deletions metric/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package metric
import (
"testing"

"bou.ke/monkey"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -73,3 +74,71 @@ func TestNewMetrics(t *testing.T) {
assert.NotNil(t, GetName("namespace_subsystem_gauge_metric"))
assert.NotNil(t, Gauge("namespace_subsystem_gauge_metric"))
}

func TestName(t *testing.T) {

assert.False(t, ValidMetricName(""))
assert.False(t, ValidMetricName(" "))
assert.False(t, ValidMetricName("\n"))
assert.False(t, ValidMetricName("5name"))
assert.False(t, ValidMetricName("name%"))
assert.False(t, ValidMetricName("hello-world"))
assert.False(t, ValidMetricName("hello-world@"))

assert.True(t, ValidMetricName("name5"))
assert.True(t, ValidMetricName(":name"))
assert.True(t, ValidMetricName("hello_world:name"))
assert.True(t, ValidMetricName("_hello_world:name"))
assert.True(t, ValidMetricName(":_hello_world:name"))
assert.True(t, ValidMetricName("namespace_name_metric"))

assert.False(t, ValidLabelName(""))
assert.False(t, ValidLabelName(" "))
assert.False(t, ValidLabelName("\n"))
assert.False(t, ValidLabelName("5name"))
assert.False(t, ValidLabelName("name%"))
assert.False(t, ValidLabelName("hello-world"))
assert.False(t, ValidLabelName("hello-world@"))

assert.True(t, ValidLabelName("_name5"))
assert.True(t, ValidLabelName("name_"))
assert.True(t, ValidLabelName("name5"))
assert.True(t, ValidLabelName("hello_world"))
assert.True(t, ValidLabelName("_hello_world_"))
assert.True(t, ValidLabelName("_hello_world_1_"))
}

func TestDuplicateName(t *testing.T) {
counter1 := NewCounter("namespace", "subsystem", "counter", "metric",
"help", []string{})
counter2 := NewCounter("namespace", "subsystem", "counter", "metric",
"help", []string{})
assert.Equal(t, counter1, counter2)

gauge1 := NewGauge("namespace", "subsystem", "gauge", "metric",
"help", []string{})
gauge2 := NewGauge("namespace", "subsystem", "gauge", "metric",
"help", []string{})
assert.Equal(t, gauge1, gauge2)
}

func TestInvalidName(t *testing.T) {

//label errors
counter := NewCounter("namespace", "subsystem", "counter", "metric",
"help", []string{"label-1", "label:2"})
assert.Nil(t, counter)

gauge := NewGauge("namespace", "subsystem", "gauge", "metric",
"help", []string{"label-1", "label:2"})
assert.Nil(t, gauge)

monkey.Patch(ValidMetricName, func(name string) bool {
return false
})
counter = NewCounter("namespace", "subsystem", "counter", "metric",
"help", []string{})
assert.Nil(t, counter)

monkey.UnpatchAll()
}
48 changes: 48 additions & 0 deletions probe/http/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"fmt"
"io/ioutil"
"net/http"
"net/http/httptrace"
"net/url"
"strconv"
"strings"
Expand Down Expand Up @@ -57,6 +58,8 @@ type HTTP struct {

client *http.Client `yaml:"-"`

traceStats *TraceStats `yaml:"-"`

metrics *metrics `yaml:"-"`
}

Expand Down Expand Up @@ -97,6 +100,7 @@ func (h *HTTP) Config(gConf global.ProbeSettings) error {
TLSClientConfig: tls,
},
}

if !checkHTTPMethod(h.Method) {
h.Method = "GET"
}
Expand Down Expand Up @@ -140,7 +144,16 @@ func (h *HTTP) DoProbe() (bool, string) {
req.Close = true

req.Header.Set("User-Agent", global.OrgProgVer)

// Tracing HTTP request
// set the http client trace
h.traceStats = NewTraceStats(h.ProbeKind, "TRACE", h.ProbeName)
clientTraceCtx := httptrace.WithClientTrace(req.Context(), h.traceStats.clientTrace)
req = req.WithContext(clientTraceCtx)

resp, err := h.client.Do(req)
h.traceStats.Done()

h.ExportMetrics(resp)
if err != nil {
log.Errorf("error making get request: %v", err)
Expand Down Expand Up @@ -192,4 +205,39 @@ func (h *HTTP) ExportMetrics(resp *http.Response) {
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(float64(len))

h.metrics.DNSDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.dnsTook))

h.metrics.ConnectDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.connTook))

h.metrics.TLSDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.tlsTook))

h.metrics.SendDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.sendTook))

h.metrics.WaitDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.waitTook))

h.metrics.TransferDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.transferTook))

h.metrics.TotalDuration.With(prometheus.Labels{
"name": h.ProbeName,
"status": fmt.Sprintf("%d", code),
}).Set(toMS(h.traceStats.totalTook))
}
25 changes: 23 additions & 2 deletions probe/http/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,15 @@ import (

// metrics is the metrics for http probe
type metrics struct {
StatusCode *prometheus.CounterVec
ContentLen *prometheus.GaugeVec
StatusCode *prometheus.CounterVec
ContentLen *prometheus.GaugeVec
DNSDuration *prometheus.GaugeVec
ConnectDuration *prometheus.GaugeVec
TLSDuration *prometheus.GaugeVec
SendDuration *prometheus.GaugeVec
WaitDuration *prometheus.GaugeVec
TransferDuration *prometheus.GaugeVec
TotalDuration *prometheus.GaugeVec
}

// newMetrics create the HTTP metrics
Expand All @@ -37,5 +44,19 @@ func newMetrics(subsystem, name string) *metrics {
"HTTP Status Code", []string{"name", "status"}),
ContentLen: metric.NewGauge(namespace, subsystem, name, "content_len",
"HTTP Content Length", []string{"name", "status"}),
DNSDuration: metric.NewGauge(namespace, subsystem, name, "dns_duration",
"DNS Duration", []string{"name", "status"}),
ConnectDuration: metric.NewGauge(namespace, subsystem, name, "connect_duration",
"TCP Connection Duration", []string{"name", "status"}),
TLSDuration: metric.NewGauge(namespace, subsystem, name, "tls_duration",
"TLS Duration", []string{"name", "status"}),
SendDuration: metric.NewGauge(namespace, subsystem, name, "send_duration",
"Send Duration", []string{"name", "status"}),
WaitDuration: metric.NewGauge(namespace, subsystem, name, "wait_duration",
"Wait Duration", []string{"name", "status"}),
TransferDuration: metric.NewGauge(namespace, subsystem, name, "transfer_duration",
"Transfer Duration", []string{"name", "status"}),
TotalDuration: metric.NewGauge(namespace, subsystem, name, "total_duration",
"Total Duration", []string{"name", "status"}),
}
}
Loading

0 comments on commit db50a78

Please sign in to comment.