From 1c0774010347f5658d91fe32257bb79a004d7224 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Tue, 28 Jun 2022 16:57:01 +0800 Subject: [PATCH 1/6] feat: add http probe trace time metrics --- probe/http/http.go | 46 +++++++++++ probe/http/metrics.go | 25 +++++- probe/http/trace.go | 164 +++++++++++++++++++++++++++++++++++++++ probe/http/trace_test.go | 137 ++++++++++++++++++++++++++++++++ 4 files changed, 370 insertions(+), 2 deletions(-) create mode 100644 probe/http/trace.go create mode 100644 probe/http/trace_test.go diff --git a/probe/http/http.go b/probe/http/http.go index acc1486b..69909d13 100644 --- a/probe/http/http.go +++ b/probe/http/http.go @@ -22,6 +22,7 @@ import ( "fmt" "io/ioutil" "net/http" + "net/http/httptrace" "net/url" "strconv" "strings" @@ -57,6 +58,8 @@ type HTTP struct { client *http.Client `yaml:"-"` + traceStatus *TraceStats `yaml:"-"` + metrics *metrics `yaml:"-"` } @@ -97,6 +100,9 @@ func (h *HTTP) Config(gConf global.ProbeSettings) error { TLSClientConfig: tls, }, } + // set the http client trace + h.traceStatus = NewTraceStats(kind, "TRACE", name) + if !checkHTTPMethod(h.Method) { h.Method = "GET" } @@ -140,6 +146,11 @@ func (h *HTTP) DoProbe() (bool, string) { req.Close = true req.Header.Set("User-Agent", global.OrgProgVer) + + // Tracing HTTP request + clientTraceCtx := httptrace.WithClientTrace(req.Context(), h.traceStatus.clientTrace) + req = req.WithContext(clientTraceCtx) + resp, err := h.client.Do(req) h.ExportMetrics(resp) if err != nil { @@ -192,4 +203,39 @@ func (h *HTTP) ExportMetrics(resp *http.Response) { "name": h.ProbeName, "status": fmt.Sprintf("%d", code), }).Set(float64(len)) + + h.metrics.DNSDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.dnsTook)) + + h.metrics.ConnectDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.connTook)) + + h.metrics.TLSDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.tlsTook)) + + h.metrics.SendDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.sendTook)) + + h.metrics.WaitDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.waitTook)) + + h.metrics.TransferDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.transferTook)) + + h.metrics.TotalDuration.With(prometheus.Labels{ + "name": h.ProbeName, + "status": fmt.Sprintf("%d", code), + }).Set(toMS(h.traceStatus.totalTook)) } diff --git a/probe/http/metrics.go b/probe/http/metrics.go index 9e9f74fe..a7e45309 100644 --- a/probe/http/metrics.go +++ b/probe/http/metrics.go @@ -25,8 +25,15 @@ import ( // metrics is the metrics for http probe type metrics struct { - StatusCode *prometheus.CounterVec - ContentLen *prometheus.GaugeVec + StatusCode *prometheus.CounterVec + ContentLen *prometheus.GaugeVec + DNSDuration *prometheus.GaugeVec + ConnectDuration *prometheus.GaugeVec + TLSDuration *prometheus.GaugeVec + SendDuration *prometheus.GaugeVec + WaitDuration *prometheus.GaugeVec + TransferDuration *prometheus.GaugeVec + TotalDuration *prometheus.GaugeVec } // newMetrics create the HTTP metrics @@ -37,5 +44,19 @@ func newMetrics(subsystem, name string) *metrics { "HTTP Status Code", []string{"name", "status"}), ContentLen: metric.NewGauge(namespace, subsystem, name, "content_len", "HTTP Content Length", []string{"name", "status"}), + DNSDuration: metric.NewGauge(namespace, subsystem, name, "dns_duration", + "DNS Duration", []string{"name", "status"}), + ConnectDuration: metric.NewGauge(namespace, subsystem, name, "connect_duration", + "Connect Duration", []string{"name", "status"}), + TLSDuration: metric.NewGauge(namespace, subsystem, name, "tls_duration", + "TLS Duration", []string{"name", "status"}), + SendDuration: metric.NewGauge(namespace, subsystem, name, "send_duration", + "Send Duration", []string{"name", "status"}), + WaitDuration: metric.NewGauge(namespace, subsystem, name, "wait_duration", + "Wait Duration", []string{"name", "status"}), + TransferDuration: metric.NewGauge(namespace, subsystem, name, "transfer_duration", + "Transfer Duration", []string{"name", "status"}), + TotalDuration: metric.NewGauge(namespace, subsystem, name, "total_duration", + "Total Duration", []string{"name", "status"}), } } diff --git a/probe/http/trace.go b/probe/http/trace.go new file mode 100644 index 00000000..24d98d4e --- /dev/null +++ b/probe/http/trace.go @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2022, MegaEase + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package http + +import ( + "crypto/tls" + "net/http/httptrace" + "time" + + log "github.com/sirupsen/logrus" +) + +// TraceStats is the stats for a http request +type TraceStats struct { + kind string + tag string + name string + + connStartAt time.Time + connTook time.Duration + dnsStartAt time.Time + dnsTook time.Duration + sendStartAt time.Time + sendTook time.Duration + tlsStartAt time.Time + tlsTook time.Duration + totalStartAt time.Time + totalTook time.Duration + transferStartAt time.Time + transferTook time.Duration + waitStartAt time.Time + waitTook time.Duration + + clientTrace *httptrace.ClientTrace +} + +func (s *TraceStats) getConn(hostPort string) { + s.totalStartAt = time.Now() +} + +func (s *TraceStats) dnsStart(info httptrace.DNSStartInfo) { + s.dnsStartAt = time.Now() +} + +func (s *TraceStats) dnsDone(info httptrace.DNSDoneInfo) { + s.dnsTook = time.Since(s.dnsStartAt) + if info.Err != nil { + return + } + log.Debugf("[%s %s %s] - DNS resolve time %.3fms", s.kind, s.tag, s.name, toMS(s.dnsTook)) + for _, addr := range info.Addrs { + log.Debugf("[%s %s %s] %s\n", s.kind, s.tag, s.name, &addr.IP) + } +} + +func (s *TraceStats) connectStart(network, addr string) { + s.connStartAt = time.Now() +} + +func (s *TraceStats) connectDone(network, addr string, err error) { + s.connTook = time.Since(s.connStartAt) + if err != nil { + return + } + log.Debugf("[%s %s %s] - %s connection created to %s. time: %.3fms\n", + s.kind, s.tag, s.name, network, addr, toMS(s.connTook)) +} + +func (s *TraceStats) tlsStart() { + s.tlsStartAt = time.Now() +} + +func (s *TraceStats) tlsDone(cs tls.ConnectionState, err error) { + s.tlsTook = time.Since(s.tlsStartAt) + if err != nil { + return + } + log.Debugf("[%s %s %s] - tls negotiated to %q, time: %.3fms", + s.kind, s.tag, s.name, cs.ServerName, toMS(s.tlsTook)) +} + +func (s TraceStats) gotConn(info httptrace.GotConnInfo) { + log.Debugf("[%s %s %s] - connection established. reused: %t idle: %t idle time: %dms\n", + s.kind, s.tag, s.name, info.Reused, info.WasIdle, info.IdleTime.Milliseconds()) +} + +func (s *TraceStats) wroteHeaderField(key string, value []string) { + s.sendStartAt = time.Now() +} + +func (s *TraceStats) wroteHeaders() { + s.sendTook = time.Since(s.sendStartAt) + log.Debugf("[%s %s %s] - headers written, time: %.3fms", + s.kind, s.tag, s.name, toMS(s.sendTook)) +} + +func (s *TraceStats) wroteRequest(info httptrace.WroteRequestInfo) { + s.waitStartAt = time.Now() + if info.Err != nil { + return + } +} + +func (s *TraceStats) gotFirstResponseByte() { + s.waitTook = time.Since(s.waitStartAt) + s.transferStartAt = time.Now() + log.Debugf("[%s %s %s] - got first response byte, time: %.3fms", + s.kind, s.tag, s.name, toMS(s.waitTook)) +} + +func (s *TraceStats) putIdleConn(err error) { + s.totalTook = time.Since(s.totalStartAt) + s.transferTook = time.Since(s.transferStartAt) + if err != nil { + return + } + log.Debugf("[%s %s %s] - put conn idle, transfer: %.3f, total: %.3fms", + s.kind, s.tag, s.name, toMS(s.transferTook), toMS(s.totalTook)) +} + +func toMS(t time.Duration) float64 { + return float64(t.Nanoseconds()) / 1000000.0 +} + +// NewTraceStats returns a new traceSTats. +func NewTraceStats(kind, tag, name string) *TraceStats { + s := &TraceStats{ + kind: kind, + name: name, + tag: tag, + } + + s.clientTrace = &httptrace.ClientTrace{ + GetConn: s.getConn, + DNSStart: s.dnsStart, + DNSDone: s.dnsDone, + ConnectStart: s.connectStart, + ConnectDone: s.connectDone, + TLSHandshakeStart: s.tlsStart, + TLSHandshakeDone: s.tlsDone, + GotConn: s.gotConn, + WroteHeaderField: s.wroteHeaderField, + WroteHeaders: s.wroteHeaders, + WroteRequest: s.wroteRequest, + GotFirstResponseByte: s.gotFirstResponseByte, + PutIdleConn: s.putIdleConn, + } + return s +} diff --git a/probe/http/trace_test.go b/probe/http/trace_test.go new file mode 100644 index 00000000..715d33fd --- /dev/null +++ b/probe/http/trace_test.go @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2022, MegaEase + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package http + +import ( + "crypto/tls" + "errors" + "net" + "net/http/httptrace" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func testTimeStart(t *testing.T, start *time.Time, fn func()) { + var newTime time.Time + assert.Equal(t, newTime, *start) + fn() + assert.Less(t, time.Since(*start), time.Minute) +} + +func testTimeDone(t *testing.T, took *time.Duration, fns ...func()) { + assert.Equal(t, time.Duration(0), *took) + for _, fn := range fns { + fn() + assert.Less(t, *took, time.Minute) + } +} + +func TestTraceStart(t *testing.T) { + // Create a new trace + s := NewTraceStats("http", "tag", "test") + assert.Equal(t, "http", s.kind) + assert.Equal(t, "test", s.name) + assert.Equal(t, "tag", s.tag) + + testTimeStart(t, &s.totalStartAt, func() { s.getConn("8080") }) + testTimeStart(t, &s.dnsStartAt, func() { s.dnsStart(httptrace.DNSStartInfo{}) }) + testTimeStart(t, &s.connStartAt, func() { s.connectStart("tcp", "8080") }) + testTimeStart(t, &s.sendStartAt, func() { s.wroteHeaderField("key", []string{"value"}) }) + testTimeStart(t, &s.tlsStartAt, func() { s.tlsStart() }) + testTimeStart(t, &s.transferStartAt, func() { s.gotFirstResponseByte() }) + testTimeStart(t, &s.waitStartAt, func() { s.wroteRequest(httptrace.WroteRequestInfo{}) }) +} + +func TestTraceDone(t *testing.T) { + s := NewTraceStats("http", "tag", "test") + + s.connStartAt = time.Now() + s.dnsStartAt = time.Now() + s.sendStartAt = time.Now() + s.tlsStartAt = time.Now() + s.transferStartAt = time.Now() + s.waitStartAt = time.Now() + s.totalStartAt = time.Now() + + // Test DNS + testTimeDone(t, &s.dnsTook, + func() { + s.dnsDone(httptrace.DNSDoneInfo{ + Addrs: []net.IPAddr{ + {IP: net.IPv4(1, 2, 3, 4)}, + }}) + }, + func() { + s.dnsDone(httptrace.DNSDoneInfo{Err: &net.DNSError{}}) + }, + ) + + // Test connection + testTimeDone(t, &s.connTook, + func() { + s.connectDone("tcp", "8080", nil) + }, + func() { + s.connectDone("tcp", "8080", &net.OpError{}) + }, + ) + + // Test TLS + testTimeDone(t, &s.tlsTook, + func() { + s.tlsDone(tls.ConnectionState{}, nil) + }, + func() { + s.tlsDone(tls.ConnectionState{}, errors.New("test error")) + }, + ) + + // Test send + testTimeDone(t, &s.sendTook, + func() { + s.wroteHeaders() + }, + ) + + // Test Wait + testTimeDone(t, &s.waitTook, + func() { + s.wroteRequest(httptrace.WroteRequestInfo{}) + }, + func() { + s.wroteRequest(httptrace.WroteRequestInfo{Err: errors.New("test error")}) + }, + ) + + // Test transfer + testTimeDone(t, &s.transferTook, + func() { + s.putIdleConn(nil) + }, + func() { + s.putIdleConn(errors.New("test error")) + }, + ) + + took := s.connTook + s.gotConn(httptrace.GotConnInfo{}) + assert.Equal(t, took, s.connTook) + +} From 8e74627dc02af29444ed8e038943fcc8e14c7723 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Tue, 28 Jun 2022 17:02:33 +0800 Subject: [PATCH 2/6] change the version number to v1.7.0 --- global/global.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/global/global.go b/global/global.go index 679c2c30..807c99a7 100644 --- a/global/global.go +++ b/global/global.go @@ -37,7 +37,7 @@ const ( // DefaultProg is the program name DefaultProg = "EaseProbe" // Ver is the program version - Ver = "v1.6.0" + Ver = "v1.7.0" //OrgProg combine organization and program OrgProg = Org + " " + DefaultProg From af815e8d72742d1e4a43d6ab106e7913208f4100 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Tue, 28 Jun 2022 20:27:32 +0800 Subject: [PATCH 3/6] refine the debug information --- probe/http/http.go | 24 ++++++++------- probe/http/trace.go | 73 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 71 insertions(+), 26 deletions(-) diff --git a/probe/http/http.go b/probe/http/http.go index 69909d13..0881927d 100644 --- a/probe/http/http.go +++ b/probe/http/http.go @@ -58,7 +58,7 @@ type HTTP struct { client *http.Client `yaml:"-"` - traceStatus *TraceStats `yaml:"-"` + traceStats *TraceStats `yaml:"-"` metrics *metrics `yaml:"-"` } @@ -100,8 +100,6 @@ func (h *HTTP) Config(gConf global.ProbeSettings) error { TLSClientConfig: tls, }, } - // set the http client trace - h.traceStatus = NewTraceStats(kind, "TRACE", name) if !checkHTTPMethod(h.Method) { h.Method = "GET" @@ -148,10 +146,14 @@ func (h *HTTP) DoProbe() (bool, string) { req.Header.Set("User-Agent", global.OrgProgVer) // Tracing HTTP request - clientTraceCtx := httptrace.WithClientTrace(req.Context(), h.traceStatus.clientTrace) + // set the http client trace + h.traceStats = NewTraceStats(h.ProbeKind, "TRACE", h.ProbeName) + clientTraceCtx := httptrace.WithClientTrace(req.Context(), h.traceStats.clientTrace) req = req.WithContext(clientTraceCtx) resp, err := h.client.Do(req) + h.traceStats.Done() + h.ExportMetrics(resp) if err != nil { log.Errorf("error making get request: %v", err) @@ -207,35 +209,35 @@ func (h *HTTP) ExportMetrics(resp *http.Response) { h.metrics.DNSDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.dnsTook)) + }).Set(toMS(h.traceStats.dnsTook)) h.metrics.ConnectDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.connTook)) + }).Set(toMS(h.traceStats.connTook)) h.metrics.TLSDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.tlsTook)) + }).Set(toMS(h.traceStats.tlsTook)) h.metrics.SendDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.sendTook)) + }).Set(toMS(h.traceStats.sendTook)) h.metrics.WaitDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.waitTook)) + }).Set(toMS(h.traceStats.waitTook)) h.metrics.TransferDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.transferTook)) + }).Set(toMS(h.traceStats.transferTook)) h.metrics.TotalDuration.With(prometheus.Labels{ "name": h.ProbeName, "status": fmt.Sprintf("%d", code), - }).Set(toMS(h.traceStatus.totalTook)) + }).Set(toMS(h.traceStats.totalTook)) } diff --git a/probe/http/trace.go b/probe/http/trace.go index 24d98d4e..459a4b6f 100644 --- a/probe/http/trace.go +++ b/probe/http/trace.go @@ -51,10 +51,14 @@ type TraceStats struct { func (s *TraceStats) getConn(hostPort string) { s.totalStartAt = time.Now() + log.Debugf("[%s %s %s] - total - start get connection: %d", + s.kind, s.tag, s.name, s.totalStartAt.UnixNano()) } func (s *TraceStats) dnsStart(info httptrace.DNSStartInfo) { s.dnsStartAt = time.Now() + log.Debugf("[%s %s %s] - dns - start resolve %s: %d", + s.kind, s.tag, s.name, info.Host, s.dnsStartAt.UnixMicro()) } func (s *TraceStats) dnsDone(info httptrace.DNSDoneInfo) { @@ -62,14 +66,17 @@ func (s *TraceStats) dnsDone(info httptrace.DNSDoneInfo) { if info.Err != nil { return } - log.Debugf("[%s %s %s] - DNS resolve time %.3fms", s.kind, s.tag, s.name, toMS(s.dnsTook)) - for _, addr := range info.Addrs { - log.Debugf("[%s %s %s] %s\n", s.kind, s.tag, s.name, &addr.IP) - } + log.Debugf("[%s %s %s] - dns - resolve ip %s, time %.3fms", + s.kind, s.tag, s.name, info.Addrs, toMS(s.dnsTook)) } func (s *TraceStats) connectStart(network, addr string) { - s.connStartAt = time.Now() + var nilTime time.Time + if s.connStartAt == nilTime { + s.connStartAt = time.Now() + } + log.Debugf("[%s %s %s] - conn - start %s connect to %s: %d", + s.kind, s.tag, s.name, network, addr, s.connStartAt.UnixMicro()) } func (s *TraceStats) connectDone(network, addr string, err error) { @@ -77,12 +84,14 @@ func (s *TraceStats) connectDone(network, addr string, err error) { if err != nil { return } - log.Debugf("[%s %s %s] - %s connection created to %s. time: %.3fms\n", + log.Debugf("[%s %s %s] - conn - %s connection created to %s. time: %.3fms\n", s.kind, s.tag, s.name, network, addr, toMS(s.connTook)) } func (s *TraceStats) tlsStart() { s.tlsStartAt = time.Now() + log.Debugf("[%s %s %s] - tls - start negotiation: %d", + s.kind, s.tag, s.name, s.tlsStartAt.UnixMicro()) } func (s *TraceStats) tlsDone(cs tls.ConnectionState, err error) { @@ -90,7 +99,7 @@ func (s *TraceStats) tlsDone(cs tls.ConnectionState, err error) { if err != nil { return } - log.Debugf("[%s %s %s] - tls negotiated to %q, time: %.3fms", + log.Debugf("[%s %s %s] - tls - negotiated to %q, time: %.3fms", s.kind, s.tag, s.name, cs.ServerName, toMS(s.tlsTook)) } @@ -100,17 +109,25 @@ func (s TraceStats) gotConn(info httptrace.GotConnInfo) { } func (s *TraceStats) wroteHeaderField(key string, value []string) { - s.sendStartAt = time.Now() + var nilTime time.Time + if s.sendStartAt == nilTime { + s.sendStartAt = time.Now() + } + log.Debugf("[%s %s %s] - send - start write header field %s %s : %d", + s.kind, s.tag, s.name, key, value, s.sendStartAt.UnixMicro()) } func (s *TraceStats) wroteHeaders() { s.sendTook = time.Since(s.sendStartAt) - log.Debugf("[%s %s %s] - headers written, time: %.3fms", + log.Debugf("[%s %s %s] - send - headers written, time: %.3fms", s.kind, s.tag, s.name, toMS(s.sendTook)) } func (s *TraceStats) wroteRequest(info httptrace.WroteRequestInfo) { s.waitStartAt = time.Now() + log.Debugf("[%s %s %s] - wait - start write request: %d", + s.kind, s.tag, s.name, s.waitStartAt.UnixMicro()) + if info.Err != nil { return } @@ -119,18 +136,44 @@ func (s *TraceStats) wroteRequest(info httptrace.WroteRequestInfo) { func (s *TraceStats) gotFirstResponseByte() { s.waitTook = time.Since(s.waitStartAt) s.transferStartAt = time.Now() - log.Debugf("[%s %s %s] - got first response byte, time: %.3fms", + log.Debugf("[%s %s %s] - transfer - start transfer the response: %d", + s.kind, s.tag, s.name, s.transferStartAt.UnixMicro()) + log.Debugf("[%s %s %s] - wait - got first response byte, time: %.3fms", s.kind, s.tag, s.name, toMS(s.waitTook)) } func (s *TraceStats) putIdleConn(err error) { + s.Done() +} + +// Done function is used to finish the trace manually +func (s *TraceStats) Done() { s.totalTook = time.Since(s.totalStartAt) s.transferTook = time.Since(s.transferStartAt) - if err != nil { - return - } - log.Debugf("[%s %s %s] - put conn idle, transfer: %.3f, total: %.3fms", - s.kind, s.tag, s.name, toMS(s.transferTook), toMS(s.totalTook)) + log.Debugf("[%s %s %s] - transfer - done, time: %.3fms", + s.kind, s.tag, s.name, toMS(s.transferTook)) + log.Debugf("[%s %s %s] - total - done , time: %.3fms", + s.kind, s.tag, s.name, toMS(s.totalTook)) + + s.Report() +} + +// Report show the trace stats +func (s *TraceStats) Report() { + log.Debugf("[%s %s %s] ======================== Trace Stats ======================", s.kind, s.tag, s.name) + log.Debugf("[%s %s %s] DNS\tConnect\tTLS\tSend\tWait\tTrans\tTotal", + s.kind, s.tag, s.name) + log.Debugf("[%s %s %s] %.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\n", + s.kind, s.tag, s.name, + float64(s.dnsTook.Nanoseconds())/1000000.0, + float64(s.connTook.Nanoseconds())/1000000.0, + float64(s.tlsTook.Nanoseconds())/1000000.0, + float64(s.sendTook.Nanoseconds())/1000000.0, + float64(s.waitTook.Nanoseconds())/1000000.0, + float64(s.transferTook.Nanoseconds())/1000000.0, + float64(s.totalTook.Nanoseconds())/1000000.0, + ) + log.Debugf("[%s %s %s] ===========================================================", s.kind, s.tag, s.name) } func toMS(t time.Duration) float64 { From fede3c2775987345d5ac571bd96971b4375e43cc Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 30 Jun 2022 19:27:49 +0800 Subject: [PATCH 4/6] add the prometheus metrics into README.md --- README.md | 42 ++++++++++++++++++++++++++++++++++++++---- probe/http/metrics.go | 2 +- probe/tls/metrics.go | 2 +- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2de2e595..21951baf 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ EaseProbe is a simple, standalone, and lightWeight tool that can do health/statu - [1.3 Report](#13-report) - [1.4 Channel](#14-channel) - [1.5 Administration](#15-administration) - - [1.6 Prometheus Metrics](#16-prometheus-metrics) + - [1.6 Prometheus Metrics Exporter](#16-prometheus-metrics-exporter) - [2. Getting Started](#2-getting-started) - [2.1 Build](#21-build) - [2.2 Configure](#22-configure) @@ -375,15 +375,49 @@ There are some administration configuration options: EaseProbe accepts the `HUP` signal to rotate the log. -### 1.6 Prometheus Metrics +### 1.6 Prometheus Metrics Exporter + +EaseProbe supports Prometheus metrics exporter. The Prometheus endpoint is `http://localhost:8181/metrics` by default. + +Currently, All of the Probers support the following metrics: + + - `total`: the total number of probes + - `duration`: Probe duration in milliseconds + - `status`: Probe status + - `SLA`: Probe SLA percentage + +And for the different Probers, the following metrics are available: + +- HTTP Probe + - `status_code`: HTTP status code + - `content_len`: HTTP content length + - `dns_duration`: DNS duration in milliseconds + - `connect_duration`: TCP connection duration in milliseconds + - `tls_duration`: TLS handshake duration in milliseconds + - `send_duration`: HTTP send duration in milliseconds + - `wait_duration`: HTTP wait duration in milliseconds + - `transfer_duration`: HTTP transfer duration in milliseconds + - `total_duration`: HTTP total duration in milliseconds + +- TLS Probe + - `earliest_cert_expiry`: last TLS chain expiry in timestamp seconds + - `last_chain_expiry_timestamp_seconds`: earliest TLS cert expiry in Unix time + +- Shell & SSH Probe + - `exit_code`: exit code of the command + - `output_len`: length of the output + +- Host Probe + - `cpu`: CPU usage in percentage + - `memory`: memory usage in percentage + - `disk`: disk usage in percentage -EaseProbe supports Prometheus metrics. The Prometheus endpoint is `http://localhost:8181/metrics` by default. The following snapshot is the Grafana panel for host CPU metrics ![](./docs/grafana.demo.png) -Refer to the [Global Setting Configuration](#38-global-setting-configuration) for further details on how to configure the HTTP server. +Refer to the [Global Setting Configuration](#39-global-setting-configuration) for further details on how to configure the HTTP server. ## 2. Getting Started diff --git a/probe/http/metrics.go b/probe/http/metrics.go index a7e45309..bc65f8a5 100644 --- a/probe/http/metrics.go +++ b/probe/http/metrics.go @@ -47,7 +47,7 @@ func newMetrics(subsystem, name string) *metrics { DNSDuration: metric.NewGauge(namespace, subsystem, name, "dns_duration", "DNS Duration", []string{"name", "status"}), ConnectDuration: metric.NewGauge(namespace, subsystem, name, "connect_duration", - "Connect Duration", []string{"name", "status"}), + "TCP Connection Duration", []string{"name", "status"}), TLSDuration: metric.NewGauge(namespace, subsystem, name, "tls_duration", "TLS Duration", []string{"name", "status"}), SendDuration: metric.NewGauge(namespace, subsystem, name, "send_duration", diff --git a/probe/tls/metrics.go b/probe/tls/metrics.go index 85b78e21..d980a22c 100644 --- a/probe/tls/metrics.go +++ b/probe/tls/metrics.go @@ -39,7 +39,7 @@ func newMetrics(subsystem, name string) *metrics { EarliestCertExpiry: metric.NewGauge(namespace, subsystem, name, "earliest_cert_expiry", "last TLS chain expiry in timestamp seconds", []string{}), LastChainExpiryTimestampSeconds: metric.NewGauge(namespace, subsystem, name, "last_chain_expiry_timestamp_seconds", - "earliest TLS cert expiry in unixtime", []string{}), + "earliest TLS cert expiry in unix time", []string{}), } } From 8e6e6b1bdd895e21ad49b29c6e3fe510f1589260 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 30 Jun 2022 20:07:38 +0800 Subject: [PATCH 5/6] bug-fix: the regex sould match the whole string --- metric/prometheus.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/metric/prometheus.go b/metric/prometheus.go index 25983f20..fa527652 100644 --- a/metric/prometheus.go +++ b/metric/prometheus.go @@ -40,8 +40,8 @@ var ( ) var ( - validMetric = regexp.MustCompile(`[a-zA-Z_:][a-zA-Z0-9_:]*`) - validLabel = regexp.MustCompile(`[a-zA-Z_][a-zA-Z0-9_]*`) + validMetric = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) + validLabel = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`) ) // Counter get the counter metric by key @@ -112,13 +112,11 @@ func NewGauge(namespace, subsystem, name, metric string, func getAndValid(namespace, subsystem, name, metric string, labels []string) (string, error) { metricName := GetName(namespace, subsystem, name, metric) if ValidMetricName(metricName) == false { - return "", fmt.Errorf("Invalid metric name: %s", metricName) } for _, l := range labels { if ValidLabelName(l) == false { - return "", fmt.Errorf("Invalid label name: %s", l) } } From 6df25948877849da0bae99eab59d58dc8d5cd0d2 Mon Sep 17 00:00:00 2001 From: Hao Chen Date: Thu, 30 Jun 2022 20:08:29 +0800 Subject: [PATCH 6/6] add more test cases for prometheus module --- metric/prometheus_test.go | 69 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/metric/prometheus_test.go b/metric/prometheus_test.go index 7b241903..721577bb 100644 --- a/metric/prometheus_test.go +++ b/metric/prometheus_test.go @@ -20,6 +20,7 @@ package metric import ( "testing" + "bou.ke/monkey" "github.com/stretchr/testify/assert" ) @@ -73,3 +74,71 @@ func TestNewMetrics(t *testing.T) { assert.NotNil(t, GetName("namespace_subsystem_gauge_metric")) assert.NotNil(t, Gauge("namespace_subsystem_gauge_metric")) } + +func TestName(t *testing.T) { + + assert.False(t, ValidMetricName("")) + assert.False(t, ValidMetricName(" ")) + assert.False(t, ValidMetricName("\n")) + assert.False(t, ValidMetricName("5name")) + assert.False(t, ValidMetricName("name%")) + assert.False(t, ValidMetricName("hello-world")) + assert.False(t, ValidMetricName("hello-world@")) + + assert.True(t, ValidMetricName("name5")) + assert.True(t, ValidMetricName(":name")) + assert.True(t, ValidMetricName("hello_world:name")) + assert.True(t, ValidMetricName("_hello_world:name")) + assert.True(t, ValidMetricName(":_hello_world:name")) + assert.True(t, ValidMetricName("namespace_name_metric")) + + assert.False(t, ValidLabelName("")) + assert.False(t, ValidLabelName(" ")) + assert.False(t, ValidLabelName("\n")) + assert.False(t, ValidLabelName("5name")) + assert.False(t, ValidLabelName("name%")) + assert.False(t, ValidLabelName("hello-world")) + assert.False(t, ValidLabelName("hello-world@")) + + assert.True(t, ValidLabelName("_name5")) + assert.True(t, ValidLabelName("name_")) + assert.True(t, ValidLabelName("name5")) + assert.True(t, ValidLabelName("hello_world")) + assert.True(t, ValidLabelName("_hello_world_")) + assert.True(t, ValidLabelName("_hello_world_1_")) +} + +func TestDuplicateName(t *testing.T) { + counter1 := NewCounter("namespace", "subsystem", "counter", "metric", + "help", []string{}) + counter2 := NewCounter("namespace", "subsystem", "counter", "metric", + "help", []string{}) + assert.Equal(t, counter1, counter2) + + gauge1 := NewGauge("namespace", "subsystem", "gauge", "metric", + "help", []string{}) + gauge2 := NewGauge("namespace", "subsystem", "gauge", "metric", + "help", []string{}) + assert.Equal(t, gauge1, gauge2) +} + +func TestInvalidName(t *testing.T) { + + //label errors + counter := NewCounter("namespace", "subsystem", "counter", "metric", + "help", []string{"label-1", "label:2"}) + assert.Nil(t, counter) + + gauge := NewGauge("namespace", "subsystem", "gauge", "metric", + "help", []string{"label-1", "label:2"}) + assert.Nil(t, gauge) + + monkey.Patch(ValidMetricName, func(name string) bool { + return false + }) + counter = NewCounter("namespace", "subsystem", "counter", "metric", + "help", []string{}) + assert.Nil(t, counter) + + monkey.UnpatchAll() +}