forked from open-telemetry/opentelemetry-collector-contrib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
regex_parser.go
233 lines (201 loc) · 7.2 KB
/
regex_parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
// Copyright 2019, OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package protocol // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/carbonreceiver/protocol"
import (
"errors"
"fmt"
"regexp"
"sort"
"strings"
metricspb "github.com/census-instrumentation/opencensus-proto/gen-go/metrics/v1"
)
const (
metricNameCapturePrefix = "name_"
keyCapturePrefix = "key_"
)
// RegexParserConfig has the configuration for a parser that can breakdown a
// Carbon "metric path" and transform it in corresponding metric labels according
// to a series of regular expressions rules (see below for details).
//
// This is typically used to extract labels from a "naming hierarchy", see
// https://graphite.readthedocs.io/en/latest/feeding-carbon.html#step-1-plan-a-naming-hierarchy
//
// Examples:
//
// 1. Rule:
// - regexp: "(?P<key_svc>[^.]+)\.(?P<key_host>[^.]+)\.cpu\.seconds"
// name_prefix: cpu_seconds
// labels:
// k: v
// Metric path: "service_name.host00.cpu.seconds"
// Resulting metric:
// name: cpu_seconds
// label keys: {"svc", "host", "k"}
// label values: {"service_name", "host00", "k"}
//
// 2. Rule:
// - regexp: "^(?P<key_svc>[^.]+)\.(?P<key_host>[^.]+)\.(?P<name_0>[^.]+).(?P<name_1>[^.]+)$"
// Metric path: "svc_02.host02.avg.duration"
// Resulting metric:
// name: avgduration
// label keys: {"svc", "host"}
// label values: {"svc_02", "host02"}
//
type RegexParserConfig struct {
// Rules contains the regular expression rules to be used by the parser.
// The first rule that matches and applies the transformations configured in
// the respective RegexRule struct. If no rules match the metric is then
// processed by the "plaintext" parser.
Rules []*RegexRule `mapstructure:"rules"`
// MetricNameSeparator is used when joining the name prefix of each individual
// rule and the respective named captures that start with the prefix
// "name_" (see RegexRule for more information).
MetricNameSeparator string `mapstructure:"name_separator"`
}
// RegexRule describes how parts of the name of metric are going to be mapped
// to metric labels. The rule is only applied if the name matches the given
// regular expression.
type RegexRule struct {
// Regular expression from which named matches are used to extract label
// keys and values from Carbon metric paths.
Regexp string `mapstrucutre:"regexp"`
// NamePrefix is the prefix added to the metric name after extracting the
// parts that will form labels and final metric name.
NamePrefix string `mapstructure:"name_prefix"`
// Labels are key-value pairs added as labels to the metrics that match this
// rule.
Labels map[string]string `mapstructure:"labels"`
// MetricType selects the type of metric to be generated, supported values are
// "gauge" (the default) and "cumulative".
MetricType string `mapstructure:"type"`
// Some fields cached after the compilation of the regular expression.
compRegexp *regexp.Regexp
metricNameParts []string
}
var _ (ParserConfig) = (*RegexParserConfig)(nil)
// BuildParser builds the respective parser of the configuration instance.
func (rpc *RegexParserConfig) BuildParser() (Parser, error) {
if rpc == nil {
return nil, errors.New("nil receiver on RegexParserConfig.BuildParser")
}
if err := compileRegexRules(rpc.Rules); err != nil {
return nil, err
}
rpp := ®exPathParser{
rules: rpc.Rules,
metricNameSeparator: rpc.MetricNameSeparator,
}
return NewParser(rpp)
}
func compileRegexRules(rules []*RegexRule) error {
if len(rules) == 0 {
return errors.New(`no expression rule was specified`)
}
for i, r := range rules {
regex, err := regexp.Compile(r.Regexp)
if err != nil {
return fmt.Errorf("error compiling %d-th rule: %w", i, err)
}
switch TargetMetricType(r.MetricType) {
case DefaultMetricType, GaugeMetricType, CumulativeMetricType:
default:
return fmt.Errorf(
`error on %d-th rule: unknown metric type %q valid choices are: %q or %q`,
i,
r.MetricType,
GaugeMetricType,
CumulativeMetricType)
}
rules[i].compRegexp = regex
var metricNameParts []string
for _, n := range regex.SubexpNames() {
switch {
case n == "":
// Default capture.
case strings.HasPrefix(n, metricNameCapturePrefix):
metricNameParts = append(metricNameParts, n)
case strings.HasPrefix(n, keyCapturePrefix):
// Correctly prefixed, nothing else to do.
default:
return fmt.Errorf(
"capture %q on %d-th rule has an unknown prefix", n, i)
}
}
sort.Strings(metricNameParts)
rules[i].metricNameParts = metricNameParts
}
return nil
}
type regexPathParser struct {
rules []*RegexRule
metricNameSeparator string
// plaintextParser is used if no rule matches a given metric.
plaintextPathParser PlaintextPathParser
}
// ParsePath converts the <metric_path> of a Carbon line (see PathParserHelper
// a full description of the line format) according to the RegexParserConfig
// settings.
func (rpp *regexPathParser) ParsePath(path string, parsedPath *ParsedPath) error {
for _, rule := range rpp.rules {
if rule.compRegexp.MatchString(path) {
ms := rule.compRegexp.FindStringSubmatch(path)
nms := rule.compRegexp.SubexpNames() // regexp pre-computes this slice.
metricNameLookup := map[string]string{}
keys := make([]*metricspb.LabelKey, 0, len(nms)+len(rule.Labels))
values := make([]*metricspb.LabelValue, 0, len(nms)+len(rule.Labels))
for i := 1; i < len(ms); i++ {
if strings.HasPrefix(nms[i], metricNameCapturePrefix) {
metricNameLookup[nms[i]] = ms[i]
} else {
keys = append(keys, &metricspb.LabelKey{Key: nms[i][len(keyCapturePrefix):]})
values = append(values, &metricspb.LabelValue{
Value: ms[i],
HasValue: true,
})
}
}
for k, v := range rule.Labels {
keys = append(keys, &metricspb.LabelKey{Key: k})
values = append(values, &metricspb.LabelValue{
Value: v,
HasValue: true,
})
}
var actualMetricName string
if len(rule.metricNameParts) == 0 {
actualMetricName = rule.NamePrefix
} else {
var sb strings.Builder
sb.WriteString(rule.NamePrefix)
for _, mnp := range rule.metricNameParts {
sb.WriteString(rpp.metricNameSeparator)
sb.WriteString(metricNameLookup[mnp])
}
actualMetricName = sb.String()
}
if actualMetricName == "" {
actualMetricName = path
}
parsedPath.MetricName = actualMetricName
parsedPath.LabelKeys = keys
parsedPath.LabelValues = values
parsedPath.MetricType = TargetMetricType(rule.MetricType)
return nil
}
}
return rpp.plaintextPathParser.ParsePath(path, parsedPath)
}
func regexDefaultConfig() ParserConfig {
return &RegexParserConfig{}
}