Skip to content

Commit

Permalink
[pkg/ottl] Add ParseKeyValue function (open-telemetry#31035)
Browse files Browse the repository at this point in the history
**Description:** <Describe what has changed.>
Adds a `ParseKeyValue` converter function that parses out key values
pairs into a `pcommon.Map`. It takes a `StringGetter` target argument
and 2 optional arguments for the pair delimiter and key value delimiter.
This is an adaptation of the Stanza Key Value Parser operator to provide
feature parity.

Given the following input string `"k1=v1 k2=v2 k3=v3"`, the function
would return the following map:
```
{ "k1": "v1", "k2": "v2", "k3": "v3" }
```

**Link to tracking Issue:** <Issue number if applicable>
Closes open-telemetry#30998 

**Testing:** <Describe what testing was performed and which tests were
added.>
Added unit tests and e2e test.

**Documentation:** <Describe the documentation added.>
Added function documentation.
  • Loading branch information
dpaasman00 committed Feb 15, 2024
1 parent 16db125 commit d0c0e97
Show file tree
Hide file tree
Showing 10 changed files with 897 additions and 0 deletions.
27 changes: 27 additions & 0 deletions .chloggen/pkg-ottl-add-parse-key-value-function.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/ottl

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `ParseKeyValue` function for parsing key value pairs from a target string

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [30998]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
4 changes: 4 additions & 0 deletions internal/coreinternal/parseutils/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package parseutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils"
14 changes: 14 additions & 0 deletions internal/coreinternal/parseutils/package_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package parseutils

import (
"testing"

"go.uber.org/goleak"
)

func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
73 changes: 73 additions & 0 deletions internal/coreinternal/parseutils/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package parseutils // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal/parseutils"

import (
"fmt"
"strings"

"go.uber.org/multierr"
)

// SplitString will split the input on the delimiter and return the resulting slice while respecting quotes. Outer quotes are stripped.
// Use in place of `strings.Split` when quotes need to be respected.
// Requires `delimiter` not be an empty string
func SplitString(input, delimiter string) ([]string, error) {
var result []string
current := ""
delimiterLength := len(delimiter)
quoteChar := "" // "" means we are not in quotes

for i := 0; i < len(input); i++ {
if quoteChar == "" && i+delimiterLength <= len(input) && input[i:i+delimiterLength] == delimiter { // delimiter
if current == "" { // leading || trailing delimiter; ignore
i += delimiterLength - 1
continue
}
result = append(result, current)
current = ""
i += delimiterLength - 1
continue
}

if quoteChar == "" && (input[i] == '"' || input[i] == '\'') { // start of quote
quoteChar = string(input[i])
continue
}
if string(input[i]) == quoteChar { // end of quote
quoteChar = ""
continue
}

current += string(input[i])
}

if quoteChar != "" { // check for closed quotes
return nil, fmt.Errorf("never reached the end of a quoted value")
}
if current != "" { // avoid adding empty value bc of a trailing delimiter
return append(result, current), nil
}

return result, nil
}

// ParseKeyValuePairs will split each string in `pairs` on the `delimiter` into a key and value string that get added to a map and returned.
func ParseKeyValuePairs(pairs []string, delimiter string) (map[string]any, error) {
parsed := make(map[string]any)
var err error
for _, p := range pairs {
pair := strings.SplitN(p, delimiter, 2)
if len(pair) != 2 {
err = multierr.Append(err, fmt.Errorf("cannot split %q into 2 items, got %d item(s)", p, len(pair)))
continue
}

key := strings.TrimSpace(pair[0])
value := strings.TrimSpace(pair[1])

parsed[key] = value
}
return parsed, err
}
276 changes: 276 additions & 0 deletions internal/coreinternal/parseutils/parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package parseutils

import (
"fmt"
"testing"

"github.com/stretchr/testify/assert"
)

func Test_SplitString(t *testing.T) {
testCases := []struct {
name string
input string
delimiter string
expected []string
expectedErr error
}{
{
name: "simple",
input: "a b c",
delimiter: " ",
expected: []string{
"a",
"b",
"c",
},
},
{
name: "single quotes",
input: "a 'b c d'",
delimiter: " ",
expected: []string{
"a",
"b c d",
},
},
{
name: "double quotes",
input: `a " b c " d`,
delimiter: " ",
expected: []string{
"a",
" b c ",
"d",
},
},
{
name: "multi-char delimiter",
input: "abc!@! def !@! g",
delimiter: "!@!",
expected: []string{
"abc",
" def ",
" g",
},
},
{
name: "leading and trailing delimiters",
input: " name=ottl func=key_value hello=world ",
delimiter: " ",
expected: []string{
"name=ottl",
"func=key_value",
"hello=world",
},
},
{
name: "embedded double quotes in single quoted value",
input: `ab c='this is a "co ol" value'`,
delimiter: " ",
expected: []string{
"ab",
`c=this is a "co ol" value`,
},
},
{
name: "embedded double quotes end single quoted value",
input: `ab c='this is a "co ol"'`,
delimiter: " ",
expected: []string{
"ab",
`c=this is a "co ol"`,
},
},
{
name: "quoted values include whitespace",
input: `name=" ottl " func=" key_ value"`,
delimiter: " ",
expected: []string{
"name= ottl ",
"func= key_ value",
},
},
{
name: "delimiter longer than input",
input: "abc",
delimiter: "aaaa",
expected: []string{
"abc",
},
},
{
name: "delimiter not found",
input: "a b c",
delimiter: "!",
expected: []string{
"a b c",
},
},
{
name: "newlines in input",
input: `a
b
c`,
delimiter: " ",
expected: []string{
"a\nb\nc",
},
},
{
name: "newline delimiter",
input: `a b c
d e f
g
h`,
delimiter: "\n",
expected: []string{
"a b c",
"d e f",
"g ",
"h",
},
},
{
name: "empty input",
input: "",
delimiter: " ",
expected: nil,
},
{
name: "equal input and delimiter",
input: "abc",
delimiter: "abc",
expected: nil,
},
{
name: "unclosed quotes",
input: "a 'b c",
delimiter: " ",
expectedErr: fmt.Errorf("never reached the end of a quoted value"),
},
{
name: "mismatched quotes",
input: `a 'b c' "d '`,
delimiter: " ",
expectedErr: fmt.Errorf("never reached the end of a quoted value"),
},
{
name: "tab delimiters",
input: "a b c",
delimiter: "\t",
expected: []string{
"a",
"b",
"c",
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
result, err := SplitString(tc.input, tc.delimiter)

if tc.expectedErr == nil {
assert.NoError(t, err)
assert.Equal(t, tc.expected, result)
} else {
assert.EqualError(t, err, tc.expectedErr.Error())
assert.Nil(t, result)
}
})
}
}

func Test_ParseKeyValuePairs(t *testing.T) {
testCases := []struct {
name string
pairs []string
delimiter string
expected map[string]any
expectedErr error
}{
{
name: "multiple delimiters",
pairs: []string{"a==b", "c=d=", "e=f"},
delimiter: "=",
expected: map[string]any{
"a": "=b",
"c": "d=",
"e": "f",
},
},
{
name: "no delimiter found",
pairs: []string{"ab"},
delimiter: "=",
expectedErr: fmt.Errorf("cannot split \"ab\" into 2 items, got 1 item(s)"),
},
{
name: "no delimiter found 2x",
pairs: []string{"ab", "cd"},
delimiter: "=",
expectedErr: fmt.Errorf("cannot split \"ab\" into 2 items, got 1 item(s); cannot split \"cd\" into 2 items, got 1 item(s)"),
},
{
name: "empty pairs",
pairs: []string{},
delimiter: "=",
expected: map[string]any{},
},
{
name: "empty pair string",
pairs: []string{""},
delimiter: "=",
expectedErr: fmt.Errorf("cannot split \"\" into 2 items, got 1 item(s)"),
},
{
name: "empty delimiter",
pairs: []string{"a=b", "c=d"},
delimiter: "",
expected: map[string]any{
"a": "=b",
"c": "=d",
},
},
{
name: "empty pairs & delimiter",
pairs: []string{},
delimiter: "",
expected: map[string]any{},
},
{
name: "early delimiter",
pairs: []string{"=a=b"},
delimiter: "=",
expected: map[string]any{
"": "a=b",
},
},
{
name: "weird spacing",
pairs: []string{" a= b ", " c = d "},
delimiter: "=",
expected: map[string]any{
"a": "b",
"c": "d",
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
result, err := ParseKeyValuePairs(tc.pairs, tc.delimiter)

if tc.expectedErr == nil {
assert.NoError(t, err)
assert.Equal(t, tc.expected, result)
} else {
assert.EqualError(t, err, tc.expectedErr.Error())
}
})
}
}
Loading

0 comments on commit d0c0e97

Please sign in to comment.