Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pkg/stanza] Add header_delimiter option to the csv_parser #18929

Merged
merged 2 commits into from
Feb 28, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add header_delimiter option to the csv_parser.
  • Loading branch information
BinaryFissionGames committed Feb 27, 2023
commit 544e8aac0371153041fd0608a52f6e02179aeed5
11 changes: 11 additions & 0 deletions .chloggen/csv-header-delimiter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: pkg/stanza

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `header_delimiter` option to the `csv_parser`.

# One or more tracking issues related to the change
issues: [18198]
1 change: 1 addition & 0 deletions pkg/stanza/docs/operators/csv_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ The `csv_parser` operator parses the string-type field selected by `parse_from`
| `header` | required when `header_attribute` not set | A string of delimited field names |
| `header_attribute` | required when `header` not set | An attribute name to read the header field from, to support dynamic field names |
| `delimiter` | `,` | A character that will be used as a delimiter. Values `\r` and `\n` cannot be used as a delimiter. |
| `header_delimiter` | value of `delimiter` | A character that will be used as a delimiter for headers. Values `\r` and `\n` cannot be used as a delimiter. |
| `lazy_quotes` | `false` | If true, a quote may appear in an unquoted field and a non-doubled quote may appear in a quoted field. Cannot be true if `ignore_quotes` is true. |
| `ignore_quotes` | `false` | If true, all quotes are ignored, and fields are simply split on the delimiter. Cannot be true if `lazy_quotes` is true. |
| `parse_from` | `body` | The [field](../types/field.md) from which the value will be parsed. |
Expand Down
9 changes: 9 additions & 0 deletions pkg/stanza/operator/parser/csv/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ func TestConfig(t *testing.T) {
return p
}(),
},
{
Name: "header_delimiter",
Expect: func() *Config {
p := NewConfig()
p.Header = "id\tseverity\tmessage"
p.HeaderDelimiter = "\t"
return p
}(),
},
{
Name: "header_attribute",
Expect: func() *Config {
Expand Down
18 changes: 15 additions & 3 deletions pkg/stanza/operator/parser/csv/csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ type Config struct {
helper.ParserConfig `mapstructure:",squash"`

Header string `mapstructure:"header"`
HeaderDelimiter string `mapstructure:"header_delimiter"`
HeaderAttribute string `mapstructure:"header_attribute"`
FieldDelimiter string `mapstructure:"delimiter"`
LazyQuotes bool `mapstructure:"lazy_quotes"`
Expand All @@ -68,33 +69,43 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
c.FieldDelimiter = ","
}

if c.HeaderDelimiter == "" {
c.HeaderDelimiter = c.FieldDelimiter
}

if c.IgnoreQuotes && c.LazyQuotes {
return nil, errors.New("only one of 'ignore_quotes' or 'lazy_quotes' can be true")
}

fieldDelimiter := []rune(c.FieldDelimiter)[0]
headerDelimiter := []rune(c.HeaderDelimiter)[0]

if len([]rune(c.FieldDelimiter)) != 1 {
return nil, fmt.Errorf("invalid 'delimiter': '%s'", c.FieldDelimiter)
}

if len([]rune(c.HeaderDelimiter)) != 1 {
return nil, fmt.Errorf("invalid 'header_delimiter': '%s'", c.HeaderDelimiter)
}

var headers []string
switch {
case c.Header == "" && c.HeaderAttribute == "":
return nil, errors.New("missing required field 'header' or 'header_attribute'")
case c.Header != "" && c.HeaderAttribute != "":
return nil, errors.New("only one header parameter can be set: 'header' or 'header_attribute'")
case c.Header != "" && !strings.Contains(c.Header, c.FieldDelimiter):
case c.Header != "" && !strings.Contains(c.Header, c.HeaderDelimiter):
return nil, errors.New("missing field delimiter in header")
case c.Header != "":
headers = strings.Split(c.Header, c.FieldDelimiter)
headers = strings.Split(c.Header, c.HeaderDelimiter)
}

return &Parser{
ParserOperator: parserOperator,
header: headers,
headerAttribute: c.HeaderAttribute,
fieldDelimiter: fieldDelimiter,
headerDelimiter: headerDelimiter,
lazyQuotes: c.LazyQuotes,
ignoreQuotes: c.IgnoreQuotes,
parse: generateParseFunc(headers, fieldDelimiter, c.LazyQuotes, c.IgnoreQuotes),
Expand All @@ -105,6 +116,7 @@ func (c Config) Build(logger *zap.SugaredLogger) (operator.Operator, error) {
type Parser struct {
helper.ParserOperator
fieldDelimiter rune
headerDelimiter rune
header []string
headerAttribute string
lazyQuotes bool
Expand Down Expand Up @@ -132,7 +144,7 @@ func (r *Parser) Process(ctx context.Context, e *entry.Entry) error {
r.Error(err)
return err
}
headers := strings.Split(headerString, string([]rune{r.fieldDelimiter}))
headers := strings.Split(headerString, string([]rune{r.headerDelimiter}))
parse = generateParseFunc(headers, r.fieldDelimiter, r.lazyQuotes, r.ignoreQuotes)
}

Expand Down
51 changes: 51 additions & 0 deletions pkg/stanza/operator/parser/csv/csv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,31 @@ func TestParserCSV(t *testing.T) {
false,
false,
},
{
"basic-different-delimiters",
func(p *Config) {
p.Header = testHeader
p.HeaderDelimiter = ","
p.FieldDelimiter = "|"
},
[]entry.Entry{
{
Body: "stanza|INFO|started agent",
},
},
[]entry.Entry{
{
Body: "stanza|INFO|started agent",
Attributes: map[string]interface{}{
"name": "stanza",
"sev": "INFO",
"msg": "started agent",
},
},
},
false,
false,
},
{
"basic-multiple-static-bodies",
func(p *Config) {
Expand Down Expand Up @@ -562,6 +587,32 @@ func TestParserCSV(t *testing.T) {
true,
false,
},
{
"invalid-header-delimiter",
func(p *Config) {
// expect []rune of length 1
p.Header = "name,,age,,height,,number"
p.HeaderDelimiter = ",,"
},
[]entry.Entry{
{
Body: "stanza,1,400,555-555-5555",
},
},
[]entry.Entry{
{
Attributes: map[string]interface{}{
"name": "stanza",
"age": "1",
"height": "400",
"number": "555-555-5555",
},
Body: "stanza,1,400,555-555-5555",
},
},
true,
false,
},
{
"parse-failure-num-fields-mismatch",
func(p *Config) {
Expand Down
4 changes: 4 additions & 0 deletions pkg/stanza/operator/parser/csv/testdata/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ delimiter:
parse_from: body.message
header: id,severity,message
delimiter: "\t"
header_delimiter:
type: csv_parser
header: "id\tseverity\tmessage"
header_delimiter: "\t"
header_attribute:
type: csv_parser
parse_from: body.message
Expand Down