Skip to content

Commit

Permalink
Moved the groupbytrace processor to contrib (open-telemetry#1179)
Browse files Browse the repository at this point in the history
Signed-off-by: Juraci Paixão Kröhling <[email protected]>

Co-authored-by: Bogdan Drutu <[email protected]>
  • Loading branch information
jpkrohling and bogdandrutu committed Oct 20, 2020
1 parent 1f4a194 commit 244967e
Show file tree
Hide file tree
Showing 24 changed files with 4,224 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ Learn more about roles in the [community repository](https://github.com/open-tel

| Processor | Reviewer(s) |
| ------------------------------ | ------------------------- |
| groupbytrace | @jpkrohling |
| routing | @jpkrohling |

### Extensions
Expand Down
2 changes: 2 additions & 0 deletions cmd/otelcontribcol/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/httpforwarder"
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer/hostobserver"
"github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer/k8sobserver"
"github.com/open-telemetry/opentelemetry-collector-contrib/processor/groupbytraceprocessor"
"github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sprocessor"
"github.com/open-telemetry/opentelemetry-collector-contrib/processor/metricstransformprocessor"
"github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor"
Expand Down Expand Up @@ -133,6 +134,7 @@ func components() (component.Factories, error) {
}

processors := []component.ProcessorFactory{
groupbytraceprocessor.NewFactory(),
k8sprocessor.NewFactory(),
metricstransformprocessor.NewFactory(),
resourcedetectionprocessor.NewFactory(),
Expand Down
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ require (
github.com/open-telemetry/opentelemetry-collector-contrib/extension/httpforwarder v0.0.0-00010101000000-000000000000
github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer/hostobserver v0.0.0-00010101000000-000000000000
github.com/open-telemetry/opentelemetry-collector-contrib/extension/observer/k8sobserver v0.0.0-00010101000000-000000000000
github.com/open-telemetry/opentelemetry-collector-contrib/processor/groupbytraceprocessor v0.0.0-00010101000000-000000000000
github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sprocessor v0.0.0-00010101000000-000000000000
github.com/open-telemetry/opentelemetry-collector-contrib/processor/metricstransformprocessor v0.0.0-00010101000000-000000000000
github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor v0.0.0-00010101000000-000000000000
Expand Down Expand Up @@ -134,6 +135,8 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/docke

replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/windowsperfcountersreceiver => ./receiver/windowsperfcountersreceiver

replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/groupbytraceprocessor => ./processor/groupbytraceprocessor

replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/k8sprocessor => ./processor/k8sprocessor/

replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourcedetectionprocessor => ./processor/resourcedetectionprocessor/
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ github.com/go-openapi/validate v0.19.8/go.mod h1:8DJv2CVJQ6kGNpFW6eV9N3JviE1C85n
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8=
github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
github.com/go-playground/validator/v10 v10.3.0/go.mod h1:uOYAAleCW8F/7oMFd6aG0GOhaH6EGOAJShg8Id5JGkI=
github.com/go-playground/validator/v10 v10.4.0/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4=
github.com/go-redis/redis/v7 v7.4.0 h1:7obg6wUoj05T0EpY0o8B59S9w5yeMWql7sw2kwNW1x4=
github.com/go-redis/redis/v7 v7.4.0/go.mod h1:JDNMw23GTyLNC4GZu9njt15ctBQVn7xjRfnwdHj/Dcg=
Expand Down Expand Up @@ -662,6 +663,7 @@ github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t
github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
github.com/grpc-ecosystem/grpc-gateway v1.14.5/go.mod h1:UJ0EZAp832vCd54Wev9N1BMKEyvcZ5+IM0AwDrnlkEc=
github.com/grpc-ecosystem/grpc-gateway v1.14.6/go.mod h1:zdiPV4Yse/1gnckTHtghG4GkDEdKCRJduHpTxT3/jcw=
github.com/grpc-ecosystem/grpc-gateway v1.15.0/go.mod h1:vO11I9oWA+KsxmfFQPhLnnIb1VDE24M+pdxZFiuZcA8=
github.com/grpc-ecosystem/grpc-gateway v1.15.2 h1:HC+hWRWf+v5zTMPyoaYTKIJih+4sd4XRWmj0qlG87Co=
github.com/grpc-ecosystem/grpc-gateway v1.15.2/go.mod h1:vO11I9oWA+KsxmfFQPhLnnIb1VDE24M+pdxZFiuZcA8=
github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645/go.mod h1:6iZfnjpejD4L/4DwD7NryNaJyCQdzwWwH2MWhCA90Kw=
Expand Down Expand Up @@ -755,6 +757,7 @@ github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/
github.com/iris-contrib/i18n v0.0.0-20171121225848-987a633949d0/go.mod h1:pMCz62A0xJL6I+umB2YTlFRwWXaDFA0jy+5HzGiJjqI=
github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw=
github.com/jaegertracing/jaeger v1.15.1/go.mod h1:LUWPSnzNPGRubM8pk0inANGitpiMOOxihXx0+53llXI=
github.com/jaegertracing/jaeger v1.19.2/go.mod h1:2GVHuF9OIfRw2N6ZMoEgRGL+GJxvDLVtALDWxOINqDk=
github.com/jaegertracing/jaeger v1.20.0 h1:rnwhl7COrEj1/vYfumL84CoiwOEy2MLFJFcW1bqjxnA=
github.com/jaegertracing/jaeger v1.20.0/go.mod h1:EFO94eQMRMI5KM4RIWcnl3rocmGEVt232TIG4Ua/4T0=
github.com/jcmturner/gofork v0.0.0-20190328161633-dc7c13fece03/go.mod h1:MK8+TM0La+2rjBD4jE12Kj1pCCxK7d2LK/UM3ncEo0o=
Expand Down Expand Up @@ -1023,6 +1026,7 @@ github.com/openzipkin/zipkin-go v0.2.1/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnh
github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnhQw8ySjnjRyN4=
github.com/openzipkin/zipkin-go v0.2.4-0.20200818204336-dc18516bbb4c h1:r2TNN46RxW319R/erz4ZBF2R0U2hbGQejvSMo8vecAw=
github.com/openzipkin/zipkin-go v0.2.4-0.20200818204336-dc18516bbb4c/go.mod h1:KpXfKdgRDnnhsxw4pNIH9Md5lyFqKUa4YDFlwRYAMyE=
github.com/orijtech/prometheus-go-metrics-exporter v0.0.5/go.mod h1:BiTx/ugZex8LheBk3j53tktWaRdFjV5FCfT2o0P7msE=
github.com/orijtech/prometheus-go-metrics-exporter v0.0.6 h1:ExkpQsyDDcyp0U3zhoNUQaCQ/o0Ovq7e1jRCL9lQ/4o=
github.com/orijtech/prometheus-go-metrics-exporter v0.0.6/go.mod h1:BiTx/ugZex8LheBk3j53tktWaRdFjV5FCfT2o0P7msE=
github.com/ory/go-acc v0.2.6 h1:YfI+L9dxI7QCtWn2RbawqO0vXhiThdXu/RgizJBbaq0=
Expand Down Expand Up @@ -1805,6 +1809,7 @@ google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQ
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0-pre/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
Expand Down
1 change: 1 addition & 0 deletions processor/groupbytraceprocessor/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include ../../Makefile.Common
60 changes: 60 additions & 0 deletions processor/groupbytraceprocessor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Group by Trace processor

Supported pipeline types: traces
Status: in development

This processor collects all the spans from the same trace, waiting a
pre-determined amount of time before releasing the trace to the next processor.
The expectation is that, generally, traces will be complete after the given time.

This processor should be used whenever a processor requires grouped traces to make decisions,
such as a tail-based sampler or a per-trace metrics processor.

The batch processor shouldn't be used before this processor, as this one will
probably undo part (or much) of the work that the batch processor performs. It's
fine to have the batch processor to run right after this one, and every entry in the
batch will be a complete trace.

Please refer to [config.go](./config.go) for the config spec.

Examples:

```yaml
processors:
groupbytrace:
groupbytrace/2:
wait_duration: 10s
num_traces: 1000
```

## Configuration

Refer to [config.yaml](./testdata/config.yaml) for detailed examples on using the processor.

The `num_traces` property tells the processor what's the maximum number of traces to keep in the internal storage. A higher `num_traces` might incur in a higher memory usage.

The `wait_duration` property tells the processor for how long it should keep traces in the internal storage. Once a trace is kept for this duration, it's then released to the next consumer and removed from the internal storage. Spans from a trace that has been released will be kept for the entire duration again.

## Metrics

The following metrics are recorded by this processor:

* `otelcol_processor_groupbytrace_conf_num_traces` represents the maximum number of traces that can be kept by the internal storage. This value comes from the processor's configuration and will never change over the lifecycle of the processor.
* `otelcol_processor_groupbytrace_event_latency_bucket`, with the following `event` tag values:
* `onBatchReceived` represents the number of batches the processor has received from the previous components
* `onTraceExpired` represents the number of traces that finished waiting in memory for spans to arrive
* `onTraceReleased` represents the number of traces that have been marked as released to the next component
* `onTraceRemoved` represents the number of traces that have been marked for removal from the internal storage
* `otelcol_processor_groupbytrace_num_events_in_queue` representing the state of the internal queue. Ideally, this number would be close to zero, but might have temporary spikes if the storage is slow.
* `otelcol_processor_groupbytrace_num_traces_in_memory` representing the state of the internal trace storage, waiting for spans to arrive. It's common to have items in memory all the time if the processor has a continuous flow of data. The longer the `wait_duration`, the higher the amount of traces in memory should be, given enough traffic.
* `otelcol_processor_groupbytrace_spans_released` and `otelcol_processor_groupbytrace_traces_released` represent the number of spans and traces effectively released to the next component.
* `otelcol_processor_groupbytrace_traces_evicted` represents the number of traces that have been evicted from the internal storage due to capacity problems. Ideally, this should be zero, or very close to zero at all times. If you keep getting items evicted, increase the `num_traces`.
* `otelcol_processor_groupbytrace_incomplete_releases` represents the traces that have been marked as expired, but had been previously been removed. This might be the case when a span from a trace has been received in a batch while the trace existed in the in-memory storage, but has since been released/removed before the span could be added to the trace. This should always be very close to 0, and a high value might indicate a software bug.

A healthy system would have the same value for the metric `otelcol_processor_groupbytrace_spans_released` and for three events under `otelcol_processor_groupbytrace_event_latency_bucket`: `onTraceExpired`, `onTraceRemoved` and `onTraceReleased`.

The metric `otelcol_processor_groupbytrace_event_latency_bucket` is a bucket and shows how long each event took to be processed in miliseconds. In most cases, it should take less than 5ms for an event to be processed, but it might be the case where an event could take 10ms. Higher latencies are possible, but it should never really reach the last item, representing 1s. Events taking more than 1s are killed automatically, and if you have multiple items in this bucket, it might indicate a bug in the software.

Most metrics are updated when the events occur, except for the following ones, which are updated periodically:
* `otelcol_processor_groupbytrace_num_events_in_queue`
* `otelcol_processor_groupbytrace_num_traces_in_memory`
46 changes: 46 additions & 0 deletions processor/groupbytraceprocessor/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http:https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package groupbytraceprocessor

import (
"time"

"go.opentelemetry.io/collector/config/configmodels"
)

// Config is the configuration for the processor.
type Config struct {
configmodels.ProcessorSettings `mapstructure:",squash"`

// NumTraces is the max number of traces to keep in memory waiting for the duration.
// Default: 1_000_000.
NumTraces int `mapstructure:"num_traces"`

// WaitDuration tells the processor to wait for the specified duration for the trace to be complete.
// Default: 1s.
WaitDuration time.Duration `mapstructure:"wait_duration"`

// DiscardOrphans instructs the processor to discard traces without the root span.
// This typically indicates that the trace is incomplete.
// Default: false.
// Not yet implemented, and an error will be returned when this option is used.
DiscardOrphans bool `mapstructure:"discard_orphans"`

// StoreOnDisk tells the processor to keep only the trace ID in memory, serializing the trace spans to disk.
// Useful when the duration to wait for traces to complete is high.
// Default: false.
// Not yet implemented, and an error will be returned when this option is used.
StoreOnDisk bool `mapstructure:"store_on_disk"`
}
15 changes: 15 additions & 0 deletions processor/groupbytraceprocessor/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http:https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package groupbytraceprocessor
Loading

0 comments on commit 244967e

Please sign in to comment.