Skip to content

Commit

Permalink
[receiver/prometheusreceiver] Fix retrieval of aggregated metrics wit…
Browse files Browse the repository at this point in the history
…h no job/instance label (open-telemetry#33565)

**Description:** This PR fixes the retrieval of metrics where either the
`job` or `instance` label is missing, and `honor_labels` is set to
`true`. This can be the case for aggregated metrics coming from a
federate endpoint. This PR introduces a fallback to using the
`job`/`instance` labels from the scrape config for such metrics.

**Link to tracking Issue:** Fixes open-telemetry#32555

**Testing:** 
- Added a Unit test
- Verified using the following config:

```
receivers:
  prometheus:
    config:
      scrape_configs:
        - job_name: 'federate'
          scrape_interval: 10s
          honor_labels: true
          params:
            'match[]':
              - '{__name__="cluster:node_cpu:sum_rate5m"}'
          metrics_path: '/federate'
          static_configs:
            - targets:
                - "localhost:9090"
exporters:
  debug:
    verbosity: detailed
  otlphttp:
    endpoint: ${env:OTLP_ENDPOINT}

  pipelines:
    metrics:
      receivers: [otlp,prometheus]
      exporters: [otlphttp, debug]
```

This was tested on a `kind` K8s cluster running the prometheus operator,
with a port forward for the `prometheus-k8s` service created by the
prometheus operator (therefore the `localhost:9090` address in the
target).

---------

Signed-off-by: Florian Bacher <[email protected]>
Co-authored-by: David Ashpole <[email protected]>
  • Loading branch information
bacherfl and dashpole committed Jun 18, 2024
1 parent ef36268 commit 744141a
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 3 deletions.
27 changes: 27 additions & 0 deletions .chloggen/fix-prometheusreceiver-federate-endpoint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: prometheusreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fall back to scrape config job/instance labels for aggregated metrics without instance/job labels

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [32555]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
32 changes: 29 additions & 3 deletions receiver/prometheusreceiver/internal/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,15 +365,41 @@ func (t *transaction) initTransaction(labels labels.Labels) error {
return errors.New("unable to find MetricMetadataStore in context")
}

job, instance := labels.Get(model.JobLabel), labels.Get(model.InstanceLabel)
if job == "" || instance == "" {
return errNoJobInstance
job, instance, err := t.getJobAndInstance(labels)
if err != nil {
return err
}
t.nodeResource = CreateResource(job, instance, target.DiscoveredLabels())
t.isNew = false
return nil
}

func (t *transaction) getJobAndInstance(labels labels.Labels) (string, string, error) {
// first, try to get job and instance from the labels
job, instance := labels.Get(model.JobLabel), labels.Get(model.InstanceLabel)
if job != "" && instance != "" {
return job, instance, nil
}

// if not available in the labels, try to fall back to the scrape job associated
// with the transaction.
// this can be the case for, e.g., aggregated metrics coming from a federate endpoint
// that represent the whole cluster, rather than an individual workload.
// See https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/32555 for reference
if target, ok := scrape.TargetFromContext(t.ctx); ok {
if job == "" {
job = target.GetValue(model.JobLabel)
}
if instance == "" {
instance = target.GetValue(model.InstanceLabel)
}
if job != "" && instance != "" {
return job, instance, nil
}
}
return "", "", errNoJobInstance
}

func (t *transaction) Commit() error {
if t.isNew {
return nil
Expand Down
36 changes: 36 additions & 0 deletions receiver/prometheusreceiver/internal/transaction_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,42 @@ func testTransactionAppendValidAndInvalid(t *testing.T, enableNativeHistograms b
require.Equal(t, 1, mds[0].MetricCount())
}

func TestTransactionAppendWithEmptyLabelArrayFallbackToTargetLabels(t *testing.T) {
for _, enableNativeHistograms := range []bool{true, false} {
t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) {
testTransactionAppendWithEmptyLabelArrayFallbackToTargetLabels(t, enableNativeHistograms)
})
}
}

func testTransactionAppendWithEmptyLabelArrayFallbackToTargetLabels(t *testing.T, enableNativeHistograms bool) {
sink := new(consumertest.MetricsSink)

scrapeTarget := scrape.NewTarget(
// processedLabels contain label values after processing (e.g. relabeling)
labels.FromMap(map[string]string{
model.InstanceLabel: "localhost:8080",
model.JobLabel: "federate",
}),
// discoveredLabels contain labels prior to any processing
labels.FromMap(map[string]string{
model.AddressLabel: "address:8080",
model.SchemeLabel: "http",
}),
nil)

ctx := scrape.ContextWithMetricMetadataStore(
scrape.ContextWithTarget(context.Background(), scrapeTarget),
testMetadataStore(testMetadata))

tr := newTransaction(ctx, &startTimeAdjuster{startTime: startTimestamp}, sink, labels.EmptyLabels(), receivertest.NewNopSettings(), nopObsRecv(t), false, enableNativeHistograms)

_, err := tr.Append(0, labels.FromMap(map[string]string{
model.MetricNameLabel: "counter_test",
}), time.Now().Unix()*1000, 1.0)
assert.NoError(t, err)
}

func TestAppendExemplarWithNoMetricName(t *testing.T) {
for _, enableNativeHistograms := range []bool{true, false} {
t.Run(fmt.Sprintf("enableNativeHistograms=%v", enableNativeHistograms), func(t *testing.T) {
Expand Down

0 comments on commit 744141a

Please sign in to comment.