Skip to content

Commit

Permalink
[service] fix: use ipv6-aware host and port concatenation function (#…
Browse files Browse the repository at this point in the history
…10343)

#### Description
Fixing the bug: the latest version of otel-collector failed to start
with ipv6 metrics endpoint service telemetry.

This problem began to occur after
#9037 with
the feature gate flag enabled was merged. This problem is probably an
implementation omission because the enabled codepath, which was
originally added by
#7871, is
marked as WIP.

You can reproduce the issue with the config and the environment variable
(`MY_POD_IP=::1`).
```yaml
service:
  telemetry:
    logs:
      encoding: json
    metrics:
      address: '[${env:MY_POD_IP}]:8888'
```

#### Link to tracking issue
Fixes
#10011

---------

Co-authored-by: Tyler Helmuth <[email protected]>
  • Loading branch information
terakoya76 and TylerHelmuth committed Jun 28, 2024
1 parent 5309d60 commit ee4eb85
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 14 deletions.
20 changes: 20 additions & 0 deletions .chloggen/fix_ipv6_too_many_colon.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: service

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fixed a bug that caused otel-collector to fail to start with ipv6 metrics endpoint service telemetry.

# One or more tracking issues or pull requests related to the change
issues: [10011]

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
59 changes: 53 additions & 6 deletions internal/testutil/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package testutil // import "go.opentelemetry.io/collector/internal/testutil"

import (
"fmt"
"net"
"os/exec"
"runtime"
Expand Down Expand Up @@ -31,15 +32,44 @@ func GetAvailableLocalAddress(t testing.TB) string {
// which do not show up under the "netstat -ano" but can only be found by
// "netsh interface ipv4 show excludedportrange protocol=tcp". We'll use []exclusions to hold those ranges and
// retry if the port returned by GetAvailableLocalAddress falls in one of those them.
network := "tcp4"
var exclusions []portpair
portFound := false
if runtime.GOOS == "windows" {
exclusions = getExclusionsList(t)
exclusions = getExclusionsList(network, t)
}

var endpoint string
for !portFound {
endpoint = findAvailableAddress(t)
endpoint = findAvailableAddress(network, t)
_, port, err := net.SplitHostPort(endpoint)
require.NoError(t, err)
portFound = true
if runtime.GOOS == "windows" {
for _, pair := range exclusions {
if port >= pair.first && port <= pair.last {
portFound = false
break
}
}
}
}

return endpoint
}

// GetAvailableLocalIPv6Address is IPv6 version of GetAvailableLocalAddress.
func GetAvailableLocalIPv6Address(t testing.TB) string {
network := "tcp6"
var exclusions []portpair
portFound := false
if runtime.GOOS == "windows" {
exclusions = getExclusionsList(network, t)
}

var endpoint string
for !portFound {
endpoint = findAvailableAddress(network, t)
_, port, err := net.SplitHostPort(endpoint)
require.NoError(t, err)
portFound = true
Expand Down Expand Up @@ -72,8 +102,17 @@ func GetAvailableLocalAddressPrometheus(t testing.TB) *config.Prometheus {
}
}

func findAvailableAddress(t testing.TB) string {
ln, err := net.Listen("tcp", "localhost:0")
func findAvailableAddress(network string, t testing.TB) string {
var host string
switch network {
case "tcp", "tcp4":
host = "localhost"
case "tcp6":
host = "[::1]"
}
require.NotZero(t, host, "network must be either of tcp, tcp4 or tcp6")

ln, err := net.Listen("tcp", fmt.Sprintf("%s:0", host))
require.NoError(t, err, "Failed to get a free local port")
// There is a possible race if something else takes this same port before
// the test uses it, however, that is unlikely in practice.
Expand All @@ -84,8 +123,16 @@ func findAvailableAddress(t testing.TB) string {
}

// Get excluded ports on Windows from the command: netsh interface ipv4 show excludedportrange protocol=tcp
func getExclusionsList(t testing.TB) []portpair {
cmdTCP := exec.Command("netsh", "interface", "ipv4", "show", "excludedportrange", "protocol=tcp")
func getExclusionsList(network string, t testing.TB) []portpair {
var cmdTCP *exec.Cmd
switch network {
case "tcp", "tcp4":
cmdTCP = exec.Command("netsh", "interface", "ipv4", "show", "excludedportrange", "protocol=tcp")
case "tcp6":
cmdTCP = exec.Command("netsh", "interface", "ipv6", "show", "excludedportrange", "protocol=tcp")
}
require.NotZero(t, cmdTCP, "network must be either of tcp, tcp4 or tcp6")

outputTCP, errTCP := cmdTCP.CombinedOutput()
require.NoError(t, errTCP)
exclusions := createExclusionsList(string(outputTCP), t)
Expand Down
22 changes: 20 additions & 2 deletions internal/testutil/testutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,32 @@ func TestGetAvailableLocalAddress(t *testing.T) {
require.Nil(t, ln1)
}

func TestGetAvailableLocalIpv6Address(t *testing.T) {
endpoint := GetAvailableLocalIPv6Address(t)

// Endpoint should be free.
ln0, err := net.Listen("tcp", endpoint)
require.NoError(t, err)
require.NotNil(t, ln0)
t.Cleanup(func() {
assert.NoError(t, ln0.Close())
})

// Ensure that the endpoint wasn't something like ":0" by checking that a
// second listener will fail.
ln1, err := net.Listen("tcp", endpoint)
require.Error(t, err)
require.Nil(t, ln1)
}

func TestCreateExclusionsList(t *testing.T) {
// Test two examples of typical output from "netsh interface ipv4 show excludedportrange protocol=tcp"
emptyExclusionsText := `
Protocol tcp Port Exclusion Ranges
Start Port End Port
---------- --------
Start Port End Port
---------- --------
* - Administered port exclusions.`

Expand Down
3 changes: 2 additions & 1 deletion service/internal/proctelemetry/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"encoding/json"
"errors"
"fmt"
"net"
"net/http"
"net/url"
"os"
Expand Down Expand Up @@ -175,7 +176,7 @@ func initPrometheusExporter(prometheusConfig *config.Prometheus, asyncErrorChann
return nil, nil, fmt.Errorf("error creating otel prometheus exporter: %w", err)
}

return exporter, InitPrometheusServer(promRegistry, fmt.Sprintf("%s:%d", *prometheusConfig.Host, *prometheusConfig.Port), asyncErrorChannel), nil
return exporter, InitPrometheusServer(promRegistry, net.JoinHostPort(*prometheusConfig.Host, fmt.Sprintf("%d", *prometheusConfig.Port)), asyncErrorChannel), nil
}

func initPullExporter(exporter config.MetricExporter, asyncErrorChannel chan error) (sdkmetric.Reader, *http.Server, error) {
Expand Down
26 changes: 21 additions & 5 deletions service/service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"bufio"
"context"
"errors"
"fmt"
"net/http"
"strings"
"sync"
Expand Down Expand Up @@ -253,13 +254,16 @@ func TestServiceTelemetryCleanupOnError(t *testing.T) {

func TestServiceTelemetry(t *testing.T) {
for _, tc := range ownMetricsTestCases() {
t.Run(tc.name, func(t *testing.T) {
testCollectorStartHelper(t, tc)
t.Run(fmt.Sprintf("ipv4_%s", tc.name), func(t *testing.T) {
testCollectorStartHelper(t, tc, "tcp4")
})
t.Run(fmt.Sprintf("ipv6_%s", tc.name), func(t *testing.T) {
testCollectorStartHelper(t, tc, "tcp6")
})
}
}

func testCollectorStartHelper(t *testing.T, tc ownMetricsTestCase) {
func testCollectorStartHelper(t *testing.T, tc ownMetricsTestCase, network string) {
var once sync.Once
loggingHookCalled := false
hook := func(zapcore.Entry) error {
Expand All @@ -269,8 +273,20 @@ func testCollectorStartHelper(t *testing.T, tc ownMetricsTestCase) {
return nil
}

metricsAddr := testutil.GetAvailableLocalAddress(t)
zpagesAddr := testutil.GetAvailableLocalAddress(t)
var (
metricsAddr string
zpagesAddr string
)
switch network {
case "tcp", "tcp4":
metricsAddr = testutil.GetAvailableLocalAddress(t)
zpagesAddr = testutil.GetAvailableLocalAddress(t)
case "tcp6":
metricsAddr = testutil.GetAvailableLocalIPv6Address(t)
zpagesAddr = testutil.GetAvailableLocalIPv6Address(t)
}
require.NotZero(t, metricsAddr, "network must be either of tcp, tcp4 or tcp6")
require.NotZero(t, zpagesAddr, "network must be either of tcp, tcp4 or tcp6")

set := newNopSettings()
set.BuildInfo = component.BuildInfo{Version: "test version", Command: otelCommand}
Expand Down

0 comments on commit ee4eb85

Please sign in to comment.