From 27cdca2e43b61c4be329dbecf5a63fa04f245b60 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Thu, 18 Apr 2024 13:39:00 -0400 Subject: [PATCH 01/18] WIP monitoring parent process for orphan detection --- extension/opampextension/config.go | 5 ++ extension/opampextension/monitor_ppid.go | 5 ++ .../opampextension/monitor_ppid_others.go | 34 +++++++++++ .../monitor_ppid_others_test.go | 60 +++++++++++++++++++ extension/opampextension/monitor_ppid_test.go | 14 +++++ .../opampextension/monitor_ppid_windows.go | 55 +++++++++++++++++ .../monitor_ppid_windows_test.go | 35 +++++++++++ 7 files changed, 208 insertions(+) create mode 100644 extension/opampextension/monitor_ppid.go create mode 100644 extension/opampextension/monitor_ppid_others.go create mode 100644 extension/opampextension/monitor_ppid_others_test.go create mode 100644 extension/opampextension/monitor_ppid_test.go create mode 100644 extension/opampextension/monitor_ppid_windows.go create mode 100644 extension/opampextension/monitor_ppid_windows_test.go diff --git a/extension/opampextension/config.go b/extension/opampextension/config.go index 877b5f62c874e..cdef794d2978e 100644 --- a/extension/opampextension/config.go +++ b/extension/opampextension/config.go @@ -29,6 +29,11 @@ type Config struct { // Agent descriptions contains options to modify the AgentDescription message AgentDescription AgentDescription `mapstructure:"agent_description"` + + // PPID is the process ID of the parent for the collector. If the PPID is specified, + // the extension will continuously poll for the status of the parent process, and emit a fatal error + // when the parent process is closed. + PPID int `mapstructure:"ppid"` } type AgentDescription struct { diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go new file mode 100644 index 0000000000000..95ed125b90505 --- /dev/null +++ b/extension/opampextension/monitor_ppid.go @@ -0,0 +1,5 @@ +package opampextension + +import "time" + +var orphanPollInterval = 5 * time.Second diff --git a/extension/opampextension/monitor_ppid_others.go b/extension/opampextension/monitor_ppid_others.go new file mode 100644 index 0000000000000..27f71b90053f3 --- /dev/null +++ b/extension/opampextension/monitor_ppid_others.go @@ -0,0 +1,34 @@ +//go:build !windows + +package opampextension + +import ( + "context" + "fmt" + "os" + "time" + + "go.opentelemetry.io/collector/component" +) + +// getppid is a function to get ppid of the process. It is mocked in testing. +var getppid = os.Getppid + +func monitorPPID(ctx context.Context, ppid int, reportStatus func(*component.StatusEvent)) { + // On unix-based systems, when the parent process dies orphaned processes + // are re-parented to be under the init system process (ppid becomes 1). + for { + if getppid() != ppid { + err := fmt.Errorf("collector was orphaned, parent pid is no longer %d", ppid) + status := component.NewFatalErrorEvent(err) + reportStatus(status) + return + } + + select { + case <-time.After(orphanPollInterval): + case <-ctx.Done(): + return + } + } +} diff --git a/extension/opampextension/monitor_ppid_others_test.go b/extension/opampextension/monitor_ppid_others_test.go new file mode 100644 index 0000000000000..f66e7cc0e6187 --- /dev/null +++ b/extension/opampextension/monitor_ppid_others_test.go @@ -0,0 +1,60 @@ +//go:build !windows + +package opampextension + +import ( + "context" + "os" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" +) + +func TestMonitorPPIDOthers(t *testing.T) { + t.Run("Does not trigger if ppid stays as specified", func(t *testing.T) { + statusReportFunc := func(*component.StatusEvent) { + require.FailNow(t, "status report function should not be called") + } + + monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) + monitorCtxCancel() + + monitorPPID(monitorCtx, os.Getppid(), statusReportFunc) + }) + + t.Run("Emits fatal status if ppid changes", func(t *testing.T) { + numPolls := 0 + setGetPPID(t, func() int { + numPolls++ + if numPolls > 1 { + return 1 + } + return os.Getppid() + }) + + setOrphanPollInterval(t, 10*time.Millisecond) + + var statusEvent *component.StatusEvent + statusReportFunc := func(evt *component.StatusEvent) { + if statusEvent != nil { + require.FailNow(t, "status report function should not be called twice") + } + statusEvent = evt + } + + monitorPPID(context.Background(), os.Getppid(), statusReportFunc) + require.NotNil(t, statusEvent) + require.Equal(t, component.StatusFatalError, statusEvent.Status()) + }) + +} + +func setGetPPID(t *testing.T, newFunc func() int) { + old := getppid + getppid = newFunc + t.Cleanup(func() { + getppid = old + }) +} diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go new file mode 100644 index 0000000000000..f71da377d292c --- /dev/null +++ b/extension/opampextension/monitor_ppid_test.go @@ -0,0 +1,14 @@ +package opampextension + +import ( + "testing" + "time" +) + +func setOrphanPollInterval(t *testing.T, newInterval time.Duration) { + old := orphanPollInterval + orphanPollInterval = newInterval + t.Cleanup(func() { + orphanPollInterval = old + }) +} diff --git a/extension/opampextension/monitor_ppid_windows.go b/extension/opampextension/monitor_ppid_windows.go new file mode 100644 index 0000000000000..6ca4a3d6384e9 --- /dev/null +++ b/extension/opampextension/monitor_ppid_windows.go @@ -0,0 +1,55 @@ +//go:build windows + +package opampextension + +import ( + "context" + "fmt" + "os" + "time" + + "go.opentelemetry.io/collector/component" +) + +func monitorPPID(ctx context.Context, ppid int, reportStatus func(*component.StatusEvent)) { + // On Windows systems, we can look up and synchronously wait for the process to exit. + // This is not possible on other systems, since Wait doesn't work on most systems unless the + // process is a child of the current one (see doc on process.Wait). + for { + process, err := os.FindProcess(ppid) + if err != nil { + err := fmt.Errorf("collector was orphaned, error finding process %d: %w", ppid, err) + status := component.NewFatalErrorEvent(err) + reportStatus(status) + return + } + + if process == nil { + err := fmt.Errorf("collector was orphaned, process %d does not exist", ppid) + status := component.NewFatalErrorEvent(err) + reportStatus(status) + return + } + + processState, err := process.Wait() + if err != nil { + err := fmt.Errorf("collector was orphaned, error while waiting on process %d to exit: %w", ppid, err) + status := component.NewFatalErrorEvent(err) + reportStatus(status) + return + } + + if processState.Exited() { + err := fmt.Errorf("collector was orphaned, process %d exited: %w", ppid, err) + status := component.NewFatalErrorEvent(err) + reportStatus(status) + return + } + + select { + case <-time.After(orphanPollInterval): + case <-ctx.Done(): + return + } + } +} diff --git a/extension/opampextension/monitor_ppid_windows_test.go b/extension/opampextension/monitor_ppid_windows_test.go new file mode 100644 index 0000000000000..398b819cb59ea --- /dev/null +++ b/extension/opampextension/monitor_ppid_windows_test.go @@ -0,0 +1,35 @@ +//go:build windows + +package opampextension + +import ( + "context" + "os/exec" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" +) + +func TestMonitorPPIDWindows(t *testing.T) { + cmdContext, cmdContextCancel := context.WithCancel(context.Background()) + t.Cleanup(cmdContextCancel) + + cmd := exec.CommandContext(cmdContext, "/bin/sh", "-c", "sleep 1000") + err := cmd.Start() + require.NoError(t, err) + + statusReportFunc := func(*component.StatusEvent) { + require.FailNow(t, "status report function should not be called") + } + + monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) + + go func() { + time.Sleep(1 * time.Second) + monitorCtxCancel() + }() + + monitorPPID(monitorCtx, cmd.Process.Pid, statusReportFunc) +} From ec199719b39f5fc0bec227b320b20b149374b04e Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Thu, 18 Apr 2024 13:40:07 -0400 Subject: [PATCH 02/18] cut out extraneous logic for windows --- extension/opampextension/monitor_ppid_windows.go | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/extension/opampextension/monitor_ppid_windows.go b/extension/opampextension/monitor_ppid_windows.go index 6ca4a3d6384e9..e534296ec1c75 100644 --- a/extension/opampextension/monitor_ppid_windows.go +++ b/extension/opampextension/monitor_ppid_windows.go @@ -31,21 +31,6 @@ func monitorPPID(ctx context.Context, ppid int, reportStatus func(*component.Sta return } - processState, err := process.Wait() - if err != nil { - err := fmt.Errorf("collector was orphaned, error while waiting on process %d to exit: %w", ppid, err) - status := component.NewFatalErrorEvent(err) - reportStatus(status) - return - } - - if processState.Exited() { - err := fmt.Errorf("collector was orphaned, process %d exited: %w", ppid, err) - status := component.NewFatalErrorEvent(err) - reportStatus(status) - return - } - select { case <-time.After(orphanPollInterval): case <-ctx.Done(): From ed34a0e0c9efba87782795abe1403c0e4da38b7b Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Thu, 18 Apr 2024 13:52:01 -0400 Subject: [PATCH 03/18] start monitoring on extension start --- extension/opampextension/factory.go | 2 +- extension/opampextension/opamp_agent.go | 36 ++++++++++++++------ extension/opampextension/opamp_agent_test.go | 14 ++++---- 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/extension/opampextension/factory.go b/extension/opampextension/factory.go index 0974399752c49..5cc9eabd692ef 100644 --- a/extension/opampextension/factory.go +++ b/extension/opampextension/factory.go @@ -31,5 +31,5 @@ func createDefaultConfig() component.Config { } func createExtension(_ context.Context, set extension.CreateSettings, cfg component.Config) (extension.Extension, error) { - return newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + return newOpampAgent(cfg.(*Config), set) } diff --git a/extension/opampextension/opamp_agent.go b/extension/opampextension/opamp_agent.go index 4f96d8247767b..92963ac19f13b 100644 --- a/extension/opampextension/opamp_agent.go +++ b/extension/opampextension/opamp_agent.go @@ -19,7 +19,7 @@ import ( "github.com/open-telemetry/opamp-go/protobufs" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" - "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/extension" semconv "go.opentelemetry.io/collector/semconv/v1.18.0" "go.uber.org/zap" "golang.org/x/exp/maps" @@ -38,6 +38,12 @@ type opampAgent struct { eclk sync.RWMutex effectiveConfig *confmap.Conf + // lifetimeCtx is canceled on Stop of the component + lifetimeCtx context.Context + lifetimeCtxCancel context.CancelFunc + + reportFunc func(*component.StatusEvent) + capabilities Capabilities agentDescription *protobufs.AgentDescription @@ -60,6 +66,12 @@ func (o *opampAgent) Start(ctx context.Context, _ component.Host) error { return err } + o.lifetimeCtx, o.lifetimeCtxCancel = context.WithCancel(context.Background()) + + if o.cfg.PPID != 0 { + go monitorPPID(o.lifetimeCtx, o.cfg.PPID, o.reportFunc) + } + settings := types.StartSettings{ Header: header, TLSConfig: tls, @@ -103,6 +115,10 @@ func (o *opampAgent) Start(ctx context.Context, _ component.Host) error { } func (o *opampAgent) Shutdown(ctx context.Context) error { + if o.lifetimeCtxCancel != nil { + o.lifetimeCtxCancel() + } + o.logger.Debug("OpAMP agent shutting down...") if o.opampClient == nil { return nil @@ -136,17 +152,17 @@ func (o *opampAgent) updateEffectiveConfig(conf *confmap.Conf) { o.effectiveConfig = conf } -func newOpampAgent(cfg *Config, logger *zap.Logger, build component.BuildInfo, res pcommon.Resource) (*opampAgent, error) { - agentType := build.Command +func newOpampAgent(cfg *Config, set extension.CreateSettings) (*opampAgent, error) { + agentType := set.BuildInfo.Command - sn, ok := res.Attributes().Get(semconv.AttributeServiceName) + sn, ok := set.Resource.Attributes().Get(semconv.AttributeServiceName) if ok { agentType = sn.AsString() } - agentVersion := build.Version + agentVersion := set.BuildInfo.Version - sv, ok := res.Attributes().Get(semconv.AttributeServiceVersion) + sv, ok := set.Resource.Attributes().Get(semconv.AttributeServiceVersion) if ok { agentVersion = sv.AsString() } @@ -160,7 +176,7 @@ func newOpampAgent(cfg *Config, logger *zap.Logger, build component.BuildInfo, r } uid = puid } else { - sid, ok := res.Attributes().Get(semconv.AttributeServiceInstanceID) + sid, ok := set.Resource.Attributes().Get(semconv.AttributeServiceInstanceID) if ok { parsedUUID, err := uuid.Parse(sid.AsString()) if err != nil { @@ -170,16 +186,16 @@ func newOpampAgent(cfg *Config, logger *zap.Logger, build component.BuildInfo, r } } - opampClient := cfg.Server.GetClient(logger) + opampClient := cfg.Server.GetClient(set.Logger) agent := &opampAgent{ cfg: cfg, - logger: logger, + logger: set.Logger, agentType: agentType, agentVersion: agentVersion, instanceID: uid, capabilities: cfg.Capabilities, opampClient: opampClient, - customCapabilityRegistry: newCustomCapabilityRegistry(logger, opampClient), + customCapabilityRegistry: newCustomCapabilityRegistry(set.Logger, opampClient), } return agent, nil diff --git a/extension/opampextension/opamp_agent_test.go b/extension/opampextension/opamp_agent_test.go index 652826639a983..ceb9f912f7664 100644 --- a/extension/opampextension/opamp_agent_test.go +++ b/extension/opampextension/opamp_agent_test.go @@ -25,7 +25,7 @@ func TestNewOpampAgent(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() set.BuildInfo = component.BuildInfo{Version: "test version", Command: "otelcoltest"} - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.Equal(t, "otelcoltest", o.agentType) assert.Equal(t, "test version", o.agentVersion) @@ -42,7 +42,7 @@ func TestNewOpampAgentAttributes(t *testing.T) { set.Resource.Attributes().PutStr(semconv.AttributeServiceName, "otelcol-distro") set.Resource.Attributes().PutStr(semconv.AttributeServiceVersion, "distro.0") set.Resource.Attributes().PutStr(semconv.AttributeServiceInstanceID, "f8999bc1-4c9b-4619-9bae-7f009d2411ec") - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.Equal(t, "otelcol-distro", o.agentType) assert.Equal(t, "distro.0", o.agentVersion) @@ -136,7 +136,7 @@ func TestCreateAgentDescription(t *testing.T) { set.Resource.Attributes().PutStr(semconv.AttributeServiceVersion, serviceVersion) set.Resource.Attributes().PutStr(semconv.AttributeServiceInstanceID, serviceInstanceUUID) - o, err := newOpampAgent(cfg, set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg, set) require.NoError(t, err) assert.Nil(t, o.agentDescription) @@ -150,7 +150,7 @@ func TestCreateAgentDescription(t *testing.T) { func TestUpdateAgentIdentity(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) olduid := o.instanceID @@ -166,7 +166,7 @@ func TestUpdateAgentIdentity(t *testing.T) { func TestComposeEffectiveConfig(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.Empty(t, o.effectiveConfig) @@ -188,7 +188,7 @@ func TestComposeEffectiveConfig(t *testing.T) { func TestShutdown(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) // Shutdown with no OpAMP client @@ -198,7 +198,7 @@ func TestShutdown(t *testing.T) { func TestStart(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.NoError(t, o.Start(context.TODO(), componenttest.NewNopHost())) From 7c5de68f34dd37733411bbc226c1d24a55b0dc9d Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Thu, 18 Apr 2024 16:02:08 -0400 Subject: [PATCH 04/18] simplify and use gopsutil for checking if process exists --- extension/opampextension/config.go | 2 +- extension/opampextension/go.mod | 7 +- extension/opampextension/go.sum | 11 +++ extension/opampextension/monitor_ppid.go | 36 ++++++++- .../opampextension/monitor_ppid_others.go | 34 --------- .../monitor_ppid_others_test.go | 60 --------------- extension/opampextension/monitor_ppid_test.go | 75 +++++++++++++++++++ .../opampextension/monitor_ppid_windows.go | 40 ---------- .../monitor_ppid_windows_test.go | 35 --------- 9 files changed, 128 insertions(+), 172 deletions(-) delete mode 100644 extension/opampextension/monitor_ppid_others.go delete mode 100644 extension/opampextension/monitor_ppid_others_test.go delete mode 100644 extension/opampextension/monitor_ppid_windows.go delete mode 100644 extension/opampextension/monitor_ppid_windows_test.go diff --git a/extension/opampextension/config.go b/extension/opampextension/config.go index cdef794d2978e..106f1cbe417a7 100644 --- a/extension/opampextension/config.go +++ b/extension/opampextension/config.go @@ -33,7 +33,7 @@ type Config struct { // PPID is the process ID of the parent for the collector. If the PPID is specified, // the extension will continuously poll for the status of the parent process, and emit a fatal error // when the parent process is closed. - PPID int `mapstructure:"ppid"` + PPID int32 `mapstructure:"ppid"` } type AgentDescription struct { diff --git a/extension/opampextension/go.mod b/extension/opampextension/go.mod index 757cccd3b5d31..2ce305aab6d24 100644 --- a/extension/opampextension/go.mod +++ b/extension/opampextension/go.mod @@ -6,13 +6,13 @@ require ( github.com/google/uuid v1.6.0 github.com/oklog/ulid/v2 v2.1.0 github.com/open-telemetry/opamp-go v0.14.0 + github.com/shirou/gopsutil v3.21.11+incompatible github.com/stretchr/testify v1.9.0 go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/config/configopaque v1.6.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/config/configtls v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/confmap v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/extension v0.99.1-0.20240503164040-109173d9cf84 - go.opentelemetry.io/collector/pdata v1.6.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/semconv v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/otel/metric v1.26.0 go.opentelemetry.io/otel/trace v1.26.0 @@ -30,6 +30,7 @@ require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/gorilla/websocket v1.5.1 // indirect @@ -44,7 +45,11 @@ require ( github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/rogpeppe/go-internal v1.11.0 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/collector/config/configtelemetry v0.99.1-0.20240503164040-109173d9cf84 // indirect + go.opentelemetry.io/collector/pdata v1.6.1-0.20240503164040-109173d9cf84 // indirect go.opentelemetry.io/otel v1.26.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.48.0 // indirect go.opentelemetry.io/otel/sdk v1.26.0 // indirect diff --git a/extension/opampextension/go.sum b/extension/opampextension/go.sum index b9ebf206e8768..b8d51b02696b0 100644 --- a/extension/opampextension/go.sum +++ b/extension/opampextension/go.sum @@ -13,6 +13,8 @@ github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 h1:TQcrn6Wq+sKGkpyPvppOz99zsMBaUOKXq6HSv655U1c= github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -56,10 +58,18 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= +github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84 h1:rnux3gC9x2XxyG3tdRwkae2exirkImgC/K0kkWHrLtk= go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84/go.mod h1:+b56nMIvo3CO5TShFn38RwX4FsXv0lVt2HoGmsaXObo= go.opentelemetry.io/collector/config/configopaque v1.6.1-0.20240503164040-109173d9cf84 h1:pKzJfh+vKDc4jLbwRqxNvYc/8kZNTO4omxaYTy5vHRc= @@ -112,6 +122,7 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go index 95ed125b90505..29a1529638b4c 100644 --- a/extension/opampextension/monitor_ppid.go +++ b/extension/opampextension/monitor_ppid.go @@ -1,5 +1,39 @@ package opampextension -import "time" +import ( + "context" + "fmt" + "time" + + "github.com/shirou/gopsutil/process" + "go.opentelemetry.io/collector/component" +) var orphanPollInterval = 5 * time.Second + +func monitorPPID(ctx context.Context, ppid int32, reportStatus func(*component.StatusEvent)) { + // On unix-based systems, when the parent process dies orphaned processes + // are re-parented to be under the init system process (ppid becomes 1). + for { + exists, err := process.PidExistsWithContext(ctx, ppid) + if err != nil { + statusErr := fmt.Errorf("collector was orphaned, failed to find process with pid %d: %w", ppid, err) + status := component.NewFatalErrorEvent(statusErr) + reportStatus(status) + return + } + + if !exists { + statusErr := fmt.Errorf("collector was orphaned, process with pid %d does not exist", ppid) + status := component.NewFatalErrorEvent(statusErr) + reportStatus(status) + return + } + + select { + case <-time.After(orphanPollInterval): // OK; Poll again to make sure PID exists + case <-ctx.Done(): + return + } + } +} diff --git a/extension/opampextension/monitor_ppid_others.go b/extension/opampextension/monitor_ppid_others.go deleted file mode 100644 index 27f71b90053f3..0000000000000 --- a/extension/opampextension/monitor_ppid_others.go +++ /dev/null @@ -1,34 +0,0 @@ -//go:build !windows - -package opampextension - -import ( - "context" - "fmt" - "os" - "time" - - "go.opentelemetry.io/collector/component" -) - -// getppid is a function to get ppid of the process. It is mocked in testing. -var getppid = os.Getppid - -func monitorPPID(ctx context.Context, ppid int, reportStatus func(*component.StatusEvent)) { - // On unix-based systems, when the parent process dies orphaned processes - // are re-parented to be under the init system process (ppid becomes 1). - for { - if getppid() != ppid { - err := fmt.Errorf("collector was orphaned, parent pid is no longer %d", ppid) - status := component.NewFatalErrorEvent(err) - reportStatus(status) - return - } - - select { - case <-time.After(orphanPollInterval): - case <-ctx.Done(): - return - } - } -} diff --git a/extension/opampextension/monitor_ppid_others_test.go b/extension/opampextension/monitor_ppid_others_test.go deleted file mode 100644 index f66e7cc0e6187..0000000000000 --- a/extension/opampextension/monitor_ppid_others_test.go +++ /dev/null @@ -1,60 +0,0 @@ -//go:build !windows - -package opampextension - -import ( - "context" - "os" - "testing" - "time" - - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/component" -) - -func TestMonitorPPIDOthers(t *testing.T) { - t.Run("Does not trigger if ppid stays as specified", func(t *testing.T) { - statusReportFunc := func(*component.StatusEvent) { - require.FailNow(t, "status report function should not be called") - } - - monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) - monitorCtxCancel() - - monitorPPID(monitorCtx, os.Getppid(), statusReportFunc) - }) - - t.Run("Emits fatal status if ppid changes", func(t *testing.T) { - numPolls := 0 - setGetPPID(t, func() int { - numPolls++ - if numPolls > 1 { - return 1 - } - return os.Getppid() - }) - - setOrphanPollInterval(t, 10*time.Millisecond) - - var statusEvent *component.StatusEvent - statusReportFunc := func(evt *component.StatusEvent) { - if statusEvent != nil { - require.FailNow(t, "status report function should not be called twice") - } - statusEvent = evt - } - - monitorPPID(context.Background(), os.Getppid(), statusReportFunc) - require.NotNil(t, statusEvent) - require.Equal(t, component.StatusFatalError, statusEvent.Status()) - }) - -} - -func setGetPPID(t *testing.T, newFunc func() int) { - old := getppid - getppid = newFunc - t.Cleanup(func() { - getppid = old - }) -} diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go index f71da377d292c..a42fe6b07231b 100644 --- a/extension/opampextension/monitor_ppid_test.go +++ b/extension/opampextension/monitor_ppid_test.go @@ -1,10 +1,85 @@ package opampextension import ( + "context" + "os/exec" + "runtime" "testing" "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" ) +func TestMonitorPPIDOthers(t *testing.T) { + t.Run("Does not trigger if process with ppid never stops", func(t *testing.T) { + cmdContext, cmdCancel := context.WithCancel(context.Background()) + cmd := longRunningComand(cmdContext) + require.NoError(t, cmd.Start()) + + t.Cleanup(func() { + cmdCancel() + _ = cmd.Wait() + }) + + statusReportFunc := func(*component.StatusEvent) { + require.FailNow(t, "status report function should not be called") + } + + monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) + + setOrphanPollInterval(t, 10*time.Millisecond) + + go func() { + time.Sleep(500 * time.Millisecond) + monitorCtxCancel() + }() + + monitorPPID(monitorCtx, int32(cmd.Process.Pid), statusReportFunc) + }) + + t.Run("Emits fatal status if ppid changes", func(t *testing.T) { + cmdContext, cmdCancel := context.WithCancel(context.Background()) + cmd := longRunningComand(cmdContext) + require.NoError(t, cmd.Start()) + + t.Cleanup(func() { + cmdCancel() + _ = cmd.Wait() + }) + + var status *component.StatusEvent + statusReportFunc := func(evt *component.StatusEvent) { + if status != nil { + require.FailNow(t, "status report function should not be called twice") + } + status = evt + } + + setOrphanPollInterval(t, 10*time.Millisecond) + + go func() { + time.Sleep(500 * time.Millisecond) + cmdCancel() + _ = cmd.Wait() + }() + + monitorPPID(context.Background(), int32(cmd.Process.Pid), statusReportFunc) + require.NotNil(t, status) + require.Equal(t, component.StatusFatalError, status.Status()) + }) + +} + +func longRunningComand(ctx context.Context) *exec.Cmd { + switch runtime.GOOS { + case "windows": + return exec.CommandContext(ctx, "timeout", "/t", "1000") + default: + return exec.CommandContext(ctx, "sleep", "1000") + } +} + func setOrphanPollInterval(t *testing.T, newInterval time.Duration) { old := orphanPollInterval orphanPollInterval = newInterval diff --git a/extension/opampextension/monitor_ppid_windows.go b/extension/opampextension/monitor_ppid_windows.go deleted file mode 100644 index e534296ec1c75..0000000000000 --- a/extension/opampextension/monitor_ppid_windows.go +++ /dev/null @@ -1,40 +0,0 @@ -//go:build windows - -package opampextension - -import ( - "context" - "fmt" - "os" - "time" - - "go.opentelemetry.io/collector/component" -) - -func monitorPPID(ctx context.Context, ppid int, reportStatus func(*component.StatusEvent)) { - // On Windows systems, we can look up and synchronously wait for the process to exit. - // This is not possible on other systems, since Wait doesn't work on most systems unless the - // process is a child of the current one (see doc on process.Wait). - for { - process, err := os.FindProcess(ppid) - if err != nil { - err := fmt.Errorf("collector was orphaned, error finding process %d: %w", ppid, err) - status := component.NewFatalErrorEvent(err) - reportStatus(status) - return - } - - if process == nil { - err := fmt.Errorf("collector was orphaned, process %d does not exist", ppid) - status := component.NewFatalErrorEvent(err) - reportStatus(status) - return - } - - select { - case <-time.After(orphanPollInterval): - case <-ctx.Done(): - return - } - } -} diff --git a/extension/opampextension/monitor_ppid_windows_test.go b/extension/opampextension/monitor_ppid_windows_test.go deleted file mode 100644 index 398b819cb59ea..0000000000000 --- a/extension/opampextension/monitor_ppid_windows_test.go +++ /dev/null @@ -1,35 +0,0 @@ -//go:build windows - -package opampextension - -import ( - "context" - "os/exec" - "testing" - "time" - - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/component" -) - -func TestMonitorPPIDWindows(t *testing.T) { - cmdContext, cmdContextCancel := context.WithCancel(context.Background()) - t.Cleanup(cmdContextCancel) - - cmd := exec.CommandContext(cmdContext, "/bin/sh", "-c", "sleep 1000") - err := cmd.Start() - require.NoError(t, err) - - statusReportFunc := func(*component.StatusEvent) { - require.FailNow(t, "status report function should not be called") - } - - monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) - - go func() { - time.Sleep(1 * time.Second) - monitorCtxCancel() - }() - - monitorPPID(monitorCtx, cmd.Process.Pid, statusReportFunc) -} From 3ed771d9843ec301026401bd5f27613b53d2fa64 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 16:00:56 +0000 Subject: [PATCH 05/18] fix test on windows --- extension/opampextension/monitor_ppid_test.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go index a42fe6b07231b..b7852de504144 100644 --- a/extension/opampextension/monitor_ppid_test.go +++ b/extension/opampextension/monitor_ppid_test.go @@ -2,6 +2,7 @@ package opampextension import ( "context" + "os" "os/exec" "runtime" "testing" @@ -15,6 +16,7 @@ func TestMonitorPPIDOthers(t *testing.T) { t.Run("Does not trigger if process with ppid never stops", func(t *testing.T) { cmdContext, cmdCancel := context.WithCancel(context.Background()) cmd := longRunningComand(cmdContext) + cmd.Stdout = os.Stdout require.NoError(t, cmd.Start()) t.Cleanup(func() { @@ -22,7 +24,8 @@ func TestMonitorPPIDOthers(t *testing.T) { _ = cmd.Wait() }) - statusReportFunc := func(*component.StatusEvent) { + statusReportFunc := func(se *component.StatusEvent) { + t.Logf("Status event error: %s", se.Err()) require.FailNow(t, "status report function should not be called") } @@ -74,7 +77,9 @@ func TestMonitorPPIDOthers(t *testing.T) { func longRunningComand(ctx context.Context) *exec.Cmd { switch runtime.GOOS { case "windows": - return exec.CommandContext(ctx, "timeout", "/t", "1000") + // Would prefer to use timeout.exe here, but it doesn't seem to work in + // a CMD-less context. + return exec.CommandContext(ctx, "ping", "-n", "1000", "localhost") default: return exec.CommandContext(ctx, "sleep", "1000") } From 66b1a9b5fbfb80509a8ce7c5ec05199814a87b1b Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 16:40:25 +0000 Subject: [PATCH 06/18] use v3 gopsutil like other components --- extension/opampextension/go.mod | 5 +++- extension/opampextension/go.sum | 31 ++++++++++++++++++++++-- extension/opampextension/monitor_ppid.go | 2 +- 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/extension/opampextension/go.mod b/extension/opampextension/go.mod index 2ce305aab6d24..09911b621feff 100644 --- a/extension/opampextension/go.mod +++ b/extension/opampextension/go.mod @@ -6,7 +6,7 @@ require ( github.com/google/uuid v1.6.0 github.com/oklog/ulid/v2 v2.1.0 github.com/open-telemetry/opamp-go v0.14.0 - github.com/shirou/gopsutil v3.21.11+incompatible + github.com/shirou/gopsutil/v3 v3.24.3 github.com/stretchr/testify v1.9.0 go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/config/configopaque v1.6.1-0.20240503164040-109173d9cf84 @@ -37,14 +37,17 @@ require ( github.com/knadh/koanf/maps v0.1.1 // indirect github.com/knadh/koanf/providers/confmap v0.1.0 // indirect github.com/knadh/koanf/v2 v2.1.1 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/client_golang v1.19.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/rogpeppe/go-internal v1.11.0 // indirect + github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/tklauser/go-sysconf v0.3.14 // indirect github.com/tklauser/numcpus v0.8.0 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect diff --git a/extension/opampextension/go.sum b/extension/opampextension/go.sum index b8d51b02696b0..c98e67aa6265a 100644 --- a/extension/opampextension/go.sum +++ b/extension/opampextension/go.sum @@ -4,6 +4,8 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= @@ -19,6 +21,8 @@ github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 h1:TQcrn6Wq+sKGkpyPvppOz99zsM github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -37,6 +41,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= @@ -46,8 +52,11 @@ github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNs github.com/open-telemetry/opamp-go v0.14.0 h1:KoziIK+wsFojhUXNTkCSTnCPf0eCMqFAaccOs0HrWIY= github.com/open-telemetry/opamp-go v0.14.0/go.mod h1:XOGCigljsLSTZ8FfLwvat0M1QDj3conIIgRa77BWrKs= github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= @@ -58,12 +67,25 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= -github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI= -github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA= +github.com/shirou/gopsutil/v3 v3.24.3 h1:eoUGJSmdfLzJ3mxIhmOAhgKEKgQkeOwKpz1NbhVnuPE= +github.com/shirou/gopsutil/v3 v3.24.3/go.mod h1:JpND7O217xa72ewWz9zN2eIIkPWsDN/3pl0H8Qt0uwg= +github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= +github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= +github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= +github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -124,6 +146,10 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -147,5 +173,6 @@ google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHh gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go index 29a1529638b4c..0b706aa296d52 100644 --- a/extension/opampextension/monitor_ppid.go +++ b/extension/opampextension/monitor_ppid.go @@ -5,7 +5,7 @@ import ( "fmt" "time" - "github.com/shirou/gopsutil/process" + "github.com/shirou/gopsutil/v3/process" "go.opentelemetry.io/collector/component" ) From 2383006e7e649e31d95f74ca715f771efbd26ad4 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 13:17:28 -0400 Subject: [PATCH 07/18] update README --- extension/opampextension/README.md | 1 + extension/opampextension/config.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/extension/opampextension/README.md b/extension/opampextension/README.md index e1da423682431..bb4fcf54f69cc 100644 --- a/extension/opampextension/README.md +++ b/extension/opampextension/README.md @@ -32,6 +32,7 @@ The following settings are optional: - `reports_effective_config`: Whether to enable the OpAMP ReportsEffectiveConfig capability. Default is `true`. - `agent_description`: Setting that modifies the agent description reported to the OpAMP server. - `non_identifying_attributes`: A map of key value pairs that will be added to the [non-identifying attributes](https://github.com/open-telemetry/opamp-spec/blob/main/specification.md#agentdescriptionnon_identifying_attributes) reported to the OpAMP server. If an attribute collides with the default non-identifying attributes that are automatically added, the ones specified here take precedence. +- `ppid`: An optional process ID to monitor. When this process is no longer running, the extension will emit a fatal error, causing the collector to exit. ### Example diff --git a/extension/opampextension/config.go b/extension/opampextension/config.go index 106f1cbe417a7..0875654b51b6f 100644 --- a/extension/opampextension/config.go +++ b/extension/opampextension/config.go @@ -32,7 +32,8 @@ type Config struct { // PPID is the process ID of the parent for the collector. If the PPID is specified, // the extension will continuously poll for the status of the parent process, and emit a fatal error - // when the parent process is closed. + // when the parent process is no longer running. + // If unspecified, the orphan detection logic does not run. PPID int32 `mapstructure:"ppid"` } From c3664c4df22676b77fe34f97a20662d355ea96c1 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 13:21:23 -0400 Subject: [PATCH 08/18] add chlog entry --- .chloggen/feat_opamp-extension-monitor-ppid.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .chloggen/feat_opamp-extension-monitor-ppid.yaml diff --git a/.chloggen/feat_opamp-extension-monitor-ppid.yaml b/.chloggen/feat_opamp-extension-monitor-ppid.yaml new file mode 100644 index 0000000000000..55755b384c891 --- /dev/null +++ b/.chloggen/feat_opamp-extension-monitor-ppid.yaml @@ -0,0 +1,13 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: opampextension + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add a new `ppid` parameter that can be used to enable orphan detection for the supervisor. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [32189] From b335d07d4d0b09084cd4c327999c75766883ef3a Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 13:31:59 -0400 Subject: [PATCH 09/18] fix comment leftover from separate unix/windows impls --- extension/opampextension/monitor_ppid.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go index 0b706aa296d52..a7d695ca564c2 100644 --- a/extension/opampextension/monitor_ppid.go +++ b/extension/opampextension/monitor_ppid.go @@ -11,9 +11,9 @@ import ( var orphanPollInterval = 5 * time.Second +// monitorPPID polls for the existence of ppid. +// If the specified ppid no longer exists, a fatal error event is reported via the passed in reportStatus function. func monitorPPID(ctx context.Context, ppid int32, reportStatus func(*component.StatusEvent)) { - // On unix-based systems, when the parent process dies orphaned processes - // are re-parented to be under the init system process (ppid becomes 1). for { exists, err := process.PidExistsWithContext(ctx, ppid) if err != nil { From 440e6420b8782b4974b03290ef09bbd99b8e2ec5 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 14:05:31 -0400 Subject: [PATCH 10/18] add license header --- extension/opampextension/monitor_ppid.go | 3 +++ extension/opampextension/monitor_ppid_test.go | 3 +++ 2 files changed, 6 insertions(+) diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go index a7d695ca564c2..5b2b8496ba2e0 100644 --- a/extension/opampextension/monitor_ppid.go +++ b/extension/opampextension/monitor_ppid.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package opampextension import ( diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go index b7852de504144..1cbf2a66c228f 100644 --- a/extension/opampextension/monitor_ppid_test.go +++ b/extension/opampextension/monitor_ppid_test.go @@ -1,3 +1,6 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + package opampextension import ( From 8c5bfa28b057564d24f401fd1f385e5369df3283 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 14:33:14 -0400 Subject: [PATCH 11/18] fix race condition in test --- extension/opampextension/monitor_ppid_test.go | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go index 1cbf2a66c228f..00893f0ec5d44 100644 --- a/extension/opampextension/monitor_ppid_test.go +++ b/extension/opampextension/monitor_ppid_test.go @@ -15,12 +15,13 @@ import ( "go.opentelemetry.io/collector/component" ) -func TestMonitorPPIDOthers(t *testing.T) { +func TestMonitorPPID(t *testing.T) { t.Run("Does not trigger if process with ppid never stops", func(t *testing.T) { cmdContext, cmdCancel := context.WithCancel(context.Background()) cmd := longRunningComand(cmdContext) cmd.Stdout = os.Stdout require.NoError(t, cmd.Start()) + cmdPid := cmd.Process.Pid t.Cleanup(func() { cmdCancel() @@ -41,18 +42,14 @@ func TestMonitorPPIDOthers(t *testing.T) { monitorCtxCancel() }() - monitorPPID(monitorCtx, int32(cmd.Process.Pid), statusReportFunc) + monitorPPID(monitorCtx, int32(cmdPid), statusReportFunc) }) t.Run("Emits fatal status if ppid changes", func(t *testing.T) { cmdContext, cmdCancel := context.WithCancel(context.Background()) cmd := longRunningComand(cmdContext) require.NoError(t, cmd.Start()) - - t.Cleanup(func() { - cmdCancel() - _ = cmd.Wait() - }) + cmdPid := cmd.Process.Pid var status *component.StatusEvent statusReportFunc := func(evt *component.StatusEvent) { @@ -64,15 +61,25 @@ func TestMonitorPPIDOthers(t *testing.T) { setOrphanPollInterval(t, 10*time.Millisecond) + cmdDoneChan := make(chan struct{}) go func() { time.Sleep(500 * time.Millisecond) cmdCancel() _ = cmd.Wait() + close(cmdDoneChan) }() - monitorPPID(context.Background(), int32(cmd.Process.Pid), statusReportFunc) + monitorPPID(context.Background(), int32(cmdPid), statusReportFunc) require.NotNil(t, status) require.Equal(t, component.StatusFatalError, status.Status()) + + // wait for command stop goroutine to actually finish + select { + case <-cmdDoneChan: + case <-time.After(5 * time.Second): + t.Fatalf("Timed out waiting for command to stop") + } + }) } From 650c5e04fdefb73ea0be9ac43fc0ecbdf1396613 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 19 Apr 2024 14:39:39 -0400 Subject: [PATCH 12/18] make goporto --- extension/opampextension/monitor_ppid.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go index 5b2b8496ba2e0..ebcbd2ae1f1a8 100644 --- a/extension/opampextension/monitor_ppid.go +++ b/extension/opampextension/monitor_ppid.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package opampextension +package opampextension // import "github.com/open-telemetry/opentelemetry-collector-contrib/extension/opampextension" import ( "context" From f7636e25d2ecd5c1a19b4b1ac019eabad38d6b37 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Tue, 23 Apr 2024 14:53:38 -0400 Subject: [PATCH 13/18] make poll interval configurable, speed up tests --- extension/opampextension/README.md | 1 + extension/opampextension/config.go | 4 ++++ extension/opampextension/factory.go | 2 ++ extension/opampextension/monitor_ppid.go | 6 ++--- extension/opampextension/monitor_ppid_test.go | 24 +++++++------------ extension/opampextension/opamp_agent.go | 2 +- 6 files changed, 18 insertions(+), 21 deletions(-) diff --git a/extension/opampextension/README.md b/extension/opampextension/README.md index bb4fcf54f69cc..e25bce9ec9de2 100644 --- a/extension/opampextension/README.md +++ b/extension/opampextension/README.md @@ -33,6 +33,7 @@ The following settings are optional: - `agent_description`: Setting that modifies the agent description reported to the OpAMP server. - `non_identifying_attributes`: A map of key value pairs that will be added to the [non-identifying attributes](https://github.com/open-telemetry/opamp-spec/blob/main/specification.md#agentdescriptionnon_identifying_attributes) reported to the OpAMP server. If an attribute collides with the default non-identifying attributes that are automatically added, the ones specified here take precedence. - `ppid`: An optional process ID to monitor. When this process is no longer running, the extension will emit a fatal error, causing the collector to exit. +- `ppid_poll_interval`: The poll interval between check for whether `ppid` is still alive or not. Defaults to 5 seconds. ### Example diff --git a/extension/opampextension/config.go b/extension/opampextension/config.go index 0875654b51b6f..ff141a7ef0249 100644 --- a/extension/opampextension/config.go +++ b/extension/opampextension/config.go @@ -6,6 +6,7 @@ package opampextension // import "github.com/open-telemetry/opentelemetry-collec import ( "errors" "net/url" + "time" "github.com/oklog/ulid/v2" "github.com/open-telemetry/opamp-go/client" @@ -35,6 +36,9 @@ type Config struct { // when the parent process is no longer running. // If unspecified, the orphan detection logic does not run. PPID int32 `mapstructure:"ppid"` + + // PPIDPollInterval is the time between polling for whether PPID is running. + PPIDPollInterval time.Duration `mapstructure:"ppid_poll_interval"` } type AgentDescription struct { diff --git a/extension/opampextension/factory.go b/extension/opampextension/factory.go index 5cc9eabd692ef..3f06c3a4d764b 100644 --- a/extension/opampextension/factory.go +++ b/extension/opampextension/factory.go @@ -5,6 +5,7 @@ package opampextension // import "github.com/open-telemetry/opentelemetry-collec import ( "context" + "time" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/extension" @@ -27,6 +28,7 @@ func createDefaultConfig() component.Config { Capabilities: Capabilities{ ReportsEffectiveConfig: true, }, + PPIDPollInterval: 5 * time.Second, } } diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go index ebcbd2ae1f1a8..40214e0287413 100644 --- a/extension/opampextension/monitor_ppid.go +++ b/extension/opampextension/monitor_ppid.go @@ -12,11 +12,9 @@ import ( "go.opentelemetry.io/collector/component" ) -var orphanPollInterval = 5 * time.Second - // monitorPPID polls for the existence of ppid. // If the specified ppid no longer exists, a fatal error event is reported via the passed in reportStatus function. -func monitorPPID(ctx context.Context, ppid int32, reportStatus func(*component.StatusEvent)) { +func monitorPPID(ctx context.Context, interval time.Duration, ppid int32, reportStatus func(*component.StatusEvent)) { for { exists, err := process.PidExistsWithContext(ctx, ppid) if err != nil { @@ -34,7 +32,7 @@ func monitorPPID(ctx context.Context, ppid int32, reportStatus func(*component.S } select { - case <-time.After(orphanPollInterval): // OK; Poll again to make sure PID exists + case <-time.After(interval): // OK; Poll again to make sure PID exists case <-ctx.Done(): return } diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go index 00893f0ec5d44..196ff5f916441 100644 --- a/extension/opampextension/monitor_ppid_test.go +++ b/extension/opampextension/monitor_ppid_test.go @@ -17,6 +17,8 @@ import ( func TestMonitorPPID(t *testing.T) { t.Run("Does not trigger if process with ppid never stops", func(t *testing.T) { + t.Parallel() + cmdContext, cmdCancel := context.WithCancel(context.Background()) cmd := longRunningComand(cmdContext) cmd.Stdout = os.Stdout @@ -35,17 +37,17 @@ func TestMonitorPPID(t *testing.T) { monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) - setOrphanPollInterval(t, 10*time.Millisecond) - go func() { - time.Sleep(500 * time.Millisecond) + time.Sleep(100 * time.Millisecond) monitorCtxCancel() }() - monitorPPID(monitorCtx, int32(cmdPid), statusReportFunc) + monitorPPID(monitorCtx, 1*time.Millisecond, int32(cmdPid), statusReportFunc) }) t.Run("Emits fatal status if ppid changes", func(t *testing.T) { + t.Parallel() + cmdContext, cmdCancel := context.WithCancel(context.Background()) cmd := longRunningComand(cmdContext) require.NoError(t, cmd.Start()) @@ -59,17 +61,15 @@ func TestMonitorPPID(t *testing.T) { status = evt } - setOrphanPollInterval(t, 10*time.Millisecond) - cmdDoneChan := make(chan struct{}) go func() { - time.Sleep(500 * time.Millisecond) + time.Sleep(100 * time.Millisecond) cmdCancel() _ = cmd.Wait() close(cmdDoneChan) }() - monitorPPID(context.Background(), int32(cmdPid), statusReportFunc) + monitorPPID(context.Background(), 1*time.Millisecond, int32(cmdPid), statusReportFunc) require.NotNil(t, status) require.Equal(t, component.StatusFatalError, status.Status()) @@ -94,11 +94,3 @@ func longRunningComand(ctx context.Context) *exec.Cmd { return exec.CommandContext(ctx, "sleep", "1000") } } - -func setOrphanPollInterval(t *testing.T, newInterval time.Duration) { - old := orphanPollInterval - orphanPollInterval = newInterval - t.Cleanup(func() { - orphanPollInterval = old - }) -} diff --git a/extension/opampextension/opamp_agent.go b/extension/opampextension/opamp_agent.go index 92963ac19f13b..0d29cb2be6067 100644 --- a/extension/opampextension/opamp_agent.go +++ b/extension/opampextension/opamp_agent.go @@ -69,7 +69,7 @@ func (o *opampAgent) Start(ctx context.Context, _ component.Host) error { o.lifetimeCtx, o.lifetimeCtxCancel = context.WithCancel(context.Background()) if o.cfg.PPID != 0 { - go monitorPPID(o.lifetimeCtx, o.cfg.PPID, o.reportFunc) + go monitorPPID(o.lifetimeCtx, o.cfg.PPIDPollInterval, o.cfg.PPID, o.reportFunc) } settings := types.StartSettings{ From 318da61ecad5d756e8b0f72996d3a6a27291caf7 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Tue, 23 Apr 2024 14:57:33 -0400 Subject: [PATCH 14/18] trim down sleeps even further --- extension/opampextension/monitor_ppid_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go index 196ff5f916441..c44de75017499 100644 --- a/extension/opampextension/monitor_ppid_test.go +++ b/extension/opampextension/monitor_ppid_test.go @@ -38,7 +38,7 @@ func TestMonitorPPID(t *testing.T) { monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) go func() { - time.Sleep(100 * time.Millisecond) + time.Sleep(50 * time.Millisecond) monitorCtxCancel() }() @@ -63,7 +63,7 @@ func TestMonitorPPID(t *testing.T) { cmdDoneChan := make(chan struct{}) go func() { - time.Sleep(100 * time.Millisecond) + time.Sleep(50 * time.Millisecond) cmdCancel() _ = cmd.Wait() close(cmdDoneChan) From 8319b85f068c8d3984240f905bed1a5f62c19c13 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Tue, 23 Apr 2024 15:00:05 -0400 Subject: [PATCH 15/18] Specify that the ppid should not be set manually. --- extension/opampextension/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/opampextension/README.md b/extension/opampextension/README.md index e25bce9ec9de2..1f3beea3479db 100644 --- a/extension/opampextension/README.md +++ b/extension/opampextension/README.md @@ -32,7 +32,7 @@ The following settings are optional: - `reports_effective_config`: Whether to enable the OpAMP ReportsEffectiveConfig capability. Default is `true`. - `agent_description`: Setting that modifies the agent description reported to the OpAMP server. - `non_identifying_attributes`: A map of key value pairs that will be added to the [non-identifying attributes](https://github.com/open-telemetry/opamp-spec/blob/main/specification.md#agentdescriptionnon_identifying_attributes) reported to the OpAMP server. If an attribute collides with the default non-identifying attributes that are automatically added, the ones specified here take precedence. -- `ppid`: An optional process ID to monitor. When this process is no longer running, the extension will emit a fatal error, causing the collector to exit. +- `ppid`: An optional process ID to monitor. When this process is no longer running, the extension will emit a fatal error, causing the collector to exit. This is meant to be set by the Supervisor or some other parent process, and should not be configured manually. - `ppid_poll_interval`: The poll interval between check for whether `ppid` is still alive or not. Defaults to 5 seconds. ### Example From b025851614b4e4a0df35957d28f4547afe088d6c Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 3 May 2024 15:13:30 -0400 Subject: [PATCH 16/18] tidy otelcontribcol go.mod --- cmd/otelcontribcol/go.mod | 4 ++-- cmd/otelcontribcol/go.sum | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cmd/otelcontribcol/go.mod b/cmd/otelcontribcol/go.mod index 51e5ec2f08ff3..18c1b799fa102 100644 --- a/cmd/otelcontribcol/go.mod +++ b/cmd/otelcontribcol/go.mod @@ -648,8 +648,8 @@ require ( github.com/tidwall/wal v1.1.7 // indirect github.com/tilinna/clock v1.1.0 // indirect github.com/tinylib/msgp v1.1.9 // indirect - github.com/tklauser/go-sysconf v0.3.12 // indirect - github.com/tklauser/numcpus v0.6.1 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect github.com/valyala/fastjson v1.6.4 // indirect github.com/vincent-petithory/dataurl v1.0.0 // indirect github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 // indirect diff --git a/cmd/otelcontribcol/go.sum b/cmd/otelcontribcol/go.sum index 63ac655352dd4..2a0d6863dc500 100644 --- a/cmd/otelcontribcol/go.sum +++ b/cmd/otelcontribcol/go.sum @@ -2167,11 +2167,13 @@ github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pv github.com/tjfoc/gmsm v1.3.2 h1:7JVkAn5bvUJ7HtU08iW6UiD+UTmJTIToHCfeFzkcCxM= github.com/tjfoc/gmsm v1.3.2/go.mod h1:HaUcFuY0auTiaHB9MHFGCPx5IaLhTUd2atbCFBQXn9w= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= -github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= -github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= From 51ec112b858724f3c060596352667ae348e8b149 Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 3 May 2024 15:40:02 -0400 Subject: [PATCH 17/18] fix config unmarshal test --- extension/opampextension/config_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extension/opampextension/config_test.go b/extension/opampextension/config_test.go index 7a0d0ddd4461c..5702e3fa0f327 100644 --- a/extension/opampextension/config_test.go +++ b/extension/opampextension/config_test.go @@ -6,6 +6,7 @@ package opampextension import ( "path/filepath" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -40,6 +41,7 @@ func TestUnmarshalConfig(t *testing.T) { Capabilities: Capabilities{ ReportsEffectiveConfig: true, }, + PPIDPollInterval: 5 * time.Second, }, cfg) } @@ -60,6 +62,7 @@ func TestUnmarshalHttpConfig(t *testing.T) { Capabilities: Capabilities{ ReportsEffectiveConfig: true, }, + PPIDPollInterval: 5 * time.Second, }, cfg) } From a6e594bd3a81fd409f7bb0adadccc5c0b608989b Mon Sep 17 00:00:00 2001 From: Brandon Johnson Date: Fri, 3 May 2024 17:19:38 -0400 Subject: [PATCH 18/18] set report func (lost in rebase) --- extension/opampextension/opamp_agent.go | 1 + 1 file changed, 1 insertion(+) diff --git a/extension/opampextension/opamp_agent.go b/extension/opampextension/opamp_agent.go index 0d29cb2be6067..4a3c2924ae5ee 100644 --- a/extension/opampextension/opamp_agent.go +++ b/extension/opampextension/opamp_agent.go @@ -196,6 +196,7 @@ func newOpampAgent(cfg *Config, set extension.CreateSettings) (*opampAgent, erro capabilities: cfg.Capabilities, opampClient: opampClient, customCapabilityRegistry: newCustomCapabilityRegistry(set.Logger, opampClient), + reportFunc: set.ReportStatus, } return agent, nil