diff --git a/.chloggen/feat_opamp-extension-monitor-ppid.yaml b/.chloggen/feat_opamp-extension-monitor-ppid.yaml new file mode 100644 index 0000000000000..55755b384c891 --- /dev/null +++ b/.chloggen/feat_opamp-extension-monitor-ppid.yaml @@ -0,0 +1,13 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: opampextension + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add a new `ppid` parameter that can be used to enable orphan detection for the supervisor. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [32189] diff --git a/cmd/otelcontribcol/go.mod b/cmd/otelcontribcol/go.mod index 51e5ec2f08ff3..18c1b799fa102 100644 --- a/cmd/otelcontribcol/go.mod +++ b/cmd/otelcontribcol/go.mod @@ -648,8 +648,8 @@ require ( github.com/tidwall/wal v1.1.7 // indirect github.com/tilinna/clock v1.1.0 // indirect github.com/tinylib/msgp v1.1.9 // indirect - github.com/tklauser/go-sysconf v0.3.12 // indirect - github.com/tklauser/numcpus v0.6.1 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect github.com/valyala/fastjson v1.6.4 // indirect github.com/vincent-petithory/dataurl v1.0.0 // indirect github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852 // indirect diff --git a/cmd/otelcontribcol/go.sum b/cmd/otelcontribcol/go.sum index 63ac655352dd4..2a0d6863dc500 100644 --- a/cmd/otelcontribcol/go.sum +++ b/cmd/otelcontribcol/go.sum @@ -2167,11 +2167,13 @@ github.com/tj/assert v0.0.3/go.mod h1:Ne6X72Q+TB1AteidzQncjw9PabbMp4PBMZ1k+vd1Pv github.com/tjfoc/gmsm v1.3.2 h1:7JVkAn5bvUJ7HtU08iW6UiD+UTmJTIToHCfeFzkcCxM= github.com/tjfoc/gmsm v1.3.2/go.mod h1:HaUcFuY0auTiaHB9MHFGCPx5IaLhTUd2atbCFBQXn9w= github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= -github.com/tklauser/go-sysconf v0.3.12 h1:0QaGUFOdQaIVdPgfITYzaTegZvdCjmYO52cSFAEVmqU= github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= -github.com/tklauser/numcpus v0.6.1 h1:ng9scYS7az0Bk4OZLvrNXNSAO2Pxr1XXRAPyjhIx+Fk= github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= diff --git a/extension/opampextension/README.md b/extension/opampextension/README.md index e1da423682431..1f3beea3479db 100644 --- a/extension/opampextension/README.md +++ b/extension/opampextension/README.md @@ -32,6 +32,8 @@ The following settings are optional: - `reports_effective_config`: Whether to enable the OpAMP ReportsEffectiveConfig capability. Default is `true`. - `agent_description`: Setting that modifies the agent description reported to the OpAMP server. - `non_identifying_attributes`: A map of key value pairs that will be added to the [non-identifying attributes](https://github.com/open-telemetry/opamp-spec/blob/main/specification.md#agentdescriptionnon_identifying_attributes) reported to the OpAMP server. If an attribute collides with the default non-identifying attributes that are automatically added, the ones specified here take precedence. +- `ppid`: An optional process ID to monitor. When this process is no longer running, the extension will emit a fatal error, causing the collector to exit. This is meant to be set by the Supervisor or some other parent process, and should not be configured manually. +- `ppid_poll_interval`: The poll interval between check for whether `ppid` is still alive or not. Defaults to 5 seconds. ### Example diff --git a/extension/opampextension/config.go b/extension/opampextension/config.go index 877b5f62c874e..ff141a7ef0249 100644 --- a/extension/opampextension/config.go +++ b/extension/opampextension/config.go @@ -6,6 +6,7 @@ package opampextension // import "github.com/open-telemetry/opentelemetry-collec import ( "errors" "net/url" + "time" "github.com/oklog/ulid/v2" "github.com/open-telemetry/opamp-go/client" @@ -29,6 +30,15 @@ type Config struct { // Agent descriptions contains options to modify the AgentDescription message AgentDescription AgentDescription `mapstructure:"agent_description"` + + // PPID is the process ID of the parent for the collector. If the PPID is specified, + // the extension will continuously poll for the status of the parent process, and emit a fatal error + // when the parent process is no longer running. + // If unspecified, the orphan detection logic does not run. + PPID int32 `mapstructure:"ppid"` + + // PPIDPollInterval is the time between polling for whether PPID is running. + PPIDPollInterval time.Duration `mapstructure:"ppid_poll_interval"` } type AgentDescription struct { diff --git a/extension/opampextension/config_test.go b/extension/opampextension/config_test.go index 7a0d0ddd4461c..5702e3fa0f327 100644 --- a/extension/opampextension/config_test.go +++ b/extension/opampextension/config_test.go @@ -6,6 +6,7 @@ package opampextension import ( "path/filepath" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -40,6 +41,7 @@ func TestUnmarshalConfig(t *testing.T) { Capabilities: Capabilities{ ReportsEffectiveConfig: true, }, + PPIDPollInterval: 5 * time.Second, }, cfg) } @@ -60,6 +62,7 @@ func TestUnmarshalHttpConfig(t *testing.T) { Capabilities: Capabilities{ ReportsEffectiveConfig: true, }, + PPIDPollInterval: 5 * time.Second, }, cfg) } diff --git a/extension/opampextension/factory.go b/extension/opampextension/factory.go index 0974399752c49..3f06c3a4d764b 100644 --- a/extension/opampextension/factory.go +++ b/extension/opampextension/factory.go @@ -5,6 +5,7 @@ package opampextension // import "github.com/open-telemetry/opentelemetry-collec import ( "context" + "time" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/extension" @@ -27,9 +28,10 @@ func createDefaultConfig() component.Config { Capabilities: Capabilities{ ReportsEffectiveConfig: true, }, + PPIDPollInterval: 5 * time.Second, } } func createExtension(_ context.Context, set extension.CreateSettings, cfg component.Config) (extension.Extension, error) { - return newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + return newOpampAgent(cfg.(*Config), set) } diff --git a/extension/opampextension/go.mod b/extension/opampextension/go.mod index 757cccd3b5d31..09911b621feff 100644 --- a/extension/opampextension/go.mod +++ b/extension/opampextension/go.mod @@ -6,13 +6,13 @@ require ( github.com/google/uuid v1.6.0 github.com/oklog/ulid/v2 v2.1.0 github.com/open-telemetry/opamp-go v0.14.0 + github.com/shirou/gopsutil/v3 v3.24.3 github.com/stretchr/testify v1.9.0 go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/config/configopaque v1.6.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/config/configtls v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/confmap v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/extension v0.99.1-0.20240503164040-109173d9cf84 - go.opentelemetry.io/collector/pdata v1.6.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/collector/semconv v0.99.1-0.20240503164040-109173d9cf84 go.opentelemetry.io/otel/metric v1.26.0 go.opentelemetry.io/otel/trace v1.26.0 @@ -30,21 +30,29 @@ require ( github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect github.com/knadh/koanf/providers/confmap v0.1.0 // indirect github.com/knadh/koanf/v2 v2.1.1 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/prometheus/client_golang v1.19.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.53.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/rogpeppe/go-internal v1.11.0 // indirect + github.com/shoenig/go-m1cpu v0.1.6 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect + github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/collector/config/configtelemetry v0.99.1-0.20240503164040-109173d9cf84 // indirect + go.opentelemetry.io/collector/pdata v1.6.1-0.20240503164040-109173d9cf84 // indirect go.opentelemetry.io/otel v1.26.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.48.0 // indirect go.opentelemetry.io/otel/sdk v1.26.0 // indirect diff --git a/extension/opampextension/go.sum b/extension/opampextension/go.sum index b9ebf206e8768..c98e67aa6265a 100644 --- a/extension/opampextension/go.sum +++ b/extension/opampextension/go.sum @@ -4,6 +4,8 @@ github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqy github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= @@ -13,10 +15,14 @@ github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1 h1:TQcrn6Wq+sKGkpyPvppOz99zsMBaUOKXq6HSv655U1c= github.com/go-viper/mapstructure/v2 v2.0.0-alpha.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -35,6 +41,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= @@ -44,8 +52,11 @@ github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNs github.com/open-telemetry/opamp-go v0.14.0 h1:KoziIK+wsFojhUXNTkCSTnCPf0eCMqFAaccOs0HrWIY= github.com/open-telemetry/opamp-go v0.14.0/go.mod h1:XOGCigljsLSTZ8FfLwvat0M1QDj3conIIgRa77BWrKs= github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= @@ -56,10 +67,31 @@ github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= +github.com/shirou/gopsutil/v3 v3.24.3 h1:eoUGJSmdfLzJ3mxIhmOAhgKEKgQkeOwKpz1NbhVnuPE= +github.com/shirou/gopsutil/v3 v3.24.3/go.mod h1:JpND7O217xa72ewWz9zN2eIIkPWsDN/3pl0H8Qt0uwg= +github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= +github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= +github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= +github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tklauser/go-sysconf v0.3.12/go.mod h1:Ho14jnntGE1fpdOqQEEaiKRpvIavV0hSfmBq8nJbHYI= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.6.1/go.mod h1:1XfjsgE2zo8GVw7POkMbHENHzVg3GzmoZ9fESEdAacY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0= +github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84 h1:rnux3gC9x2XxyG3tdRwkae2exirkImgC/K0kkWHrLtk= go.opentelemetry.io/collector/component v0.99.1-0.20240503164040-109173d9cf84/go.mod h1:+b56nMIvo3CO5TShFn38RwX4FsXv0lVt2HoGmsaXObo= go.opentelemetry.io/collector/config/configopaque v1.6.1-0.20240503164040-109173d9cf84 h1:pKzJfh+vKDc4jLbwRqxNvYc/8kZNTO4omxaYTy5vHRc= @@ -112,7 +144,12 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -136,5 +173,6 @@ google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHh gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/extension/opampextension/monitor_ppid.go b/extension/opampextension/monitor_ppid.go new file mode 100644 index 0000000000000..40214e0287413 --- /dev/null +++ b/extension/opampextension/monitor_ppid.go @@ -0,0 +1,40 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package opampextension // import "github.com/open-telemetry/opentelemetry-collector-contrib/extension/opampextension" + +import ( + "context" + "fmt" + "time" + + "github.com/shirou/gopsutil/v3/process" + "go.opentelemetry.io/collector/component" +) + +// monitorPPID polls for the existence of ppid. +// If the specified ppid no longer exists, a fatal error event is reported via the passed in reportStatus function. +func monitorPPID(ctx context.Context, interval time.Duration, ppid int32, reportStatus func(*component.StatusEvent)) { + for { + exists, err := process.PidExistsWithContext(ctx, ppid) + if err != nil { + statusErr := fmt.Errorf("collector was orphaned, failed to find process with pid %d: %w", ppid, err) + status := component.NewFatalErrorEvent(statusErr) + reportStatus(status) + return + } + + if !exists { + statusErr := fmt.Errorf("collector was orphaned, process with pid %d does not exist", ppid) + status := component.NewFatalErrorEvent(statusErr) + reportStatus(status) + return + } + + select { + case <-time.After(interval): // OK; Poll again to make sure PID exists + case <-ctx.Done(): + return + } + } +} diff --git a/extension/opampextension/monitor_ppid_test.go b/extension/opampextension/monitor_ppid_test.go new file mode 100644 index 0000000000000..c44de75017499 --- /dev/null +++ b/extension/opampextension/monitor_ppid_test.go @@ -0,0 +1,96 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package opampextension + +import ( + "context" + "os" + "os/exec" + "runtime" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" +) + +func TestMonitorPPID(t *testing.T) { + t.Run("Does not trigger if process with ppid never stops", func(t *testing.T) { + t.Parallel() + + cmdContext, cmdCancel := context.WithCancel(context.Background()) + cmd := longRunningComand(cmdContext) + cmd.Stdout = os.Stdout + require.NoError(t, cmd.Start()) + cmdPid := cmd.Process.Pid + + t.Cleanup(func() { + cmdCancel() + _ = cmd.Wait() + }) + + statusReportFunc := func(se *component.StatusEvent) { + t.Logf("Status event error: %s", se.Err()) + require.FailNow(t, "status report function should not be called") + } + + monitorCtx, monitorCtxCancel := context.WithCancel(context.Background()) + + go func() { + time.Sleep(50 * time.Millisecond) + monitorCtxCancel() + }() + + monitorPPID(monitorCtx, 1*time.Millisecond, int32(cmdPid), statusReportFunc) + }) + + t.Run("Emits fatal status if ppid changes", func(t *testing.T) { + t.Parallel() + + cmdContext, cmdCancel := context.WithCancel(context.Background()) + cmd := longRunningComand(cmdContext) + require.NoError(t, cmd.Start()) + cmdPid := cmd.Process.Pid + + var status *component.StatusEvent + statusReportFunc := func(evt *component.StatusEvent) { + if status != nil { + require.FailNow(t, "status report function should not be called twice") + } + status = evt + } + + cmdDoneChan := make(chan struct{}) + go func() { + time.Sleep(50 * time.Millisecond) + cmdCancel() + _ = cmd.Wait() + close(cmdDoneChan) + }() + + monitorPPID(context.Background(), 1*time.Millisecond, int32(cmdPid), statusReportFunc) + require.NotNil(t, status) + require.Equal(t, component.StatusFatalError, status.Status()) + + // wait for command stop goroutine to actually finish + select { + case <-cmdDoneChan: + case <-time.After(5 * time.Second): + t.Fatalf("Timed out waiting for command to stop") + } + + }) + +} + +func longRunningComand(ctx context.Context) *exec.Cmd { + switch runtime.GOOS { + case "windows": + // Would prefer to use timeout.exe here, but it doesn't seem to work in + // a CMD-less context. + return exec.CommandContext(ctx, "ping", "-n", "1000", "localhost") + default: + return exec.CommandContext(ctx, "sleep", "1000") + } +} diff --git a/extension/opampextension/opamp_agent.go b/extension/opampextension/opamp_agent.go index 4f96d8247767b..4a3c2924ae5ee 100644 --- a/extension/opampextension/opamp_agent.go +++ b/extension/opampextension/opamp_agent.go @@ -19,7 +19,7 @@ import ( "github.com/open-telemetry/opamp-go/protobufs" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/confmap" - "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/extension" semconv "go.opentelemetry.io/collector/semconv/v1.18.0" "go.uber.org/zap" "golang.org/x/exp/maps" @@ -38,6 +38,12 @@ type opampAgent struct { eclk sync.RWMutex effectiveConfig *confmap.Conf + // lifetimeCtx is canceled on Stop of the component + lifetimeCtx context.Context + lifetimeCtxCancel context.CancelFunc + + reportFunc func(*component.StatusEvent) + capabilities Capabilities agentDescription *protobufs.AgentDescription @@ -60,6 +66,12 @@ func (o *opampAgent) Start(ctx context.Context, _ component.Host) error { return err } + o.lifetimeCtx, o.lifetimeCtxCancel = context.WithCancel(context.Background()) + + if o.cfg.PPID != 0 { + go monitorPPID(o.lifetimeCtx, o.cfg.PPIDPollInterval, o.cfg.PPID, o.reportFunc) + } + settings := types.StartSettings{ Header: header, TLSConfig: tls, @@ -103,6 +115,10 @@ func (o *opampAgent) Start(ctx context.Context, _ component.Host) error { } func (o *opampAgent) Shutdown(ctx context.Context) error { + if o.lifetimeCtxCancel != nil { + o.lifetimeCtxCancel() + } + o.logger.Debug("OpAMP agent shutting down...") if o.opampClient == nil { return nil @@ -136,17 +152,17 @@ func (o *opampAgent) updateEffectiveConfig(conf *confmap.Conf) { o.effectiveConfig = conf } -func newOpampAgent(cfg *Config, logger *zap.Logger, build component.BuildInfo, res pcommon.Resource) (*opampAgent, error) { - agentType := build.Command +func newOpampAgent(cfg *Config, set extension.CreateSettings) (*opampAgent, error) { + agentType := set.BuildInfo.Command - sn, ok := res.Attributes().Get(semconv.AttributeServiceName) + sn, ok := set.Resource.Attributes().Get(semconv.AttributeServiceName) if ok { agentType = sn.AsString() } - agentVersion := build.Version + agentVersion := set.BuildInfo.Version - sv, ok := res.Attributes().Get(semconv.AttributeServiceVersion) + sv, ok := set.Resource.Attributes().Get(semconv.AttributeServiceVersion) if ok { agentVersion = sv.AsString() } @@ -160,7 +176,7 @@ func newOpampAgent(cfg *Config, logger *zap.Logger, build component.BuildInfo, r } uid = puid } else { - sid, ok := res.Attributes().Get(semconv.AttributeServiceInstanceID) + sid, ok := set.Resource.Attributes().Get(semconv.AttributeServiceInstanceID) if ok { parsedUUID, err := uuid.Parse(sid.AsString()) if err != nil { @@ -170,16 +186,17 @@ func newOpampAgent(cfg *Config, logger *zap.Logger, build component.BuildInfo, r } } - opampClient := cfg.Server.GetClient(logger) + opampClient := cfg.Server.GetClient(set.Logger) agent := &opampAgent{ cfg: cfg, - logger: logger, + logger: set.Logger, agentType: agentType, agentVersion: agentVersion, instanceID: uid, capabilities: cfg.Capabilities, opampClient: opampClient, - customCapabilityRegistry: newCustomCapabilityRegistry(logger, opampClient), + customCapabilityRegistry: newCustomCapabilityRegistry(set.Logger, opampClient), + reportFunc: set.ReportStatus, } return agent, nil diff --git a/extension/opampextension/opamp_agent_test.go b/extension/opampextension/opamp_agent_test.go index 652826639a983..ceb9f912f7664 100644 --- a/extension/opampextension/opamp_agent_test.go +++ b/extension/opampextension/opamp_agent_test.go @@ -25,7 +25,7 @@ func TestNewOpampAgent(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() set.BuildInfo = component.BuildInfo{Version: "test version", Command: "otelcoltest"} - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.Equal(t, "otelcoltest", o.agentType) assert.Equal(t, "test version", o.agentVersion) @@ -42,7 +42,7 @@ func TestNewOpampAgentAttributes(t *testing.T) { set.Resource.Attributes().PutStr(semconv.AttributeServiceName, "otelcol-distro") set.Resource.Attributes().PutStr(semconv.AttributeServiceVersion, "distro.0") set.Resource.Attributes().PutStr(semconv.AttributeServiceInstanceID, "f8999bc1-4c9b-4619-9bae-7f009d2411ec") - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.Equal(t, "otelcol-distro", o.agentType) assert.Equal(t, "distro.0", o.agentVersion) @@ -136,7 +136,7 @@ func TestCreateAgentDescription(t *testing.T) { set.Resource.Attributes().PutStr(semconv.AttributeServiceVersion, serviceVersion) set.Resource.Attributes().PutStr(semconv.AttributeServiceInstanceID, serviceInstanceUUID) - o, err := newOpampAgent(cfg, set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg, set) require.NoError(t, err) assert.Nil(t, o.agentDescription) @@ -150,7 +150,7 @@ func TestCreateAgentDescription(t *testing.T) { func TestUpdateAgentIdentity(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) olduid := o.instanceID @@ -166,7 +166,7 @@ func TestUpdateAgentIdentity(t *testing.T) { func TestComposeEffectiveConfig(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.Empty(t, o.effectiveConfig) @@ -188,7 +188,7 @@ func TestComposeEffectiveConfig(t *testing.T) { func TestShutdown(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) // Shutdown with no OpAMP client @@ -198,7 +198,7 @@ func TestShutdown(t *testing.T) { func TestStart(t *testing.T) { cfg := createDefaultConfig() set := extensiontest.NewNopCreateSettings() - o, err := newOpampAgent(cfg.(*Config), set.Logger, set.BuildInfo, set.Resource) + o, err := newOpampAgent(cfg.(*Config), set) assert.NoError(t, err) assert.NoError(t, o.Start(context.TODO(), componenttest.NewNopHost()))