From 54aa89690aa462a15d536067a8683fbc6a66939e Mon Sep 17 00:00:00 2001 From: Feng Kun Date: Sat, 25 Apr 2020 15:35:14 +0800 Subject: [PATCH] feat: upgrade node-exporter to v0.18.1 Signed-off-by: Feng Kun --- .../controller/addon/prometheus/controller.go | 30 ++++++------------- .../addon/prometheus/images/images.go | 2 +- .../controller/addon/prometheus/yamls.go | 22 +++++++++----- 3 files changed, 24 insertions(+), 30 deletions(-) diff --git a/pkg/platform/controller/addon/prometheus/controller.go b/pkg/platform/controller/addon/prometheus/controller.go index 5dba4b115..23eb8be2a 100755 --- a/pkg/platform/controller/addon/prometheus/controller.go +++ b/pkg/platform/controller/addon/prometheus/controller.go @@ -1486,21 +1486,20 @@ func createDaemonSetForNodeExporter(components images.Components) *appsv1.Daemon Name: nodeExporterDaemonSet, Image: components.NodeExporterService.FullName(), Args: []string{ - "--path.procfs=/host/proc", - "--path.sysfs=/host/sys", + "--path.rootfs=/host", "--no-collector.arp", "--no-collector.bcache", "--no-collector.bonding", "--no-collector.buddyinfo", "--no-collector.conntrack", "--no-collector.cpu", + "--no-collector.cpufreq", "--collector.diskstats", "--no-collector.drbd", "--no-collector.edac", "--no-collector.entropy", "--no-collector.filefd", "--collector.filesystem", - "--no-collector.gmond", "--no-collector.hwmon", "--no-collector.infiniband", "--no-collector.interrupts", @@ -1509,13 +1508,15 @@ func createDaemonSetForNodeExporter(components images.Components) *appsv1.Daemon "--no-collector.loadavg", "--no-collector.logind", "--no-collector.mdadm", - "--no-collector.megacli", "--no-collector.meminfo", "--no-collector.meminfo_numa", "--no-collector.mountstats", "--collector.netdev", "--no-collector.netstat", + "--no-collector.netclass", "--no-collector.nfs", + "--no-collector.nfsd", + "--no-collector.pressure", "--no-collector.ntp", "--no-collector.qdisc", "--no-collector.runit", @@ -1538,13 +1539,8 @@ func createDaemonSetForNodeExporter(components images.Components) *appsv1.Daemon }, VolumeMounts: []corev1.VolumeMount{ { - MountPath: "/host/proc", - Name: "proc", - ReadOnly: true, - }, - { - MountPath: "/host/sys", - Name: "sys", + MountPath: "/host", + Name: "root", ReadOnly: true, }, }, @@ -1554,18 +1550,10 @@ func createDaemonSetForNodeExporter(components images.Components) *appsv1.Daemon HostPID: true, Volumes: []corev1.Volume{ { - Name: "proc", - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: "/proc", - }, - }, - }, - { - Name: "sys", + Name: "root", VolumeSource: corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ - Path: "/sys", + Path: "/", }, }, }, diff --git a/pkg/platform/controller/addon/prometheus/images/images.go b/pkg/platform/controller/addon/prometheus/images/images.go index 20d786838..6263d5502 100644 --- a/pkg/platform/controller/addon/prometheus/images/images.go +++ b/pkg/platform/controller/addon/prometheus/images/images.go @@ -57,7 +57,7 @@ var versionMap = map[string]Components{ LatestVersion: { PrometheusService: containerregistry.Image{Name: "prometheus", Tag: "v2.16.0"}, KubeStateService: containerregistry.Image{Name: "kube-state-metrics", Tag: "v1.9.5"}, - NodeExporterService: containerregistry.Image{Name: "node-exporter", Tag: "v0.15.2"}, + NodeExporterService: containerregistry.Image{Name: "node-exporter", Tag: "v0.18.1"}, AlertManagerService: containerregistry.Image{Name: "alertmanager", Tag: "v0.18.0"}, ConfigMapReloadWorkLoad: containerregistry.Image{Name: "configmap-reload", Tag: "v0.1"}, PrometheusOperatorService: containerregistry.Image{Name: "prometheus-operator", Tag: "v0.31.1"}, diff --git a/pkg/platform/controller/addon/prometheus/yamls.go b/pkg/platform/controller/addon/prometheus/yamls.go index f5479bc95..25a51598b 100755 --- a/pkg/platform/controller/addon/prometheus/yamls.go +++ b/pkg/platform/controller/addon/prometheus/yamls.go @@ -310,7 +310,7 @@ func scrapeConfigForPrometheus() string { regex: (.+) metric_relabel_configs: - source_labels: [ __name__ ] - regex: 'scheduler_e2e_scheduling_latency_microseconds_sum|scheduler_e2e_scheduling_latency_microseconds_count|apiserver_request_duration_seconds_(.*)|node_sockstat_TCP_inuse|node_network_transmit_bytes|node_network_receive_bytes|node_filesystem_size|node_filesystem_avail|node_disk_bytes_written|node_disk_bytes_read|node_disk_writes_completed|node_disk_reads_completed' + regex: 'scheduler_e2e_scheduling_latency_microseconds_sum|scheduler_e2e_scheduling_latency_microseconds_count|apiserver_request_duration_seconds_(.*)|node_sockstat_TCP_inuse|node_network_transmit_bytes_total|node_network_receive_bytes_total|node_filesystem_size_bytes|node_filesystem_avail_bytes|node_disk_written_bytes_total|node_disk_read_bytes_total|node_disk_writes_completed_total|node_disk_reads_completed_total' action: keep - regex: "instance|job|pod_name|namespace|scope|subresource" action: labeldrop @@ -643,28 +643,34 @@ groups: expr: sum(k8s_pod_gpu_memory_used) without(namespace,pod_name,workload_kind,workload_name) *100 / on(node) group_left() kube_node_status_capacity_gpu_memory - record: k8s_node_fs_write_bytes - expr: (sum by (node) (irate(node_disk_bytes_written[4m]))) *on(node) group_left(node_role) kube_node_labels + expr: (sum by (node) (irate(node_disk_written_bytes_total[4m]))) *on(node) group_left(node_role) kube_node_labels - record: k8s_node_fs_read_bytes - expr: (sum by (node) (irate(node_disk_bytes_read[4m])))*on(node) group_left(node_role) kube_node_labels + expr: (sum by (node) (irate(node_disk_read_bytes_total[4m])))*on(node) group_left(node_role) kube_node_labels - record: k8s_node_fs_write_times - expr: (sum by (node) (irate(node_disk_writes_completed[4m])))*on(node) group_left(node_role) kube_node_labels + expr: (sum by (node) (irate(node_disk_writes_completed_total[4m])))*on(node) group_left(node_role) kube_node_labels - record: k8s_node_fs_read_times - expr: (sum by (node) (irate(node_disk_reads_completed[4m])))*on(node) group_left(node_role) kube_node_labels + expr: (sum by (node) (irate(node_disk_reads_completed_total[4m])))*on(node) group_left(node_role) kube_node_labels - record: k8s_node_pod_num expr: count(k8s_pod_status_ready) without (pod_name,workload_kind,workload_name,namespace) - record: k8s_node_disk_space_rate - expr: (100 - sum (node_filesystem_avail{fstype=~"ext3|ext4|xfs"}) by (node) / sum (node_filesystem_size{fstype=~"ext3|ext4|xfs"}) by (node) *100) *on(node) group_left(node_role) kube_node_labels + expr: (100 - sum (node_filesystem_avail_bytes{fstype=~"ext3|ext4|xfs"}) by (node) / sum (node_filesystem_size_bytes{fstype=~"ext3|ext4|xfs"}) by (node) *100) *on(node) group_left(node_role) kube_node_labels + + - record: k8s_node_filesystem_avail_bytes + expr: node_filesystem_avail_bytes{fstype=~"ext3|ext4|xfs"} + + - record: k8s_node_filesystem_size_bytes + expr: node_filesystem_size_bytes{fstype=~"ext3|ext4|xfs"} - record: k8s_node_network_receive_bytes_bw - expr: (sum by (node) (irate(node_network_receive_bytes{device!~"lo|veth(.*)|virb(.*)|docker(.*)|tunl(.*)|v-h(.*)|flannel(.*)"}[5m])))*on(node) group_left(node_role) kube_node_labels + expr: (sum by (node) (irate(node_network_receive_bytes_total{device!~"lo|veth(.*)|virb(.*)|docker(.*)|tunl(.*)|v-h(.*)|flannel(.*)"}[5m])))*on(node) group_left(node_role) kube_node_labels - record: k8s_node_network_transmit_bytes_bw - expr: (sum by (node) (irate(node_network_transmit_bytes{device!~"lo|veth(.*)|virb(.*)|docker(.*)|tunl(.*)|v-h(.*)|flannel(.*)"}[5m])))*on(node) group_left(node_role) kube_node_labels + expr: (sum by (node) (irate(node_network_transmit_bytes_total{device!~"lo|veth(.*)|virb(.*)|docker(.*)|tunl(.*)|v-h(.*)|flannel(.*)"}[5m])))*on(node) group_left(node_role) kube_node_labels - record: k8s_workload_abnormal expr: |-