-
Notifications
You must be signed in to change notification settings - Fork 791
/
kafka-broker.yaml
204 lines (183 loc) · 5.83 KB
/
kafka-broker.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
---
rules:
# Broker metrics
- bean: kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec
mapping:
Count:
metric: kafka.message.count
type: counter
desc: The number of messages received by the broker
unit: '{messages}'
- bean: kafka.server:type=BrokerTopicMetrics,name=TotalFetchRequestsPerSec
label:
type: fetch
mapping:
Count:
metric: kafka.request.count
type: counter
desc: The number of requests received by the broker
unit: '{requests}'
- bean: kafka.server:type=BrokerTopicMetrics,name=TotalProduceRequestsPerSec
label:
type: produce
mapping:
Count:
metric: kafka.request.count
type: counter
desc: The number of requests received by the broker
unit: '{requests}'
- bean: kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec
label:
type: fetch
mapping:
Count:
metric: kafka.request.failed
type: counter
desc: The number of requests to the broker resulting in a failure
unit: '{requests}'
- bean: kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec
label:
type: produce
mapping:
Count:
metric: kafka.request.failed
type: counter
desc: The number of requests to the broker resulting in a failure
unit: '{requests}'
- beans:
- kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce
- kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer
- kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower
label:
type: param(request)
unit: ms
mapping:
Count:
metric: kafka.request.time.total
type: counter
desc: The total time the broker has taken to service requests
50thPercentile:
metric: kafka.request.time.50p
type: gauge
desc: The 50th percentile time the broker has taken to service requests
99thPercentile:
metric: kafka.request.time.99p
type: gauge
desc: The 99th percentile time the broker has taken to service requests
- bean: kafka.network:type=RequestChannel,name=RequestQueueSize
mapping:
Value:
metric: kafka.request.queue
type: updowncounter
desc: Size of the request queue
unit: '{requests}'
- bean: kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec
label:
direction: in
mapping:
Count:
metric: kafka.network.io
type: counter
desc: The bytes received or sent by the broker
unit: By
- bean: kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec
label:
direction: out
mapping:
Count:
metric: kafka.network.io
type: counter
desc: The bytes received or sent by the broker
unit: By
- beans:
- kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Produce
- kafka.server:type=DelayedOperationPurgatory,name=PurgatorySize,delayedOperation=Fetch
label:
type: param(delayedOperation)
mapping:
Value:
metric: kafka.purgatory.size
type: updowncounter
desc: The number of requests waiting in purgatory
unit: '{requests}'
- bean: kafka.server:type=ReplicaManager,name=PartitionCount
mapping:
Value:
metric: kafka.partition.count
type: updowncounter
desc: The number of partitions on the broker
unit: '{partitions}'
- bean: kafka.controller:type=KafkaController,name=OfflinePartitionsCount
mapping:
Value:
metric: kafka.partition.offline
type: updowncounter
desc: The number of partitions offline
unit: '{partitions}'
- bean: kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions
mapping:
Value:
metric: kafka.partition.underReplicated
type: updowncounter
desc: The number of under replicated partitions
unit: '{partitions}'
- bean: kafka.server:type=ReplicaManager,name=IsrShrinksPerSec
label:
operation: shrink
mapping:
Count:
metric: kafka.isr.operation.count
type: updowncounter
desc: The number of in-sync replica shrink and expand operations
unit: '{operations}'
- bean: kafka.server:type=ReplicaManager,name=IsrExpandsPerSec
label:
operation: expand
mapping:
Count:
metric: kafka.isr.operation.count
type: updowncounter
desc: The number of in-sync replica shrink and expand operations
unit: '{operations}'
- bean: kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica
mapping:
Value:
metric: kafka.lag.max
desc: The max lag in messages between follower and leader replicas
unit: '{messages}'
- bean: kafka.controller:type=KafkaController,name=ActiveControllerCount
mapping:
Value:
metric: kafka.controller.active.count
type: updowncounter
desc: The number of controllers active on the broker
unit: '{controllers}'
- bean: kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs
mapping:
Count:
metric: kafka.leaderElection.count
type: counter
desc: The leader election count
unit: '{elections}'
- bean: kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec
mapping:
Count:
metric: kafka.leaderElection.unclean.count
type: counter
desc: Unclean leader election count - increasing indicates broker failures
unit: '{elections}'
# Log metrics
- bean: kafka.log:type=LogFlushStats,name=LogFlushRateAndTimeMs
unit: ms
type: gauge
prefix: kafka.logs.flush.
mapping:
Count:
type: counter
desc: Log flush count
50thPercentile:
metric: time.50p
desc: Log flush time - 50th percentile
99thPercentile:
metric: time.99p
desc: Log flush time - 99th percentile