forked from iovisor/bcc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request iovisor#770 from palmtenor/pmu_api
Add basic support for BPF perf event
- Loading branch information
Showing
8 changed files
with
364 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
.TH llcstat 8 "2015-08-18" "USER COMMANDS" | ||
.SH NAME | ||
llcstat \- Trace cache references and cache misses. Uses Linux eBPF/bcc. | ||
.SH SYNOPSIS | ||
.B llcstat [\-h] [\-c SAMPLE_PERIOD] [duration] | ||
.SH DESCRIPTION | ||
llcstat traces cache references and cache misses system-side, and summarizes | ||
them by PID and CPU. These events have different meanings on different | ||
architecture. For x86-64, they mean misses and references to LLC. | ||
This can be useful to locate and debug performance issues | ||
caused by cache hit rate. | ||
|
||
This works by sampling corresponding events defined in uapi/linux/perf_event.h, | ||
namely PERF_COUNT_HW_CACHE_REFERENCES and PERF_COUNT_HW_CACHE_MISSES, using | ||
BPF perf event tracing. Upon each sampled event, the attached BPF program | ||
records the PID and CPU ID on which the event happened, and stores it in table. | ||
|
||
This makes use of a Linux 4.9 feature (BPF_PROG_TYPE_PERF_EVENT). | ||
|
||
Since this uses BPF, only the root user can use this tool. | ||
.SH REQUIREMENTS | ||
CONFIG_BPF and bcc. | ||
.SH OPTIONS | ||
.TP | ||
\-h | ||
Print usage message. | ||
.TP | ||
\-c SAMPLE_PERIOD | ||
Sample one in this many cache reference and cache miss events. | ||
.TP | ||
duration | ||
Duration to trace, in seconds. | ||
.SH EXAMPLES | ||
.TP | ||
Sample one in 100 events, trace for 20 seconds: | ||
# | ||
.B llcstat -c 100 20 | ||
.SH FIELDS | ||
.TP | ||
PID | ||
Process ID | ||
.TP | ||
NAME | ||
Process name | ||
.TP | ||
CPU | ||
CPU ID | ||
.TP | ||
REFERENCE | ||
Number of cache reference events | ||
.TP | ||
MISS | ||
Number of cache miss events | ||
.TP | ||
HIT% | ||
Cache hit ratio | ||
.SH SOURCE | ||
This is from bcc. | ||
.IP | ||
https://github.com/iovisor/bcc | ||
.PP | ||
Also look in the bcc distribution for a companion _examples.txt file containing | ||
example usage, output, and commentary for this tool. | ||
.SH OS | ||
Linux | ||
.SH STABILITY | ||
Unstable - in development. | ||
.SH AUTHOR | ||
Teng Qin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#!/usr/bin/python | ||
# | ||
# llcstat.py Summarize cache references and cache misses by PID. | ||
# Cache reference and cache miss are corresponding events defined in | ||
# uapi/linux/perf_event.h, it varies to different architecture. | ||
# On x86-64, they mean LLC references and LLC misses. | ||
# | ||
# For Linux, uses BCC, eBPF. Embedded C. | ||
# | ||
# REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). | ||
# | ||
# Copyright (c) 2016 Facebook, Inc. | ||
# Licensed under the Apache License, Version 2.0 (the "License") | ||
# | ||
# 19-Oct-2016 Teng Qin Created this. | ||
|
||
from __future__ import print_function | ||
import argparse | ||
from bcc import BPF, PerfType, PerfHWConfig | ||
import signal | ||
from time import sleep | ||
|
||
parser = argparse.ArgumentParser( | ||
description="Summarize cache references and misses by PID", | ||
formatter_class=argparse.RawDescriptionHelpFormatter) | ||
parser.add_argument( | ||
"-c", "--sample_period", type=int, default=100, | ||
help="Sample one in this many number of cache reference / miss events") | ||
parser.add_argument( | ||
"duration", nargs="?", default=10, help="Duration, in seconds, to run") | ||
args = parser.parse_args() | ||
|
||
# load BPF program | ||
b = BPF(text=""" | ||
#include <linux/ptrace.h> | ||
#include <uapi/linux/bpf_perf_event.h> | ||
struct key_t { | ||
int cpu; | ||
int pid; | ||
char name[TASK_COMM_LEN]; | ||
}; | ||
BPF_HASH(ref_count, struct key_t); | ||
BPF_HASH(miss_count, struct key_t); | ||
static inline __attribute__((always_inline)) void get_key(struct key_t* key) { | ||
key->cpu = bpf_get_smp_processor_id(); | ||
key->pid = bpf_get_current_pid_tgid(); | ||
bpf_get_current_comm(&(key->name), sizeof(key->name)); | ||
} | ||
int on_cache_miss(struct bpf_perf_event_data *ctx) { | ||
struct key_t key = {}; | ||
get_key(&key); | ||
u64 zero = 0, *val; | ||
val = miss_count.lookup_or_init(&key, &zero); | ||
(*val) += ctx->sample_period; | ||
return 0; | ||
} | ||
int on_cache_ref(struct bpf_perf_event_data *ctx) { | ||
struct key_t key = {}; | ||
get_key(&key); | ||
u64 zero = 0, *val; | ||
val = ref_count.lookup_or_init(&key, &zero); | ||
(*val) += ctx->sample_period; | ||
return 0; | ||
} | ||
""") | ||
|
||
b.attach_perf_event( | ||
ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_MISSES, | ||
fn_name="on_cache_miss", sample_period=args.sample_period) | ||
b.attach_perf_event( | ||
ev_type=PerfType.HARDWARE, ev_config=PerfHWConfig.CACHE_REFERENCES, | ||
fn_name="on_cache_ref", sample_period=args.sample_period) | ||
|
||
print("Running for {} seconds or hit Ctrl-C to end.".format(args.duration)) | ||
|
||
try: | ||
sleep(float(args.duration)) | ||
except KeyboardInterrupt: | ||
signal.signal(signal.SIGINT, lambda signal, frame: print()) | ||
|
||
miss_count = {} | ||
for (k, v) in b.get_table('miss_count').items(): | ||
miss_count[(k.pid, k.cpu, k.name)] = v.value | ||
|
||
print('PID NAME CPU REFERENCE MISS HIT%') | ||
tot_ref = 0 | ||
tot_miss = 0 | ||
for (k, v) in b.get_table('ref_count').items(): | ||
try: | ||
miss = miss_count[(k.pid, k.cpu, k.name)] | ||
except KeyError: | ||
miss = 0 | ||
tot_ref += v.value | ||
tot_miss += miss | ||
# This happens on some PIDs due to missed counts caused by sampling | ||
hit = (v.value - miss) if (v.value >= miss) else 0 | ||
print('{:<8d} {:<16s} {:<4d} {:>12d} {:>12d} {:>6.2f}%'.format( | ||
k.pid, k.name, k.cpu, v.value, miss, | ||
(float(hit) / float(v.value)) * 100.0)) | ||
print('Total References: {} Total Misses: {} Hit Rate: {:.2f}%'.format( | ||
tot_ref, tot_miss, (float(tot_ref - tot_miss) / float(tot_ref)) * 100.0)) |
Oops, something went wrong.