Skip to content

Commit

Permalink
Make perf ring buffer size configurable
Browse files Browse the repository at this point in the history
As discussed in iovisor#966, this PR makes the size of the ring buffer used to send
data to userspace configurable. It changes the Python, Lua and C++ APIs to
expose this knob.

It also defaults the buffer size to a larger value (64 pages per CPU, an 8x
increase) for several tools which produce a lot of output, as well as making it
configurable in `trace` via a `-b` flag.
  • Loading branch information
Mark Drayton committed Feb 27, 2017
1 parent 02884a0 commit 5f5687e
Show file tree
Hide file tree
Showing 31 changed files with 106 additions and 60 deletions.
4 changes: 2 additions & 2 deletions docs/reference_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -865,9 +865,9 @@ These are equivalent.

### 2. open_perf_buffer()

Syntax: ```table.open_perf_buffers(callback)```
Syntax: ```table.open_perf_buffers(callback, page_cnt=N)```

This operates on a table as defined in BPF as BPF_PERF_OUTPUT(), and associates the callback Python function ```callback``` to be called when data is available in the perf ring buffer. This is part of the recommended mechanism for transferring per-event data from kernel to user space.
This operates on a table as defined in BPF as BPF_PERF_OUTPUT(), and associates the callback Python function ```callback``` to be called when data is available in the perf ring buffer. This is part of the recommended mechanism for transferring per-event data from kernel to user space. The size of the perf ring buffer can be specified via the ```page_cnt``` parameter, which must be a power of two number of pages and defaults to 8.

Example:

Expand Down
2 changes: 1 addition & 1 deletion examples/lua/bashreadline.lua
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ return function(BPF)
print("%-9s %-6d %s" % {os.date("%H:%M:%S"), tonumber(event.pid), ffi.string(event.str)})
end

b:get_table("events"):open_perf_buffer(print_readline, "struct { uint64_t pid; char str[80]; }")
b:get_table("events"):open_perf_buffer(print_readline, "struct { uint64_t pid; char str[80]; }", nil)

print("%-9s %-6s %s" % {"TIME", "PID", "COMMAND"})
b:kprobe_poll_loop()
Expand Down
7 changes: 5 additions & 2 deletions src/cc/BPF.cc
Original file line number Diff line number Diff line change
Expand Up @@ -392,11 +392,14 @@ StatusTuple BPF::detach_perf_event(uint32_t ev_type, uint32_t ev_config) {
}

StatusTuple BPF::open_perf_buffer(const std::string& name,
perf_reader_raw_cb cb, void* cb_cookie) {
perf_reader_raw_cb cb, void* cb_cookie,
int page_cnt) {
if (perf_buffers_.find(name) == perf_buffers_.end())
perf_buffers_[name] = new BPFPerfBuffer(bpf_module_.get(), name);
if ((page_cnt & (page_cnt - 1)) != 0)
return StatusTuple(-1, "open_perf_buffer page_cnt must be a power of two");
auto table = perf_buffers_[name];
TRY2(table->open_all_cpu(cb, cb_cookie));
TRY2(table->open_all_cpu(cb, cb_cookie, page_cnt));
return StatusTuple(0);
}

Expand Down
5 changes: 4 additions & 1 deletion src/cc/BPF.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
#include "compat/linux/bpf.h"
#include "libbpf.h"

static const int DEFAULT_PERF_BUFFER_PAGE_CNT = 8;

namespace ebpf {

struct open_probe_t {
Expand Down Expand Up @@ -96,7 +98,8 @@ class BPF {
}

StatusTuple open_perf_buffer(const std::string& name, perf_reader_raw_cb cb,
void* cb_cookie = nullptr);
void* cb_cookie = nullptr,
int page_cnt = DEFAULT_PERF_BUFFER_PAGE_CNT);
StatusTuple close_perf_buffer(const std::string& name);
void poll_perf_buffer(const std::string& name, int timeout = -1);

Expand Down
8 changes: 4 additions & 4 deletions src/cc/BPFTable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ std::vector<std::string> BPFStackTable::get_stack_symbol(int stack_id,
}

StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb, int cpu,
void* cb_cookie) {
void* cb_cookie, int page_cnt) {
if (cpu_readers_.find(cpu) != cpu_readers_.end())
return StatusTuple(-1, "Perf buffer already open on CPU %d", cpu);
auto reader =
static_cast<perf_reader*>(bpf_open_perf_buffer(cb, cb_cookie, -1, cpu));
static_cast<perf_reader*>(bpf_open_perf_buffer(cb, cb_cookie, -1, cpu, page_cnt));
if (reader == nullptr)
return StatusTuple(-1, "Unable to construct perf reader");
int reader_fd = perf_reader_fd(reader);
Expand All @@ -86,12 +86,12 @@ StatusTuple BPFPerfBuffer::open_on_cpu(perf_reader_raw_cb cb, int cpu,
}

StatusTuple BPFPerfBuffer::open_all_cpu(perf_reader_raw_cb cb,
void* cb_cookie) {
void* cb_cookie, int page_cnt) {
if (cpu_readers_.size() != 0 || readers_.size() != 0)
return StatusTuple(-1, "Previously opened perf buffer not cleaned");

for (int i: get_online_cpus()) {
auto res = open_on_cpu(cb, i, cb_cookie);
auto res = open_on_cpu(cb, i, cb_cookie, page_cnt);
if (res.code() != 0) {
TRY2(close_all_cpu());
return res;
Expand Down
6 changes: 4 additions & 2 deletions src/cc/BPFTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,12 +126,14 @@ class BPFPerfBuffer : protected BPFTableBase<int, int> {
: BPFTableBase<int, int>(bpf_module, name) {}
~BPFPerfBuffer();

StatusTuple open_all_cpu(perf_reader_raw_cb cb, void* cb_cookie);
StatusTuple open_all_cpu(perf_reader_raw_cb cb, void* cb_cookie,
int page_cnt);
StatusTuple close_all_cpu();
void poll(int timeout);

private:
StatusTuple open_on_cpu(perf_reader_raw_cb cb, int cpu, void* cb_cookie);
StatusTuple open_on_cpu(perf_reader_raw_cb cb, int cpu, void* cb_cookie,
int page_cnt);
StatusTuple close_on_cpu(int cpu);

std::map<int, perf_reader*> cpu_readers_;
Expand Down
13 changes: 8 additions & 5 deletions src/cc/libbpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
#define PERF_FLAG_FD_CLOEXEC (1UL << 3)
#endif

static int probe_perf_reader_page_cnt = 8;

static __u64 ptr_to_u64(void *ptr)
{
return (__u64) (unsigned long) ptr;
Expand Down Expand Up @@ -351,7 +353,7 @@ void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, con
int n;

snprintf(new_name, sizeof(new_name), "%s_bcc_%d", ev_name, getpid());
reader = perf_reader_new(cb, NULL, cb_cookie);
reader = perf_reader_new(cb, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;

Expand Down Expand Up @@ -411,7 +413,7 @@ void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, con
int n;

snprintf(new_name, sizeof(new_name), "%s_bcc_%d", ev_name, getpid());
reader = perf_reader_new(cb, NULL, cb_cookie);
reader = perf_reader_new(cb, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;

Expand Down Expand Up @@ -493,7 +495,7 @@ void * bpf_attach_tracepoint(int progfd, const char *tp_category,
char buf[256];
struct perf_reader *reader = NULL;

reader = perf_reader_new(cb, NULL, cb_cookie);
reader = perf_reader_new(cb, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;

Expand All @@ -515,12 +517,13 @@ int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
return 0;
}

void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu) {
void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid,
int cpu, int page_cnt) {
int pfd;
struct perf_event_attr attr = {};
struct perf_reader *reader = NULL;

reader = perf_reader_new(NULL, raw_cb, cb_cookie);
reader = perf_reader_new(NULL, raw_cb, cb_cookie, page_cnt);
if (!reader)
goto error;

Expand Down
3 changes: 2 additions & 1 deletion src/cc/libbpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ void * bpf_attach_tracepoint(int progfd, const char *tp_category,
int group_fd, perf_reader_cb cb, void *cb_cookie);
int bpf_detach_tracepoint(const char *tp_category, const char *tp_name);

void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu);
void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid,
int cpu, int page_cnt);

/* attached a prog expressed by progfd to the device specified in dev_name */
int bpf_attach_xdp(const char *dev_name, int progfd);
Expand Down
7 changes: 3 additions & 4 deletions src/cc/perf_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
#include "libbpf.h"
#include "perf_reader.h"

int perf_reader_page_cnt = 8;

struct perf_reader {
perf_reader_cb cb;
perf_reader_raw_cb raw_cb;
Expand All @@ -42,7 +40,8 @@ struct perf_reader {
uint64_t sample_type;
};

struct perf_reader * perf_reader_new(perf_reader_cb cb, perf_reader_raw_cb raw_cb, void *cb_cookie) {
struct perf_reader * perf_reader_new(perf_reader_cb cb,
perf_reader_raw_cb raw_cb, void *cb_cookie, int page_cnt) {
struct perf_reader *reader = calloc(1, sizeof(struct perf_reader));
if (!reader)
return NULL;
Expand All @@ -51,7 +50,7 @@ struct perf_reader * perf_reader_new(perf_reader_cb cb, perf_reader_raw_cb raw_c
reader->cb_cookie = cb_cookie;
reader->fd = -1;
reader->page_size = getpagesize();
reader->page_cnt = perf_reader_page_cnt;
reader->page_cnt = page_cnt;
return reader;
}

Expand Down
3 changes: 2 additions & 1 deletion src/cc/perf_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ extern "C" {

struct perf_reader;

struct perf_reader * perf_reader_new(perf_reader_cb cb, perf_reader_raw_cb raw_cb, void *cb_cookie);
struct perf_reader * perf_reader_new(perf_reader_cb cb,
perf_reader_raw_cb raw_cb, void *cb_cookie, int page_cnt);
void perf_reader_free(void *ptr);
int perf_reader_mmap(struct perf_reader *reader, unsigned type, unsigned long sample_type);
int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout);
Expand Down
2 changes: 1 addition & 1 deletion src/lua/bcc/libbcc.lua
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ void * bpf_attach_uprobe(int progfd, int attach_type, const char *ev_name,

int bpf_detach_uprobe(const char *ev_name);

void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu);
void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb, void *cb_cookie, int pid, int cpu, int page_cnt);
]]

ffi.cdef[[
Expand Down
11 changes: 6 additions & 5 deletions src/lua/bcc/table.lua
Original file line number Diff line number Diff line change
Expand Up @@ -243,13 +243,14 @@ local function _perf_id(id, cpu)
return string.format("bcc:perf_event_array:%d:%d", tonumber(id), cpu or 0)
end

function PerfEventArray:_open_perf_buffer(cpu, callback, ctype)
function PerfEventArray:_open_perf_buffer(cpu, callback, ctype, page_cnt)
local _cb = ffi.cast("perf_reader_raw_cb",
function (cookie, data, size)
callback(cpu, ctype(data)[0])
end)

local reader = libbcc.bpf_open_perf_buffer(_cb, nil, -1, cpu)
-- default to 8 pages per buffer
local reader = libbcc.bpf_open_perf_buffer(_cb, nil, -1, cpu, page_cnt or 8)
assert(reader, "failed to open perf buffer")

local fd = libbcc.perf_reader_fd(reader)
Expand All @@ -258,11 +259,11 @@ function PerfEventArray:_open_perf_buffer(cpu, callback, ctype)
self._callbacks[cpu] = _cb
end

function PerfEventArray:open_perf_buffer(callback, data_type, ...)
function PerfEventArray:open_perf_buffer(callback, data_type, data_params, page_cnt)
assert(data_type, "a data type is needed for callback conversion")
local ctype = ffi.typeof(data_type.."*", ...)
local ctype = ffi.typeof(data_type.."*", unpack(data_params or {}))
for i = 0, Posix.cpu_count() - 1 do
self:_open_perf_buffer(i, callback, ctype)
self:_open_perf_buffer(i, callback, ctype, page_cnt)
end
end

Expand Down
2 changes: 1 addition & 1 deletion src/python/bcc/libbcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
lib.bpf_detach_tracepoint.restype = ct.c_int
lib.bpf_detach_tracepoint.argtypes = [ct.c_char_p, ct.c_char_p]
lib.bpf_open_perf_buffer.restype = ct.c_void_p
lib.bpf_open_perf_buffer.argtypes = [_RAW_CB_TYPE, ct.py_object, ct.c_int, ct.c_int]
lib.bpf_open_perf_buffer.argtypes = [_RAW_CB_TYPE, ct.py_object, ct.c_int, ct.c_int, ct.c_int]
lib.bpf_open_perf_event.restype = ct.c_int
lib.bpf_open_perf_event.argtypes = [ct.c_uint, ct.c_ulonglong, ct.c_int, ct.c_int]
lib.perf_reader_poll.restype = ct.c_int
Expand Down
15 changes: 10 additions & 5 deletions src/python/bcc/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,20 +507,25 @@ def __delitem__(self, key):
super(PerfEventArray, self).__delitem__(key)
self.close_perf_buffer(key)

def open_perf_buffer(self, callback):
def open_perf_buffer(self, callback, page_cnt=8):
"""open_perf_buffers(callback)
Opens a set of per-cpu ring buffer to receive custom perf event
data from the bpf program. The callback will be invoked for each
event submitted from the kernel, up to millions per second.
event submitted from the kernel, up to millions per second. Use
page_cnt to change the size of the per-cpu ring buffer. The value
must be a power of two and defaults to 8.
"""

if page_cnt & (page_cnt - 1) != 0:
raise Exception("Perf buffer page_cnt must be a power of two")

for i in get_online_cpus():
self._open_perf_buffer(i, callback)
self._open_perf_buffer(i, callback, page_cnt)

def _open_perf_buffer(self, cpu, callback):
def _open_perf_buffer(self, cpu, callback, page_cnt):
fn = _RAW_CB_TYPE(lambda _, data, size: callback(cpu, data, size))
reader = lib.bpf_open_perf_buffer(fn, None, -1, cpu)
reader = lib.bpf_open_perf_buffer(fn, None, -1, cpu, page_cnt)
if not reader:
raise Exception("Could not open perf buffer")
fd = lib.perf_reader_fd(reader)
Expand Down
6 changes: 3 additions & 3 deletions tools/biosnoop.lua
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ return function(BPF, utils)
uint64_t sector;
uint64_t len;
uint64_t ts;
char disk_name[%d];
char name[%d];
char disk_name[$];
char name[$];
}
]] % {DISK_NAME_LEN, TASK_COMM_LEN})
]], {DISK_NAME_LEN, TASK_COMM_LEN}, 64)
bpf:kprobe_poll_loop()
end
2 changes: 1 addition & 1 deletion tools/biosnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,6 @@ def print_event(cpu, data, size):
start_ts = 1

# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/btrfsslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,6 @@ def print_event(cpu, data, size):
"BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))

# read events
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/cpuunclaimed.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def print_event(cpu, data, size):
trigger = int(0.8 * (1000000000 / frequency))

# read events
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
# allow some buffering by calling sleep(), to reduce the context switch
# rate and lower overhead.
Expand Down
2 changes: 1 addition & 1 deletion tools/dbslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def print_event(cpu, data, size):
(', '.join(map(str, args.pids)), args.threshold))
print("%-14s %-6s %8s %s" % ("TIME(s)", "PID", "MS", "QUERY"))

bpf["events"].open_perf_buffer(print_event)
bpf["events"].open_perf_buffer(print_event, page_cnt=64)
while True:
bpf.kprobe_poll()

2 changes: 1 addition & 1 deletion tools/dcsnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,6 @@ def print_event(cpu, data, size):
# header
print("%-11s %-6s %-16s %1s %s" % ("TIME(s)", "PID", "COMM", "T", "FILE"))

b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/ext4slower.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,6 @@ def print_event(cpu, data, size):
"BYTES", "OFF_KB", "LAT(ms)", "FILENAME"))

# read events
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/fileslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,6 @@ def print_event(cpu, data, size):
time.time() - start_ts, event.comm, event.pid, mode_s[event.mode],
event.sz, ms, name))

b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/mysqld_qslower.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,6 @@ def print_event(cpu, data, size):
event.pid, float(event.delta) / 1000000, event.query))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/opensnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,6 @@ def print_event(cpu, data, size):
event.comm, fd_s, err, event.fname))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
2 changes: 1 addition & 1 deletion tools/stacksnoop.lua
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,6 @@ return function(BPF, utils)

bpf:get_table("events"):open_perf_buffer(print_event,
"struct { uint64_t stack_id; uint32_t pid; char comm[$]; }",
TASK_COMM_LEN)
{TASK_COMM_LEN})
bpf:kprobe_poll_loop()
end
2 changes: 1 addition & 1 deletion tools/statsnoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,6 @@ def print_event(cpu, data, size):
fd_s, err, event.fname))

# loop with callback to print_event
b["events"].open_perf_buffer(print_event)
b["events"].open_perf_buffer(print_event, page_cnt=64)
while 1:
b.kprobe_poll()
4 changes: 2 additions & 2 deletions tools/tcplife.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def print_ipv6_event(cpu, data, size):
start_ts = 0

# read events
b["ipv4_events"].open_perf_buffer(print_ipv4_event)
b["ipv6_events"].open_perf_buffer(print_ipv6_event)
b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64)
b["ipv6_events"].open_perf_buffer(print_ipv6_event, page_cnt=64)
while 1:
b.kprobe_poll()
Loading

0 comments on commit 5f5687e

Please sign in to comment.