Skip to content

Commit

Permalink
tools/llcstat: Add TID info support
Browse files Browse the repository at this point in the history
  • Loading branch information
yezhengmao1 committed Jun 24, 2022
1 parent 13b5563 commit eb837bc
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 34 deletions.
35 changes: 21 additions & 14 deletions libbpf-tools/llcstat.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,43 @@
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "maps.bpf.h"
#include "llcstat.h"

#define MAX_ENTRIES 10240

const volatile bool targ_per_thread = false;

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_ENTRIES);
__type(key, u64);
__type(value, struct info);
__type(key, struct key_info);
__type(value, struct value_info);
} infos SEC(".maps");

static __always_inline
int trace_event(__u64 sample_period, bool miss)
{
u64 pid = bpf_get_current_pid_tgid();
u32 cpu = bpf_get_smp_processor_id();
struct info *infop, info = {};
u64 key = pid << 32 | cpu;

infop = bpf_map_lookup_elem(&infos, &key);
if (!infop) {
bpf_get_current_comm(info.comm, sizeof(info.comm));
infop = &info;
}
struct key_info key = {};
struct value_info *infop, zero = {};

u64 pid_tgid = bpf_get_current_pid_tgid();
key.cpu = bpf_get_smp_processor_id();
key.pid = pid_tgid >> 32;
if (targ_per_thread)
key.tid = (u32)pid_tgid;
else
key.tid = key.pid;

infop = bpf_map_lookup_or_try_init(&infos, &key, &zero);
if (!infop)
return 0;
if (miss)
infop->miss += sample_period;
else
infop->ref += sample_period;
if (infop == &info)
bpf_map_update_elem(&infos, &key, infop, 0);
bpf_get_current_comm(infop->comm, sizeof(infop->comm));

return 0;
}

Expand Down
40 changes: 29 additions & 11 deletions libbpf-tools/llcstat.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//
// Based on llcstat(8) from BCC by Teng Qin.
// 29-Sep-2020 Wenbo Zhang Created this.
// 20-Jun-2022 YeZhengMao Added tid info.
#include <argp.h>
#include <signal.h>
#include <stdio.h>
Expand All @@ -21,6 +22,7 @@ struct env {
int sample_period;
time_t duration;
bool verbose;
bool per_thread;
} env = {
.sample_period = 100,
.duration = 10,
Expand All @@ -40,6 +42,8 @@ static const struct argp_option opts[] = {
{ "sample_period", 'c', "SAMPLE_PERIOD", 0, "Sample one in this many "
"number of cache reference / miss events" },
{ "verbose", 'v', NULL, 0, "Verbose debug output" },
{ "tid", 't', NULL, 0,
"Summarize cache references and misses by PID/TID" },
{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
{},
};
Expand All @@ -55,6 +59,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'v':
env.verbose = true;
break;
case 't':
env.per_thread = true;
break;
case 'c':
errno = 0;
env.sample_period = strtol(arg, NULL, 10);
Expand Down Expand Up @@ -131,10 +138,10 @@ static void sig_handler(int sig)
static void print_map(struct bpf_map *map)
{
__u64 total_ref = 0, total_miss = 0, total_hit, hit;
__u64 lookup_key = -1, next_key;
__u32 pid, cpu, tid;
struct key_info lookup_key = { .cpu = -1 }, next_key;
int err, fd = bpf_map__fd(map);
struct info info;
__u32 pid, cpu;
struct value_info info;

while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) {
err = bpf_map_lookup_elem(fd, &next_key, &info);
Expand All @@ -143,11 +150,16 @@ static void print_map(struct bpf_map *map)
return;
}
hit = info.ref > info.miss ? info.ref - info.miss : 0;
pid = next_key >> 32;
cpu = next_key;
printf("%-8u %-16s %-4u %12llu %12llu %6.2f%%\n", pid, info.comm,
cpu, info.ref, info.miss, info.ref > 0 ?
hit * 1.0 / info.ref * 100 : 0);
cpu = next_key.cpu;
pid = next_key.pid;
tid = next_key.tid;
printf("%-8u ", pid);
if (env.per_thread) {
printf("%-8u ", tid);
}
printf("%-16s %-4u %12llu %12llu %6.2f%%\n",
info.comm, cpu, info.ref, info.miss,
info.ref > 0 ? hit * 1.0 / info.ref * 100 : 0);
total_miss += info.miss;
total_ref += info.ref;
lookup_key = next_key;
Expand All @@ -157,7 +169,7 @@ static void print_map(struct bpf_map *map)
total_ref, total_miss, total_ref > 0 ?
total_hit * 1.0 / total_ref * 100 : 0);

lookup_key = -1;
lookup_key.cpu = -1;
while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) {
err = bpf_map_delete_elem(fd, &next_key);
if (err < 0) {
Expand Down Expand Up @@ -212,6 +224,8 @@ int main(int argc, char **argv)
goto cleanup;
}

obj->rodata->targ_per_thread = env.per_thread;

err = llcstat_bpf__load(obj);
if (err) {
fprintf(stderr, "failed to load BPF object: %d\n", err);
Expand All @@ -233,8 +247,12 @@ int main(int argc, char **argv)

sleep(env.duration);

printf("%-8s %-16s %-4s %12s %12s %7s\n",
"PID", "NAME", "CPU", "REFERENCE", "MISS", "HIT%");
printf("%-8s ", "PID");
if (env.per_thread) {
printf("%-8s ", "TID");
}
printf("%-16s %-4s %12s %12s %7s\n",
"NAME", "CPU", "REFERENCE", "MISS", "HIT%");

print_map(obj->maps.infos);

Expand Down
8 changes: 7 additions & 1 deletion libbpf-tools/llcstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@

#define TASK_COMM_LEN 16

struct info {
struct value_info {
__u64 ref;
__u64 miss;
char comm[TASK_COMM_LEN];
};

struct key_info {
__u32 cpu;
__u32 pid;
__u32 tid;
};

#endif /* __LLCSTAT_H */
3 changes: 3 additions & 0 deletions man/man8/llcstat.8
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Print usage message.
\-c SAMPLE_PERIOD
Sample one in this many cache reference and cache miss events.
.TP
\-t
Summarize cache references and misses by PID/TID
.TP
duration
Duration to trace, in seconds.
.SH EXAMPLES
Expand Down
46 changes: 38 additions & 8 deletions tools/llcstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 19-Oct-2016 Teng Qin Created this.
# 20-Jun-2022 YeZhengMao Added tid info.

from __future__ import print_function
import argparse
Expand All @@ -30,6 +31,10 @@
help="Sample one in this many number of cache reference / miss events")
parser.add_argument(
"duration", nargs="?", default=10, help="Duration, in seconds, to run")
parser.add_argument(
"-t", "--tid", action="store_true",
help="Summarize cache references and misses by PID/TID"
)
parser.add_argument("--ebpf", action="store_true",
help=argparse.SUPPRESS)
args = parser.parse_args()
Expand All @@ -41,16 +46,19 @@
struct key_t {
int cpu;
int pid;
u32 pid;
u32 tid;
char name[TASK_COMM_LEN];
};
BPF_HASH(ref_count, struct key_t);
BPF_HASH(miss_count, struct key_t);
static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
u64 pid_tgid = bpf_get_current_pid_tgid();
key->cpu = bpf_get_smp_processor_id();
key->pid = bpf_get_current_pid_tgid() >> 32;
key->pid = pid_tgid >> 32;
key->tid = GET_TID ? (u32)pid_tgid : key->pid;
bpf_get_current_comm(&(key->name), sizeof(key->name));
}
Expand All @@ -73,6 +81,8 @@
}
"""

bpf_text = bpf_text.replace("GET_TID", "1" if args.tid else "0")

if args.ebpf:
print(bpf_text)
exit()
Expand All @@ -98,22 +108,42 @@

miss_count = {}
for (k, v) in b.get_table('miss_count').items():
miss_count[(k.pid, k.cpu, k.name)] = v.value
if args.tid:
miss_count[(k.pid, k.tid, k.cpu, k.name)] = v.value
else:
miss_count[(k.pid, k.cpu, k.name)] = v.value

header_text = 'PID '
format_text = '{:<8d} '
if args.tid:
header_text += 'TID '
format_text += '{:<8d} '

header_text += 'NAME CPU REFERENCE MISS HIT%'
format_text += '{:<16s} {:<4d} {:>12d} {:>12d} {:>6.2f}%'

print('PID NAME CPU REFERENCE MISS HIT%')
print(header_text)
tot_ref = 0
tot_miss = 0
for (k, v) in b.get_table('ref_count').items():
try:
miss = miss_count[(k.pid, k.cpu, k.name)]
if args.tid:
miss = miss_count[(k.pid, k.tid, k.cpu, k.name)]
else:
miss = miss_count[(k.pid, k.cpu, k.name)]
except KeyError:
miss = 0
tot_ref += v.value
tot_miss += miss
# This happens on some PIDs due to missed counts caused by sampling
hit = (v.value - miss) if (v.value >= miss) else 0
print('{:<8d} {:<16s} {:<4d} {:>12d} {:>12d} {:>6.2f}%'.format(
k.pid, k.name.decode('utf-8', 'replace'), k.cpu, v.value, miss,
(float(hit) / float(v.value)) * 100.0))
if args.tid:
print(format_text.format(
k.pid, k.tid, k.name.decode('utf-8', 'replace'), k.cpu, v.value, miss,
(float(hit) / float(v.value)) * 100.0))
else:
print(format_text.format(
k.pid, k.name.decode('utf-8', 'replace'), k.cpu, v.value, miss,
(float(hit) / float(v.value)) * 100.0))
print('Total References: {} Total Misses: {} Hit Rate: {:.2f}%'.format(
tot_ref, tot_miss, (float(tot_ref - tot_miss) / float(tot_ref)) * 100.0))
16 changes: 16 additions & 0 deletions tools/llcstat_example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,21 @@ some degree by chance. Overall it should make sense. But for low counts,
you might find a case where -- by chance -- a process has been tallied with
more misses than references, which would seem impossible.

# ./llcstat.py 10 -t
Running for 10 seconds or hit Ctrl-C to end.
PID TID NAME CPU REFERENCE MISS HIT%
170843 170845 docker 12 2700 1200 55.56%
298670 298670 kworker/15:0 15 500 0 100.00%
170254 170254 kworker/11:1 11 2500 400 84.00%
1046952 1046953 git 0 2600 1100 57.69%
170843 170849 docker 15 1000 400 60.00%
1027373 1027382 node 8 3500 2500 28.57%
0 0 swapper/7 7 173000 4200 97.57%
1028217 1028217 node 14 15600 22400 0.00%
[...]
Total References: 7139900 Total Misses: 1413900 Hit Rate: 80.20%

This shows each TID`s cache hit rate during the 10 seconds run period.

USAGE message:

Expand All @@ -54,3 +69,4 @@ positional arguments:
-c SAMPLE_PERIOD, --sample_period SAMPLE_PERIOD
Sample one in this many number of cache reference
and miss events
-t, --tid Summarize cache references and misses by PID/TID

0 comments on commit eb837bc

Please sign in to comment.