diff --git a/man/man8/offcputime.8 b/man/man8/offcputime.8 index 5d17d3ed228b..440c1dd489d8 100644 --- a/man/man8/offcputime.8 +++ b/man/man8/offcputime.8 @@ -2,7 +2,7 @@ .SH NAME offcputime \- Summarize off-CPU time by kernel stack trace. Uses Linux eBPF/bcc. .SH SYNOPSIS -.B offcputime [\-h] [\-u] [\-p PID] [\-v] [\-f] [duration] +.B offcputime [\-h] [\-p PID | \-t TID | \-u | \-k] [\-U | \-K] [\-d] [\-f] [\-\-stack\-storage\-size STACK_STORAGE_SIZE] [\-m MIN_BLOCK_TIME] [\-M MAX_BLOCK_TIME] [\-\-state STATE] [duration] .SH DESCRIPTION This program shows stack traces and task names that were blocked and "off-CPU", and the total duration they were not running: their "off-CPU time". @@ -64,6 +64,16 @@ Output folded format. \-\-stack-storage-size STACK_STORAGE_SIZE Change the number of unique stack traces that can be stored and displayed. .TP +\-m MIN_BLOCK_TIME +The minimum time in microseconds over which we store traces (default 1) +.TP +\-M MAX_BLOCK_TIME +The maximum time in microseconds under which we store traces (default U64_MAX) +.TP +\-\-state +Filter on this thread state bitmask (eg, 2 == TASK_UNINTERRUPTIBLE). +See include/linux/sched.h for states. +.TP duration Duration to trace, in seconds. .SH EXAMPLES diff --git a/tools/offcputime.py b/tools/offcputime.py index 5727d982870a..f136698a3780 100755 --- a/tools/offcputime.py +++ b/tools/offcputime.py @@ -88,11 +88,16 @@ def positive_nonzero_int(val): type=positive_nonzero_int, help="the amount of time in microseconds under which we " + "store traces (default U64_MAX)") +parser.add_argument("--state", default=999, + type=positive_int, + help="filter on this thread state bitmask (eg, 2 == TASK_UNINTERRUPTIBLE" + + ") see include/linux/sched.h") args = parser.parse_args() if args.pid and args.tgid: parser.error("specify only one of -p and -t") folded = args.folded duration = int(args.duration) +debug = 0 # signal handler def signal_ignore(signal, frame): @@ -123,7 +128,7 @@ def signal_ignore(signal, frame): u64 ts, *tsp; // record previous thread sleep time - if (THREAD_FILTER) { + if ((THREAD_FILTER) && (STATE_FILTER)) { ts = bpf_ktime_get_ns(); start.update(&pid, &ts); } @@ -177,7 +182,15 @@ def signal_ignore(signal, frame): else: thread_context = "all threads" thread_filter = '1' +if args.state == 0: + state_filter = 'prev->state == 0' +elif args.state != 999: + # these states are sometimes bitmask checked + state_filter = 'prev->state & %d' % args.state +else: + state_filter = '1' bpf_text = bpf_text.replace('THREAD_FILTER', thread_filter) +bpf_text = bpf_text.replace('STATE_FILTER', state_filter) # set stack storage size bpf_text = bpf_text.replace('STACK_STORAGE_SIZE', str(args.stack_storage_size)) @@ -210,6 +223,9 @@ def signal_ignore(signal, frame): "doesn't make sense.", file=stderr) exit(1) +if (debug): + print(bpf_text) + # initialize BPF b = BPF(text=bpf_text) b.attach_kprobe(event="finish_task_switch", fn_name="oncpu") diff --git a/tools/offcputime_example.txt b/tools/offcputime_example.txt index f903458e66e9..1f6066d92958 100644 --- a/tools/offcputime_example.txt +++ b/tools/offcputime_example.txt @@ -718,9 +718,10 @@ creating your "off-CPU time flame graphs". USAGE message: -# ./offcputime -h +# ./offcputime.py -h usage: offcputime.py [-h] [-p PID | -t TID | -u | -k] [-U | -K] [-d] [-f] [--stack-storage-size STACK_STORAGE_SIZE] + [-m MIN_BLOCK_TIME] [-M MAX_BLOCK_TIME] [--state STATE] [duration] Summarize off-CPU time by stack trace @@ -747,11 +748,21 @@ optional arguments: --stack-storage-size STACK_STORAGE_SIZE the number of unique stack traces that can be stored and displayed (default 1024) + -m MIN_BLOCK_TIME, --min-block-time MIN_BLOCK_TIME + the amount of time in microseconds over which we store + traces (default 1) + -M MAX_BLOCK_TIME, --max-block-time MAX_BLOCK_TIME + the amount of time in microseconds under which we + store traces (default U64_MAX) + --state STATE filter on this thread state bitmask (eg, 2 == + TASK_UNINTERRUPTIBLE) see include/linux/sched.h examples: ./offcputime # trace off-CPU stack time until Ctrl-C ./offcputime 5 # trace for 5 seconds only ./offcputime -f 5 # 5 seconds, and output in folded format + ./offcputime -m 1000 # trace only events that last more than 1000 usec + ./offcputime -M 10000 # trace only events that last less than 10000 usec ./offcputime -p 185 # only trace threads for PID 185 ./offcputime -t 188 # only trace thread 188 ./offcputime -u # only trace user threads (no kernel)