#!/usr/bin/python # # ppchcalls Summarize ppc hcalls stats. # # Initial version migrating perf based tool to ebpf with additional hcalls, # inspired by existing bcc tool for syscalls. # # from time import sleep, strftime import argparse import errno import itertools import sys import signal from bcc import BPF hcall_table = { 4: 'H_REMOVE', 8: 'H_ENTER', 12: 'H_READ', 16: 'H_CLEAR_MOD', 20: 'H_CLEAR_REF', 24: 'H_PROTECT', 28: 'H_GET_TCE', 32: 'H_PUT_TCE', 36: 'H_SET_SPRG0', 40: 'H_SET_DABR', 44: 'H_PAGE_INIT', 48: 'H_SET_ASR', 52: 'H_ASR_ON', 56: 'H_ASR_OFF', 60: 'H_LOGICAL_CI_LOAD', 64: 'H_LOGICAL_CI_STORE', 68: 'H_LOGICAL_CACHE_LOAD', 72: 'H_LOGICAL_CACHE_STORE', 76: 'H_LOGICAL_ICBI', 80: 'H_LOGICAL_DCBF', 84: 'H_GET_TERM_CHAR', 88: 'H_PUT_TERM_CHAR', 92: 'H_REAL_TO_LOGICAL', 96: 'H_HYPERVISOR_DATA', 100: 'H_EOI', 104: 'H_CPPR', 108: 'H_IPI', 112: 'H_IPOLL', 116: 'H_XIRR', 120: 'H_MIGRATE_DMA', 124: 'H_PERFMON', 220: 'H_REGISTER_VPA', 224: 'H_CEDE', 228: 'H_CONFER', 232: 'H_PROD', 236: 'H_GET_PPP', 240: 'H_SET_PPP', 244: 'H_PURR', 248: 'H_PIC', 252: 'H_REG_CRQ', 256: 'H_FREE_CRQ', 260: 'H_VIO_SIGNAL', 264: 'H_SEND_CRQ', 272: 'H_COPY_RDMA', 276: 'H_REGISTER_LOGICAL_LAN', 280: 'H_FREE_LOGICAL_LAN', 284: 'H_ADD_LOGICAL_LAN_BUFFER', 288: 'H_SEND_LOGICAL_LAN', 292: 'H_BULK_REMOVE', 304: 'H_MULTICAST_CTRL', 308: 'H_SET_XDABR', 312: 'H_STUFF_TCE', 316: 'H_PUT_TCE_INDIRECT', 332: 'H_CHANGE_LOGICAL_LAN_MAC', 336: 'H_VTERM_PARTNER_INFO', 340: 'H_REGISTER_VTERM', 344: 'H_FREE_VTERM', 348: 'H_RESET_EVENTS', 352: 'H_ALLOC_RESOURCE', 356: 'H_FREE_RESOURCE', 360: 'H_MODIFY_QP', 364: 'H_QUERY_QP', 368: 'H_REREGISTER_PMR', 372: 'H_REGISTER_SMR', 376: 'H_QUERY_MR', 380: 'H_QUERY_MW', 384: 'H_QUERY_HCA', 388: 'H_QUERY_PORT', 392: 'H_MODIFY_PORT', 396: 'H_DEFINE_AQP1', 400: 'H_GET_TRACE_BUFFER', 404: 'H_DEFINE_AQP0', 408: 'H_RESIZE_MR', 412: 'H_ATTACH_MCQP', 416: 'H_DETACH_MCQP', 420: 'H_CREATE_RPT', 424: 'H_REMOVE_RPT', 428: 'H_REGISTER_RPAGES', 432: 'H_DISABLE_AND_GET', 436: 'H_ERROR_DATA', 440: 'H_GET_HCA_INFO', 444: 'H_GET_PERF_COUNT', 448: 'H_MANAGE_TRACE', 456: 'H_GET_CPU_CHARACTERISTICS', 468: 'H_FREE_LOGICAL_LAN_BUFFER', 472: 'H_POLL_PENDING', 484: 'H_QUERY_INT_STATE', 580: 'H_ILLAN_ATTRIBUTES', 592: 'H_MODIFY_HEA_QP', 596: 'H_QUERY_HEA_QP', 600: 'H_QUERY_HEA', 604: 'H_QUERY_HEA_PORT', 608: 'H_MODIFY_HEA_PORT', 612: 'H_REG_BCMC', 616: 'H_DEREG_BCMC', 620: 'H_REGISTER_HEA_RPAGES', 624: 'H_DISABLE_AND_GET_HEA', 628: 'H_GET_HEA_INFO', 632: 'H_ALLOC_HEA_RESOURCE', 644: 'H_ADD_CONN', 648: 'H_DEL_CONN', 664: 'H_JOIN', 672: 'H_VASI_SIGNAL', 676: 'H_VASI_STATE', 680: 'H_VIOCTL', 688: 'H_ENABLE_CRQ', 696: 'H_GET_EM_PARMS', 720: 'H_SET_MPP', 724: 'H_GET_MPP', 732: 'H_REG_SUB_CRQ', 736: 'H_FREE_SUB_CRQ', 740: 'H_SEND_SUB_CRQ', 744: 'H_SEND_SUB_CRQ_INDIRECT', 748: 'H_HOME_NODE_ASSOCIATIVITY', 756: 'H_BEST_ENERGY', 764: 'H_XIRR_X', 768: 'H_RANDOM', 772: 'H_COP', 788: 'H_GET_MPP_X', 796: 'H_SET_MODE', 808: 'H_BLOCK_REMOVE', 856: 'H_CLEAR_HPT', 864: 'H_REQUEST_VMC', 876: 'H_RESIZE_HPT_PREPARE', 880: 'H_RESIZE_HPT_COMMIT', 892: 'H_REGISTER_PROC_TBL', 896: 'H_SIGNAL_SYS_RESET', 904: 'H_ALLOCATE_VAS_WINDOW', 908: 'H_MODIFY_VAS_WINDOW', 912: 'H_DEALLOCATE_VAS_WINDOW', 916: 'H_QUERY_VAS_WINDOW', 920: 'H_QUERY_VAS_CAPABILITIES', 924: 'H_QUERY_NX_CAPABILITIES', 928: 'H_GET_NX_FAULT', 936: 'H_INT_GET_SOURCE_INFO', 940: 'H_INT_SET_SOURCE_CONFIG', 944: 'H_INT_GET_SOURCE_CONFIG', 948: 'H_INT_GET_QUEUE_INFO', 952: 'H_INT_SET_QUEUE_CONFIG', 956: 'H_INT_GET_QUEUE_CONFIG', 960: 'H_INT_SET_OS_REPORTING_LINE', 964: 'H_INT_GET_OS_REPORTING_LINE', 968: 'H_INT_ESB', 972: 'H_INT_SYNC', 976: 'H_INT_RESET', 996: 'H_SCM_READ_METADATA', 1000: 'H_SCM_WRITE_METADATA', 1004: 'H_SCM_BIND_MEM', 1008: 'H_SCM_UNBIND_MEM', 1012: 'H_SCM_QUERY_BLOCK_MEM_BINDING', 1016: 'H_SCM_QUERY_LOGICAL_MEM_BINDING', 1020: 'H_SCM_UNBIND_ALL', 1024: 'H_SCM_HEALTH', 1048: 'H_SCM_PERFORMANCE_STATS', 1052: 'H_PKS_GET_CONFIG', 1056: 'H_PKS_SET_PASSWORD', 1060: 'H_PKS_GEN_PASSWORD', 1068: 'H_PKS_WRITE_OBJECT', 1072: 'H_PKS_GEN_KEY', 1076: 'H_PKS_READ_OBJECT', 1080: 'H_PKS_REMOVE_OBJECT', 1084: 'H_PKS_CONFIRM_OBJECT_FLUSHED', 1096: 'H_RPT_INVALIDATE', 1100: 'H_SCM_FLUSH', 1104: 'H_GET_ENERGY_SCALE_INFO', 1108: 'H_PKS_SIGNED_UPDATE', 1116: 'H_WATCHDOG', # Platform specific hcalls used by KVM on PowerVM 1120: 'H_GUEST_GET_CAPABILITIES', 1124: 'H_GUEST_SET_CAPABILITIES', 1136: 'H_GUEST_CREATE', 1140: 'H_GUEST_CREATE_VCPU', 1144: 'H_GUEST_GET_STATE', 1148: 'H_GUEST_SET_STATE', 1152: 'H_GUEST_RUN_VCPU', 1156: 'H_GUEST_COPY_MEMORY', 1160: 'H_GUEST_DELETE', # Platform-specific hcalls used by the Ultravisor 61184: 'H_SVM_PAGE_IN', 61188: 'H_SVM_PAGE_OUT', 61192: 'H_SVM_INIT_START', 61196: 'H_SVM_INIT_DONE', 61204: 'H_SVM_INIT_ABORT', # Platform specific hcalls used by KVM 61440: 'H_RTAS', # Platform specific hcalls used by QEMU/SLOF 61441: 'H_LOGICAL_MEMOP', 61442: 'H_CAS', 61443: 'H_UPDATE_DT', # Platform specific hcalls provided by PHYP 61560: 'H_GET_24X7_CATALOG_PAGE', 61564: 'H_GET_24X7_DATA', 61568: 'H_GET_PERF_COUNTER_INFO', # Platform-specific hcalls used for nested HV KVM 63488: 'H_SET_PARTITION_TABLE', 63492: 'H_ENTER_NESTED', 63496: 'H_TLB_INVALIDATE', 63500: 'H_COPY_TOFROM_GUEST', } def hcall_table_lookup(opcode): if (opcode in hcall_table): return hcall_table[opcode] else: return opcode if sys.version_info.major < 3: izip_longest = itertools.izip_longest else: izip_longest = itertools.zip_longest # signal handler def signal_ignore(signal, frame): print() def handle_errno(errstr): try: return abs(int(errstr)) except ValueError: pass try: return getattr(errno, errstr) except AttributeError: raise argparse.ArgumentTypeError("couldn't map %s to an errno" % errstr) parser = argparse.ArgumentParser( description="Summarize ppc hcall counts and latencies.") parser.add_argument("-p", "--pid", type=int, help="trace only this pid") parser.add_argument("-t", "--tid", type=int, help="trace only this tid") parser.add_argument("-i", "--interval", type=int, help="print summary at this interval (seconds)") parser.add_argument("-d", "--duration", type=int, help="total duration of trace, in seconds") parser.add_argument("-T", "--top", type=int, default=10, help="print only the top hcalls by count or latency") parser.add_argument("-x", "--failures", action="store_true", help="trace only failed hcalls (return < 0)") parser.add_argument("-e", "--errno", type=handle_errno, help="trace only hcalls that return this error (numeric or EPERM, etc.)") parser.add_argument("-L", "--latency", action="store_true", help="collect hcall latency") parser.add_argument("-m", "--milliseconds", action="store_true", help="display latency in milliseconds (default: microseconds)") parser.add_argument("-P", "--process", action="store_true", help="count by process and not by hcall") parser.add_argument("-l", "--list", action="store_true", help="print list of recognized hcalls and exit") parser.add_argument("--hcall", type=str, help="trace this hcall only (use option -l to get all recognized hcalls)") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() if args.duration and not args.interval: args.interval = args.duration if not args.interval: args.interval = 99999999 hcall_nr = -1 if args.hcall is not None: for key, value in hcall_table.items(): if args.hcall == value: hcall_nr = key print("hcall %s , hcall_nr =%d" % (args.hcall, hcall_nr)) break if hcall_nr == -1: print("Error: hcall '%s' not found. Exiting." % args.hcall) sys.exit(1) if args.list: for grp in izip_longest(*(iter(sorted(hcall_table.values())),) * 4): print(" ".join(["%-25s" % s for s in grp if s is not None])) sys.exit(0) text = """ #ifdef LATENCY struct data_t { u64 count; u64 min; u64 max; u64 total_ns; }; BPF_HASH(start, u64, u64); BPF_HASH(ppc_data, u32, struct data_t); #else BPF_HASH(ppc_data, u32, u64); #endif #ifdef LATENCY RAW_TRACEPOINT_PROBE(hcall_entry) { // TP_PROTO(unsigned long opcode, unsigned long *args), u64 pid_tgid = bpf_get_current_pid_tgid(); u32 pid = pid_tgid >> 32; u32 tid = (u32)pid_tgid; #ifdef FILTER_HCALL_NR if (ctx->args[0] != FILTER_HCALL_NR) return 0; #endif #ifdef FILTER_PID if (pid != FILTER_PID) return 0; #endif #ifdef FILTER_TID if (tid != FILTER_TID) return 0; #endif u64 t = bpf_ktime_get_ns(); start.update(&pid_tgid, &t); return 0; } #endif RAW_TRACEPOINT_PROBE(hcall_exit) { // TP_PROTO(unsigned long opcode, long retval, unsigned long *retbuf) u64 pid_tgid = bpf_get_current_pid_tgid(); u32 pid = pid_tgid >> 32; u32 tid = (u32)pid_tgid; #ifdef FILTER_HCALL_NR if (ctx->args[0] != FILTER_HCALL_NR) return 0; #endif #ifdef FILTER_PID if (pid != FILTER_PID) return 0; #endif #ifdef FILTER_TID if (tid != FILTER_TID) return 0; #endif #ifdef FILTER_FAILED if (ctx->args[1] >= 0) return 0; #endif #ifdef FILTER_ERRNO if (ctx->args[1] != -FILTER_ERRNO) return 0; #endif #ifdef BY_PROCESS u32 key = pid_tgid >> 32; #else u32 key = (unsigned long) ctx->args[0]; #endif #ifdef LATENCY struct data_t *val, zero = {}; u64 delta = 0; u64 *start_ns = start.lookup(&pid_tgid); if (!start_ns) return 0; val = ppc_data.lookup_or_try_init(&key, &zero); if (val) { val->count++; delta = bpf_ktime_get_ns() - *start_ns; if (val->min) { if(val->min > delta) val->min = delta; } else { val->min = delta; } if (val->max) { if(val->max < delta) val->max = delta; } else { val->max = delta; } val->total_ns += delta; } #else u64 *val, zero = 0; val = ppc_data.lookup_or_try_init(&key, &zero); if (val) { ++(*val); } #endif return 0; } """ if args.pid: text = ("#define FILTER_PID %d\n" % args.pid) + text elif args.tid: text = ("#define FILTER_TID %d\n" % args.tid) + text if args.failures: text = "#define FILTER_FAILED\n" + text if args.errno: text = "#define FILTER_ERRNO %d\n" % abs(args.errno) + text if args.latency: text = "#define LATENCY\n" + text if args.process: text = "#define BY_PROCESS\n" + text if args.hcall is not None: text = ("#define FILTER_HCALL_NR %d\n" % hcall_nr) + text if args.ebpf: print(text) exit() bpf = BPF(text=text) def print_stats(): if args.latency: ppc_print_latency_stats() else: print_ppc_count_stats() ppc_agg_colname = "PID COMM" if args.process else "PPC HCALL" min_time_colname = "MIN (ms)" if args.milliseconds else "MIN (us)" max_time_colname = "MAX (ms)" if args.milliseconds else "MAX (us)" avg_time_colname = "AVG (ms)" if args.milliseconds else "AVG (us)" def comm_for_pid(pid): try: return open("/proc/%d/comm" % pid, "r").read().strip() except Exception: return "[unknown]" def agg_colval(key): if args.process: return "%-6d %-15s" % (key.value, comm_for_pid(key.value)) else: return hcall_table_lookup(key.value) def print_ppc_count_stats(): data = bpf["ppc_data"] print("[%s]" % strftime("%H:%M:%S")) print("%-45s %8s" % (ppc_agg_colname, "COUNT")) for k, v in sorted(data.items(), key=lambda kv: -kv[1].value)[:args.top]: if k.value == 0xFFFFFFFF: continue # happens occasionally, we don't need it print("%-45s %8d" % (agg_colval(k), v.value)) print("") data.clear() def ppc_print_latency_stats(): data = bpf["ppc_data"] print("[%s]" % strftime("%H:%M:%S")) print("%-45s %8s %17s %17s %17s" % (ppc_agg_colname, "COUNT", min_time_colname, max_time_colname, avg_time_colname)) for k, v in sorted(data.items(), key=lambda kv: -kv[1].count)[:args.top]: if k.value == 0xFFFFFFFF: continue # happens occasionally, we don't need it print(("%-45s %8d " + ("%17.6f" if args.milliseconds else "%17.3f ") + ("%17.6f" if args.milliseconds else "%17.3f ") + ("%17.6f" if args.milliseconds else "%17.3f")) % (agg_colval(k), v.count, v.min / (1e6 if args.milliseconds else 1e3), v.max / (1e6 if args.milliseconds else 1e3), (v.total_ns / v.count) / (1e6 if args.milliseconds else 1e3))) print("") data.clear() if args.hcall is not None: print("Tracing %sppc hcall '%s'... Ctrl+C to quit." % ("failed " if args.failures else "", args.hcall)) else: print("Tracing %sppc hcalls, printing top %d... Ctrl+C to quit." % ("failed " if args.failures else "", args.top)) exiting = 0 if args.interval else 1 seconds = 0 while True: try: sleep(args.interval) seconds += args.interval except KeyboardInterrupt: exiting = 1 signal.signal(signal.SIGINT, signal_ignore) if args.duration and seconds >= args.duration: exiting = 1 print_stats() if exiting: print("Detaching...") exit()