From 0c4df02d739fed5ab081b330d67403206dd3967e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 21 Jun 2013 08:51:38 -0700 Subject: [PATCH] x86: Add NMI duration tracepoints This patch has been invaluable in my adventures finding issues in the perf NMI handler. I'm as big a fan of printk() as anybody is, but using printk() in NMIs is deadly when they're happening frequently. Even hacking in trace_printk() ended up eating enough CPU to throw off some of the measurements I was making. Signed-off-by: Dave Hansen Acked-by: Peter Zijlstra Cc: paulus@samba.org Cc: acme@ghostprotocols.net Cc: Dave Hansen Signed-off-by: Ingo Molnar --- Documentation/trace/events-nmi.txt | 43 ++++++++++++++++++++++++++++++ arch/x86/kernel/nmi.c | 9 +++++-- include/trace/events/nmi.h | 37 +++++++++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 Documentation/trace/events-nmi.txt create mode 100644 include/trace/events/nmi.h diff --git a/Documentation/trace/events-nmi.txt b/Documentation/trace/events-nmi.txt new file mode 100644 index 000000000000..c03c8c89f08d --- /dev/null +++ b/Documentation/trace/events-nmi.txt @@ -0,0 +1,43 @@ +NMI Trace Events + +These events normally show up here: + + /sys/kernel/debug/tracing/events/nmi + +-- + +nmi_handler: + +You might want to use this tracepoint if you suspect that your +NMI handlers are hogging large amounts of CPU time. The kernel +will warn if it sees long-running handlers: + + INFO: NMI handler took too long to run: 9.207 msecs + +and this tracepoint will allow you to drill down and get some +more details. + +Let's say you suspect that perf_event_nmi_handler() is causing +you some problems and you only want to trace that handler +specifically. You need to find its address: + + $ grep perf_event_nmi_handler /proc/kallsyms + ffffffff81625600 t perf_event_nmi_handler + +Let's also say you are only interested in when that function is +really hogging a lot of CPU time, like a millisecond at a time. +Note that the kernel's output is in milliseconds, but the input +to the filter is in nanoseconds! You can filter on 'delta_ns': + +cd /sys/kernel/debug/tracing/events/nmi/nmi_handler +echo 'handler==0xffffffff81625600 && delta_ns>1000000' > filter +echo 1 > enable + +Your output would then look like: + +$ cat /sys/kernel/debug/tracing/trace_pipe +-0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1 +-0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1 +-0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1 +-0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1 + diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index e9bae4c2f2dd..0920212e6159 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -30,6 +30,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + struct nmi_desc { spinlock_t lock; struct list_head head; @@ -108,11 +111,13 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 */ list_for_each_entry_rcu(a, &desc->head, list) { u64 before, delta, whole_msecs; - int decimal_msecs; + int decimal_msecs, thishandled; before = local_clock(); - handled += a->handler(type, regs); + thishandled = a->handler(type, regs); + handled += thishandled; delta = local_clock() - before; + trace_nmi_handler(a->handler, (int)delta, thishandled); if (delta < nmi_longest_ns) continue; diff --git a/include/trace/events/nmi.h b/include/trace/events/nmi.h new file mode 100644 index 000000000000..da3ee96b8d03 --- /dev/null +++ b/include/trace/events/nmi.h @@ -0,0 +1,37 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nmi + +#if !defined(_TRACE_NMI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NMI_H + +#include +#include + +TRACE_EVENT(nmi_handler, + + TP_PROTO(void *handler, s64 delta_ns, int handled), + + TP_ARGS(handler, delta_ns, handled), + + TP_STRUCT__entry( + __field( void *, handler ) + __field( s64, delta_ns) + __field( int, handled ) + ), + + TP_fast_assign( + __entry->handler = handler; + __entry->delta_ns = delta_ns; + __entry->handled = handled; + ), + + TP_printk("%ps() delta_ns: %lld handled: %d", + __entry->handler, + __entry->delta_ns, + __entry->handled) +); + +#endif /* _TRACE_NMI_H */ + +/* This part ust be outside protection */ +#include