tracing: rework sched_preempt_disable trace point implementation
The current implementation of sched_preempt_disable trace point fails to detect the preemption disable time inside spin_lock_bh() and spin_unlock_bh(). This is because __local_bh_disable_ip() calls directly __preempt_count_add() which skips the preemption disable tracking. Instead of relying on the updates to preempt count, it is better to write the preemption disable tracking directly to preemptsoff tracer. This is similar to how irq disable tracking is done. The current code handles the false positives coming from __schedule() by directly resetting the time stamp. This requires an interface from the scheduler to preemptsoff tracer. To avoid this additional interface, the current patch detects the same condition by comparing the task pid and context switch count. If they are not matching at the time of preemption disable to enable, don't track the preemption disable time as it involved a context switch. Due to this rework. the sched_preempt_disable trace point location is changed to /sys/kernel/debug/tracing/events/preemptirq/sched_preempt_disable/enable Change-Id: I7f58d316b7c54bc7a54102bfeb678404bda010d4 Signed-off-by: Pavankumar Kondeti <pkondeti@codeaurora.org>
This commit is contained in:
parent
5e3f09ce21
commit
fd6298c891
6 changed files with 98 additions and 88 deletions
|
@ -76,7 +76,7 @@ extern int sched_updown_migrate_handler(struct ctl_table *table,
|
|||
size_t *lenp, loff_t *ppos);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_PREEMPT_TRACER) || defined(CONFIG_DEBUG_PREEMPT)
|
||||
#if defined(CONFIG_PREEMPTIRQ_EVENTS) || defined(CONFIG_PREEMPT_TRACER)
|
||||
extern unsigned int sysctl_preemptoff_tracing_threshold_ns;
|
||||
#endif
|
||||
#if defined(CONFIG_PREEMPTIRQ_EVENTS) && defined(CONFIG_IRQSOFF_TRACER)
|
||||
|
|
|
@ -90,6 +90,38 @@ TRACE_EVENT(irqs_disable,
|
|||
__entry->caddr2, __entry->caddr3)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_preempt_disable,
|
||||
|
||||
TP_PROTO(u64 delta, bool irqs_disabled,
|
||||
unsigned long caddr0, unsigned long caddr1,
|
||||
unsigned long caddr2, unsigned long caddr3),
|
||||
|
||||
TP_ARGS(delta, irqs_disabled, caddr0, caddr1, caddr2, caddr3),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, delta)
|
||||
__field(bool, irqs_disabled)
|
||||
__field(void*, caddr0)
|
||||
__field(void*, caddr1)
|
||||
__field(void*, caddr2)
|
||||
__field(void*, caddr3)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->delta = delta;
|
||||
__entry->irqs_disabled = irqs_disabled;
|
||||
__entry->caddr0 = (void *)caddr0;
|
||||
__entry->caddr1 = (void *)caddr1;
|
||||
__entry->caddr2 = (void *)caddr2;
|
||||
__entry->caddr3 = (void *)caddr3;
|
||||
),
|
||||
|
||||
TP_printk("delta=%llu(ns) irqs_d=%d Callers:(%ps<-%ps<-%ps<-%ps)",
|
||||
__entry->delta, __entry->irqs_disabled,
|
||||
__entry->caddr0, __entry->caddr1,
|
||||
__entry->caddr2, __entry->caddr3)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_PREEMPTIRQ_H */
|
||||
|
||||
#include <trace/define_trace.h>
|
||||
|
|
|
@ -1649,38 +1649,6 @@ TRACE_EVENT(sched_isolate,
|
|||
__entry->time, __entry->isolate)
|
||||
);
|
||||
|
||||
TRACE_EVENT(sched_preempt_disable,
|
||||
|
||||
TP_PROTO(u64 delta, bool irqs_disabled,
|
||||
unsigned long caddr0, unsigned long caddr1,
|
||||
unsigned long caddr2, unsigned long caddr3),
|
||||
|
||||
TP_ARGS(delta, irqs_disabled, caddr0, caddr1, caddr2, caddr3),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, delta)
|
||||
__field(bool, irqs_disabled)
|
||||
__field(void*, caddr0)
|
||||
__field(void*, caddr1)
|
||||
__field(void*, caddr2)
|
||||
__field(void*, caddr3)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->delta = delta;
|
||||
__entry->irqs_disabled = irqs_disabled;
|
||||
__entry->caddr0 = (void *)caddr0;
|
||||
__entry->caddr1 = (void *)caddr1;
|
||||
__entry->caddr2 = (void *)caddr2;
|
||||
__entry->caddr3 = (void *)caddr3;
|
||||
),
|
||||
|
||||
TP_printk("delta=%llu(ns) irqs_d=%d Callers:(%ps<-%ps<-%ps<-%ps)",
|
||||
__entry->delta, __entry->irqs_disabled,
|
||||
__entry->caddr0, __entry->caddr1,
|
||||
__entry->caddr2, __entry->caddr3)
|
||||
);
|
||||
|
||||
#include "walt.h"
|
||||
#endif /* CONFIG_SMP */
|
||||
#endif /* _TRACE_SCHED_H */
|
||||
|
|
|
@ -3353,55 +3353,17 @@ static inline void sched_tick_stop(int cpu) { }
|
|||
|
||||
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
|
||||
defined(CONFIG_TRACE_PREEMPT_TOGGLE))
|
||||
/*
|
||||
* preemptoff stack tracing threshold in ns.
|
||||
* default: 1ms
|
||||
*/
|
||||
unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000UL;
|
||||
|
||||
struct preempt_store {
|
||||
u64 ts;
|
||||
unsigned long caddr[4];
|
||||
bool irqs_disabled;
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct preempt_store, the_ps);
|
||||
|
||||
/*
|
||||
* This is only called from __schedule() upon context switch.
|
||||
*
|
||||
* schedule() calls __schedule() with preemption disabled.
|
||||
* if we had entered idle and exiting idle now, reset the preemption
|
||||
* tracking otherwise we may think preemption is disabled the whole time
|
||||
* when the non idle task re-enables the preemption in schedule().
|
||||
*/
|
||||
static inline void preempt_latency_reset(void)
|
||||
{
|
||||
if (is_idle_task(this_rq()->curr))
|
||||
this_cpu_ptr(&the_ps)->ts = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the value passed in is equal to the current preempt count
|
||||
* then we just disabled preemption. Start timing the latency.
|
||||
*/
|
||||
static inline void preempt_latency_start(int val)
|
||||
{
|
||||
int cpu = raw_smp_processor_id();
|
||||
struct preempt_store *ps = &per_cpu(the_ps, cpu);
|
||||
|
||||
if (preempt_count() == val) {
|
||||
unsigned long ip = get_lock_parent_ip();
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
current->preempt_disable_ip = ip;
|
||||
#endif
|
||||
ps->ts = sched_clock();
|
||||
ps->caddr[0] = CALLER_ADDR0;
|
||||
ps->caddr[1] = CALLER_ADDR1;
|
||||
ps->caddr[2] = CALLER_ADDR2;
|
||||
ps->caddr[3] = CALLER_ADDR3;
|
||||
ps->irqs_disabled = irqs_disabled();
|
||||
|
||||
trace_preempt_off(CALLER_ADDR0, ip);
|
||||
}
|
||||
}
|
||||
|
@ -3435,19 +3397,6 @@ NOKPROBE_SYMBOL(preempt_count_add);
|
|||
static inline void preempt_latency_stop(int val)
|
||||
{
|
||||
if (preempt_count() == val) {
|
||||
struct preempt_store *ps = &per_cpu(the_ps,
|
||||
raw_smp_processor_id());
|
||||
u64 delta = ps->ts ? (sched_clock() - ps->ts) : 0;
|
||||
|
||||
/*
|
||||
* Trace preempt disable stack if preemption
|
||||
* is disabled for more than the threshold.
|
||||
*/
|
||||
if (delta > sysctl_preemptoff_tracing_threshold_ns)
|
||||
trace_sched_preempt_disable(delta, ps->irqs_disabled,
|
||||
ps->caddr[0], ps->caddr[1],
|
||||
ps->caddr[2], ps->caddr[3]);
|
||||
ps->ts = 0;
|
||||
trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
|
||||
}
|
||||
}
|
||||
|
@ -3477,7 +3426,6 @@ NOKPROBE_SYMBOL(preempt_count_sub);
|
|||
#else
|
||||
static inline void preempt_latency_start(int val) { }
|
||||
static inline void preempt_latency_stop(int val) { }
|
||||
static inline void preempt_latency_reset(void) { }
|
||||
#endif
|
||||
|
||||
static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
|
||||
|
@ -3701,7 +3649,6 @@ static void __sched notrace __schedule(bool preempt)
|
|||
if (!prev->on_rq)
|
||||
prev->last_sleep_ts = wallclock;
|
||||
|
||||
preempt_latency_reset();
|
||||
update_task_ravg(prev, rq, PUT_PREV_TASK, wallclock, 0);
|
||||
update_task_ravg(next, rq, PICK_NEXT_TASK, wallclock, 0);
|
||||
rq->nr_switches++;
|
||||
|
|
|
@ -336,7 +336,7 @@ static struct ctl_table kern_table[] = {
|
|||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#if defined(CONFIG_PREEMPT_TRACER) || defined(CONFIG_DEBUG_PREEMPT)
|
||||
#if defined(CONFIG_PREEMPT_TRACER) && defined(CONFIG_PREEMPTIRQ_EVENTS)
|
||||
{
|
||||
.procname = "preemptoff_tracing_threshold_ns",
|
||||
.data = &sysctl_preemptoff_tracing_threshold_ns,
|
||||
|
|
|
@ -620,7 +620,7 @@ struct irqsoff_store {
|
|||
unsigned long caddr[4];
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU(struct irqsoff_store, the_irqsoff);
|
||||
static DEFINE_PER_CPU(struct irqsoff_store, the_irqsoff);
|
||||
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
|
||||
|
||||
/*
|
||||
|
@ -703,9 +703,57 @@ static struct tracer irqsoff_tracer __read_mostly =
|
|||
#endif /* CONFIG_IRQSOFF_TRACER */
|
||||
|
||||
#ifdef CONFIG_PREEMPT_TRACER
|
||||
#ifdef CONFIG_PREEMPTIRQ_EVENTS
|
||||
/*
|
||||
* preemptoff stack tracing threshold in ns.
|
||||
* default: 1ms
|
||||
*/
|
||||
unsigned int sysctl_preemptoff_tracing_threshold_ns = 1000000UL;
|
||||
|
||||
struct preempt_store {
|
||||
u64 ts;
|
||||
unsigned long caddr[4];
|
||||
bool irqs_disabled;
|
||||
int pid;
|
||||
unsigned long ncsw;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct preempt_store, the_ps);
|
||||
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
|
||||
|
||||
void tracer_preempt_on(unsigned long a0, unsigned long a1)
|
||||
{
|
||||
int pc = preempt_count();
|
||||
#ifdef CONFIG_PREEMPTIRQ_EVENTS
|
||||
struct preempt_store *ps;
|
||||
u64 delta = 0;
|
||||
|
||||
lockdep_off();
|
||||
ps = &per_cpu(the_ps, raw_smp_processor_id());
|
||||
|
||||
/*
|
||||
* schedule() calls __schedule() with preemption disabled.
|
||||
* if we had entered idle and exiting idle now, we think
|
||||
* preemption is disabled the whole time. Detect this by
|
||||
* checking if the preemption is disabled across the same
|
||||
* task. There is a possiblity that the same task is scheduled
|
||||
* after idle. To rule out this possibility, compare the
|
||||
* context switch count also.
|
||||
*/
|
||||
if (ps->ts && ps->pid == current->pid && (ps->ncsw ==
|
||||
current->nvcsw + current->nivcsw))
|
||||
delta = sched_clock() - ps->ts;
|
||||
/*
|
||||
* Trace preempt disable stack if preemption
|
||||
* is disabled for more than the threshold.
|
||||
*/
|
||||
if (delta > sysctl_preemptoff_tracing_threshold_ns)
|
||||
trace_sched_preempt_disable(delta, ps->irqs_disabled,
|
||||
ps->caddr[0], ps->caddr[1],
|
||||
ps->caddr[2], ps->caddr[3]);
|
||||
ps->ts = 0;
|
||||
lockdep_on();
|
||||
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
|
||||
|
||||
if (preempt_trace(pc) && !irq_trace())
|
||||
stop_critical_timing(a0, a1, pc);
|
||||
|
@ -714,6 +762,21 @@ void tracer_preempt_on(unsigned long a0, unsigned long a1)
|
|||
void tracer_preempt_off(unsigned long a0, unsigned long a1)
|
||||
{
|
||||
int pc = preempt_count();
|
||||
#ifdef CONFIG_PREEMPTIRQ_EVENTS
|
||||
struct preempt_store *ps;
|
||||
|
||||
lockdep_off();
|
||||
ps = &per_cpu(the_ps, raw_smp_processor_id());
|
||||
ps->ts = sched_clock();
|
||||
ps->caddr[0] = CALLER_ADDR0;
|
||||
ps->caddr[1] = CALLER_ADDR1;
|
||||
ps->caddr[2] = CALLER_ADDR2;
|
||||
ps->caddr[3] = CALLER_ADDR3;
|
||||
ps->irqs_disabled = irqs_disabled();
|
||||
ps->pid = current->pid;
|
||||
ps->ncsw = current->nvcsw + current->nivcsw;
|
||||
lockdep_on();
|
||||
#endif /* CONFIG_PREEMPTIRQ_EVENTS */
|
||||
|
||||
if (preempt_trace(pc) && !irq_trace())
|
||||
start_critical_timing(a0, a1, pc);
|
||||
|
|
Loading…
Reference in a new issue