8e3e076c5a
The generic semaphore rewrite had a huge performance regression on AIM7
(and potentially other BKL-heavy benchmarks) because the generic
semaphores had been rewritten to be simple to understand and fair. The
latter, in particular, turns a semaphore-based BKL implementation into a
mess of scheduling.
The attempt to fix the performance regression failed miserably (see the
previous commit 00b41ec261
'Revert
"semaphore: fix"'), and so for now the simple and sane approach is to
instead just go back to the old spinlock-based BKL implementation that
never had any issues like this.
This patch also has the advantage of being reported to fix the
regression completely according to Yanmin Zhang, unlike the semaphore
hack which still left a couple percentage point regression.
As a spinlock, the BKL obviously has the potential to be a latency
issue, but it's not really any different from any other spinlock in that
respect. We do want to get rid of the BKL asap, but that has been the
plan for several years.
These days, the biggest users are in the tty layer (open/release in
particular) and Alan holds out some hope:
"tty release is probably a few months away from getting cured - I'm
afraid it will almost certainly be the very last user of the BKL in
tty to get fixed as it depends on everything else being sanely locked."
so while we're not there yet, we do have a plan of action.
Tested-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Alexander Viro <viro@ftp.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
167 lines
4.5 KiB
C
167 lines
4.5 KiB
C
#ifndef LINUX_HARDIRQ_H
|
|
#define LINUX_HARDIRQ_H
|
|
|
|
#include <linux/preempt.h>
|
|
#include <linux/smp_lock.h>
|
|
#include <linux/lockdep.h>
|
|
#include <asm/hardirq.h>
|
|
#include <asm/system.h>
|
|
|
|
/*
|
|
* We put the hardirq and softirq counter into the preemption
|
|
* counter. The bitmask has the following meaning:
|
|
*
|
|
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
|
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
|
*
|
|
* The hardirq count can be overridden per architecture, the default is:
|
|
*
|
|
* - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
|
|
* - ( bit 28 is the PREEMPT_ACTIVE flag. )
|
|
*
|
|
* PREEMPT_MASK: 0x000000ff
|
|
* SOFTIRQ_MASK: 0x0000ff00
|
|
* HARDIRQ_MASK: 0x0fff0000
|
|
*/
|
|
#define PREEMPT_BITS 8
|
|
#define SOFTIRQ_BITS 8
|
|
|
|
#ifndef HARDIRQ_BITS
|
|
#define HARDIRQ_BITS 12
|
|
|
|
#ifndef MAX_HARDIRQS_PER_CPU
|
|
#define MAX_HARDIRQS_PER_CPU NR_IRQS
|
|
#endif
|
|
|
|
/*
|
|
* The hardirq mask has to be large enough to have space for potentially
|
|
* all IRQ sources in the system nesting on a single CPU.
|
|
*/
|
|
#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
|
|
# error HARDIRQ_BITS is too low!
|
|
#endif
|
|
#endif
|
|
|
|
#define PREEMPT_SHIFT 0
|
|
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
|
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
|
|
|
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
|
|
|
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
|
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
|
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
|
|
|
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
|
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
|
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
|
|
|
#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
|
|
#error PREEMPT_ACTIVE is too low!
|
|
#endif
|
|
|
|
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
|
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
|
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
|
|
|
|
/*
|
|
* Are we doing bottom half or hardware interrupt processing?
|
|
* Are we in a softirq context? Interrupt context?
|
|
*/
|
|
#define in_irq() (hardirq_count())
|
|
#define in_softirq() (softirq_count())
|
|
#define in_interrupt() (irq_count())
|
|
|
|
#if defined(CONFIG_PREEMPT)
|
|
# define PREEMPT_INATOMIC_BASE kernel_locked()
|
|
# define PREEMPT_CHECK_OFFSET 1
|
|
#else
|
|
# define PREEMPT_INATOMIC_BASE 0
|
|
# define PREEMPT_CHECK_OFFSET 0
|
|
#endif
|
|
|
|
/*
|
|
* Are we running in atomic context? WARNING: this macro cannot
|
|
* always detect atomic context; in particular, it cannot know about
|
|
* held spinlocks in non-preemptible kernels. Thus it should not be
|
|
* used in the general case to determine whether sleeping is possible.
|
|
* Do not use in_atomic() in driver code.
|
|
*/
|
|
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
|
|
|
|
/*
|
|
* Check whether we were atomic before we did preempt_disable():
|
|
* (used by the scheduler, *after* releasing the kernel lock)
|
|
*/
|
|
#define in_atomic_preempt_off() \
|
|
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
|
# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
|
|
#else
|
|
# define preemptible() 0
|
|
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
extern void synchronize_irq(unsigned int irq);
|
|
#else
|
|
# define synchronize_irq(irq) barrier()
|
|
#endif
|
|
|
|
struct task_struct;
|
|
|
|
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
|
static inline void account_system_vtime(struct task_struct *tsk)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
|
|
extern void rcu_irq_enter(void);
|
|
extern void rcu_irq_exit(void);
|
|
#else
|
|
# define rcu_irq_enter() do { } while (0)
|
|
# define rcu_irq_exit() do { } while (0)
|
|
#endif /* CONFIG_PREEMPT_RCU */
|
|
|
|
/*
|
|
* It is safe to do non-atomic ops on ->hardirq_context,
|
|
* because NMI handlers may not preempt and the ops are
|
|
* always balanced, so the interrupted value of ->hardirq_context
|
|
* will always be restored.
|
|
*/
|
|
#define __irq_enter() \
|
|
do { \
|
|
rcu_irq_enter(); \
|
|
account_system_vtime(current); \
|
|
add_preempt_count(HARDIRQ_OFFSET); \
|
|
trace_hardirq_enter(); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Enter irq context (on NO_HZ, update jiffies):
|
|
*/
|
|
extern void irq_enter(void);
|
|
|
|
/*
|
|
* Exit irq context without processing softirqs:
|
|
*/
|
|
#define __irq_exit() \
|
|
do { \
|
|
trace_hardirq_exit(); \
|
|
account_system_vtime(current); \
|
|
sub_preempt_count(HARDIRQ_OFFSET); \
|
|
rcu_irq_exit(); \
|
|
} while (0)
|
|
|
|
/*
|
|
* Exit irq context and process softirqs if needed:
|
|
*/
|
|
extern void irq_exit(void);
|
|
|
|
#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0)
|
|
#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0)
|
|
|
|
#endif /* LINUX_HARDIRQ_H */
|