From f595ec964daf7f99668039d7303ddedd09a75142 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Jun 2008 10:47:56 +0200 Subject: [PATCH 1/3] common implementation of iterative div/mod We have a few instances of the open-coded iterative div/mod loop, used when we don't expcet the dividend to be much bigger than the divisor. Unfortunately modern gcc's have the tendency to strength "reduce" this into a full mod operation, which isn't necessarily any faster, and even if it were, doesn't exist if gcc implements it in libgcc. The workaround is to put a dummy asm statement in the loop to prevent gcc from performing the transformation. This patch creates a single implementation of this loop, and uses it to replace the open-coded versions I know about. Signed-off-by: Jeremy Fitzhardinge Cc: Andrew Morton Cc: john stultz Cc: Segher Boessenkool Cc: Christian Kujau Cc: Robert Hancock Signed-off-by: Ingo Molnar --- arch/x86/xen/time.c | 13 +++---------- include/linux/math64.h | 2 ++ include/linux/time.h | 11 ++--------- lib/div64.c | 23 +++++++++++++++++++++++ 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c39e1a5aa241..52b2e3856980 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -150,11 +151,7 @@ static void do_stolen_accounting(void) if (stolen < 0) stolen = 0; - ticks = 0; - while (stolen >= NS_PER_TICK) { - ticks++; - stolen -= NS_PER_TICK; - } + ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); __get_cpu_var(residual_stolen) = stolen; account_steal_time(NULL, ticks); @@ -166,11 +163,7 @@ static void do_stolen_accounting(void) if (blocked < 0) blocked = 0; - ticks = 0; - while (blocked >= NS_PER_TICK) { - ticks++; - blocked -= NS_PER_TICK; - } + ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); __get_cpu_var(residual_blocked) = blocked; account_steal_time(idle_task(smp_processor_id()), ticks); } diff --git a/include/linux/math64.h b/include/linux/math64.h index c1a5f81501ff..177785e1e4a3 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor) } #endif +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); + #endif /* _LINUX_MATH64_H */ diff --git a/include/linux/time.h b/include/linux/time.h index d32ef0ad4c0a..05f9517a8ed1 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -6,6 +6,7 @@ #ifdef __KERNEL__ # include # include +# include #endif #ifndef _STRUCT_TIMESPEC @@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec); */ static inline void timespec_add_ns(struct timespec *a, u64 ns) { - ns += a->tv_nsec; - while(unlikely(ns >= NSEC_PER_SEC)) { - /* The following asm() prevents the compiler from - * optimising this loop into a modulo operation. */ - asm("" : "+r"(ns)); - - ns -= NSEC_PER_SEC; - a->tv_sec++; - } + a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); a->tv_nsec = ns; } #endif /* __KERNEL__ */ diff --git a/lib/div64.c b/lib/div64.c index bb5bd0c0f030..76c01542d3e1 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64); #endif #endif /* BITS_PER_LONG == 32 */ + +/* + * Iterative div/mod for use when dividend is not expected to be much + * bigger than divisor. + */ +u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + u32 ret = 0; + + while (dividend >= divisor) { + /* The following asm() prevents the compiler from + optimising this loop into a modulo operation. */ + asm("" : "+rm"(dividend)); + + dividend -= divisor; + ret++; + } + + *remainder = dividend; + + return ret; +} +EXPORT_SYMBOL(iter_div_u64_rem); From d5e181f78ac753893eb930868a52a4488cd3de0a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Jun 2008 10:47:58 +0200 Subject: [PATCH 2/3] add an inlined version of iter_div_u64_rem iter_div_u64_rem is used in the x86-64 vdso, which cannot call other kernel code. For this case, provide the always_inlined version, __iter_div_u64_rem. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/math64.h | 19 +++++++++++++++++++ lib/div64.c | 15 +-------------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/include/linux/math64.h b/include/linux/math64.h index 177785e1e4a3..c87f1528703a 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -83,4 +83,23 @@ static inline s64 div_s64(s64 dividend, s32 divisor) u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder); +static __always_inline u32 +__iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) +{ + u32 ret = 0; + + while (dividend >= divisor) { + /* The following asm() prevents the compiler from + optimising this loop into a modulo operation. */ + asm("" : "+rm"(dividend)); + + dividend -= divisor; + ret++; + } + + *remainder = dividend; + + return ret; +} + #endif /* _LINUX_MATH64_H */ diff --git a/lib/div64.c b/lib/div64.c index 76c01542d3e1..a111eb8de9cf 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -105,19 +105,6 @@ EXPORT_SYMBOL(div64_u64); */ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) { - u32 ret = 0; - - while (dividend >= divisor) { - /* The following asm() prevents the compiler from - optimising this loop into a modulo operation. */ - asm("" : "+rm"(dividend)); - - dividend -= divisor; - ret++; - } - - *remainder = dividend; - - return ret; + return __iter_div_u64_rem(dividend, divisor, remainder); } EXPORT_SYMBOL(iter_div_u64_rem); From 9412e28649d0272df5e4af57bb378926fd4df580 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 12 Jun 2008 10:48:00 +0200 Subject: [PATCH 3/3] always_inline timespec_add_ns timespec_add_ns is used from the x86-64 vdso, which cannot call out to other kernel code. Make sure that timespec_add_ns is always inlined (and only uses always_inlined functions) to make sure there are no unexpected calls. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/time.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/time.h b/include/linux/time.h index 05f9517a8ed1..e15206a7e82e 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -170,10 +170,13 @@ extern struct timeval ns_to_timeval(const s64 nsec); * timespec_add_ns - Adds nanoseconds to a timespec * @a: pointer to timespec to be incremented * @ns: unsigned nanoseconds value to be added + * + * This must always be inlined because its used from the x86-64 vdso, + * which cannot call other kernel functions. */ -static inline void timespec_add_ns(struct timespec *a, u64 ns) +static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) { - a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); + a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); a->tv_nsec = ns; } #endif /* __KERNEL__ */