6bb74df481
This patch resolves the issue found here: http://bugme.osdl.org/show_bug.cgi?id=7426 The basic summary is: Currently we register most of i386/x86_64 clocksources at module_init time. Then we enable clocksource selection at late_initcall time. This causes some problems for drivers that use gettimeofday for init calibration routines (specifically the es1968 driver in this case), where durring module_init, the only clocksource available is the low-res jiffies clocksource. This may cause slight calibration errors, due to the small sampling time used. It should be noted that drivers that require fine grained time may not function on architectures that do not have better then jiffies resolution timekeeping (there are a few). However, this does not discount the reasonable need for such fine-grained timekeeping at init time. Thus the solution here is to register clocksources earlier (ideally when the hardware is being initialized), and then we enable clocksource selection at fs_initcall (before device_initcall). This patch should probably get some testing time in -mm, since clocksource selection is one of the most important issues for correct timekeeping, and I've only been able to test this on a few of my own boxes. Signed-off-by: John Stultz <johnstul@us.ibm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
223 lines
4.9 KiB
C
223 lines
4.9 KiB
C
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/init.h>
|
|
#include <linux/clocksource.h>
|
|
#include <linux/time.h>
|
|
#include <linux/acpi.h>
|
|
#include <linux/cpufreq.h>
|
|
|
|
#include <asm/timex.h>
|
|
|
|
static int notsc __initdata = 0;
|
|
|
|
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
|
|
EXPORT_SYMBOL(cpu_khz);
|
|
|
|
static unsigned int cyc2ns_scale __read_mostly;
|
|
|
|
void set_cyc2ns_scale(unsigned long khz)
|
|
{
|
|
cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / khz;
|
|
}
|
|
|
|
static unsigned long long cycles_2_ns(unsigned long long cyc)
|
|
{
|
|
return (cyc * cyc2ns_scale) >> NS_SCALE;
|
|
}
|
|
|
|
unsigned long long sched_clock(void)
|
|
{
|
|
unsigned long a = 0;
|
|
|
|
/* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
|
|
* which means it is not completely exact and may not be monotonous
|
|
* between CPUs. But the errors should be too small to matter for
|
|
* scheduling purposes.
|
|
*/
|
|
|
|
rdtscll(a);
|
|
return cycles_2_ns(a);
|
|
}
|
|
|
|
static int tsc_unstable;
|
|
|
|
static inline int check_tsc_unstable(void)
|
|
{
|
|
return tsc_unstable;
|
|
}
|
|
#ifdef CONFIG_CPU_FREQ
|
|
|
|
/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
|
|
* changes.
|
|
*
|
|
* RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
|
|
* not that important because current Opteron setups do not support
|
|
* scaling on SMP anyroads.
|
|
*
|
|
* Should fix up last_tsc too. Currently gettimeofday in the
|
|
* first tick after the change will be slightly wrong.
|
|
*/
|
|
|
|
#include <linux/workqueue.h>
|
|
|
|
static unsigned int cpufreq_delayed_issched = 0;
|
|
static unsigned int cpufreq_init = 0;
|
|
static struct work_struct cpufreq_delayed_get_work;
|
|
|
|
static void handle_cpufreq_delayed_get(struct work_struct *v)
|
|
{
|
|
unsigned int cpu;
|
|
for_each_online_cpu(cpu) {
|
|
cpufreq_get(cpu);
|
|
}
|
|
cpufreq_delayed_issched = 0;
|
|
}
|
|
|
|
static unsigned int ref_freq = 0;
|
|
static unsigned long loops_per_jiffy_ref = 0;
|
|
|
|
static unsigned long cpu_khz_ref = 0;
|
|
|
|
static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
|
|
void *data)
|
|
{
|
|
struct cpufreq_freqs *freq = data;
|
|
unsigned long *lpj, dummy;
|
|
|
|
if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC))
|
|
return 0;
|
|
|
|
lpj = &dummy;
|
|
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
|
|
#ifdef CONFIG_SMP
|
|
lpj = &cpu_data[freq->cpu].loops_per_jiffy;
|
|
#else
|
|
lpj = &boot_cpu_data.loops_per_jiffy;
|
|
#endif
|
|
|
|
if (!ref_freq) {
|
|
ref_freq = freq->old;
|
|
loops_per_jiffy_ref = *lpj;
|
|
cpu_khz_ref = cpu_khz;
|
|
}
|
|
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
|
|
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
|
|
(val == CPUFREQ_RESUMECHANGE)) {
|
|
*lpj =
|
|
cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
|
|
|
|
cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
|
|
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
|
|
mark_tsc_unstable();
|
|
}
|
|
|
|
set_cyc2ns_scale(cpu_khz_ref);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct notifier_block time_cpufreq_notifier_block = {
|
|
.notifier_call = time_cpufreq_notifier
|
|
};
|
|
|
|
static int __init cpufreq_tsc(void)
|
|
{
|
|
INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
|
|
if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
|
|
CPUFREQ_TRANSITION_NOTIFIER))
|
|
cpufreq_init = 1;
|
|
return 0;
|
|
}
|
|
|
|
core_initcall(cpufreq_tsc);
|
|
|
|
#endif
|
|
|
|
static int tsc_unstable = 0;
|
|
|
|
/*
|
|
* Make an educated guess if the TSC is trustworthy and synchronized
|
|
* over all CPUs.
|
|
*/
|
|
__cpuinit int unsynchronized_tsc(void)
|
|
{
|
|
if (tsc_unstable)
|
|
return 1;
|
|
|
|
#ifdef CONFIG_SMP
|
|
if (apic_is_clustered_box())
|
|
return 1;
|
|
#endif
|
|
/* Most intel systems have synchronized TSCs except for
|
|
multi node systems */
|
|
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
|
|
#ifdef CONFIG_ACPI
|
|
/* But TSC doesn't tick in C3 so don't use it there */
|
|
if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
|
|
return 1;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/* Assume multi socket systems are not synchronized */
|
|
return num_present_cpus() > 1;
|
|
}
|
|
|
|
int __init notsc_setup(char *s)
|
|
{
|
|
notsc = 1;
|
|
return 1;
|
|
}
|
|
|
|
__setup("notsc", notsc_setup);
|
|
|
|
|
|
/* clock source code: */
|
|
static cycle_t read_tsc(void)
|
|
{
|
|
cycle_t ret = (cycle_t)get_cycles_sync();
|
|
return ret;
|
|
}
|
|
|
|
static cycle_t __vsyscall_fn vread_tsc(void)
|
|
{
|
|
cycle_t ret = (cycle_t)get_cycles_sync();
|
|
return ret;
|
|
}
|
|
|
|
static struct clocksource clocksource_tsc = {
|
|
.name = "tsc",
|
|
.rating = 300,
|
|
.read = read_tsc,
|
|
.mask = CLOCKSOURCE_MASK(64),
|
|
.shift = 22,
|
|
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
|
|
CLOCK_SOURCE_MUST_VERIFY,
|
|
.vread = vread_tsc,
|
|
};
|
|
|
|
void mark_tsc_unstable(void)
|
|
{
|
|
if (!tsc_unstable) {
|
|
tsc_unstable = 1;
|
|
/* Change only the rating, when not registered */
|
|
if (clocksource_tsc.mult)
|
|
clocksource_change_rating(&clocksource_tsc, 0);
|
|
else
|
|
clocksource_tsc.rating = 0;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
|
|
|
|
void __init init_tsc_clocksource(void)
|
|
{
|
|
if (!notsc) {
|
|
clocksource_tsc.mult = clocksource_khz2mult(cpu_khz,
|
|
clocksource_tsc.shift);
|
|
if (check_tsc_unstable())
|
|
clocksource_tsc.rating = 0;
|
|
|
|
clocksource_register(&clocksource_tsc);
|
|
}
|
|
}
|