Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux

* 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: (35 commits)
  PM idle: remove global declaration of pm_idle
  unicore32 idle: delete stray pm_idle comment
  openrisc idle: delete pm_idle
  mn10300 idle: delete pm_idle
  microblaze idle: delete pm_idle
  m32r idle: delete pm_idle, and other dead idle code
  ia64 idle: delete pm_idle
  cris idle: delete idle and pm_idle
  ARM64 idle: delete pm_idle
  ARM idle: delete pm_idle
  blackfin idle: delete pm_idle
  sparc idle: rename pm_idle to sparc_idle
  sh idle: rename global pm_idle to static sh_idle
  x86 idle: rename global pm_idle to static x86_idle
  APM idle: register apm_cpu_idle via cpuidle
  tools/power turbostat: display SMI count by default
  intel_idle: export both C1 and C1E
  cpuidle: remove vestage definition of cpuidle_state_usage.driver_data
  x86 idle: remove 32-bit-only "no-hlt" parameter, hlt_works_ok flag
  x86 idle: remove mwait_idle() and "idle=mwait" cmdline param
  ...

Conflicts:
	arch/x86/kernel/process.c (with PM / tracing commit 43720bd)
	drivers/acpi/processor_idle.c (with ACPICA commit 4f84291)
This commit is contained in:
Rafael J. Wysocki 2013-02-18 22:34:11 +01:00
commit 10baf04e95
35 changed files with 366 additions and 559 deletions

View file

@ -1039,16 +1039,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
Claim all unknown PCI IDE storage controllers. Claim all unknown PCI IDE storage controllers.
idle= [X86] idle= [X86]
Format: idle=poll, idle=mwait, idle=halt, idle=nomwait Format: idle=poll, idle=halt, idle=nomwait
Poll forces a polling idle loop that can slightly Poll forces a polling idle loop that can slightly
improve the performance of waking up a idle CPU, but improve the performance of waking up a idle CPU, but
will use a lot of power and make the system run hot. will use a lot of power and make the system run hot.
Not recommended. Not recommended.
idle=mwait: On systems which support MONITOR/MWAIT but
the kernel chose to not use it because it doesn't save
as much power as a normal idle loop, use the
MONITOR/MWAIT idle loop anyways. Performance should be
the same as idle=poll.
idle=halt: Halt is forced to be used for CPU idle. idle=halt: Halt is forced to be used for CPU idle.
In such case C2/C3 won't be used again. In such case C2/C3 won't be used again.
idle=nomwait: Disable mwait for CPU C-states idle=nomwait: Disable mwait for CPU C-states
@ -1891,10 +1886,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
wfi(ARM) instruction doesn't work correctly and not to wfi(ARM) instruction doesn't work correctly and not to
use it. This is also useful when using JTAG debugger. use it. This is also useful when using JTAG debugger.
no-hlt [BUGS=X86-32] Tells the kernel that the hlt
instruction doesn't work correctly and not to
use it.
no_file_caps Tells the kernel not to honor file capabilities. The no_file_caps Tells the kernel not to honor file capabilities. The
only way then for a file to be executed with privilege only way then for a file to be executed with privilege
is to be setuid root or executed by root. is to be setuid root or executed by root.

View file

@ -172,14 +172,9 @@ static void default_idle(void)
local_irq_enable(); local_irq_enable();
} }
void (*pm_idle)(void) = default_idle;
EXPORT_SYMBOL(pm_idle);
/* /*
* The idle thread, has rather strange semantics for calling pm_idle, * The idle thread.
* but this is what x86 does and we need to do the same, so that * We always respect 'hlt_counter' to prevent low power idle.
* things like cpuidle get called in the same way. The only difference
* is that we always respect 'hlt_counter' to prevent low power idle.
*/ */
void cpu_idle(void) void cpu_idle(void)
{ {
@ -210,10 +205,10 @@ void cpu_idle(void)
} else if (!need_resched()) { } else if (!need_resched()) {
stop_critical_timings(); stop_critical_timings();
if (cpuidle_idle_call()) if (cpuidle_idle_call())
pm_idle(); default_idle();
start_critical_timings(); start_critical_timings();
/* /*
* pm_idle functions must always * default_idle functions must always
* return with IRQs enabled. * return with IRQs enabled.
*/ */
WARN_ON(irqs_disabled()); WARN_ON(irqs_disabled());

View file

@ -25,53 +25,9 @@
#define DAVINCI_CPUIDLE_MAX_STATES 2 #define DAVINCI_CPUIDLE_MAX_STATES 2
struct davinci_ops {
void (*enter) (u32 flags);
void (*exit) (u32 flags);
u32 flags;
};
/* Actual code that puts the SoC in different idle states */
static int davinci_enter_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
struct davinci_ops *ops = cpuidle_get_statedata(state_usage);
if (ops && ops->enter)
ops->enter(ops->flags);
index = cpuidle_wrap_enter(dev, drv, index,
arm_cpuidle_simple_enter);
if (ops && ops->exit)
ops->exit(ops->flags);
return index;
}
/* fields in davinci_ops.flags */
#define DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN BIT(0)
static struct cpuidle_driver davinci_idle_driver = {
.name = "cpuidle-davinci",
.owner = THIS_MODULE,
.en_core_tk_irqen = 1,
.states[0] = ARM_CPUIDLE_WFI_STATE,
.states[1] = {
.enter = davinci_enter_idle,
.exit_latency = 10,
.target_residency = 100000,
.flags = CPUIDLE_FLAG_TIME_VALID,
.name = "DDR SR",
.desc = "WFI and DDR Self Refresh",
},
.state_count = DAVINCI_CPUIDLE_MAX_STATES,
};
static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device); static DEFINE_PER_CPU(struct cpuidle_device, davinci_cpuidle_device);
static void __iomem *ddr2_reg_base; static void __iomem *ddr2_reg_base;
static bool ddr2_pdown;
static void davinci_save_ddr_power(int enter, bool pdown) static void davinci_save_ddr_power(int enter, bool pdown)
{ {
@ -92,21 +48,35 @@ static void davinci_save_ddr_power(int enter, bool pdown)
__raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET); __raw_writel(val, ddr2_reg_base + DDR2_SDRCR_OFFSET);
} }
static void davinci_c2state_enter(u32 flags) /* Actual code that puts the SoC in different idle states */
static int davinci_enter_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{ {
davinci_save_ddr_power(1, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN)); davinci_save_ddr_power(1, ddr2_pdown);
index = cpuidle_wrap_enter(dev, drv, index,
arm_cpuidle_simple_enter);
davinci_save_ddr_power(0, ddr2_pdown);
return index;
} }
static void davinci_c2state_exit(u32 flags) static struct cpuidle_driver davinci_idle_driver = {
{ .name = "cpuidle-davinci",
davinci_save_ddr_power(0, !!(flags & DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN)); .owner = THIS_MODULE,
} .en_core_tk_irqen = 1,
.states[0] = ARM_CPUIDLE_WFI_STATE,
static struct davinci_ops davinci_states[DAVINCI_CPUIDLE_MAX_STATES] = { .states[1] = {
[1] = { .enter = davinci_enter_idle,
.enter = davinci_c2state_enter, .exit_latency = 10,
.exit = davinci_c2state_exit, .target_residency = 100000,
.flags = CPUIDLE_FLAG_TIME_VALID,
.name = "DDR SR",
.desc = "WFI and DDR Self Refresh",
}, },
.state_count = DAVINCI_CPUIDLE_MAX_STATES,
}; };
static int __init davinci_cpuidle_probe(struct platform_device *pdev) static int __init davinci_cpuidle_probe(struct platform_device *pdev)
@ -124,11 +94,7 @@ static int __init davinci_cpuidle_probe(struct platform_device *pdev)
ddr2_reg_base = pdata->ddr2_ctlr_base; ddr2_reg_base = pdata->ddr2_ctlr_base;
if (pdata->ddr2_pdown) ddr2_pdown = pdata->ddr2_pdown;
davinci_states[1].flags |= DAVINCI_CPUIDLE_FLAGS_DDR2_PWDN;
cpuidle_set_statedata(&device->states_usage[1], &davinci_states[1]);
device->state_count = DAVINCI_CPUIDLE_MAX_STATES;
ret = cpuidle_register_driver(&davinci_idle_driver); ret = cpuidle_register_driver(&davinci_idle_driver);
if (ret) { if (ret) {

View file

@ -97,14 +97,9 @@ static void default_idle(void)
local_irq_enable(); local_irq_enable();
} }
void (*pm_idle)(void) = default_idle;
EXPORT_SYMBOL_GPL(pm_idle);
/* /*
* The idle thread, has rather strange semantics for calling pm_idle, * The idle thread.
* but this is what x86 does and we need to do the same, so that * We always respect 'hlt_counter' to prevent low power idle.
* things like cpuidle get called in the same way. The only difference
* is that we always respect 'hlt_counter' to prevent low power idle.
*/ */
void cpu_idle(void) void cpu_idle(void)
{ {
@ -122,10 +117,10 @@ void cpu_idle(void)
local_irq_disable(); local_irq_disable();
if (!need_resched()) { if (!need_resched()) {
stop_critical_timings(); stop_critical_timings();
pm_idle(); default_idle();
start_critical_timings(); start_critical_timings();
/* /*
* pm_idle functions should always return * default_idle functions should always return
* with IRQs enabled. * with IRQs enabled.
*/ */
WARN_ON(irqs_disabled()); WARN_ON(irqs_disabled());

View file

@ -39,12 +39,6 @@ int nr_l1stack_tasks;
void *l1_stack_base; void *l1_stack_base;
unsigned long l1_stack_len; unsigned long l1_stack_len;
/*
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void) = NULL;
EXPORT_SYMBOL(pm_idle);
void (*pm_power_off)(void) = NULL; void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(pm_power_off);
@ -81,7 +75,6 @@ void cpu_idle(void)
{ {
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
while (1) { while (1) {
void (*idle)(void) = pm_idle;
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
if (cpu_is_offline(smp_processor_id())) if (cpu_is_offline(smp_processor_id()))

View file

@ -54,11 +54,6 @@ void enable_hlt(void)
EXPORT_SYMBOL(enable_hlt); EXPORT_SYMBOL(enable_hlt);
/*
* The following aren't currently used.
*/
void (*pm_idle)(void);
extern void default_idle(void); extern void default_idle(void);
void (*pm_power_off)(void); void (*pm_power_off)(void);
@ -77,16 +72,12 @@ void cpu_idle (void)
while (1) { while (1) {
rcu_idle_enter(); rcu_idle_enter();
while (!need_resched()) { while (!need_resched()) {
void (*idle)(void);
/* /*
* Mark this as an RCU critical section so that * Mark this as an RCU critical section so that
* synchronize_kernel() in the unload path waits * synchronize_kernel() in the unload path waits
* for our completion. * for our completion.
*/ */
idle = pm_idle; default_idle();
if (!idle)
idle = default_idle;
idle();
} }
rcu_idle_exit(); rcu_idle_exit();
schedule_preempt_disabled(); schedule_preempt_disabled();

View file

@ -57,8 +57,6 @@ void (*ia64_mark_idle)(int);
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override); EXPORT_SYMBOL(boot_option_idle_override);
void (*pm_idle) (void);
EXPORT_SYMBOL(pm_idle);
void (*pm_power_off) (void); void (*pm_power_off) (void);
EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(pm_power_off);
@ -301,7 +299,6 @@ cpu_idle (void)
if (mark_idle) if (mark_idle)
(*mark_idle)(1); (*mark_idle)(1);
idle = pm_idle;
if (!idle) if (!idle)
idle = default_idle; idle = default_idle;
(*idle)(); (*idle)();

View file

@ -1051,7 +1051,6 @@ cpu_init (void)
max_num_phys_stacked = num_phys_stacked; max_num_phys_stacked = num_phys_stacked;
} }
platform_cpu_init(); platform_cpu_init();
pm_idle = default_idle;
} }
void __init void __init

View file

@ -44,35 +44,9 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return tsk->thread.lr; return tsk->thread.lr;
} }
/*
* Powermanagement idle function, if any..
*/
static void (*pm_idle)(void) = NULL;
void (*pm_power_off)(void) = NULL; void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(pm_power_off);
/*
* We use this is we don't have any better
* idle routine..
*/
static void default_idle(void)
{
/* M32R_FIXME: Please use "cpu_sleep" mode. */
cpu_relax();
}
/*
* On SMP it's slightly faster (but much more power-consuming!)
* to poll the ->work.need_resched flag instead of waiting for the
* cross-CPU IPI to arrive. Use this option with caution.
*/
static void poll_idle (void)
{
/* M32R_FIXME */
cpu_relax();
}
/* /*
* The idle thread. There's no useful work to be * The idle thread. There's no useful work to be
* done, so just try to conserve power and have a * done, so just try to conserve power and have a
@ -84,14 +58,8 @@ void cpu_idle (void)
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
while (1) { while (1) {
rcu_idle_enter(); rcu_idle_enter();
while (!need_resched()) { while (!need_resched())
void (*idle)(void) = pm_idle; cpu_relax();
if (!idle)
idle = default_idle;
idle();
}
rcu_idle_exit(); rcu_idle_exit();
schedule_preempt_disabled(); schedule_preempt_disabled();
} }
@ -120,21 +88,6 @@ void machine_power_off(void)
/* M32R_FIXME */ /* M32R_FIXME */
} }
static int __init idle_setup (char *str)
{
if (!strncmp(str, "poll", 4)) {
printk("using poll in idle threads.\n");
pm_idle = poll_idle;
} else if (!strncmp(str, "sleep", 4)) {
printk("using sleep in idle threads.\n");
pm_idle = default_idle;
}
return 1;
}
__setup("idle=", idle_setup);
void show_regs(struct pt_regs * regs) void show_regs(struct pt_regs * regs)
{ {
printk("\n"); printk("\n");

View file

@ -41,7 +41,6 @@ void show_regs(struct pt_regs *regs)
regs->msr, regs->ear, regs->esr, regs->fsr); regs->msr, regs->ear, regs->esr, regs->fsr);
} }
void (*pm_idle)(void);
void (*pm_power_off)(void) = NULL; void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(pm_power_off);
@ -98,8 +97,6 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
while (1) { while (1) {
void (*idle)(void) = pm_idle;
if (!idle) if (!idle)
idle = default_idle; idle = default_idle;

View file

@ -36,12 +36,6 @@
#include <asm/gdb-stub.h> #include <asm/gdb-stub.h>
#include "internal.h" #include "internal.h"
/*
* power management idle function, if any..
*/
void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
/* /*
* return saved PC of a blocked thread. * return saved PC of a blocked thread.
*/ */
@ -113,7 +107,6 @@ void cpu_idle(void)
void (*idle)(void); void (*idle)(void);
smp_rmb(); smp_rmb();
idle = pm_idle;
if (!idle) { if (!idle) {
#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) #if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
idle = poll_idle; idle = poll_idle;

View file

@ -39,11 +39,6 @@
void (*powersave) (void) = NULL; void (*powersave) (void) = NULL;
static inline void pm_idle(void)
{
barrier();
}
void cpu_idle(void) void cpu_idle(void)
{ {
set_thread_flag(TIF_POLLING_NRFLAG); set_thread_flag(TIF_POLLING_NRFLAG);

View file

@ -22,7 +22,7 @@
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/bl_bit.h> #include <asm/bl_bit.h>
void (*pm_idle)(void); static void (*sh_idle)(void);
static int hlt_counter; static int hlt_counter;
@ -103,9 +103,9 @@ void cpu_idle(void)
/* Don't trace irqs off for idle */ /* Don't trace irqs off for idle */
stop_critical_timings(); stop_critical_timings();
if (cpuidle_idle_call()) if (cpuidle_idle_call())
pm_idle(); sh_idle();
/* /*
* Sanity check to ensure that pm_idle() returns * Sanity check to ensure that sh_idle() returns
* with IRQs enabled * with IRQs enabled
*/ */
WARN_ON(irqs_disabled()); WARN_ON(irqs_disabled());
@ -123,13 +123,13 @@ void __init select_idle_routine(void)
/* /*
* If a platform has set its own idle routine, leave it alone. * If a platform has set its own idle routine, leave it alone.
*/ */
if (pm_idle) if (sh_idle)
return; return;
if (hlt_works()) if (hlt_works())
pm_idle = default_idle; sh_idle = default_idle;
else else
pm_idle = poll_idle; sh_idle = poll_idle;
} }
void stop_this_cpu(void *unused) void stop_this_cpu(void *unused)

View file

@ -118,6 +118,7 @@ extern unsigned long get_wchan(struct task_struct *);
extern struct task_struct *last_task_used_math; extern struct task_struct *last_task_used_math;
#define cpu_relax() barrier() #define cpu_relax() barrier()
extern void (*sparc_idle)(void);
#endif #endif

View file

@ -20,6 +20,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/auxio.h> #include <asm/auxio.h>
#include <asm/apc.h> #include <asm/apc.h>
#include <asm/processor.h>
/* Debugging /* Debugging
* *
@ -158,7 +159,7 @@ static int apc_probe(struct platform_device *op)
/* Assign power management IDLE handler */ /* Assign power management IDLE handler */
if (!apc_no_idle) if (!apc_no_idle)
pm_idle = apc_swift_idle; sparc_idle = apc_swift_idle;
printk(KERN_INFO "%s: power management initialized%s\n", printk(KERN_INFO "%s: power management initialized%s\n",
APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : ""); APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : "");

View file

@ -9,6 +9,7 @@
#include <asm/leon_amba.h> #include <asm/leon_amba.h>
#include <asm/cpu_type.h> #include <asm/cpu_type.h>
#include <asm/leon.h> #include <asm/leon.h>
#include <asm/processor.h>
/* List of Systems that need fixup instructions around power-down instruction */ /* List of Systems that need fixup instructions around power-down instruction */
unsigned int pmc_leon_fixup_ids[] = { unsigned int pmc_leon_fixup_ids[] = {
@ -69,9 +70,9 @@ static int __init leon_pmc_install(void)
if (sparc_cpu_model == sparc_leon) { if (sparc_cpu_model == sparc_leon) {
/* Assign power management IDLE handler */ /* Assign power management IDLE handler */
if (pmc_leon_need_fixup()) if (pmc_leon_need_fixup())
pm_idle = pmc_leon_idle_fixup; sparc_idle = pmc_leon_idle_fixup;
else else
pm_idle = pmc_leon_idle; sparc_idle = pmc_leon_idle;
printk(KERN_INFO "leon: power management initialized\n"); printk(KERN_INFO "leon: power management initialized\n");
} }

View file

@ -17,6 +17,7 @@
#include <asm/oplib.h> #include <asm/oplib.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/auxio.h> #include <asm/auxio.h>
#include <asm/processor.h>
/* Debug /* Debug
* *
@ -63,7 +64,7 @@ static int pmc_probe(struct platform_device *op)
#ifndef PMC_NO_IDLE #ifndef PMC_NO_IDLE
/* Assign power management IDLE handler */ /* Assign power management IDLE handler */
pm_idle = pmc_swift_idle; sparc_idle = pmc_swift_idle;
#endif #endif
printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME); printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME);

View file

@ -43,8 +43,7 @@
* Power management idle function * Power management idle function
* Set in pm platform drivers (apc.c and pmc.c) * Set in pm platform drivers (apc.c and pmc.c)
*/ */
void (*pm_idle)(void); void (*sparc_idle)(void);
EXPORT_SYMBOL(pm_idle);
/* /*
* Power-off handler instantiation for pm.h compliance * Power-off handler instantiation for pm.h compliance
@ -75,8 +74,8 @@ void cpu_idle(void)
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */
for (;;) { for (;;) {
while (!need_resched()) { while (!need_resched()) {
if (pm_idle) if (sparc_idle)
(*pm_idle)(); (*sparc_idle)();
else else
cpu_relax(); cpu_relax();
} }

View file

@ -45,11 +45,6 @@ static const char * const processor_modes[] = {
"UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR" "UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR"
}; };
/*
* The idle thread, has rather strange semantics for calling pm_idle,
* but this is what x86 does and we need to do the same, so that
* things like cpuidle get called in the same way.
*/
void cpu_idle(void) void cpu_idle(void)
{ {
/* endless idle loop with no priority at all */ /* endless idle loop with no priority at all */

View file

@ -1922,6 +1922,7 @@ config APM_DO_ENABLE
this feature. this feature.
config APM_CPU_IDLE config APM_CPU_IDLE
depends on CPU_IDLE
bool "Make CPU Idle calls when idle" bool "Make CPU Idle calls when idle"
---help--- ---help---
Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.

View file

@ -4,7 +4,8 @@
#define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_SUBSTATE_MASK 0xf
#define MWAIT_CSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf
#define MWAIT_SUBSTATE_SIZE 4 #define MWAIT_SUBSTATE_SIZE 4
#define MWAIT_MAX_NUM_CSTATES 8 #define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
#define CPUID_MWAIT_LEAF 5 #define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1

View file

@ -89,7 +89,6 @@ struct cpuinfo_x86 {
char wp_works_ok; /* It doesn't on 386's */ char wp_works_ok; /* It doesn't on 386's */
/* Problems on some 486Dx4's and old 386's: */ /* Problems on some 486Dx4's and old 386's: */
char hlt_works_ok;
char hard_math; char hard_math;
char rfu; char rfu;
char fdiv_bug; char fdiv_bug;
@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
extern const struct seq_operations cpuinfo_op; extern const struct seq_operations cpuinfo_op;
static inline int hlt_works(int cpu)
{
#ifdef CONFIG_X86_32
return cpu_data(cpu).hlt_works_ok;
#else
return 1;
#endif
}
#define cache_line_size() (boot_cpu_data.x86_cache_alignment) #define cache_line_size() (boot_cpu_data.x86_cache_alignment)
extern void cpu_detect(struct cpuinfo_x86 *c); extern void cpu_detect(struct cpuinfo_x86 *c);
@ -725,7 +715,7 @@ extern unsigned long boot_option_idle_override;
extern bool amd_e400_c1e_detected; extern bool amd_e400_c1e_detected;
enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
IDLE_POLL, IDLE_FORCE_MWAIT}; IDLE_POLL};
extern void enable_sep_cpu(void); extern void enable_sep_cpu(void);
extern int sysenter_setup(void); extern int sysenter_setup(void);
@ -998,7 +988,11 @@ extern unsigned long arch_align_stack(unsigned long sp);
extern void free_init_pages(char *what, unsigned long begin, unsigned long end); extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
void default_idle(void); void default_idle(void);
bool set_pm_idle_to_default(void); #ifdef CONFIG_XEN
bool xen_set_default_idle(void);
#else
#define xen_set_default_idle 0
#endif
void stop_this_cpu(void *dummy); void stop_this_cpu(void *dummy);

View file

@ -103,6 +103,8 @@
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402 #define MSR_IA32_MC0_ADDR 0x00000402
@ -272,6 +274,7 @@
#define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_PLATFORM_ID 0x00000017
#define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c #define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b #define MSR_IA32_TSC_ADJUST 0x0000003b

View file

@ -232,6 +232,7 @@
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/syscore_ops.h> #include <linux/syscore_ops.h>
#include <linux/i8253.h> #include <linux/i8253.h>
#include <linux/cpuidle.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/desc.h> #include <asm/desc.h>
@ -360,13 +361,35 @@ struct apm_user {
* idle percentage above which bios idle calls are done * idle percentage above which bios idle calls are done
*/ */
#ifdef CONFIG_APM_CPU_IDLE #ifdef CONFIG_APM_CPU_IDLE
#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
#define DEFAULT_IDLE_THRESHOLD 95 #define DEFAULT_IDLE_THRESHOLD 95
#else #else
#define DEFAULT_IDLE_THRESHOLD 100 #define DEFAULT_IDLE_THRESHOLD 100
#endif #endif
#define DEFAULT_IDLE_PERIOD (100 / 3) #define DEFAULT_IDLE_PERIOD (100 / 3)
static int apm_cpu_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index);
static struct cpuidle_driver apm_idle_driver = {
.name = "apm_idle",
.owner = THIS_MODULE,
.en_core_tk_irqen = 1,
.states = {
{ /* entry 0 is for polling */ },
{ /* entry 1 is for APM idle */
.name = "APM",
.desc = "APM idle",
.flags = CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 250, /* WAG */
.target_residency = 500, /* WAG */
.enter = &apm_cpu_idle
},
},
.state_count = 2,
};
static struct cpuidle_device apm_cpuidle_device;
/* /*
* Local variables * Local variables
*/ */
@ -377,7 +400,6 @@ static struct {
static int clock_slowed; static int clock_slowed;
static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
static int set_pm_idle;
static int suspends_pending; static int suspends_pending;
static int standbys_pending; static int standbys_pending;
static int ignore_sys_suspend; static int ignore_sys_suspend;
@ -884,8 +906,6 @@ static void apm_do_busy(void)
#define IDLE_CALC_LIMIT (HZ * 100) #define IDLE_CALC_LIMIT (HZ * 100)
#define IDLE_LEAKY_MAX 16 #define IDLE_LEAKY_MAX 16
static void (*original_pm_idle)(void) __read_mostly;
/** /**
* apm_cpu_idle - cpu idling for APM capable Linux * apm_cpu_idle - cpu idling for APM capable Linux
* *
@ -894,7 +914,8 @@ static void (*original_pm_idle)(void) __read_mostly;
* Furthermore it calls the system default idle routine. * Furthermore it calls the system default idle routine.
*/ */
static void apm_cpu_idle(void) static int apm_cpu_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{ {
static int use_apm_idle; /* = 0 */ static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */ static unsigned int last_jiffies; /* = 0 */
@ -904,7 +925,6 @@ static void apm_cpu_idle(void)
unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket; unsigned int bucket;
WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc: recalc:
if (jiffies_since_last_check > IDLE_CALC_LIMIT) { if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0; use_apm_idle = 0;
@ -950,9 +970,6 @@ recalc:
break; break;
} }
} }
if (original_pm_idle)
original_pm_idle();
else
default_idle(); default_idle();
local_irq_disable(); local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies; jiffies_since_last_check = jiffies - last_jiffies;
@ -963,7 +980,7 @@ recalc:
if (apm_idle_done) if (apm_idle_done)
apm_do_busy(); apm_do_busy();
local_irq_enable(); return index;
} }
/** /**
@ -2381,9 +2398,9 @@ static int __init apm_init(void)
if (HZ != 100) if (HZ != 100)
idle_period = (idle_period * HZ) / 100; idle_period = (idle_period * HZ) / 100;
if (idle_threshold < 100) { if (idle_threshold < 100) {
original_pm_idle = pm_idle; if (!cpuidle_register_driver(&apm_idle_driver))
pm_idle = apm_cpu_idle; if (cpuidle_register_device(&apm_cpuidle_device))
set_pm_idle = 1; cpuidle_unregister_driver(&apm_idle_driver);
} }
return 0; return 0;
@ -2393,15 +2410,9 @@ static void __exit apm_exit(void)
{ {
int error; int error;
if (set_pm_idle) { cpuidle_unregister_device(&apm_cpuidle_device);
pm_idle = original_pm_idle; cpuidle_unregister_driver(&apm_idle_driver);
/*
* We are about to unload the current idle thread pm callback
* (pm_idle), Wait for all processors to update cached/local
* copies of pm_idle before proceeding.
*/
kick_all_cpus_sync();
}
if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
&& (apm_info.connection_version > 0x0100)) { && (apm_info.connection_version > 0x0100)) {
error = apm_engage_power_management(APM_DEVICE_ALL, 0); error = apm_engage_power_management(APM_DEVICE_ALL, 0);

View file

@ -17,15 +17,6 @@
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/alternative.h> #include <asm/alternative.h>
static int __init no_halt(char *s)
{
WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n");
boot_cpu_data.hlt_works_ok = 0;
return 1;
}
__setup("no-hlt", no_halt);
static int __init no_387(char *s) static int __init no_387(char *s)
{ {
boot_cpu_data.hard_math = 0; boot_cpu_data.hard_math = 0;
@ -89,23 +80,6 @@ static void __init check_fpu(void)
pr_warn("Hmm, FPU with FDIV bug\n"); pr_warn("Hmm, FPU with FDIV bug\n");
} }
static void __init check_hlt(void)
{
if (boot_cpu_data.x86 >= 5 || paravirt_enabled())
return;
pr_info("Checking 'hlt' instruction... ");
if (!boot_cpu_data.hlt_works_ok) {
pr_cont("disabled\n");
return;
}
halt();
halt();
halt();
halt();
pr_cont("OK\n");
}
/* /*
* Check whether we are able to run this kernel safely on SMP. * Check whether we are able to run this kernel safely on SMP.
* *
@ -129,7 +103,6 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data); print_cpu_info(&boot_cpu_data);
#endif #endif
check_config(); check_config();
check_hlt();
init_utsname()->machine[1] = init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions(); alternative_instructions();

View file

@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{ {
seq_printf(m, seq_printf(m,
"fdiv_bug\t: %s\n" "fdiv_bug\t: %s\n"
"hlt_bug\t\t: %s\n"
"f00f_bug\t: %s\n" "f00f_bug\t: %s\n"
"coma_bug\t: %s\n" "coma_bug\t: %s\n"
"fpu\t\t: %s\n" "fpu\t\t: %s\n"
@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
"cpuid level\t: %d\n" "cpuid level\t: %d\n"
"wp\t\t: %s\n", "wp\t\t: %s\n",
c->fdiv_bug ? "yes" : "no", c->fdiv_bug ? "yes" : "no",
c->hlt_works_ok ? "no" : "yes",
c->f00f_bug ? "yes" : "no", c->f00f_bug ? "yes" : "no",
c->coma_bug ? "yes" : "no", c->coma_bug ? "yes" : "no",
c->hard_math ? "yes" : "no", c->hard_math ? "yes" : "no",

View file

@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override); EXPORT_SYMBOL(boot_option_idle_override);
/* static void (*x86_idle)(void);
* Powermanagement idle function, if any..
*/
void (*pm_idle)(void);
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(pm_idle);
#endif
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
static inline void play_dead(void) static inline void play_dead(void)
@ -351,7 +345,7 @@ void cpu_idle(void)
rcu_idle_enter(); rcu_idle_enter();
if (cpuidle_idle_call()) if (cpuidle_idle_call())
pm_idle(); x86_idle();
rcu_idle_exit(); rcu_idle_exit();
start_critical_timings(); start_critical_timings();
@ -394,14 +388,16 @@ void default_idle(void)
EXPORT_SYMBOL(default_idle); EXPORT_SYMBOL(default_idle);
#endif #endif
bool set_pm_idle_to_default(void) #ifdef CONFIG_XEN
bool xen_set_default_idle(void)
{ {
bool ret = !!pm_idle; bool ret = !!x86_idle;
pm_idle = default_idle; x86_idle = default_idle;
return ret; return ret;
} }
#endif
void stop_this_cpu(void *dummy) void stop_this_cpu(void *dummy)
{ {
local_irq_disable(); local_irq_disable();
@ -411,30 +407,9 @@ void stop_this_cpu(void *dummy)
set_cpu_online(smp_processor_id(), false); set_cpu_online(smp_processor_id(), false);
disable_local_APIC(); disable_local_APIC();
for (;;) { for (;;)
if (hlt_works(smp_processor_id()))
halt(); halt();
} }
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
if (!need_resched()) {
trace_cpu_idle_rcuidle(1, smp_processor_id());
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
smp_mb();
if (!need_resched())
__sti_mwait(0, 0);
else
local_irq_enable();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} else
local_irq_enable();
}
/* /*
* On SMP it's slightly faster (but much more power-consuming!) * On SMP it's slightly faster (but much more power-consuming!)
@ -450,53 +425,6 @@ static void poll_idle(void)
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} }
/*
* mwait selection logic:
*
* It depends on the CPU. For AMD CPUs that support MWAIT this is
* wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
* then depend on a clock divisor and current Pstate of the core. If
* all cores of a processor are in halt state (C1) the processor can
* enter the C1E (C1 enhanced) state. If mwait is used this will never
* happen.
*
* idle=mwait overrides this decision and forces the usage of mwait.
*/
#define MWAIT_INFO 0x05
#define MWAIT_ECX_EXTENDED_INFO 0x01
#define MWAIT_EDX_C1 0xf0
int mwait_usable(const struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
/* Use mwait if idle=mwait boot option is given */
if (boot_option_idle_override == IDLE_FORCE_MWAIT)
return 1;
/*
* Any idle= boot option other than idle=mwait means that we must not
* use mwait. Eg: idle=halt or idle=poll or idle=nomwait
*/
if (boot_option_idle_override != IDLE_NO_OVERRIDE)
return 0;
if (c->cpuid_level < MWAIT_INFO)
return 0;
cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
/* Check, whether EDX has extended info about MWAIT */
if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
return 1;
/*
* edx enumeratios MONITOR/MWAIT extensions. Check, whether
* C1 supports MWAIT
*/
return (edx & MWAIT_EDX_C1);
}
bool amd_e400_c1e_detected; bool amd_e400_c1e_detected;
EXPORT_SYMBOL(amd_e400_c1e_detected); EXPORT_SYMBOL(amd_e400_c1e_detected);
@ -561,31 +489,24 @@ static void amd_e400_idle(void)
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{ {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) { if (x86_idle == poll_idle && smp_num_siblings > 1)
pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
}
#endif #endif
if (pm_idle) if (x86_idle)
return; return;
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { if (cpu_has_amd_erratum(amd_erratum_400)) {
/*
* One CPU supports mwait => All CPUs supports mwait
*/
pr_info("using mwait in idle threads\n");
pm_idle = mwait_idle;
} else if (cpu_has_amd_erratum(amd_erratum_400)) {
/* E400: APIC timer interrupt does not wake up CPU from C1e */ /* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n"); pr_info("using AMD E400 aware idle routine\n");
pm_idle = amd_e400_idle; x86_idle = amd_e400_idle;
} else } else
pm_idle = default_idle; x86_idle = default_idle;
} }
void __init init_amd_e400_c1e_mask(void) void __init init_amd_e400_c1e_mask(void)
{ {
/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
if (pm_idle == amd_e400_idle) if (x86_idle == amd_e400_idle)
zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
} }
@ -596,11 +517,8 @@ static int __init idle_setup(char *str)
if (!strcmp(str, "poll")) { if (!strcmp(str, "poll")) {
pr_info("using polling idle threads\n"); pr_info("using polling idle threads\n");
pm_idle = poll_idle; x86_idle = poll_idle;
boot_option_idle_override = IDLE_POLL; boot_option_idle_override = IDLE_POLL;
} else if (!strcmp(str, "mwait")) {
boot_option_idle_override = IDLE_FORCE_MWAIT;
WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
} else if (!strcmp(str, "halt")) { } else if (!strcmp(str, "halt")) {
/* /*
* When the boot option of idle=halt is added, halt is * When the boot option of idle=halt is added, halt is
@ -609,7 +527,7 @@ static int __init idle_setup(char *str)
* To continue to load the CPU idle driver, don't touch * To continue to load the CPU idle driver, don't touch
* the boot_option_idle_override. * the boot_option_idle_override.
*/ */
pm_idle = default_idle; x86_idle = default_idle;
boot_option_idle_override = IDLE_HALT; boot_option_idle_override = IDLE_HALT;
} else if (!strcmp(str, "nomwait")) { } else if (!strcmp(str, "nomwait")) {
/* /*

View file

@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void)
void *mwait_ptr; void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) if (!this_cpu_has(X86_FEATURE_MWAIT))
return; return;
if (!this_cpu_has(X86_FEATURE_CLFLSH)) if (!this_cpu_has(X86_FEATURE_CLFLSH))
return; return;

View file

@ -556,12 +556,9 @@ void __init xen_arch_setup(void)
COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
/* Set up idle, making sure it calls safe_halt() pvop */ /* Set up idle, making sure it calls safe_halt() pvop */
#ifdef CONFIG_X86_32
boot_cpu_data.hlt_works_ok = 1;
#endif
disable_cpuidle(); disable_cpuidle();
disable_cpufreq(); disable_cpufreq();
WARN_ON(set_pm_idle_to_default()); WARN_ON(xen_set_default_idle());
fiddle_vdso(); fiddle_vdso();
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
numa_off = 1; numa_off = 1;

View file

@ -28,19 +28,12 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/ */
#include <linux/kernel.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h>
#include <linux/cpufreq.h>
#include <linux/slab.h>
#include <linux/acpi.h> #include <linux/acpi.h>
#include <linux/dmi.h> #include <linux/dmi.h>
#include <linux/moduleparam.h>
#include <linux/sched.h> /* need_resched() */ #include <linux/sched.h> /* need_resched() */
#include <linux/pm_qos.h>
#include <linux/clockchips.h> #include <linux/clockchips.h>
#include <linux/cpuidle.h> #include <linux/cpuidle.h>
#include <linux/irqflags.h>
/* /*
* Include the apic definitions for x86 to have the APIC timer related defines * Include the apic definitions for x86 to have the APIC timer related defines
@ -52,23 +45,14 @@
#include <asm/apic.h> #include <asm/apic.h>
#endif #endif
#include <asm/io.h>
#include <asm/uaccess.h>
#include <acpi/acpi_bus.h> #include <acpi/acpi_bus.h>
#include <acpi/processor.h> #include <acpi/processor.h>
#include <asm/processor.h>
#define PREFIX "ACPI: " #define PREFIX "ACPI: "
#define ACPI_PROCESSOR_CLASS "processor" #define ACPI_PROCESSOR_CLASS "processor"
#define _COMPONENT ACPI_PROCESSOR_COMPONENT #define _COMPONENT ACPI_PROCESSOR_COMPONENT
ACPI_MODULE_NAME("processor_idle"); ACPI_MODULE_NAME("processor_idle");
#define PM_TIMER_TICK_NS (1000000000ULL/ACPI_PM_TIMER_FREQUENCY)
#define C2_OVERHEAD 1 /* 1us */
#define C3_OVERHEAD 1 /* 1us */
#define PM_TIMER_TICKS_TO_US(p) \
(((p) * 1000)/(ACPI_PM_TIMER_FREQUENCY/1000))
static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER;
module_param(max_cstate, uint, 0000); module_param(max_cstate, uint, 0000);
@ -82,10 +66,11 @@ module_param(latency_factor, uint, 0644);
static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device); static DEFINE_PER_CPU(struct cpuidle_device *, acpi_cpuidle_device);
static struct acpi_processor_cx *acpi_cstate[CPUIDLE_STATE_MAX];
static int disabled_by_idle_boot_param(void) static int disabled_by_idle_boot_param(void)
{ {
return boot_option_idle_override == IDLE_POLL || return boot_option_idle_override == IDLE_POLL ||
boot_option_idle_override == IDLE_FORCE_MWAIT ||
boot_option_idle_override == IDLE_HALT; boot_option_idle_override == IDLE_HALT;
} }
@ -737,8 +722,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index) struct cpuidle_driver *drv, int index)
{ {
struct acpi_processor *pr; struct acpi_processor *pr;
struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = acpi_cstate[index];
struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
pr = __this_cpu_read(processors); pr = __this_cpu_read(processors);
@ -761,8 +745,7 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
*/ */
static int acpi_idle_play_dead(struct cpuidle_device *dev, int index) static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
{ {
struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = acpi_cstate[index];
struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
ACPI_FLUSH_CPU_CACHE(); ACPI_FLUSH_CPU_CACHE();
@ -792,8 +775,7 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index) struct cpuidle_driver *drv, int index)
{ {
struct acpi_processor *pr; struct acpi_processor *pr;
struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = acpi_cstate[index];
struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
pr = __this_cpu_read(processors); pr = __this_cpu_read(processors);
@ -851,8 +833,7 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index) struct cpuidle_driver *drv, int index)
{ {
struct acpi_processor *pr; struct acpi_processor *pr;
struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; struct acpi_processor_cx *cx = acpi_cstate[index];
struct acpi_processor_cx *cx = cpuidle_get_statedata(state_usage);
pr = __this_cpu_read(processors); pr = __this_cpu_read(processors);
@ -944,13 +925,13 @@ struct cpuidle_driver acpi_idle_driver = {
* device i.e. per-cpu data * device i.e. per-cpu data
* *
* @pr: the ACPI processor * @pr: the ACPI processor
* @dev : the cpuidle device
*/ */
static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr) static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr,
struct cpuidle_device *dev)
{ {
int i, count = CPUIDLE_DRIVER_STATE_START; int i, count = CPUIDLE_DRIVER_STATE_START;
struct acpi_processor_cx *cx; struct acpi_processor_cx *cx;
struct cpuidle_state_usage *state_usage;
struct cpuidle_device *dev = per_cpu(acpi_cpuidle_device, pr->id);
if (!pr->flags.power_setup_done) if (!pr->flags.power_setup_done)
return -EINVAL; return -EINVAL;
@ -969,7 +950,6 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
cx = &pr->power.states[i]; cx = &pr->power.states[i];
state_usage = &dev->states_usage[count];
if (!cx->valid) if (!cx->valid)
continue; continue;
@ -980,8 +960,7 @@ static int acpi_processor_setup_cpuidle_cx(struct acpi_processor *pr)
!(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
continue; continue;
#endif #endif
acpi_cstate[count] = cx;
cpuidle_set_statedata(state_usage, cx);
count++; count++;
if (count == CPUIDLE_STATE_MAX) if (count == CPUIDLE_STATE_MAX)
@ -1105,7 +1084,7 @@ int acpi_processor_hotplug(struct acpi_processor *pr)
cpuidle_disable_device(dev); cpuidle_disable_device(dev);
acpi_processor_get_power_info(pr); acpi_processor_get_power_info(pr);
if (pr->flags.power) { if (pr->flags.power) {
acpi_processor_setup_cpuidle_cx(pr); acpi_processor_setup_cpuidle_cx(pr, dev);
ret = cpuidle_enable_device(dev); ret = cpuidle_enable_device(dev);
} }
cpuidle_resume_and_unlock(); cpuidle_resume_and_unlock();
@ -1163,8 +1142,8 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
continue; continue;
acpi_processor_get_power_info(_pr); acpi_processor_get_power_info(_pr);
if (_pr->flags.power) { if (_pr->flags.power) {
acpi_processor_setup_cpuidle_cx(_pr);
dev = per_cpu(acpi_cpuidle_device, cpu); dev = per_cpu(acpi_cpuidle_device, cpu);
acpi_processor_setup_cpuidle_cx(_pr, dev);
cpuidle_enable_device(dev); cpuidle_enable_device(dev);
} }
} }
@ -1233,7 +1212,7 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr)
return -ENOMEM; return -ENOMEM;
per_cpu(acpi_cpuidle_device, pr->id) = dev; per_cpu(acpi_cpuidle_device, pr->id) = dev;
acpi_processor_setup_cpuidle_cx(pr); acpi_processor_setup_cpuidle_cx(pr, dev);
/* Register per-cpu cpuidle_device. Cpuidle driver /* Register per-cpu cpuidle_device. Cpuidle driver
* must already be registered before registering device * must already be registered before registering device

View file

@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = {
.en_core_tk_irqen = 1, .en_core_tk_irqen = 1,
}; };
/* intel_idle.max_cstate=0 disables driver */ /* intel_idle.max_cstate=0 disables driver */
static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1; static int max_cstate = CPUIDLE_STATE_MAX - 1;
static unsigned int mwait_substates; static unsigned int mwait_substates;
@ -90,6 +90,7 @@ struct idle_cpu {
* Indicate which enable bits to clear here. * Indicate which enable bits to clear here.
*/ */
unsigned long auto_demotion_disable_flags; unsigned long auto_demotion_disable_flags;
bool disable_promotion_to_c1e;
}; };
static const struct idle_cpu *icpu; static const struct idle_cpu *icpu;
@ -108,162 +109,206 @@ static struct cpuidle_state *cpuidle_state_table;
*/ */
#define CPUIDLE_FLAG_TLB_FLUSHED 0x10000 #define CPUIDLE_FLAG_TLB_FLUSHED 0x10000
/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
* 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc.
*
* We store the hint at the top of our "flags" for each state.
*/
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
/* /*
* States are indexed by the cstate number, * States are indexed by the cstate number,
* which is also the index into the MWAIT hint array. * which is also the index into the MWAIT hint array.
* Thus C0 is a dummy. * Thus C0 is a dummy.
*/ */
static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = {
{ /* MWAIT C0 */ }, {
{ /* MWAIT C1 */
.name = "C1-NHM", .name = "C1-NHM",
.desc = "MWAIT 0x00", .desc = "MWAIT 0x00",
.flags = CPUIDLE_FLAG_TIME_VALID, .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 3, .exit_latency = 3,
.target_residency = 6, .target_residency = 6,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C2 */ {
.name = "C1E-NHM",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-NHM", .name = "C3-NHM",
.desc = "MWAIT 0x10", .desc = "MWAIT 0x10",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 20, .exit_latency = 20,
.target_residency = 80, .target_residency = 80,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C3 */ {
.name = "C6-NHM", .name = "C6-NHM",
.desc = "MWAIT 0x20", .desc = "MWAIT 0x20",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 200, .exit_latency = 200,
.target_residency = 800, .target_residency = 800,
.enter = &intel_idle }, .enter = &intel_idle },
{
.enter = NULL }
}; };
static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = { static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = {
{ /* MWAIT C0 */ }, {
{ /* MWAIT C1 */
.name = "C1-SNB", .name = "C1-SNB",
.desc = "MWAIT 0x00", .desc = "MWAIT 0x00",
.flags = CPUIDLE_FLAG_TIME_VALID, .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1, .exit_latency = 2,
.target_residency = 1, .target_residency = 2,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C2 */ {
.name = "C1E-SNB",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-SNB", .name = "C3-SNB",
.desc = "MWAIT 0x10", .desc = "MWAIT 0x10",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80, .exit_latency = 80,
.target_residency = 211, .target_residency = 211,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C3 */ {
.name = "C6-SNB", .name = "C6-SNB",
.desc = "MWAIT 0x20", .desc = "MWAIT 0x20",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 104, .exit_latency = 104,
.target_residency = 345, .target_residency = 345,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C4 */ {
.name = "C7-SNB", .name = "C7-SNB",
.desc = "MWAIT 0x30", .desc = "MWAIT 0x30",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 109, .exit_latency = 109,
.target_residency = 345, .target_residency = 345,
.enter = &intel_idle }, .enter = &intel_idle },
{
.enter = NULL }
}; };
static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = { static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = {
{ /* MWAIT C0 */ }, {
{ /* MWAIT C1 */
.name = "C1-IVB", .name = "C1-IVB",
.desc = "MWAIT 0x00", .desc = "MWAIT 0x00",
.flags = CPUIDLE_FLAG_TIME_VALID, .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1, .exit_latency = 1,
.target_residency = 1, .target_residency = 1,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C2 */ {
.name = "C1E-IVB",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-IVB", .name = "C3-IVB",
.desc = "MWAIT 0x10", .desc = "MWAIT 0x10",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 59, .exit_latency = 59,
.target_residency = 156, .target_residency = 156,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C3 */ {
.name = "C6-IVB", .name = "C6-IVB",
.desc = "MWAIT 0x20", .desc = "MWAIT 0x20",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 80, .exit_latency = 80,
.target_residency = 300, .target_residency = 300,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C4 */ {
.name = "C7-IVB", .name = "C7-IVB",
.desc = "MWAIT 0x30", .desc = "MWAIT 0x30",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 87, .exit_latency = 87,
.target_residency = 300, .target_residency = 300,
.enter = &intel_idle }, .enter = &intel_idle },
{
.enter = NULL }
}; };
static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = {
{ /* MWAIT C0 */ }, {
{ /* MWAIT C1 */ .name = "C1-HSW",
.name = "C1-ATM",
.desc = "MWAIT 0x00", .desc = "MWAIT 0x00",
.flags = CPUIDLE_FLAG_TIME_VALID, .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 1, .exit_latency = 2,
.target_residency = 4, .target_residency = 2,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C2 */ {
.name = "C1E-HSW",
.desc = "MWAIT 0x01",
.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C3-HSW",
.desc = "MWAIT 0x10",
.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 33,
.target_residency = 100,
.enter = &intel_idle },
{
.name = "C6-HSW",
.desc = "MWAIT 0x20",
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 133,
.target_residency = 400,
.enter = &intel_idle },
{
.name = "C7s-HSW",
.desc = "MWAIT 0x32",
.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 166,
.target_residency = 500,
.enter = &intel_idle },
{
.enter = NULL }
};
static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = {
{
.name = "C1E-ATM",
.desc = "MWAIT 0x00",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 10,
.target_residency = 20,
.enter = &intel_idle },
{
.name = "C2-ATM", .name = "C2-ATM",
.desc = "MWAIT 0x10", .desc = "MWAIT 0x10",
.flags = CPUIDLE_FLAG_TIME_VALID, .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
.exit_latency = 20, .exit_latency = 20,
.target_residency = 80, .target_residency = 80,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C3 */ }, {
{ /* MWAIT C4 */
.name = "C4-ATM", .name = "C4-ATM",
.desc = "MWAIT 0x30", .desc = "MWAIT 0x30",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 100, .exit_latency = 100,
.target_residency = 400, .target_residency = 400,
.enter = &intel_idle }, .enter = &intel_idle },
{ /* MWAIT C5 */ }, {
{ /* MWAIT C6 */
.name = "C6-ATM", .name = "C6-ATM",
.desc = "MWAIT 0x52", .desc = "MWAIT 0x52",
.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 140, .exit_latency = 140,
.target_residency = 560, .target_residency = 560,
.enter = &intel_idle }, .enter = &intel_idle },
};
static long get_driver_data(int cstate)
{ {
int driver_data; .enter = NULL }
switch (cstate) { };
case 1: /* MWAIT C1 */
driver_data = 0x00;
break;
case 2: /* MWAIT C2 */
driver_data = 0x10;
break;
case 3: /* MWAIT C3 */
driver_data = 0x20;
break;
case 4: /* MWAIT C4 */
driver_data = 0x30;
break;
case 5: /* MWAIT C5 */
driver_data = 0x40;
break;
case 6: /* MWAIT C6 */
driver_data = 0x52;
break;
default:
driver_data = 0x00;
}
return driver_data;
}
/** /**
* intel_idle * intel_idle
@ -278,8 +323,7 @@ static int intel_idle(struct cpuidle_device *dev,
{ {
unsigned long ecx = 1; /* break on interrupt flag */ unsigned long ecx = 1; /* break on interrupt flag */
struct cpuidle_state *state = &drv->states[index]; struct cpuidle_state *state = &drv->states[index];
struct cpuidle_state_usage *state_usage = &dev->states_usage[index]; unsigned long eax = flg2MWAIT(state->flags);
unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
unsigned int cstate; unsigned int cstate;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
@ -362,10 +406,19 @@ static void auto_demotion_disable(void *dummy)
msr_bits &= ~(icpu->auto_demotion_disable_flags); msr_bits &= ~(icpu->auto_demotion_disable_flags);
wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits); wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
} }
static void c1e_promotion_disable(void *dummy)
{
unsigned long long msr_bits;
rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
msr_bits &= ~0x2;
wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
}
static const struct idle_cpu idle_cpu_nehalem = { static const struct idle_cpu idle_cpu_nehalem = {
.state_table = nehalem_cstates, .state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
.disable_promotion_to_c1e = true,
}; };
static const struct idle_cpu idle_cpu_atom = { static const struct idle_cpu idle_cpu_atom = {
@ -379,10 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = {
static const struct idle_cpu idle_cpu_snb = { static const struct idle_cpu idle_cpu_snb = {
.state_table = snb_cstates, .state_table = snb_cstates,
.disable_promotion_to_c1e = true,
}; };
static const struct idle_cpu idle_cpu_ivb = { static const struct idle_cpu idle_cpu_ivb = {
.state_table = ivb_cstates, .state_table = ivb_cstates,
.disable_promotion_to_c1e = true,
};
static const struct idle_cpu idle_cpu_hsw = {
.state_table = hsw_cstates,
.disable_promotion_to_c1e = true,
}; };
#define ICPU(model, cpu) \ #define ICPU(model, cpu) \
@ -402,6 +462,9 @@ static const struct x86_cpu_id intel_idle_ids[] = {
ICPU(0x2d, idle_cpu_snb), ICPU(0x2d, idle_cpu_snb),
ICPU(0x3a, idle_cpu_ivb), ICPU(0x3a, idle_cpu_ivb),
ICPU(0x3e, idle_cpu_ivb), ICPU(0x3e, idle_cpu_ivb),
ICPU(0x3c, idle_cpu_hsw),
ICPU(0x3f, idle_cpu_hsw),
ICPU(0x45, idle_cpu_hsw),
{} {}
}; };
MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@ -484,32 +547,31 @@ static int intel_idle_cpuidle_driver_init(void)
drv->state_count = 1; drv->state_count = 1;
for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
int num_substates; int num_substates, mwait_hint, mwait_cstate, mwait_substate;
if (cstate > max_cstate) { if (cpuidle_state_table[cstate].enter == NULL)
break;
if (cstate + 1 > max_cstate) {
printk(PREFIX "max_cstate %d reached\n", printk(PREFIX "max_cstate %d reached\n",
max_cstate); max_cstate);
break; break;
} }
/* does the state exist in CPUID.MWAIT? */ mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
num_substates = (mwait_substates >> ((cstate) * 4)) mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
& MWAIT_SUBSTATE_MASK; mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
if (num_substates == 0)
continue;
/* is the state not enabled? */
if (cpuidle_state_table[cstate].enter == NULL) {
/* does the driver not know about the state? */
if (*cpuidle_state_table[cstate].name == '\0')
pr_debug(PREFIX "unaware of model 0x%x"
" MWAIT %d please"
" contact lenb@kernel.org\n",
boot_cpu_data.x86_model, cstate);
continue;
}
if ((cstate > 2) && /* does the state exist in CPUID.MWAIT? */
num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
& MWAIT_SUBSTATE_MASK;
/* if sub-state in table is not enumerated by CPUID */
if ((mwait_substate + 1) > num_substates)
continue;
if (((mwait_cstate + 1) > 2) &&
!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halts in idle" mark_tsc_unstable("TSC halts in idle"
" states deeper than C2"); " states deeper than C2");
@ -523,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void)
if (icpu->auto_demotion_disable_flags) if (icpu->auto_demotion_disable_flags)
on_each_cpu(auto_demotion_disable, NULL, 1); on_each_cpu(auto_demotion_disable, NULL, 1);
if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
on_each_cpu(c1e_promotion_disable, NULL, 1);
return 0; return 0;
} }
@ -541,25 +606,28 @@ static int intel_idle_cpu_init(int cpu)
dev->state_count = 1; dev->state_count = 1;
for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) { for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
int num_substates; int num_substates, mwait_hint, mwait_cstate, mwait_substate;
if (cstate > max_cstate) { if (cpuidle_state_table[cstate].enter == NULL)
continue;
if (cstate + 1 > max_cstate) {
printk(PREFIX "max_cstate %d reached\n", max_cstate); printk(PREFIX "max_cstate %d reached\n", max_cstate);
break; break;
} }
/* does the state exist in CPUID.MWAIT? */ mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
num_substates = (mwait_substates >> ((cstate) * 4)) mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
& MWAIT_SUBSTATE_MASK; mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
if (num_substates == 0)
continue;
/* is the state not enabled? */
if (cpuidle_state_table[cstate].enter == NULL)
continue;
dev->states_usage[dev->state_count].driver_data = /* does the state exist in CPUID.MWAIT? */
(void *)get_driver_data(cstate); num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
& MWAIT_SUBSTATE_MASK;
/* if sub-state in table is not enumerated by CPUID */
if ((mwait_substate + 1) > num_substates)
continue;
dev->state_count += 1; dev->state_count += 1;
} }

View file

@ -32,8 +32,6 @@ struct cpuidle_driver;
****************************/ ****************************/
struct cpuidle_state_usage { struct cpuidle_state_usage {
void *driver_data;
unsigned long long disable; unsigned long long disable;
unsigned long long usage; unsigned long long usage;
unsigned long long time; /* in US */ unsigned long long time; /* in US */
@ -62,26 +60,6 @@ struct cpuidle_state {
#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
/**
* cpuidle_get_statedata - retrieves private driver state data
* @st_usage: the state usage statistics
*/
static inline void *cpuidle_get_statedata(struct cpuidle_state_usage *st_usage)
{
return st_usage->driver_data;
}
/**
* cpuidle_set_statedata - stores private driver state data
* @st_usage: the state usage statistics
* @data: the private data
*/
static inline void
cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data)
{
st_usage->driver_data = data;
}
struct cpuidle_device { struct cpuidle_device {
unsigned int registered:1; unsigned int registered:1;
unsigned int enabled:1; unsigned int enabled:1;

View file

@ -31,7 +31,6 @@
/* /*
* Callbacks for platform drivers to implement. * Callbacks for platform drivers to implement.
*/ */
extern void (*pm_idle)(void);
extern void (*pm_power_off)(void); extern void (*pm_power_off)(void);
extern void (*pm_power_off_prepare)(void); extern void (*pm_power_off_prepare)(void);

View file

@ -31,8 +31,6 @@ The \fB-S\fP option limits output to a 1-line System Summary for each interval.
.PP .PP
The \fB-v\fP option increases verbosity. The \fB-v\fP option increases verbosity.
.PP .PP
The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34"
.PP
The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter.
.PP .PP
The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter.
@ -186,26 +184,24 @@ This is a weighted average, where the weight is %c0. ie. it is the total number
un-halted cycles elapsed per time divided by the number of CPUs. un-halted cycles elapsed per time divided by the number of CPUs.
.SH SMI COUNTING EXAMPLE .SH SMI COUNTING EXAMPLE
On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter. On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
Using the -m option, you can display how many SMIs have fired since reset, or if there This counter is shown by default under the "SMI" column.
are SMIs during the measurement interval, you can display the delta using the -d option.
.nf .nf
[root@x980 ~]# turbostat -m 0x34 [root@x980 ~]# turbostat
cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6 cor CPU %c0 GHz TSC SMI %c1 %c3 %c6 CTMP %pc3 %pc6
1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55 0.11 1.91 3.38 0 1.84 0.26 97.79 29 0.82 83.87
0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55 0 0 0.40 1.63 3.38 0 10.27 0.12 89.20 20 0.82 83.88
0 6 0.14 1.63 3.38 0x00000056 5.30 0 6 0.06 1.63 3.38 0 10.61
1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52 1 2 0.37 2.63 3.38 0 0.02 0.10 99.51 22
1 8 0.10 1.65 3.38 0x00000056 18.05 1 8 0.01 1.62 3.38 0 0.39
2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50 2 4 0.07 1.62 3.38 0 0.04 0.07 99.82 23
2 10 0.10 1.63 3.38 0x00000056 6.93 2 10 0.02 1.62 3.38 0 0.09
8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16 8 1 0.23 1.64 3.38 0 0.10 1.07 98.60 24
8 7 0.08 1.64 3.38 0x00000056 5.12 8 7 0.02 1.64 3.38 0 0.31
9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38 9 3 0.03 1.62 3.38 0 0.03 0.05 99.89 29
9 9 0.09 1.68 3.38 0x00000056 9.32 9 9 0.02 1.62 3.38 0 0.05
10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23 10 5 0.07 1.62 3.38 0 0.08 0.12 99.73 27
10 11 1.72 1.65 3.38 0x00000056 15.05 10 11 0.03 1.62 3.38 0 0.13
^C ^C
[root@x980 ~]#
.fi .fi
.SH NOTES .SH NOTES

View file

@ -58,6 +58,7 @@ unsigned int extra_msr_offset32;
unsigned int extra_msr_offset64; unsigned int extra_msr_offset64;
unsigned int extra_delta_offset32; unsigned int extra_delta_offset32;
unsigned int extra_delta_offset64; unsigned int extra_delta_offset64;
int do_smi;
double bclk; double bclk;
unsigned int show_pkg; unsigned int show_pkg;
unsigned int show_core; unsigned int show_core;
@ -99,6 +100,7 @@ struct thread_data {
unsigned long long extra_delta64; unsigned long long extra_delta64;
unsigned long long extra_msr32; unsigned long long extra_msr32;
unsigned long long extra_delta32; unsigned long long extra_delta32;
unsigned int smi_count;
unsigned int cpu_id; unsigned int cpu_id;
unsigned int flags; unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2
@ -248,6 +250,8 @@ void print_header(void)
if (has_aperf) if (has_aperf)
outp += sprintf(outp, " GHz"); outp += sprintf(outp, " GHz");
outp += sprintf(outp, " TSC"); outp += sprintf(outp, " TSC");
if (do_smi)
outp += sprintf(outp, " SMI");
if (extra_delta_offset32) if (extra_delta_offset32)
outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
if (extra_delta_offset64) if (extra_delta_offset64)
@ -314,6 +318,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
extra_msr_offset32, t->extra_msr32); extra_msr_offset32, t->extra_msr32);
fprintf(stderr, "msr0x%x: %016llX\n", fprintf(stderr, "msr0x%x: %016llX\n",
extra_msr_offset64, t->extra_msr64); extra_msr_offset64, t->extra_msr64);
if (do_smi)
fprintf(stderr, "SMI: %08X\n", t->smi_count);
} }
if (c) { if (c) {
@ -352,6 +358,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
* RAM_W: %5.2 * RAM_W: %5.2
* GHz: "GHz" 3 columns %3.2 * GHz: "GHz" 3 columns %3.2
* TSC: "TSC" 3 columns %3.2 * TSC: "TSC" 3 columns %3.2
* SMI: "SMI" 4 columns %4d
* percentage " %pc3" %6.2 * percentage " %pc3" %6.2
* Perf Status percentage: %5.2 * Perf Status percentage: %5.2
* "CTMP" 4 columns %4d * "CTMP" 4 columns %4d
@ -431,6 +438,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
/* TSC */ /* TSC */
outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
/* SMI */
if (do_smi)
outp += sprintf(outp, "%4d", t->smi_count);
/* delta */ /* delta */
if (extra_delta_offset32) if (extra_delta_offset32)
outp += sprintf(outp, " %11llu", t->extra_delta32); outp += sprintf(outp, " %11llu", t->extra_delta32);
@ -645,6 +656,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
*/ */
old->extra_msr32 = new->extra_msr32; old->extra_msr32 = new->extra_msr32;
old->extra_msr64 = new->extra_msr64; old->extra_msr64 = new->extra_msr64;
if (do_smi)
old->smi_count = new->smi_count - old->smi_count;
} }
int delta_cpu(struct thread_data *t, struct core_data *c, int delta_cpu(struct thread_data *t, struct core_data *c,
@ -672,6 +686,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->mperf = 0; t->mperf = 0;
t->c1 = 0; t->c1 = 0;
t->smi_count = 0;
t->extra_delta32 = 0; t->extra_delta32 = 0;
t->extra_delta64 = 0; t->extra_delta64 = 0;
@ -802,6 +817,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -4; return -4;
} }
if (do_smi) {
if (get_msr(cpu, MSR_SMI_COUNT, &msr))
return -5;
t->smi_count = msr & 0xFFFFFFFF;
}
if (extra_delta_offset32) { if (extra_delta_offset32) {
if (get_msr(cpu, extra_delta_offset32, &msr)) if (get_msr(cpu, extra_delta_offset32, &msr))
return -5; return -5;
@ -908,7 +928,6 @@ void print_verbose_header(void)
get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
if (verbose)
fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
ratio = (msr >> 40) & 0xFF; ratio = (msr >> 40) & 0xFF;
@ -919,12 +938,15 @@ void print_verbose_header(void)
fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
ratio, bclk, ratio * bclk); ratio, bclk, ratio * bclk);
get_msr(0, MSR_IA32_POWER_CTL, &msr);
fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
if (!do_ivt_turbo_ratio_limit) if (!do_ivt_turbo_ratio_limit)
goto print_nhm_turbo_ratio_limits; goto print_nhm_turbo_ratio_limits;
get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
if (verbose)
fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF; ratio = (msr >> 56) & 0xFF;
@ -1016,7 +1038,6 @@ print_nhm_turbo_ratio_limits:
get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
if (verbose)
fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
ratio = (msr >> 56) & 0xFF; ratio = (msr >> 56) & 0xFF;
@ -1397,6 +1418,9 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x2D: /* SNB Xeon */ case 0x2D: /* SNB Xeon */
case 0x3A: /* IVB */ case 0x3A: /* IVB */
case 0x3E: /* IVB Xeon */ case 0x3E: /* IVB Xeon */
case 0x3C: /* HSW */
case 0x3F: /* HSW */
case 0x45: /* HSW */
return 1; return 1;
case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */
@ -1488,6 +1512,9 @@ void rapl_probe(unsigned int family, unsigned int model)
switch (model) { switch (model) {
case 0x2A: case 0x2A:
case 0x3A: case 0x3A:
case 0x3C: /* HSW */
case 0x3F: /* HSW */
case 0x45: /* HSW */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
break; break;
case 0x2D: case 0x2D:
@ -1724,6 +1751,9 @@ int is_snb(unsigned int family, unsigned int model)
case 0x2D: case 0x2D:
case 0x3A: /* IVB */ case 0x3A: /* IVB */
case 0x3E: /* IVB Xeon */ case 0x3E: /* IVB Xeon */
case 0x3C: /* HSW */
case 0x3F: /* HSW */
case 0x45: /* HSW */
return 1; return 1;
} }
return 0; return 0;
@ -1883,6 +1913,7 @@ void check_cpuid()
do_nehalem_platform_info = genuine_intel && has_invariant_tsc; do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
do_smi = do_nhm_cstates;
do_snb_cstates = is_snb(family, model); do_snb_cstates = is_snb(family, model);
bclk = discover_bclk(family, model); bclk = discover_bclk(family, model);
@ -2219,9 +2250,6 @@ void cmdline(int argc, char **argv)
case 'c': case 'c':
sscanf(optarg, "%x", &extra_delta_offset32); sscanf(optarg, "%x", &extra_delta_offset32);
break; break;
case 's':
extra_delta_offset32 = 0x34; /* SMI counter */
break;
case 'C': case 'C':
sscanf(optarg, "%x", &extra_delta_offset64); sscanf(optarg, "%x", &extra_delta_offset64);
break; break;
@ -2248,7 +2276,7 @@ int main(int argc, char **argv)
cmdline(argc, argv); cmdline(argc, argv);
if (verbose) if (verbose)
fprintf(stderr, "turbostat v3.0 November 23, 2012" fprintf(stderr, "turbostat v3.2 February 11, 2013"
" - Len Brown <lenb@kernel.org>\n"); " - Len Brown <lenb@kernel.org>\n");
turbostat_init(); turbostat_init();