Browse Source

Merge branches 'idle-remove-statedata', 'pm_idle' and 'idle-hsw-turbostat' into release

Len Brown 12 years ago
parent
commit
2e7d0f60d8

+ 4 - 9
arch/arm/kernel/process.c

@@ -172,14 +172,9 @@ static void default_idle(void)
 	local_irq_enable();
 }
 
-void (*pm_idle)(void) = default_idle;
-EXPORT_SYMBOL(pm_idle);
-
 /*
- * The idle thread, has rather strange semantics for calling pm_idle,
- * but this is what x86 does and we need to do the same, so that
- * things like cpuidle get called in the same way.  The only difference
- * is that we always respect 'hlt_counter' to prevent low power idle.
+ * The idle thread.
+ * We always respect 'hlt_counter' to prevent low power idle.
  */
 void cpu_idle(void)
 {
@@ -210,10 +205,10 @@ void cpu_idle(void)
 			} else if (!need_resched()) {
 				stop_critical_timings();
 				if (cpuidle_idle_call())
-					pm_idle();
+					default_idle();
 				start_critical_timings();
 				/*
-				 * pm_idle functions must always
+				 * default_idle functions must always
 				 * return with IRQs enabled.
 				 */
 				WARN_ON(irqs_disabled());

+ 4 - 9
arch/arm64/kernel/process.c

@@ -97,14 +97,9 @@ static void default_idle(void)
 	local_irq_enable();
 }
 
-void (*pm_idle)(void) = default_idle;
-EXPORT_SYMBOL_GPL(pm_idle);
-
 /*
- * The idle thread, has rather strange semantics for calling pm_idle,
- * but this is what x86 does and we need to do the same, so that
- * things like cpuidle get called in the same way.  The only difference
- * is that we always respect 'hlt_counter' to prevent low power idle.
+ * The idle thread.
+ * We always respect 'hlt_counter' to prevent low power idle.
  */
 void cpu_idle(void)
 {
@@ -122,10 +117,10 @@ void cpu_idle(void)
 			local_irq_disable();
 			if (!need_resched()) {
 				stop_critical_timings();
-				pm_idle();
+				default_idle();
 				start_critical_timings();
 				/*
-				 * pm_idle functions should always return
+				 * default_idle functions should always return
 				 * with IRQs enabled.
 				 */
 				WARN_ON(irqs_disabled());

+ 0 - 7
arch/blackfin/kernel/process.c

@@ -39,12 +39,6 @@ int nr_l1stack_tasks;
 void *l1_stack_base;
 unsigned long l1_stack_len;
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void) = NULL;
-EXPORT_SYMBOL(pm_idle);
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
@@ -81,7 +75,6 @@ void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */
 	while (1) {
-		void (*idle)(void) = pm_idle;
 
 #ifdef CONFIG_HOTPLUG_CPU
 		if (cpu_is_offline(smp_processor_id()))

+ 1 - 10
arch/cris/kernel/process.c

@@ -54,11 +54,6 @@ void enable_hlt(void)
 
 EXPORT_SYMBOL(enable_hlt);
  
-/*
- * The following aren't currently used.
- */
-void (*pm_idle)(void);
-
 extern void default_idle(void);
 
 void (*pm_power_off)(void);
@@ -77,16 +72,12 @@ void cpu_idle (void)
 	while (1) {
 		rcu_idle_enter();
 		while (!need_resched()) {
-			void (*idle)(void);
 			/*
 			 * Mark this as an RCU critical section so that
 			 * synchronize_kernel() in the unload path waits
 			 * for our completion.
 			 */
-			idle = pm_idle;
-			if (!idle)
-				idle = default_idle;
-			idle();
+			default_idle();
 		}
 		rcu_idle_exit();
 		schedule_preempt_disabled();

+ 0 - 3
arch/ia64/kernel/process.c

@@ -57,8 +57,6 @@ void (*ia64_mark_idle)(int);
 
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
-void (*pm_idle) (void);
-EXPORT_SYMBOL(pm_idle);
 void (*pm_power_off) (void);
 EXPORT_SYMBOL(pm_power_off);
 
@@ -301,7 +299,6 @@ cpu_idle (void)
 			if (mark_idle)
 				(*mark_idle)(1);
 
-			idle = pm_idle;
 			if (!idle)
 				idle = default_idle;
 			(*idle)();

+ 0 - 1
arch/ia64/kernel/setup.c

@@ -1051,7 +1051,6 @@ cpu_init (void)
 		max_num_phys_stacked = num_phys_stacked;
 	}
 	platform_cpu_init();
-	pm_idle = default_idle;
 }
 
 void __init

+ 2 - 49
arch/m32r/kernel/process.c

@@ -44,35 +44,9 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	return tsk->thread.lr;
 }
 
-/*
- * Powermanagement idle function, if any..
- */
-static void (*pm_idle)(void) = NULL;
-
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
-/*
- * We use this is we don't have any better
- * idle routine..
- */
-static void default_idle(void)
-{
-	/* M32R_FIXME: Please use "cpu_sleep" mode.  */
-	cpu_relax();
-}
-
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle (void)
-{
-	/* M32R_FIXME */
-	cpu_relax();
-}
-
 /*
  * The idle thread. There's no useful work to be
  * done, so just try to conserve power and have a
@@ -84,14 +58,8 @@ void cpu_idle (void)
 	/* endless idle loop with no priority at all */
 	while (1) {
 		rcu_idle_enter();
-		while (!need_resched()) {
-			void (*idle)(void) = pm_idle;
-
-			if (!idle)
-				idle = default_idle;
-
-			idle();
-		}
+		while (!need_resched())
+			cpu_relax();
 		rcu_idle_exit();
 		schedule_preempt_disabled();
 	}
@@ -120,21 +88,6 @@ void machine_power_off(void)
 	/* M32R_FIXME */
 }
 
-static int __init idle_setup (char *str)
-{
-	if (!strncmp(str, "poll", 4)) {
-		printk("using poll in idle threads.\n");
-		pm_idle = poll_idle;
-	} else if (!strncmp(str, "sleep", 4)) {
-		printk("using sleep in idle threads.\n");
-		pm_idle = default_idle;
-	}
-
-	return 1;
-}
-
-__setup("idle=", idle_setup);
-
 void show_regs(struct pt_regs * regs)
 {
 	printk("\n");

+ 0 - 3
arch/microblaze/kernel/process.c

@@ -41,7 +41,6 @@ void show_regs(struct pt_regs *regs)
 				regs->msr, regs->ear, regs->esr, regs->fsr);
 }
 
-void (*pm_idle)(void);
 void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
@@ -98,8 +97,6 @@ void cpu_idle(void)
 
 	/* endless idle loop with no priority at all */
 	while (1) {
-		void (*idle)(void) = pm_idle;
-
 		if (!idle)
 			idle = default_idle;
 

+ 0 - 7
arch/mn10300/kernel/process.c

@@ -36,12 +36,6 @@
 #include <asm/gdb-stub.h>
 #include "internal.h"
 
-/*
- * power management idle function, if any..
- */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
-
 /*
  * return saved PC of a blocked thread.
  */
@@ -113,7 +107,6 @@ void cpu_idle(void)
 			void (*idle)(void);
 
 			smp_rmb();
-			idle = pm_idle;
 			if (!idle) {
 #if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
 				idle = poll_idle;

+ 0 - 5
arch/openrisc/kernel/idle.c

@@ -39,11 +39,6 @@
 
 void (*powersave) (void) = NULL;
 
-static inline void pm_idle(void)
-{
-	barrier();
-}
-
 void cpu_idle(void)
 {
 	set_thread_flag(TIF_POLLING_NRFLAG);

+ 6 - 6
arch/sh/kernel/idle.c

@@ -22,7 +22,7 @@
 #include <asm/smp.h>
 #include <asm/bl_bit.h>
 
-void (*pm_idle)(void);
+static void (*sh_idle)(void);
 
 static int hlt_counter;
 
@@ -103,9 +103,9 @@ void cpu_idle(void)
 			/* Don't trace irqs off for idle */
 			stop_critical_timings();
 			if (cpuidle_idle_call())
-				pm_idle();
+				sh_idle();
 			/*
-			 * Sanity check to ensure that pm_idle() returns
+			 * Sanity check to ensure that sh_idle() returns
 			 * with IRQs enabled
 			 */
 			WARN_ON(irqs_disabled());
@@ -123,13 +123,13 @@ void __init select_idle_routine(void)
 	/*
 	 * If a platform has set its own idle routine, leave it alone.
 	 */
-	if (pm_idle)
+	if (sh_idle)
 		return;
 
 	if (hlt_works())
-		pm_idle = default_idle;
+		sh_idle = default_idle;
 	else
-		pm_idle = poll_idle;
+		sh_idle = poll_idle;
 }
 
 void stop_this_cpu(void *unused)

+ 1 - 0
arch/sparc/include/asm/processor_32.h

@@ -118,6 +118,7 @@ extern unsigned long get_wchan(struct task_struct *);
 extern struct task_struct *last_task_used_math;
 
 #define cpu_relax()	barrier()
+extern void (*sparc_idle)(void);
 
 #endif
 

+ 2 - 1
arch/sparc/kernel/apc.c

@@ -20,6 +20,7 @@
 #include <asm/uaccess.h>
 #include <asm/auxio.h>
 #include <asm/apc.h>
+#include <asm/processor.h>
 
 /* Debugging
  * 
@@ -158,7 +159,7 @@ static int apc_probe(struct platform_device *op)
 
 	/* Assign power management IDLE handler */
 	if (!apc_no_idle)
-		pm_idle = apc_swift_idle;	
+		sparc_idle = apc_swift_idle;
 
 	printk(KERN_INFO "%s: power management initialized%s\n", 
 	       APC_DEVNAME, apc_no_idle ? " (CPU idle disabled)" : "");

+ 3 - 2
arch/sparc/kernel/leon_pmc.c

@@ -9,6 +9,7 @@
 #include <asm/leon_amba.h>
 #include <asm/cpu_type.h>
 #include <asm/leon.h>
+#include <asm/processor.h>
 
 /* List of Systems that need fixup instructions around power-down instruction */
 unsigned int pmc_leon_fixup_ids[] = {
@@ -69,9 +70,9 @@ static int __init leon_pmc_install(void)
 	if (sparc_cpu_model == sparc_leon) {
 		/* Assign power management IDLE handler */
 		if (pmc_leon_need_fixup())
-			pm_idle = pmc_leon_idle_fixup;
+			sparc_idle = pmc_leon_idle_fixup;
 		else
-			pm_idle = pmc_leon_idle;
+			sparc_idle = pmc_leon_idle;
 
 		printk(KERN_INFO "leon: power management initialized\n");
 	}

+ 2 - 1
arch/sparc/kernel/pmc.c

@@ -17,6 +17,7 @@
 #include <asm/oplib.h>
 #include <asm/uaccess.h>
 #include <asm/auxio.h>
+#include <asm/processor.h>
 
 /* Debug
  *
@@ -63,7 +64,7 @@ static int pmc_probe(struct platform_device *op)
 
 #ifndef PMC_NO_IDLE
 	/* Assign power management IDLE handler */
-	pm_idle = pmc_swift_idle;
+	sparc_idle = pmc_swift_idle;
 #endif
 
 	printk(KERN_INFO "%s: power management initialized\n", PMC_DEVNAME);

+ 3 - 4
arch/sparc/kernel/process_32.c

@@ -43,8 +43,7 @@
  * Power management idle function 
  * Set in pm platform drivers (apc.c and pmc.c)
  */
-void (*pm_idle)(void);
-EXPORT_SYMBOL(pm_idle);
+void (*sparc_idle)(void);
 
 /* 
  * Power-off handler instantiation for pm.h compliance
@@ -75,8 +74,8 @@ void cpu_idle(void)
 	/* endless idle loop with no priority at all */
 	for (;;) {
 		while (!need_resched()) {
-			if (pm_idle)
-				(*pm_idle)();
+			if (sparc_idle)
+				(*sparc_idle)();
 			else
 				cpu_relax();
 		}

+ 0 - 5
arch/unicore32/kernel/process.c

@@ -45,11 +45,6 @@ static const char * const processor_modes[] = {
 	"UK18", "UK19", "UK1A", "EXTN", "UK1C", "UK1D", "UK1E", "SUSR"
 };
 
-/*
- * The idle thread, has rather strange semantics for calling pm_idle,
- * but this is what x86 does and we need to do the same, so that
- * things like cpuidle get called in the same way.
- */
 void cpu_idle(void)
 {
 	/* endless idle loop with no priority at all */

+ 1 - 0
arch/x86/Kconfig

@@ -1912,6 +1912,7 @@ config APM_DO_ENABLE
 	  this feature.
 
 config APM_CPU_IDLE
+	depends on CPU_IDLE
 	bool "Make CPU Idle calls when idle"
 	---help---
 	  Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.

+ 2 - 1
arch/x86/include/asm/mwait.h

@@ -4,7 +4,8 @@
 #define MWAIT_SUBSTATE_MASK		0xf
 #define MWAIT_CSTATE_MASK		0xf
 #define MWAIT_SUBSTATE_SIZE		4
-#define MWAIT_MAX_NUM_CSTATES		8
+#define MWAIT_HINT2CSTATE(hint)		(((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
+#define MWAIT_HINT2SUBSTATE(hint)	((hint) & MWAIT_CSTATE_MASK)
 
 #define CPUID_MWAIT_LEAF		5
 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1

+ 3 - 0
arch/x86/include/uapi/asm/msr-index.h

@@ -103,6 +103,8 @@
 #define DEBUGCTLMSR_BTS_OFF_USR		(1UL << 10)
 #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI	(1UL << 11)
 
+#define MSR_IA32_POWER_CTL		0x000001fc
+
 #define MSR_IA32_MC0_CTL		0x00000400
 #define MSR_IA32_MC0_STATUS		0x00000401
 #define MSR_IA32_MC0_ADDR		0x00000402
@@ -272,6 +274,7 @@
 #define MSR_IA32_PLATFORM_ID		0x00000017
 #define MSR_IA32_EBL_CR_POWERON		0x0000002a
 #define MSR_EBC_FREQUENCY_ID		0x0000002c
+#define MSR_SMI_COUNT			0x00000034
 #define MSR_IA32_FEATURE_CONTROL        0x0000003a
 #define MSR_IA32_TSC_ADJUST             0x0000003b
 

+ 34 - 23
arch/x86/kernel/apm_32.c

@@ -232,6 +232,7 @@
 #include <linux/acpi.h>
 #include <linux/syscore_ops.h>
 #include <linux/i8253.h>
+#include <linux/cpuidle.h>
 
 #include <asm/uaccess.h>
 #include <asm/desc.h>
@@ -360,13 +361,35 @@ struct apm_user {
  * idle percentage above which bios idle calls are done
  */
 #ifdef CONFIG_APM_CPU_IDLE
-#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012
 #define DEFAULT_IDLE_THRESHOLD	95
 #else
 #define DEFAULT_IDLE_THRESHOLD	100
 #endif
 #define DEFAULT_IDLE_PERIOD	(100 / 3)
 
+static int apm_cpu_idle(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv, int index);
+
+static struct cpuidle_driver apm_idle_driver = {
+	.name = "apm_idle",
+	.owner = THIS_MODULE,
+	.en_core_tk_irqen = 1,
+	.states = {
+		{ /* entry 0 is for polling */ },
+		{ /* entry 1 is for APM idle */
+			.name = "APM",
+			.desc = "APM idle",
+			.flags = CPUIDLE_FLAG_TIME_VALID,
+			.exit_latency = 250,	/* WAG */
+			.target_residency = 500,	/* WAG */
+			.enter = &apm_cpu_idle
+		},
+	},
+	.state_count = 2,
+};
+
+static struct cpuidle_device apm_cpuidle_device;
+
 /*
  * Local variables
  */
@@ -377,7 +400,6 @@ static struct {
 static int clock_slowed;
 static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD;
 static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD;
-static int set_pm_idle;
 static int suspends_pending;
 static int standbys_pending;
 static int ignore_sys_suspend;
@@ -884,8 +906,6 @@ static void apm_do_busy(void)
 #define IDLE_CALC_LIMIT	(HZ * 100)
 #define IDLE_LEAKY_MAX	16
 
-static void (*original_pm_idle)(void) __read_mostly;
-
 /**
  * apm_cpu_idle		-	cpu idling for APM capable Linux
  *
@@ -894,7 +914,8 @@ static void (*original_pm_idle)(void) __read_mostly;
  * Furthermore it calls the system default idle routine.
  */
 
-static void apm_cpu_idle(void)
+static int apm_cpu_idle(struct cpuidle_device *dev,
+	struct cpuidle_driver *drv, int index)
 {
 	static int use_apm_idle; /* = 0 */
 	static unsigned int last_jiffies; /* = 0 */
@@ -904,7 +925,6 @@ static void apm_cpu_idle(void)
 	unsigned int jiffies_since_last_check = jiffies - last_jiffies;
 	unsigned int bucket;
 
-	WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
 recalc:
 	if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
 		use_apm_idle = 0;
@@ -950,10 +970,7 @@ recalc:
 				break;
 			}
 		}
-		if (original_pm_idle)
-			original_pm_idle();
-		else
-			default_idle();
+		default_idle();
 		local_irq_disable();
 		jiffies_since_last_check = jiffies - last_jiffies;
 		if (jiffies_since_last_check > idle_period)
@@ -963,7 +980,7 @@ recalc:
 	if (apm_idle_done)
 		apm_do_busy();
 
-	local_irq_enable();
+	return index;
 }
 
 /**
@@ -2381,9 +2398,9 @@ static int __init apm_init(void)
 	if (HZ != 100)
 		idle_period = (idle_period * HZ) / 100;
 	if (idle_threshold < 100) {
-		original_pm_idle = pm_idle;
-		pm_idle  = apm_cpu_idle;
-		set_pm_idle = 1;
+		if (!cpuidle_register_driver(&apm_idle_driver))
+			if (cpuidle_register_device(&apm_cpuidle_device))
+				cpuidle_unregister_driver(&apm_idle_driver);
 	}
 
 	return 0;
@@ -2393,15 +2410,9 @@ static void __exit apm_exit(void)
 {
 	int error;
 
-	if (set_pm_idle) {
-		pm_idle = original_pm_idle;
-		/*
-		 * We are about to unload the current idle thread pm callback
-		 * (pm_idle), Wait for all processors to update cached/local
-		 * copies of pm_idle before proceeding.
-		 */
-		kick_all_cpus_sync();
-	}
+	cpuidle_unregister_device(&apm_cpuidle_device);
+	cpuidle_unregister_driver(&apm_idle_driver);
+
 	if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
 	    && (apm_info.connection_version > 0x0100)) {
 		error = apm_engage_power_management(APM_DEVICE_ALL, 0);

+ 12 - 19
arch/x86/kernel/process.c

@@ -268,13 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-/*
- * Powermanagement idle function, if any..
- */
-void (*pm_idle)(void);
-#ifdef CONFIG_APM_MODULE
-EXPORT_SYMBOL(pm_idle);
-#endif
+static void (*x86_idle)(void);
 
 #ifndef CONFIG_SMP
 static inline void play_dead(void)
@@ -351,7 +345,7 @@ void cpu_idle(void)
 			rcu_idle_enter();
 
 			if (cpuidle_idle_call())
-				pm_idle();
+				x86_idle();
 
 			rcu_idle_exit();
 			start_critical_timings();
@@ -398,9 +392,9 @@ EXPORT_SYMBOL(default_idle);
 
 bool set_pm_idle_to_default(void)
 {
-	bool ret = !!pm_idle;
+	bool ret = !!x86_idle;
 
-	pm_idle = default_idle;
+	x86_idle = default_idle;
 
 	return ret;
 }
@@ -567,11 +561,10 @@ static void amd_e400_idle(void)
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
-	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+	if (x86_idle == poll_idle && smp_num_siblings > 1)
 		pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n");
-	}
 #endif
-	if (pm_idle)
+	if (x86_idle)
 		return;
 
 	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
@@ -579,19 +572,19 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 		 * One CPU supports mwait => All CPUs supports mwait
 		 */
 		pr_info("using mwait in idle threads\n");
-		pm_idle = mwait_idle;
+		x86_idle = mwait_idle;
 	} else if (cpu_has_amd_erratum(amd_erratum_400)) {
 		/* E400: APIC timer interrupt does not wake up CPU from C1e */
 		pr_info("using AMD E400 aware idle routine\n");
-		pm_idle = amd_e400_idle;
+		x86_idle = amd_e400_idle;
 	} else
-		pm_idle = default_idle;
+		x86_idle = default_idle;
 }
 
 void __init init_amd_e400_c1e_mask(void)
 {
 	/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
-	if (pm_idle == amd_e400_idle)
+	if (x86_idle == amd_e400_idle)
 		zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
 }
 
@@ -602,7 +595,7 @@ static int __init idle_setup(char *str)
 
 	if (!strcmp(str, "poll")) {
 		pr_info("using polling idle threads\n");
-		pm_idle = poll_idle;
+		x86_idle = poll_idle;
 		boot_option_idle_override = IDLE_POLL;
 	} else if (!strcmp(str, "mwait")) {
 		boot_option_idle_override = IDLE_FORCE_MWAIT;
@@ -615,7 +608,7 @@ static int __init idle_setup(char *str)
 		 * To continue to load the CPU idle driver, don't touch
 		 * the boot_option_idle_override.
 		 */
-		pm_idle = default_idle;
+		x86_idle = default_idle;
 		boot_option_idle_override = IDLE_HALT;
 	} else if (!strcmp(str, "nomwait")) {
 		/*

+ 147 - 56
drivers/idle/intel_idle.c

@@ -74,7 +74,7 @@ static struct cpuidle_driver intel_idle_driver = {
 	.en_core_tk_irqen = 1,
 };
 /* intel_idle.max_cstate=0 disables driver */
-static int max_cstate = MWAIT_MAX_NUM_CSTATES - 1;
+static int max_cstate = CPUIDLE_STATE_MAX - 1;
 
 static unsigned int mwait_substates;
 
@@ -90,6 +90,7 @@ struct idle_cpu {
 	 * Indicate which enable bits to clear here.
 	 */
 	unsigned long auto_demotion_disable_flags;
+	bool disable_promotion_to_c1e;
 };
 
 static const struct idle_cpu *icpu;
@@ -123,127 +124,190 @@ static struct cpuidle_state *cpuidle_state_table;
  * which is also the index into the MWAIT hint array.
  * Thus C0 is a dummy.
  */
-static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = {
-	{ /* MWAIT C0 */ },
-	{ /* MWAIT C1 */
+static struct cpuidle_state nehalem_cstates[CPUIDLE_STATE_MAX] = {
+	{
 		.name = "C1-NHM",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 3,
 		.target_residency = 6,
 		.enter = &intel_idle },
-	{ /* MWAIT C2 */
+	{
+		.name = "C1E-NHM",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle },
+	{
 		.name = "C3-NHM",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 20,
 		.target_residency = 80,
 		.enter = &intel_idle },
-	{ /* MWAIT C3 */
+	{
 		.name = "C6-NHM",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
 		.target_residency = 800,
 		.enter = &intel_idle },
+	{
+		.enter = NULL }
 };
 
-static struct cpuidle_state snb_cstates[MWAIT_MAX_NUM_CSTATES] = {
-	{ /* MWAIT C0 */ },
-	{ /* MWAIT C1 */
+static struct cpuidle_state snb_cstates[CPUIDLE_STATE_MAX] = {
+	{
 		.name = "C1-SNB",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
-		.exit_latency = 1,
-		.target_residency = 1,
+		.exit_latency = 2,
+		.target_residency = 2,
 		.enter = &intel_idle },
-	{ /* MWAIT C2 */
+	{
+		.name = "C1E-SNB",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle },
+	{
 		.name = "C3-SNB",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
 		.target_residency = 211,
 		.enter = &intel_idle },
-	{ /* MWAIT C3 */
+	{
 		.name = "C6-SNB",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 104,
 		.target_residency = 345,
 		.enter = &intel_idle },
-	{ /* MWAIT C4 */
+	{
 		.name = "C7-SNB",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 109,
 		.target_residency = 345,
 		.enter = &intel_idle },
+	{
+		.enter = NULL }
 };
 
-static struct cpuidle_state ivb_cstates[MWAIT_MAX_NUM_CSTATES] = {
-	{ /* MWAIT C0 */ },
-	{ /* MWAIT C1 */
+static struct cpuidle_state ivb_cstates[CPUIDLE_STATE_MAX] = {
+	{
 		.name = "C1-IVB",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle },
-	{ /* MWAIT C2 */
+	{
+		.name = "C1E-IVB",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle },
+	{
 		.name = "C3-IVB",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
 		.target_residency = 156,
 		.enter = &intel_idle },
-	{ /* MWAIT C3 */
+	{
 		.name = "C6-IVB",
 		.desc = "MWAIT 0x20",
 		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
 		.target_residency = 300,
 		.enter = &intel_idle },
-	{ /* MWAIT C4 */
+	{
 		.name = "C7-IVB",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 87,
 		.target_residency = 300,
 		.enter = &intel_idle },
+	{
+		.enter = NULL }
 };
 
-static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = {
-	{ /* MWAIT C0 */ },
-	{ /* MWAIT C1 */
-		.name = "C1-ATM",
+static struct cpuidle_state hsw_cstates[CPUIDLE_STATE_MAX] = {
+	{
+		.name = "C1-HSW",
 		.desc = "MWAIT 0x00",
 		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
-		.exit_latency = 1,
-		.target_residency = 4,
+		.exit_latency = 2,
+		.target_residency = 2,
 		.enter = &intel_idle },
-	{ /* MWAIT C2 */
+	{
+		.name = "C1E-HSW",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle },
+	{
+		.name = "C3-HSW",
+		.desc = "MWAIT 0x10",
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 33,
+		.target_residency = 100,
+		.enter = &intel_idle },
+	{
+		.name = "C6-HSW",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 133,
+		.target_residency = 400,
+		.enter = &intel_idle },
+	{
+		.name = "C7s-HSW",
+		.desc = "MWAIT 0x32",
+		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 166,
+		.target_residency = 500,
+		.enter = &intel_idle },
+	{
+		.enter = NULL }
+};
+
+static struct cpuidle_state atom_cstates[CPUIDLE_STATE_MAX] = {
+	{
+		.name = "C1E-ATM",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle },
+	{
 		.name = "C2-ATM",
 		.desc = "MWAIT 0x10",
 		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 20,
 		.target_residency = 80,
 		.enter = &intel_idle },
-	{ /* MWAIT C3 */ },
-	{ /* MWAIT C4 */
+	{
 		.name = "C4-ATM",
 		.desc = "MWAIT 0x30",
 		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
 		.target_residency = 400,
 		.enter = &intel_idle },
-	{ /* MWAIT C5 */ },
-	{ /* MWAIT C6 */
+	{
 		.name = "C6-ATM",
 		.desc = "MWAIT 0x52",
 		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
 		.target_residency = 560,
 		.enter = &intel_idle },
+	{
+		.enter = NULL }
 };
 
 /**
@@ -342,10 +406,19 @@ static void auto_demotion_disable(void *dummy)
 	msr_bits &= ~(icpu->auto_demotion_disable_flags);
 	wrmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr_bits);
 }
+static void c1e_promotion_disable(void *dummy)
+{
+	unsigned long long msr_bits;
+
+	rdmsrl(MSR_IA32_POWER_CTL, msr_bits);
+	msr_bits &= ~0x2;
+	wrmsrl(MSR_IA32_POWER_CTL, msr_bits);
+}
 
 static const struct idle_cpu idle_cpu_nehalem = {
 	.state_table = nehalem_cstates,
 	.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
+	.disable_promotion_to_c1e = true,
 };
 
 static const struct idle_cpu idle_cpu_atom = {
@@ -359,10 +432,17 @@ static const struct idle_cpu idle_cpu_lincroft = {
 
 static const struct idle_cpu idle_cpu_snb = {
 	.state_table = snb_cstates,
+	.disable_promotion_to_c1e = true,
 };
 
 static const struct idle_cpu idle_cpu_ivb = {
 	.state_table = ivb_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
+static const struct idle_cpu idle_cpu_hsw = {
+	.state_table = hsw_cstates,
+	.disable_promotion_to_c1e = true,
 };
 
 #define ICPU(model, cpu) \
@@ -382,6 +462,9 @@ static const struct x86_cpu_id intel_idle_ids[] = {
 	ICPU(0x2d, idle_cpu_snb),
 	ICPU(0x3a, idle_cpu_ivb),
 	ICPU(0x3e, idle_cpu_ivb),
+	ICPU(0x3c, idle_cpu_hsw),
+	ICPU(0x3f, idle_cpu_hsw),
+	ICPU(0x45, idle_cpu_hsw),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -464,32 +547,31 @@ static int intel_idle_cpuidle_driver_init(void)
 
 	drv->state_count = 1;
 
-	for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-		int num_substates;
+	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+		int num_substates, mwait_hint, mwait_cstate, mwait_substate;
 
-		if (cstate > max_cstate) {
+		if (cpuidle_state_table[cstate].enter == NULL)
+			break;
+
+		if (cstate + 1 > max_cstate) {
 			printk(PREFIX "max_cstate %d reached\n",
 				max_cstate);
 			break;
 		}
 
+		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
+		mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
+		mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
+
 		/* does the state exist in CPUID.MWAIT? */
-		num_substates = (mwait_substates >> ((cstate) * 4))
+		num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
 					& MWAIT_SUBSTATE_MASK;
-		if (num_substates == 0)
-			continue;
-		/* is the state not enabled? */
-		if (cpuidle_state_table[cstate].enter == NULL) {
-			/* does the driver not know about the state? */
-			if (*cpuidle_state_table[cstate].name == '\0')
-				pr_debug(PREFIX "unaware of model 0x%x"
-					" MWAIT %d please"
-					" contact lenb@kernel.org\n",
-				boot_cpu_data.x86_model, cstate);
+
+		/* if sub-state in table is not enumerated by CPUID */
+		if ((mwait_substate + 1) > num_substates)
 			continue;
-		}
 
-		if ((cstate > 2) &&
+		if (((mwait_cstate + 1) > 2) &&
 			!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
 			mark_tsc_unstable("TSC halts in idle"
 					" states deeper than C2");
@@ -503,6 +585,9 @@ static int intel_idle_cpuidle_driver_init(void)
 	if (icpu->auto_demotion_disable_flags)
 		on_each_cpu(auto_demotion_disable, NULL, 1);
 
+	if (icpu->disable_promotion_to_c1e)	/* each-cpu is redundant */
+		on_each_cpu(c1e_promotion_disable, NULL, 1);
+
 	return 0;
 }
 
@@ -521,21 +606,27 @@ static int intel_idle_cpu_init(int cpu)
 
 	dev->state_count = 1;
 
-	for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
-		int num_substates;
+	for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
+		int num_substates, mwait_hint, mwait_cstate, mwait_substate;
+
+		if (cpuidle_state_table[cstate].enter == NULL)
+			continue;
 
-		if (cstate > max_cstate) {
+		if (cstate + 1 > max_cstate) {
 			printk(PREFIX "max_cstate %d reached\n", max_cstate);
 			break;
 		}
 
+		mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
+		mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint);
+		mwait_substate = MWAIT_HINT2SUBSTATE(mwait_hint);
+
 		/* does the state exist in CPUID.MWAIT? */
-		num_substates = (mwait_substates >> ((cstate) * 4))
-			& MWAIT_SUBSTATE_MASK;
-		if (num_substates == 0)
-			continue;
-		/* is the state not enabled? */
-		if (cpuidle_state_table[cstate].enter == NULL)
+		num_substates = (mwait_substates >> ((mwait_cstate + 1) * 4))
+					& MWAIT_SUBSTATE_MASK;
+
+		/* if sub-state in table is not enumerated by CPUID */
+		if ((mwait_substate + 1) > num_substates)
 			continue;
 
 		dev->state_count += 1;

+ 0 - 1
include/linux/pm.h

@@ -31,7 +31,6 @@
 /*
  * Callbacks for platform drivers to implement.
  */
-extern void (*pm_idle)(void);
 extern void (*pm_power_off)(void);
 extern void (*pm_power_off_prepare)(void);
 

+ 16 - 20
tools/power/x86/turbostat/turbostat.8

@@ -31,8 +31,6 @@ The \fB-S\fP option limits output to a 1-line System Summary for each interval.
 .PP
 The \fB-v\fP option increases verbosity.
 .PP
-The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34"
-.PP
 The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter.
 .PP
 The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter.
@@ -186,26 +184,24 @@ This is a weighted average, where the weight is %c0.  ie. it is the total number
 un-halted cycles elapsed per time divided by the number of CPUs.
 .SH SMI COUNTING EXAMPLE
 On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter.
-Using the -m option, you can display how many SMIs have fired since reset, or if there
-are SMIs during the measurement interval, you can display the delta using the -d option.
+This counter is shown by default under the "SMI" column.
 .nf
-[root@x980 ~]# turbostat -m 0x34
-cor CPU    %c0  GHz  TSC   MSR 0x034    %c1    %c3    %c6   %pc3   %pc6
-          1.41 1.82 3.38  0x00000000   8.92  37.82  51.85  17.37   0.55
-  0   0   3.73 2.03 3.38  0x00000055   1.72  48.25  46.31  17.38   0.55
-  0   6   0.14 1.63 3.38  0x00000056   5.30
-  1   2   2.51 1.80 3.38  0x00000056  15.65  29.33  52.52
-  1   8   0.10 1.65 3.38  0x00000056  18.05
-  2   4   1.16 1.68 3.38  0x00000056   5.87  24.47  68.50
-  2  10   0.10 1.63 3.38  0x00000056   6.93
-  8   1   3.84 1.91 3.38  0x00000056   1.36  50.65  44.16
-  8   7   0.08 1.64 3.38  0x00000056   5.12
-  9   3   1.82 1.73 3.38  0x00000056   7.59  24.21  66.38
-  9   9   0.09 1.68 3.38  0x00000056   9.32
- 10   5   1.66 1.65 3.38  0x00000056  15.10  50.00  33.23
- 10  11   1.72 1.65 3.38  0x00000056  15.05
+[root@x980 ~]# turbostat
+cor CPU    %c0  GHz  TSC SMI    %c1    %c3    %c6 CTMP   %pc3   %pc6
+          0.11 1.91 3.38   0   1.84   0.26  97.79   29   0.82  83.87
+  0   0   0.40 1.63 3.38   0  10.27   0.12  89.20   20   0.82  83.88
+  0   6   0.06 1.63 3.38   0  10.61
+  1   2   0.37 2.63 3.38   0   0.02   0.10  99.51   22
+  1   8   0.01 1.62 3.38   0   0.39
+  2   4   0.07 1.62 3.38   0   0.04   0.07  99.82   23
+  2  10   0.02 1.62 3.38   0   0.09
+  8   1   0.23 1.64 3.38   0   0.10   1.07  98.60   24
+  8   7   0.02 1.64 3.38   0   0.31
+  9   3   0.03 1.62 3.38   0   0.03   0.05  99.89   29
+  9   9   0.02 1.62 3.38   0   0.05
+ 10   5   0.07 1.62 3.38   0   0.08   0.12  99.73   27
+ 10  11   0.03 1.62 3.38   0   0.13
 ^C
-[root@x980 ~]# 
 .fi
 .SH NOTES
 

+ 38 - 10
tools/power/x86/turbostat/turbostat.c

@@ -58,6 +58,7 @@ unsigned int extra_msr_offset32;
 unsigned int extra_msr_offset64;
 unsigned int extra_delta_offset32;
 unsigned int extra_delta_offset64;
+int do_smi;
 double bclk;
 unsigned int show_pkg;
 unsigned int show_core;
@@ -99,6 +100,7 @@ struct thread_data {
 	unsigned long long extra_delta64;
 	unsigned long long extra_msr32;
 	unsigned long long extra_delta32;
+	unsigned int smi_count;
 	unsigned int cpu_id;
 	unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE	0x2
@@ -248,6 +250,8 @@ void print_header(void)
 	if (has_aperf)
 		outp += sprintf(outp, "  GHz");
 	outp += sprintf(outp, "  TSC");
+	if (do_smi)
+		outp += sprintf(outp, " SMI");
 	if (extra_delta_offset32)
 		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
 	if (extra_delta_offset64)
@@ -314,6 +318,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
 			extra_msr_offset32, t->extra_msr32);
 		fprintf(stderr, "msr0x%x: %016llX\n",
 			extra_msr_offset64, t->extra_msr64);
+		if (do_smi)
+			fprintf(stderr, "SMI: %08X\n", t->smi_count);
 	}
 
 	if (c) {
@@ -352,6 +358,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
  * RAM_W: %5.2
  * GHz: "GHz" 3 columns %3.2
  * TSC: "TSC" 3 columns %3.2
+ * SMI: "SMI" 4 columns %4d
  * percentage " %pc3" %6.2
  * Perf Status percentage: %5.2
  * "CTMP" 4 columns %4d
@@ -431,6 +438,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
 	/* TSC */
 	outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float);
 
+	/* SMI */
+	if (do_smi)
+		outp += sprintf(outp, "%4d", t->smi_count);
+
 	/* delta */
 	if (extra_delta_offset32)
 		outp += sprintf(outp, "  %11llu", t->extra_delta32);
@@ -645,6 +656,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
 	 */
 	old->extra_msr32 = new->extra_msr32;
 	old->extra_msr64 = new->extra_msr64;
+
+	if (do_smi)
+		old->smi_count = new->smi_count - old->smi_count;
 }
 
 int delta_cpu(struct thread_data *t, struct core_data *c,
@@ -672,6 +686,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 	t->mperf = 0;
 	t->c1 = 0;
 
+	t->smi_count = 0;
 	t->extra_delta32 = 0;
 	t->extra_delta64 = 0;
 
@@ -802,6 +817,11 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 			return -4;
 	}
 
+	if (do_smi) {
+		if (get_msr(cpu, MSR_SMI_COUNT, &msr))
+			return -5;
+		t->smi_count = msr & 0xFFFFFFFF;
+	}
 	if (extra_delta_offset32) {
 		if (get_msr(cpu, extra_delta_offset32, &msr))
 			return -5;
@@ -908,8 +928,7 @@ void print_verbose_header(void)
 
 	get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
 
-	if (verbose)
-		fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
+	fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
 
 	ratio = (msr >> 40) & 0xFF;
 	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
@@ -919,13 +938,16 @@ void print_verbose_header(void)
 	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
 		ratio, bclk, ratio * bclk);
 
+	get_msr(0, MSR_IA32_POWER_CTL, &msr);
+	fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+		msr, msr & 0x2 ? "EN" : "DIS");
+
 	if (!do_ivt_turbo_ratio_limit)
 		goto print_nhm_turbo_ratio_limits;
 
 	get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
 
-	if (verbose)
-		fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+	fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
@@ -1016,8 +1038,7 @@ print_nhm_turbo_ratio_limits:
 
 	get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
 
-	if (verbose)
-		fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
+	fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
 	ratio = (msr >> 56) & 0xFF;
 	if (ratio)
@@ -1397,6 +1418,9 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
 	case 0x2D:	/* SNB Xeon */
 	case 0x3A:	/* IVB */
 	case 0x3E:	/* IVB Xeon */
+	case 0x3C:	/* HSW */
+	case 0x3F:	/* HSW */
+	case 0x45:	/* HSW */
 		return 1;
 	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
 	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
@@ -1488,6 +1512,9 @@ void rapl_probe(unsigned int family, unsigned int model)
 	switch (model) {
 	case 0x2A:
 	case 0x3A:
+	case 0x3C:	/* HSW */
+	case 0x3F:	/* HSW */
+	case 0x45:	/* HSW */
 		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
 		break;
 	case 0x2D:
@@ -1724,6 +1751,9 @@ int is_snb(unsigned int family, unsigned int model)
 	case 0x2D:
 	case 0x3A:	/* IVB */
 	case 0x3E:	/* IVB Xeon */
+	case 0x3C:	/* HSW */
+	case 0x3F:	/* HSW */
+	case 0x45:	/* HSW */
 		return 1;
 	}
 	return 0;
@@ -1883,6 +1913,7 @@ void check_cpuid()
 
 	do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
 	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */
+	do_smi = do_nhm_cstates;
 	do_snb_cstates = is_snb(family, model);
 	bclk = discover_bclk(family, model);
 
@@ -2219,9 +2250,6 @@ void cmdline(int argc, char **argv)
 		case 'c':
 			sscanf(optarg, "%x", &extra_delta_offset32);
 			break;
-		case 's':
-			extra_delta_offset32 = 0x34;	/* SMI counter */
-			break;
 		case 'C':
 			sscanf(optarg, "%x", &extra_delta_offset64);
 			break;
@@ -2248,7 +2276,7 @@ int main(int argc, char **argv)
 	cmdline(argc, argv);
 
 	if (verbose)
-		fprintf(stderr, "turbostat v3.0 November 23, 2012"
+		fprintf(stderr, "turbostat v3.2 February 11, 2013"
 			" - Len Brown <lenb@kernel.org>\n");
 
 	turbostat_init();