16 years ago · 1d99100120
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -102,15 +102,39 @@ struct mce_log {
 
				 
			
 
				 #ifdef __KERNEL__
			
 
				 
			
 
				+#include <linux/percpu.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <asm/atomic.h>
			
 
				+
			
 
				 extern int mce_disabled;
			
 
				+extern int mce_p5_enabled;
			
 
				 
			
 
				-#include <asm/atomic.h>
			
 
				-#include <linux/percpu.h>
			
 
				+#ifdef CONFIG_X86_MCE
			
 
				+void mcheck_init(struct cpuinfo_x86 *c);
			
 
				+#else
			
 
				+static inline void mcheck_init(struct cpuinfo_x86 *c) {}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_X86_OLD_MCE
			
 
				+extern int nr_mce_banks;
			
 
				+void amd_mcheck_init(struct cpuinfo_x86 *c);
			
 
				+void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
			
 
				+void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
			
 
				+#endif
			
 
				+
			
 
				+#ifdef CONFIG_X86_ANCIENT_MCE
			
 
				+void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
			
 
				+void winchip_mcheck_init(struct cpuinfo_x86 *c);
			
 
				+static inline void enable_p5_mce(void) { mce_p5_enabled = 1; }
			
 
				+#else
			
 
				+static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
			
 
				+static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
			
 
				+static inline void enable_p5_mce(void) {}
			
 
				+#endif
			
 
				 
			
 
				 void mce_setup(struct mce *m);
			
 
				 void mce_log(struct mce *m);
			
 
				 DECLARE_PER_CPU(struct sys_device, mce_dev);
			
 
				-extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
			
 
				 
			
 
				 /*
			
 
				  * To support more than 128 would need to escape the predefined
			
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
 
				 DECLARE_PER_CPU(unsigned, mce_exception_count);
			
 
				 DECLARE_PER_CPU(unsigned, mce_poll_count);
			
 
				 
			
 
				-void mce_log_therm_throt_event(__u64 status);
			
 
				-
			
 
				 extern atomic_t mce_entry;
			
 
				 
			
 
				-void do_machine_check(struct pt_regs *, long);
			
 
				-
			
 
				 typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
			
 
				 DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
			
 
				 
			
@@ -167,13 +187,32 @@ void mce_notify_process(void);
 
				 DECLARE_PER_CPU(struct mce, injectm);
			
 
				 extern struct file_operations mce_chrdev_ops;
			
 
				 
			
 
				-#ifdef CONFIG_X86_MCE
			
 
				-void mcheck_init(struct cpuinfo_x86 *c);
			
 
				-#else
			
 
				-#define mcheck_init(c) do { } while (0)
			
 
				-#endif
			
 
				+/*
			
 
				+ * Exception handler
			
 
				+ */
			
 
				+
			
 
				+/* Call the installed machine check handler for this CPU setup. */
			
 
				+extern void (*machine_check_vector)(struct pt_regs *, long error_code);
			
 
				+void do_machine_check(struct pt_regs *, long);
			
 
				+
			
 
				+/*
			
 
				+ * Threshold handler
			
 
				+ */
			
 
				 
			
 
				 extern void (*mce_threshold_vector)(void);
			
 
				+extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
			
 
				+
			
 
				+/*
			
 
				+ * Thermal handler
			
 
				+ */
			
 
				+
			
 
				+void intel_init_thermal(struct cpuinfo_x86 *c);
			
 
				+
			
 
				+#ifdef CONFIG_X86_NEW_MCE
			
 
				+void mce_log_therm_throt_event(__u64 status);
			
 
				+#else
			
 
				+static inline void mce_log_therm_throt_event(__u64 status) {}
			
 
				+#endif
			
 
				 
			
 
				 #endif /* __KERNEL__ */
			
 
				 #endif /* _ASM_X86_MCE_H */
			
--- a/arch/x86/include/asm/therm_throt.h
+++ b/arch/x86/include/asm/therm_throt.h
@@ -1,9 +0,0 @@
 
				-#ifndef _ASM_X86_THERM_THROT_H
			
 
				-#define _ASM_X86_THERM_THROT_H
			
 
				-
			
 
				-#include <asm/atomic.h>
			
 
				-
			
 
				-extern atomic_t therm_throt_en;
			
 
				-int therm_throt_process(int curr);
			
 
				-
			
 
				-#endif /* _ASM_X86_THERM_THROT_H */
			
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,12 @@
 
				-obj-y				=  mce.o therm_throt.o
			
 
				+obj-y				=  mce.o
			
 
				 
			
 
				 obj-$(CONFIG_X86_NEW_MCE)	+= mce-severity.o
			
 
				 obj-$(CONFIG_X86_OLD_MCE)	+= k7.o p4.o p6.o
			
 
				 obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
			
 
				-obj-$(CONFIG_X86_MCE_P4THERMAL)	+= mce_intel.o
			
 
				-obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o mce_intel.o
			
 
				-obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
			
 
				+obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
			
 
				+obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
			
 
				 obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
			
 
				 obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
			
 
				 obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
			
 
				+
			
 
				+obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
			
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -10,10 +10,9 @@
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/system.h>
			
 
				+#include <asm/mce.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				-#include "mce.h"
			
 
				-
			
 
				 /* Machine Check Handler For AMD Athlon/Duron: */
			
 
				 static void k7_machine_check(struct pt_regs *regs, long error_code)
			
 
				 {
			
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -44,7 +44,6 @@
 
				 #include <asm/msr.h>
			
 
				 
			
 
				 #include "mce-internal.h"
			
 
				-#include "mce.h"
			
 
				 
			
 
				 /* Handle unconfigured int18 (should never happen) */
			
 
				 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
			
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 
				 void (*machine_check_vector)(struct pt_regs *, long error_code) =
			
 
				 						unexpected_machine_check;
			
 
				 
			
 
				-int				mce_disabled;
			
 
				+int mce_disabled __read_mostly;
			
 
				 
			
 
				 #ifdef CONFIG_X86_NEW_MCE
			
 
				 
			
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
 
				  *   2: SIGBUS or log uncorrected errors (if possible), log corrected errors
			
 
				  *   3: never panic or SIGBUS, log all errors (for testing only)
			
 
				  */
			
 
				-static int			tolerant = 1;
			
 
				-static int			banks;
			
 
				-static u64			*bank;
			
 
				-static unsigned long		notify_user;
			
 
				-static int			rip_msr;
			
 
				-static int			mce_bootlog = -1;
			
 
				-static int			monarch_timeout = -1;
			
 
				-static int			mce_panic_timeout;
			
 
				-static int			mce_dont_log_ce;
			
 
				-int				mce_cmci_disabled;
			
 
				-int				mce_ignore_ce;
			
 
				-int				mce_ser;
			
 
				-
			
 
				-static char			trigger[128];
			
 
				-static char			*trigger_argv[2] = { trigger, NULL };
			
 
				+static int			tolerant		__read_mostly = 1;
			
 
				+static int			banks			__read_mostly;
			
 
				+static u64			*bank			__read_mostly;
			
 
				+static int			rip_msr			__read_mostly;
			
 
				+static int			mce_bootlog		__read_mostly = -1;
			
 
				+static int			monarch_timeout		__read_mostly = -1;
			
 
				+static int			mce_panic_timeout	__read_mostly;
			
 
				+static int			mce_dont_log_ce		__read_mostly;
			
 
				+int				mce_cmci_disabled	__read_mostly;
			
 
				+int				mce_ignore_ce		__read_mostly;
			
 
				+int				mce_ser			__read_mostly;
			
 
				+
			
 
				+/* User mode helper program triggered by machine check event */
			
 
				+static unsigned long		mce_need_notify;
			
 
				+static char			mce_helper[128];
			
 
				+static char			*mce_helper_argv[2] = { mce_helper, NULL };
			
 
				 
			
 
				 static unsigned long		dont_init_banks;
			
 
				 
			
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce)
 
				 	wmb();
			
 
				 
			
 
				 	mce->finished = 1;
			
 
				-	set_bit(0, &notify_user);
			
 
				+	set_bit(0, &mce_need_notify);
			
 
				 }
			
 
				 
			
 
				 static void print_mce(struct mce *m)
			
@@ -691,18 +691,21 @@ static atomic_t global_nwo;
 
				  * in the entry order.
			
 
				  * TBD double check parallel CPU hotunplug
			
 
				  */
			
 
				-static int mce_start(int no_way_out, int *order)
			
 
				+static int mce_start(int *no_way_out)
			
 
				 {
			
 
				-	int nwo;
			
 
				+	int order;
			
 
				 	int cpus = num_online_cpus();
			
 
				 	u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
			
 
				 
			
 
				-	if (!timeout) {
			
 
				-		*order = -1;
			
 
				-		return no_way_out;
			
 
				-	}
			
 
				+	if (!timeout)
			
 
				+		return -1;
			
 
				 
			
 
				-	atomic_add(no_way_out, &global_nwo);
			
 
				+	atomic_add(*no_way_out, &global_nwo);
			
 
				+	/*
			
 
				+	 * global_nwo should be updated before mce_callin
			
 
				+	 */
			
 
				+	smp_wmb();
			
 
				+	order = atomic_add_return(1, &mce_callin);
			
 
				 
			
 
				 	/*
			
 
				 	 * Wait for everyone.
			
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order)
 
				 	while (atomic_read(&mce_callin) != cpus) {
			
 
				 		if (mce_timed_out(&timeout)) {
			
 
				 			atomic_set(&global_nwo, 0);
			
 
				-			*order = -1;
			
 
				-			return no_way_out;
			
 
				+			return -1;
			
 
				 		}
			
 
				 		ndelay(SPINUNIT);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Cache the global no_way_out state.
			
 
				+	 * mce_callin should be read before global_nwo
			
 
				 	 */
			
 
				-	nwo = atomic_read(&global_nwo);
			
 
				+	smp_rmb();
			
 
				 
			
 
				-	/*
			
 
				-	 * Monarch starts executing now, the others wait.
			
 
				-	 */
			
 
				-	if (*order == 1) {
			
 
				+	if (order == 1) {
			
 
				+		/*
			
 
				+		 * Monarch: Starts executing now, the others wait.
			
 
				+		 */
			
 
				 		atomic_set(&mce_executing, 1);
			
 
				-		return nwo;
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Subject: Now start the scanning loop one by one in
			
 
				+		 * the original callin order.
			
 
				+		 * This way when there are any shared banks it will be
			
 
				+		 * only seen by one CPU before cleared, avoiding duplicates.
			
 
				+		 */
			
 
				+		while (atomic_read(&mce_executing) < order) {
			
 
				+			if (mce_timed_out(&timeout)) {
			
 
				+				atomic_set(&global_nwo, 0);
			
 
				+				return -1;
			
 
				+			}
			
 
				+			ndelay(SPINUNIT);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Now start the scanning loop one by one
			
 
				-	 * in the original callin order.
			
 
				-	 * This way when there are any shared banks it will
			
 
				-	 * be only seen by one CPU before cleared, avoiding duplicates.
			
 
				+	 * Cache the global no_way_out state.
			
 
				 	 */
			
 
				-	while (atomic_read(&mce_executing) < *order) {
			
 
				-		if (mce_timed_out(&timeout)) {
			
 
				-			atomic_set(&global_nwo, 0);
			
 
				-			*order = -1;
			
 
				-			return no_way_out;
			
 
				-		}
			
 
				-		ndelay(SPINUNIT);
			
 
				-	}
			
 
				-	return nwo;
			
 
				+	*no_way_out = atomic_read(&global_nwo);
			
 
				+
			
 
				+	return order;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
				 	 * check handler.
			
 
				 	 */
			
 
				 	int order;
			
 
				-
			
 
				 	/*
			
 
				 	 * If no_way_out gets set, there is no safe way to recover from this
			
 
				 	 * MCE.  If tolerant is cranked up, we'll try anyway.
			
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
				 	if (!banks)
			
 
				 		goto out;
			
 
				 
			
 
				-	order = atomic_add_return(1, &mce_callin);
			
 
				 	mce_setup(&m);
			
 
				 
			
 
				 	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
			
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 
				 	 * This way we don't report duplicated events on shared banks
			
 
				 	 * because the first one to see it will clear it.
			
 
				 	 */
			
 
				-	no_way_out = mce_start(no_way_out, &order);
			
 
				+	order = mce_start(&no_way_out);
			
 
				 	for (i = 0; i < banks; i++) {
			
 
				 		__clear_bit(i, toclear);
			
 
				 		if (!bank[i])
			
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data)
 
				 
			
 
				 static void mce_do_trigger(struct work_struct *work)
			
 
				 {
			
 
				-	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
			
 
				+	call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
			
 
				 }
			
 
				 
			
 
				 static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
			
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void)
 
				 
			
 
				 	clear_thread_flag(TIF_MCE_NOTIFY);
			
 
				 
			
 
				-	if (test_and_clear_bit(0, &notify_user)) {
			
 
				+	if (test_and_clear_bit(0, &mce_need_notify)) {
			
 
				 		wake_up_interruptible(&mce_wait);
			
 
				 
			
 
				 		/*
			
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void)
 
				 		 * work_pending is always cleared before the function is
			
 
				 		 * executed.
			
 
				 		 */
			
 
				-		if (trigger[0] && !work_pending(&mce_trigger_work))
			
 
				+		if (mce_helper[0] && !work_pending(&mce_trigger_work))
			
 
				 			schedule_work(&mce_trigger_work);
			
 
				 
			
 
				 		if (__ratelimit(&ratelimit))
			
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
 
				 		return;
			
 
				 	switch (c->x86_vendor) {
			
 
				 	case X86_VENDOR_INTEL:
			
 
				-		if (mce_p5_enabled())
			
 
				-			intel_p5_mcheck_init(c);
			
 
				+		intel_p5_mcheck_init(c);
			
 
				 		break;
			
 
				 	case X86_VENDOR_CENTAUR:
			
 
				 		winchip_mcheck_init(c);
			
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev)
 
				 static void mce_cpu_restart(void *data)
			
 
				 {
			
 
				 	del_timer_sync(&__get_cpu_var(mce_timer));
			
 
				-	if (mce_available(&current_cpu_data))
			
 
				-		mce_init();
			
 
				+	if (!mce_available(&current_cpu_data))
			
 
				+		return;
			
 
				+	mce_init();
			
 
				 	mce_init_timer();
			
 
				 }
			
 
				 
			
@@ -1620,6 +1624,26 @@ static void mce_restart(void)
 
				 	on_each_cpu(mce_cpu_restart, NULL, 1);
			
 
				 }
			
 
				 
			
 
				+/* Toggle features for corrected errors */
			
 
				+static void mce_disable_ce(void *all)
			
 
				+{
			
 
				+	if (!mce_available(&current_cpu_data))
			
 
				+		return;
			
 
				+	if (all)
			
 
				+		del_timer_sync(&__get_cpu_var(mce_timer));
			
 
				+	cmci_clear();
			
 
				+}
			
 
				+
			
 
				+static void mce_enable_ce(void *all)
			
 
				+{
			
 
				+	if (!mce_available(&current_cpu_data))
			
 
				+		return;
			
 
				+	cmci_reenable();
			
 
				+	cmci_recheck();
			
 
				+	if (all)
			
 
				+		mce_init_timer();
			
 
				+}
			
 
				+
			
 
				 static struct sysdev_class mce_sysclass = {
			
 
				 	.suspend	= mce_suspend,
			
 
				 	.shutdown	= mce_shutdown,
			
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
 
				 static ssize_t
			
 
				 show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf)
			
 
				 {
			
 
				-	strcpy(buf, trigger);
			
 
				+	strcpy(buf, mce_helper);
			
 
				 	strcat(buf, "\n");
			
 
				-	return strlen(trigger) + 1;
			
 
				+	return strlen(mce_helper) + 1;
			
 
				 }
			
 
				 
			
 
				 static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
			
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 
				 	char *p;
			
 
				 	int len;
			
 
				 
			
 
				-	strncpy(trigger, buf, sizeof(trigger));
			
 
				-	trigger[sizeof(trigger)-1] = 0;
			
 
				-	len = strlen(trigger);
			
 
				-	p = strchr(trigger, '\n');
			
 
				+	strncpy(mce_helper, buf, sizeof(mce_helper));
			
 
				+	mce_helper[sizeof(mce_helper)-1] = 0;
			
 
				+	len = strlen(mce_helper);
			
 
				+	p = strchr(mce_helper, '\n');
			
 
				 
			
 
				 	if (*p)
			
 
				 		*p = 0;
			
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr,
 
				 	return len;
			
 
				 }
			
 
				 
			
 
				+static ssize_t set_ignore_ce(struct sys_device *s,
			
 
				+			     struct sysdev_attribute *attr,
			
 
				+			     const char *buf, size_t size)
			
 
				+{
			
 
				+	u64 new;
			
 
				+
			
 
				+	if (strict_strtoull(buf, 0, &new) < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (mce_ignore_ce ^ !!new) {
			
 
				+		if (new) {
			
 
				+			/* disable ce features */
			
 
				+			on_each_cpu(mce_disable_ce, (void *)1, 1);
			
 
				+			mce_ignore_ce = 1;
			
 
				+		} else {
			
 
				+			/* enable ce features */
			
 
				+			mce_ignore_ce = 0;
			
 
				+			on_each_cpu(mce_enable_ce, (void *)1, 1);
			
 
				+		}
			
 
				+	}
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				+static ssize_t set_cmci_disabled(struct sys_device *s,
			
 
				+				 struct sysdev_attribute *attr,
			
 
				+				 const char *buf, size_t size)
			
 
				+{
			
 
				+	u64 new;
			
 
				+
			
 
				+	if (strict_strtoull(buf, 0, &new) < 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	if (mce_cmci_disabled ^ !!new) {
			
 
				+		if (new) {
			
 
				+			/* disable cmci */
			
 
				+			on_each_cpu(mce_disable_ce, NULL, 1);
			
 
				+			mce_cmci_disabled = 1;
			
 
				+		} else {
			
 
				+			/* enable cmci */
			
 
				+			mce_cmci_disabled = 0;
			
 
				+			on_each_cpu(mce_enable_ce, NULL, 1);
			
 
				+		}
			
 
				+	}
			
 
				+	return size;
			
 
				+}
			
 
				+
			
 
				 static ssize_t store_int_with_restart(struct sys_device *s,
			
 
				 				      struct sysdev_attribute *attr,
			
 
				 				      const char *buf, size_t size)
			
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s,
 
				 static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger);
			
 
				 static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
			
 
				 static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
			
 
				+static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
			
 
				 
			
 
				 static struct sysdev_ext_attribute attr_check_interval = {
			
 
				 	_SYSDEV_ATTR(check_interval, 0644, sysdev_show_int,
			
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = {
 
				 	&check_interval
			
 
				 };
			
 
				 
			
 
				+static struct sysdev_ext_attribute attr_ignore_ce = {
			
 
				+	_SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce),
			
 
				+	&mce_ignore_ce
			
 
				+};
			
 
				+
			
 
				+static struct sysdev_ext_attribute attr_cmci_disabled = {
			
 
				+	_SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled),
			
 
				+	&mce_cmci_disabled
			
 
				+};
			
 
				+
			
 
				 static struct sysdev_attribute *mce_attrs[] = {
			
 
				-	&attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger,
			
 
				+	&attr_tolerant.attr,
			
 
				+	&attr_check_interval.attr,
			
 
				+	&attr_trigger,
			
 
				 	&attr_monarch_timeout.attr,
			
 
				+	&attr_dont_log_ce.attr,
			
 
				+	&attr_ignore_ce.attr,
			
 
				+	&attr_cmci_disabled.attr,
			
 
				 	NULL
			
 
				 };
			
 
				 
			
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized;
 
				 static __cpuinit int mce_create_device(unsigned int cpu)
			
 
				 {
			
 
				 	int err;
			
 
				-	int i;
			
 
				+	int i, j;
			
 
				 
			
 
				 	if (!mce_available(&boot_cpu_data))
			
 
				 		return -EIO;
			
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 
				 		if (err)
			
 
				 			goto error;
			
 
				 	}
			
 
				-	for (i = 0; i < banks; i++) {
			
 
				+	for (j = 0; j < banks; j++) {
			
 
				 		err = sysdev_create_file(&per_cpu(mce_dev, cpu),
			
 
				-					&bank_attrs[i]);
			
 
				+					&bank_attrs[j]);
			
 
				 		if (err)
			
 
				 			goto error2;
			
 
				 	}
			
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 
				 
			
 
				 	return 0;
			
 
				 error2:
			
 
				-	while (--i >= 0)
			
 
				-		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
			
 
				+	while (--j >= 0)
			
 
				+		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
			
 
				 error:
			
 
				 	while (--i >= 0)
			
 
				 		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
			
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void)
 
				 	if (!mce_available(&boot_cpu_data))
			
 
				 		return -EIO;
			
 
				 
			
 
				-	alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
			
 
				+	zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
			
 
				 
			
 
				 	err = mce_init_banks();
			
 
				 	if (err)
			
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
				 /* This has to be run for each processor */
			
 
				 void mcheck_init(struct cpuinfo_x86 *c)
			
 
				 {
			
 
				-	if (mce_disabled == 1)
			
 
				+	if (mce_disabled)
			
 
				 		return;
			
 
				 
			
 
				 	switch (c->x86_vendor) {
			
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c)
 
				 
			
 
				 static int __init mcheck_enable(char *str)
			
 
				 {
			
 
				-	mce_disabled = -1;
			
 
				+	mce_p5_enabled = 1;
			
 
				 	return 1;
			
 
				 }
			
 
				-
			
 
				 __setup("mce", mcheck_enable);
			
 
				 
			
 
				 #endif /* CONFIG_X86_OLD_MCE */
			
--- a/arch/x86/kernel/cpu/mcheck/mce.h
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -1,38 +0,0 @@
 
				-#include <linux/init.h>
			
 
				-#include <asm/mce.h>
			
 
				-
			
 
				-#ifdef CONFIG_X86_OLD_MCE
			
 
				-void amd_mcheck_init(struct cpuinfo_x86 *c);
			
 
				-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
			
 
				-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
			
 
				-#endif
			
 
				-
			
 
				-#ifdef CONFIG_X86_ANCIENT_MCE
			
 
				-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
			
 
				-void winchip_mcheck_init(struct cpuinfo_x86 *c);
			
 
				-extern int mce_p5_enable;
			
 
				-static inline int mce_p5_enabled(void) { return mce_p5_enable; }
			
 
				-static inline void enable_p5_mce(void) { mce_p5_enable = 1; }
			
 
				-#else
			
 
				-static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {}
			
 
				-static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
			
 
				-static inline int mce_p5_enabled(void) { return 0; }
			
 
				-static inline void enable_p5_mce(void) { }
			
 
				-#endif
			
 
				-
			
 
				-/* Call the installed machine check handler for this CPU setup. */
			
 
				-extern void (*machine_check_vector)(struct pt_regs *, long error_code);
			
 
				-
			
 
				-#ifdef CONFIG_X86_OLD_MCE
			
 
				-
			
 
				-extern int nr_mce_banks;
			
 
				-
			
 
				-void intel_set_thermal_handler(void);
			
 
				-
			
 
				-#else
			
 
				-
			
 
				-static inline void intel_set_thermal_handler(void) { }
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-void intel_init_thermal(struct cpuinfo_x86 *c);
			
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,80 +1,226 @@
 
				 /*
			
 
				- * Common code for Intel machine checks
			
 
				+ * Intel specific MCE features.
			
 
				+ * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
			
 
				+ * Copyright (C) 2008, 2009 Intel Corporation
			
 
				+ * Author: Andi Kleen
			
 
				  */
			
 
				-#include <linux/interrupt.h>
			
 
				-#include <linux/kernel.h>
			
 
				-#include <linux/types.h>
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/smp.h>
			
 
				 
			
 
				-#include <asm/therm_throt.h>
			
 
				-#include <asm/processor.h>
			
 
				-#include <asm/system.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/interrupt.h>
			
 
				+#include <linux/percpu.h>
			
 
				 #include <asm/apic.h>
			
 
				+#include <asm/processor.h>
			
 
				 #include <asm/msr.h>
			
 
				+#include <asm/mce.h>
			
 
				+
			
 
				+/*
			
 
				+ * Support for Intel Correct Machine Check Interrupts. This allows
			
 
				+ * the CPU to raise an interrupt when a corrected machine check happened.
			
 
				+ * Normally we pick those up using a regular polling timer.
			
 
				+ * Also supports reliable discovery of shared banks.
			
 
				+ */
			
 
				 
			
 
				-#include "mce.h"
			
 
				+static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
			
 
				 
			
 
				-void intel_init_thermal(struct cpuinfo_x86 *c)
			
 
				+/*
			
 
				+ * cmci_discover_lock protects against parallel discovery attempts
			
 
				+ * which could race against each other.
			
 
				+ */
			
 
				+static DEFINE_SPINLOCK(cmci_discover_lock);
			
 
				+
			
 
				+#define CMCI_THRESHOLD 1
			
 
				+
			
 
				+static int cmci_supported(int *banks)
			
 
				 {
			
 
				-	unsigned int cpu = smp_processor_id();
			
 
				-	int tm2 = 0;
			
 
				-	u32 l, h;
			
 
				+	u64 cap;
			
 
				+
			
 
				+	if (mce_cmci_disabled || mce_ignore_ce)
			
 
				+		return 0;
			
 
				 
			
 
				 	/*
			
 
				-	 * Thermal monitoring depends on ACPI, clock modulation
			
 
				-	 * and APIC as well
			
 
				+	 * Vendor check is not strictly needed, but the initial
			
 
				+	 * initialization is vendor keyed and this
			
 
				+	 * makes sure none of the backdoors are entered otherwise.
			
 
				 	 */
			
 
				-	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC) ||
			
 
				-		!cpu_has(c, X86_FEATURE_APIC)) {
			
 
				-		pr_debug("Thermal monitoring disabled\n");
			
 
				-		return;
			
 
				+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
			
 
				+		return 0;
			
 
				+	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
			
 
				+		return 0;
			
 
				+	rdmsrl(MSR_IA32_MCG_CAP, cap);
			
 
				+	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
			
 
				+	return !!(cap & MCG_CMCI_P);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The interrupt handler. This is called on every event.
			
 
				+ * Just call the poller directly to log any events.
			
 
				+ * This could in theory increase the threshold under high load,
			
 
				+ * but doesn't for now.
			
 
				+ */
			
 
				+static void intel_threshold_interrupt(void)
			
 
				+{
			
 
				+	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
			
 
				+	mce_notify_irq();
			
 
				+}
			
 
				+
			
 
				+static void print_update(char *type, int *hdr, int num)
			
 
				+{
			
 
				+	if (*hdr == 0)
			
 
				+		printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
			
 
				+	*hdr = 1;
			
 
				+	printk(KERN_CONT " %s:%d", type, num);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
			
 
				+ * on this CPU. Use the algorithm recommended in the SDM to discover shared
			
 
				+ * banks.
			
 
				+ */
			
 
				+static void cmci_discover(int banks, int boot)
			
 
				+{
			
 
				+	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
			
 
				+	unsigned long flags;
			
 
				+	int hdr = 0;
			
 
				+	int i;
			
 
				+
			
 
				+	spin_lock_irqsave(&cmci_discover_lock, flags);
			
 
				+	for (i = 0; i < banks; i++) {
			
 
				+		u64 val;
			
 
				+
			
 
				+		if (test_bit(i, owned))
			
 
				+			continue;
			
 
				+
			
 
				+		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				+
			
 
				+		/* Already owned by someone else? */
			
 
				+		if (val & CMCI_EN) {
			
 
				+			if (test_and_clear_bit(i, owned) || boot)
			
 
				+				print_update("SHD", &hdr, i);
			
 
				+			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		val |= CMCI_EN | CMCI_THRESHOLD;
			
 
				+		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				+		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				+
			
 
				+		/* Did the enable bit stick? -- the bank supports CMCI */
			
 
				+		if (val & CMCI_EN) {
			
 
				+			if (!test_and_set_bit(i, owned) || boot)
			
 
				+				print_update("CMCI", &hdr, i);
			
 
				+			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			
 
				+		} else {
			
 
				+			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
			
 
				+		}
			
 
				 	}
			
 
				+	spin_unlock_irqrestore(&cmci_discover_lock, flags);
			
 
				+	if (hdr)
			
 
				+		printk(KERN_CONT "\n");
			
 
				+}
			
 
				 
			
 
				-	/*
			
 
				-	 * First check if its enabled already, in which case there might
			
 
				-	 * be some SMM goo which handles it, so we can't even put a handler
			
 
				-	 * since it might be delivered via SMI already:
			
 
				-	 */
			
 
				-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
			
 
				-	h = apic_read(APIC_LVTTHMR);
			
 
				-	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
			
 
				-		printk(KERN_DEBUG
			
 
				-		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
			
 
				+/*
			
 
				+ * Just in case we missed an event during initialization check
			
 
				+ * all the CMCI owned banks.
			
 
				+ */
			
 
				+void cmci_recheck(void)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	int banks;
			
 
				+
			
 
				+	if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
			
 
				 		return;
			
 
				-	}
			
 
				+	local_irq_save(flags);
			
 
				+	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
			
 
				+	local_irq_restore(flags);
			
 
				+}
			
 
				 
			
 
				-	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
			
 
				-		tm2 = 1;
			
 
				+/*
			
 
				+ * Disable CMCI on this CPU for all banks it owns when it goes down.
			
 
				+ * This allows other CPUs to claim the banks on rediscovery.
			
 
				+ */
			
 
				+void cmci_clear(void)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	int i;
			
 
				+	int banks;
			
 
				+	u64 val;
			
 
				 
			
 
				-	/* Check whether a vector already exists */
			
 
				-	if (h & APIC_VECTOR_MASK) {
			
 
				-		printk(KERN_DEBUG
			
 
				-		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
			
 
				-		       cpu, (h & APIC_VECTOR_MASK));
			
 
				+	if (!cmci_supported(&banks))
			
 
				 		return;
			
 
				+	spin_lock_irqsave(&cmci_discover_lock, flags);
			
 
				+	for (i = 0; i < banks; i++) {
			
 
				+		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
			
 
				+			continue;
			
 
				+		/* Disable CMCI */
			
 
				+		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				+		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
			
 
				+		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				+		__clear_bit(i, __get_cpu_var(mce_banks_owned));
			
 
				 	}
			
 
				+	spin_unlock_irqrestore(&cmci_discover_lock, flags);
			
 
				+}
			
 
				 
			
 
				-	/* We'll mask the thermal vector in the lapic till we're ready: */
			
 
				-	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
			
 
				-	apic_write(APIC_LVTTHMR, h);
			
 
				+/*
			
 
				+ * After a CPU went down cycle through all the others and rediscover
			
 
				+ * Must run in process context.
			
 
				+ */
			
 
				+void cmci_rediscover(int dying)
			
 
				+{
			
 
				+	int banks;
			
 
				+	int cpu;
			
 
				+	cpumask_var_t old;
			
 
				 
			
 
				-	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
			
 
				-	wrmsr(MSR_IA32_THERM_INTERRUPT,
			
 
				-		l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
			
 
				+	if (!cmci_supported(&banks))
			
 
				+		return;
			
 
				+	if (!alloc_cpumask_var(&old, GFP_KERNEL))
			
 
				+		return;
			
 
				+	cpumask_copy(old, &current->cpus_allowed);
			
 
				+
			
 
				+	for_each_online_cpu(cpu) {
			
 
				+		if (cpu == dying)
			
 
				+			continue;
			
 
				+		if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
			
 
				+			continue;
			
 
				+		/* Recheck banks in case CPUs don't all have the same */
			
 
				+		if (cmci_supported(&banks))
			
 
				+			cmci_discover(banks, 0);
			
 
				+	}
			
 
				 
			
 
				-	intel_set_thermal_handler();
			
 
				+	set_cpus_allowed_ptr(current, old);
			
 
				+	free_cpumask_var(old);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Reenable CMCI on this CPU in case a CPU down failed.
			
 
				+ */
			
 
				+void cmci_reenable(void)
			
 
				+{
			
 
				+	int banks;
			
 
				+	if (cmci_supported(&banks))
			
 
				+		cmci_discover(banks, 0);
			
 
				+}
			
 
				 
			
 
				-	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
			
 
				-	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
			
 
				+static void intel_init_cmci(void)
			
 
				+{
			
 
				+	int banks;
			
 
				 
			
 
				-	/* Unmask the thermal vector: */
			
 
				-	l = apic_read(APIC_LVTTHMR);
			
 
				-	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
			
 
				+	if (!cmci_supported(&banks))
			
 
				+		return;
			
 
				 
			
 
				-	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
			
 
				-	       cpu, tm2 ? "TM2" : "TM1");
			
 
				+	mce_threshold_vector = intel_threshold_interrupt;
			
 
				+	cmci_discover(banks, 1);
			
 
				+	/*
			
 
				+	 * For CPU #0 this runs with still disabled APIC, but that's
			
 
				+	 * ok because only the vector is set up. We still do another
			
 
				+	 * check for the banks later for CPU #0 just to make sure
			
 
				+	 * to not miss any events.
			
 
				+	 */
			
 
				+	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
			
 
				+	cmci_recheck();
			
 
				+}
			
 
				 
			
 
				-	/* enable thermal throttle processing */
			
 
				-	atomic_set(&therm_throt_en, 1);
			
 
				+void mce_intel_feature_init(struct cpuinfo_x86 *c)
			
 
				+{
			
 
				+	intel_init_thermal(c);
			
 
				+	intel_init_cmci();
			
 
				 }
			
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -1,248 +0,0 @@
 
				-/*
			
 
				- * Intel specific MCE features.
			
 
				- * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
			
 
				- * Copyright (C) 2008, 2009 Intel Corporation
			
 
				- * Author: Andi Kleen
			
 
				- */
			
 
				-
			
 
				-#include <linux/init.h>
			
 
				-#include <linux/interrupt.h>
			
 
				-#include <linux/percpu.h>
			
 
				-#include <asm/processor.h>
			
 
				-#include <asm/apic.h>
			
 
				-#include <asm/msr.h>
			
 
				-#include <asm/mce.h>
			
 
				-#include <asm/hw_irq.h>
			
 
				-#include <asm/idle.h>
			
 
				-#include <asm/therm_throt.h>
			
 
				-
			
 
				-#include "mce.h"
			
 
				-
			
 
				-asmlinkage void smp_thermal_interrupt(void)
			
 
				-{
			
 
				-	__u64 msr_val;
			
 
				-
			
 
				-	ack_APIC_irq();
			
 
				-
			
 
				-	exit_idle();
			
 
				-	irq_enter();
			
 
				-
			
 
				-	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
			
 
				-	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
			
 
				-		mce_log_therm_throt_event(msr_val);
			
 
				-
			
 
				-	inc_irq_stat(irq_thermal_count);
			
 
				-	irq_exit();
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Support for Intel Correct Machine Check Interrupts. This allows
			
 
				- * the CPU to raise an interrupt when a corrected machine check happened.
			
 
				- * Normally we pick those up using a regular polling timer.
			
 
				- * Also supports reliable discovery of shared banks.
			
 
				- */
			
 
				-
			
 
				-static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
			
 
				-
			
 
				-/*
			
 
				- * cmci_discover_lock protects against parallel discovery attempts
			
 
				- * which could race against each other.
			
 
				- */
			
 
				-static DEFINE_SPINLOCK(cmci_discover_lock);
			
 
				-
			
 
				-#define CMCI_THRESHOLD 1
			
 
				-
			
 
				-static int cmci_supported(int *banks)
			
 
				-{
			
 
				-	u64 cap;
			
 
				-
			
 
				-	if (mce_cmci_disabled || mce_ignore_ce)
			
 
				-		return 0;
			
 
				-
			
 
				-	/*
			
 
				-	 * Vendor check is not strictly needed, but the initial
			
 
				-	 * initialization is vendor keyed and this
			
 
				-	 * makes sure none of the backdoors are entered otherwise.
			
 
				-	 */
			
 
				-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
			
 
				-		return 0;
			
 
				-	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
			
 
				-		return 0;
			
 
				-	rdmsrl(MSR_IA32_MCG_CAP, cap);
			
 
				-	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
			
 
				-	return !!(cap & MCG_CMCI_P);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * The interrupt handler. This is called on every event.
			
 
				- * Just call the poller directly to log any events.
			
 
				- * This could in theory increase the threshold under high load,
			
 
				- * but doesn't for now.
			
 
				- */
			
 
				-static void intel_threshold_interrupt(void)
			
 
				-{
			
 
				-	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
			
 
				-	mce_notify_irq();
			
 
				-}
			
 
				-
			
 
				-static void print_update(char *type, int *hdr, int num)
			
 
				-{
			
 
				-	if (*hdr == 0)
			
 
				-		printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
			
 
				-	*hdr = 1;
			
 
				-	printk(KERN_CONT " %s:%d", type, num);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
			
 
				- * on this CPU. Use the algorithm recommended in the SDM to discover shared
			
 
				- * banks.
			
 
				- */
			
 
				-static void cmci_discover(int banks, int boot)
			
 
				-{
			
 
				-	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
			
 
				-	unsigned long flags;
			
 
				-	int hdr = 0;
			
 
				-	int i;
			
 
				-
			
 
				-	spin_lock_irqsave(&cmci_discover_lock, flags);
			
 
				-	for (i = 0; i < banks; i++) {
			
 
				-		u64 val;
			
 
				-
			
 
				-		if (test_bit(i, owned))
			
 
				-			continue;
			
 
				-
			
 
				-		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				-
			
 
				-		/* Already owned by someone else? */
			
 
				-		if (val & CMCI_EN) {
			
 
				-			if (test_and_clear_bit(i, owned) || boot)
			
 
				-				print_update("SHD", &hdr, i);
			
 
				-			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		val |= CMCI_EN | CMCI_THRESHOLD;
			
 
				-		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				-		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				-
			
 
				-		/* Did the enable bit stick? -- the bank supports CMCI */
			
 
				-		if (val & CMCI_EN) {
			
 
				-			if (!test_and_set_bit(i, owned) || boot)
			
 
				-				print_update("CMCI", &hdr, i);
			
 
				-			__clear_bit(i, __get_cpu_var(mce_poll_banks));
			
 
				-		} else {
			
 
				-			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
			
 
				-		}
			
 
				-	}
			
 
				-	spin_unlock_irqrestore(&cmci_discover_lock, flags);
			
 
				-	if (hdr)
			
 
				-		printk(KERN_CONT "\n");
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Just in case we missed an event during initialization check
			
 
				- * all the CMCI owned banks.
			
 
				- */
			
 
				-void cmci_recheck(void)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-	int banks;
			
 
				-
			
 
				-	if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
			
 
				-		return;
			
 
				-	local_irq_save(flags);
			
 
				-	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
			
 
				-	local_irq_restore(flags);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Disable CMCI on this CPU for all banks it owns when it goes down.
			
 
				- * This allows other CPUs to claim the banks on rediscovery.
			
 
				- */
			
 
				-void cmci_clear(void)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-	int i;
			
 
				-	int banks;
			
 
				-	u64 val;
			
 
				-
			
 
				-	if (!cmci_supported(&banks))
			
 
				-		return;
			
 
				-	spin_lock_irqsave(&cmci_discover_lock, flags);
			
 
				-	for (i = 0; i < banks; i++) {
			
 
				-		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
			
 
				-			continue;
			
 
				-		/* Disable CMCI */
			
 
				-		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				-		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
			
 
				-		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
			
 
				-		__clear_bit(i, __get_cpu_var(mce_banks_owned));
			
 
				-	}
			
 
				-	spin_unlock_irqrestore(&cmci_discover_lock, flags);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * After a CPU went down cycle through all the others and rediscover
			
 
				- * Must run in process context.
			
 
				- */
			
 
				-void cmci_rediscover(int dying)
			
 
				-{
			
 
				-	int banks;
			
 
				-	int cpu;
			
 
				-	cpumask_var_t old;
			
 
				-
			
 
				-	if (!cmci_supported(&banks))
			
 
				-		return;
			
 
				-	if (!alloc_cpumask_var(&old, GFP_KERNEL))
			
 
				-		return;
			
 
				-	cpumask_copy(old, &current->cpus_allowed);
			
 
				-
			
 
				-	for_each_online_cpu(cpu) {
			
 
				-		if (cpu == dying)
			
 
				-			continue;
			
 
				-		if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
			
 
				-			continue;
			
 
				-		/* Recheck banks in case CPUs don't all have the same */
			
 
				-		if (cmci_supported(&banks))
			
 
				-			cmci_discover(banks, 0);
			
 
				-	}
			
 
				-
			
 
				-	set_cpus_allowed_ptr(current, old);
			
 
				-	free_cpumask_var(old);
			
 
				-}
			
 
				-
			
 
				-/*
			
 
				- * Reenable CMCI on this CPU in case a CPU down failed.
			
 
				- */
			
 
				-void cmci_reenable(void)
			
 
				-{
			
 
				-	int banks;
			
 
				-	if (cmci_supported(&banks))
			
 
				-		cmci_discover(banks, 0);
			
 
				-}
			
 
				-
			
 
				-static void intel_init_cmci(void)
			
 
				-{
			
 
				-	int banks;
			
 
				-
			
 
				-	if (!cmci_supported(&banks))
			
 
				-		return;
			
 
				-
			
 
				-	mce_threshold_vector = intel_threshold_interrupt;
			
 
				-	cmci_discover(banks, 1);
			
 
				-	/*
			
 
				-	 * For CPU #0 this runs with still disabled APIC, but that's
			
 
				-	 * ok because only the vector is set up. We still do another
			
 
				-	 * check for the banks later for CPU #0 just to make sure
			
 
				-	 * to not miss any events.
			
 
				-	 */
			
 
				-	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
			
 
				-	cmci_recheck();
			
 
				-}
			
 
				-
			
 
				-void mce_intel_feature_init(struct cpuinfo_x86 *c)
			
 
				-{
			
 
				-	intel_init_thermal(c);
			
 
				-	intel_init_cmci();
			
 
				-}
			
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -17,10 +17,9 @@
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/system.h>
			
 
				+#include <asm/mce.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				-#include "mce.h"
			
 
				-
			
 
				 static int		firstbank;
			
 
				 
			
 
				 #define MCE_RATE	(15*HZ)	/* timer rate is 15s */
			
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -1,21 +1,15 @@
 
				 /*
			
 
				  * P4 specific Machine Check Exception Reporting
			
 
				  */
			
 
				-
			
 
				-#include <linux/interrupt.h>
			
 
				 #include <linux/kernel.h>
			
 
				 #include <linux/types.h>
			
 
				 #include <linux/init.h>
			
 
				 #include <linux/smp.h>
			
 
				 
			
 
				-#include <asm/therm_throt.h>
			
 
				 #include <asm/processor.h>
			
 
				-#include <asm/system.h>
			
 
				-#include <asm/apic.h>
			
 
				+#include <asm/mce.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				-#include "mce.h"
			
 
				-
			
 
				 /* as supported by the P4/Xeon family */
			
 
				 struct intel_mce_extended_msrs {
			
 
				 	u32 eax;
			
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
 
				 
			
 
				 static int mce_num_extended_msrs;
			
 
				 
			
 
				-
			
 
				-#ifdef CONFIG_X86_MCE_P4THERMAL
			
 
				-
			
 
				-static void unexpected_thermal_interrupt(struct pt_regs *regs)
			
 
				-{
			
 
				-	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
			
 
				-			smp_processor_id());
			
 
				-	add_taint(TAINT_MACHINE_CHECK);
			
 
				-}
			
 
				-
			
 
				-/* P4/Xeon Thermal transition interrupt handler: */
			
 
				-static void intel_thermal_interrupt(struct pt_regs *regs)
			
 
				-{
			
 
				-	__u64 msr_val;
			
 
				-
			
 
				-	ack_APIC_irq();
			
 
				-
			
 
				-	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
			
 
				-	therm_throt_process(msr_val & THERM_STATUS_PROCHOT);
			
 
				-}
			
 
				-
			
 
				-/* Thermal interrupt handler for this CPU setup: */
			
 
				-static void (*vendor_thermal_interrupt)(struct pt_regs *regs) =
			
 
				-						unexpected_thermal_interrupt;
			
 
				-
			
 
				-void smp_thermal_interrupt(struct pt_regs *regs)
			
 
				-{
			
 
				-	irq_enter();
			
 
				-	vendor_thermal_interrupt(regs);
			
 
				-	__get_cpu_var(irq_stat).irq_thermal_count++;
			
 
				-	irq_exit();
			
 
				-}
			
 
				-
			
 
				-void intel_set_thermal_handler(void)
			
 
				-{
			
 
				-	vendor_thermal_interrupt = intel_thermal_interrupt;
			
 
				-}
			
 
				-
			
 
				-#endif /* CONFIG_X86_MCE_P4THERMAL */
			
 
				-
			
 
				 /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
			
 
				 static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
			
 
				 {
			
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -10,12 +10,11 @@
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/system.h>
			
 
				+#include <asm/mce.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				-#include "mce.h"
			
 
				-
			
 
				 /* By default disabled */
			
 
				-int		mce_p5_enable;
			
 
				+int mce_p5_enabled __read_mostly;
			
 
				 
			
 
				 /* Machine check handler for Pentium class Intel CPUs: */
			
 
				 static void pentium_machine_check(struct pt_regs *regs, long error_code)
			
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 
				 {
			
 
				 	u32 l, h;
			
 
				 
			
 
				-	/* Check for MCE support: */
			
 
				-	if (!cpu_has(c, X86_FEATURE_MCE))
			
 
				+	/* Default P5 to off as its often misconnected: */
			
 
				+	if (!mce_p5_enabled)
			
 
				 		return;
			
 
				 
			
 
				-#ifdef CONFIG_X86_OLD_MCE
			
 
				-	/* Default P5 to off as its often misconnected: */
			
 
				-	if (mce_disabled != -1)
			
 
				+	/* Check for MCE support: */
			
 
				+	if (!cpu_has(c, X86_FEATURE_MCE))
			
 
				 		return;
			
 
				-#endif
			
 
				 
			
 
				 	machine_check_vector = pentium_machine_check;
			
 
				 	/* Make sure the vector pointer is visible before we enable MCEs: */
			
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -10,10 +10,9 @@
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/system.h>
			
 
				+#include <asm/mce.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				-#include "mce.h"
			
 
				-
			
 
				 /* Machine Check Handler For PII/PIII */
			
 
				 static void intel_machine_check(struct pt_regs *regs, long error_code)
			
 
				 {
			
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -13,13 +13,23 @@
 
				  * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
			
 
				  *          Inspired by Ross Biro's and Al Borchers' counter code.
			
 
				  */
			
 
				+#include <linux/interrupt.h>
			
 
				 #include <linux/notifier.h>
			
 
				 #include <linux/jiffies.h>
			
 
				+#include <linux/kernel.h>
			
 
				 #include <linux/percpu.h>
			
 
				 #include <linux/sysdev.h>
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/init.h>
			
 
				+#include <linux/smp.h>
			
 
				 #include <linux/cpu.h>
			
 
				 
			
 
				-#include <asm/therm_throt.h>
			
 
				+#include <asm/processor.h>
			
 
				+#include <asm/system.h>
			
 
				+#include <asm/apic.h>
			
 
				+#include <asm/idle.h>
			
 
				+#include <asm/mce.h>
			
 
				+#include <asm/msr.h>
			
 
				 
			
 
				 /* How long to wait between reporting thermal events */
			
 
				 #define CHECK_INTERVAL		(300 * HZ)
			
@@ -27,7 +37,7 @@
 
				 static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
			
 
				 static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
			
 
				 
			
 
				-atomic_t therm_throt_en		= ATOMIC_INIT(0);
			
 
				+static atomic_t therm_throt_en		= ATOMIC_INIT(0);
			
 
				 
			
 
				 #ifdef CONFIG_SYSFS
			
 
				 #define define_therm_throt_sysdev_one_ro(_name)				\
			
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
 
				  *          1 : Event should be logged further, and a message has been
			
 
				  *              printed to the syslog.
			
 
				  */
			
 
				-int therm_throt_process(int curr)
			
 
				+static int therm_throt_process(int curr)
			
 
				 {
			
 
				 	unsigned int cpu = smp_processor_id();
			
 
				 	__u64 tmp_jiffs = get_jiffies_64();
			
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				-
			
 
				 device_initcall(thermal_throttle_init_device);
			
 
				+
			
 
				 #endif /* CONFIG_SYSFS */
			
 
				+
			
 
				+/* Thermal transition interrupt handler */
			
 
				+static void intel_thermal_interrupt(void)
			
 
				+{
			
 
				+	__u64 msr_val;
			
 
				+
			
 
				+	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
			
 
				+	if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
			
 
				+		mce_log_therm_throt_event(msr_val);
			
 
				+}
			
 
				+
			
 
				+static void unexpected_thermal_interrupt(void)
			
 
				+{
			
 
				+	printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
			
 
				+			smp_processor_id());
			
 
				+	add_taint(TAINT_MACHINE_CHECK);
			
 
				+}
			
 
				+
			
 
				+static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
			
 
				+
			
 
				+asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
			
 
				+{
			
 
				+	exit_idle();
			
 
				+	irq_enter();
			
 
				+	inc_irq_stat(irq_thermal_count);
			
 
				+	smp_thermal_vector();
			
 
				+	irq_exit();
			
 
				+	/* Ack only at the end to avoid potential reentry */
			
 
				+	ack_APIC_irq();
			
 
				+}
			
 
				+
			
 
				+void intel_init_thermal(struct cpuinfo_x86 *c)
			
 
				+{
			
 
				+	unsigned int cpu = smp_processor_id();
			
 
				+	int tm2 = 0;
			
 
				+	u32 l, h;
			
 
				+
			
 
				+	/* Thermal monitoring depends on ACPI and clock modulation*/
			
 
				+	if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * First check if its enabled already, in which case there might
			
 
				+	 * be some SMM goo which handles it, so we can't even put a handler
			
 
				+	 * since it might be delivered via SMI already:
			
 
				+	 */
			
 
				+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
			
 
				+	h = apic_read(APIC_LVTTHMR);
			
 
				+	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
			
 
				+		printk(KERN_DEBUG
			
 
				+		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
			
 
				+		tm2 = 1;
			
 
				+
			
 
				+	/* Check whether a vector already exists */
			
 
				+	if (h & APIC_VECTOR_MASK) {
			
 
				+		printk(KERN_DEBUG
			
 
				+		       "CPU%d: Thermal LVT vector (%#x) already installed\n",
			
 
				+		       cpu, (h & APIC_VECTOR_MASK));
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* We'll mask the thermal vector in the lapic till we're ready: */
			
 
				+	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
			
 
				+	apic_write(APIC_LVTTHMR, h);
			
 
				+
			
 
				+	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
			
 
				+	wrmsr(MSR_IA32_THERM_INTERRUPT,
			
 
				+		l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
			
 
				+
			
 
				+	smp_thermal_vector = intel_thermal_interrupt;
			
 
				+
			
 
				+	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
			
 
				+	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
			
 
				+
			
 
				+	/* Unmask the thermal vector: */
			
 
				+	l = apic_read(APIC_LVTTHMR);
			
 
				+	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
			
 
				+
			
 
				+	printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
			
 
				+	       cpu, tm2 ? "TM2" : "TM1");
			
 
				+
			
 
				+	/* enable thermal throttle processing */
			
 
				+	atomic_set(&therm_throt_en, 1);
			
 
				+}
			
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -9,10 +9,9 @@
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/system.h>
			
 
				+#include <asm/mce.h>
			
 
				 #include <asm/msr.h>
			
 
				 
			
 
				-#include "mce.h"
			
 
				-
			
 
				 /* Machine check handler for WinChip C6: */
			
 
				 static void winchip_machine_check(struct pt_regs *regs, long error_code)
			
 
				 {
			
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -53,6 +53,7 @@
 
				 #include <asm/traps.h>
			
 
				 #include <asm/desc.h>
			
 
				 #include <asm/i387.h>
			
 
				+#include <asm/mce.h>
			
 
				 
			
 
				 #include <asm/mach_traps.h>
			
 
				 
			
@@ -64,8 +65,6 @@
 
				 #include <asm/setup.h>
			
 
				 #include <asm/traps.h>
			
 
				 
			
 
				-#include "cpu/mcheck/mce.h"
			
 
				-
			
 
				 asmlinkage int system_call(void);
			
 
				 
			
 
				 /* Do we ignore FPU interrupts ? */