|
@@ -83,6 +83,7 @@ static int mce_dont_log_ce __read_mostly;
|
|
int mce_cmci_disabled __read_mostly;
|
|
int mce_cmci_disabled __read_mostly;
|
|
int mce_ignore_ce __read_mostly;
|
|
int mce_ignore_ce __read_mostly;
|
|
int mce_ser __read_mostly;
|
|
int mce_ser __read_mostly;
|
|
|
|
+int mce_bios_cmci_threshold __read_mostly;
|
|
|
|
|
|
struct mce_bank *mce_banks __read_mostly;
|
|
struct mce_bank *mce_banks __read_mostly;
|
|
|
|
|
|
@@ -1266,6 +1267,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
|
|
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
|
|
static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
|
|
static DEFINE_PER_CPU(struct timer_list, mce_timer);
|
|
static DEFINE_PER_CPU(struct timer_list, mce_timer);
|
|
|
|
|
|
|
|
+static unsigned long mce_adjust_timer_default(unsigned long interval)
|
|
|
|
+{
|
|
|
|
+ return interval;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static unsigned long (*mce_adjust_timer)(unsigned long interval) =
|
|
|
|
+ mce_adjust_timer_default;
|
|
|
|
+
|
|
static void mce_timer_fn(unsigned long data)
|
|
static void mce_timer_fn(unsigned long data)
|
|
{
|
|
{
|
|
struct timer_list *t = &__get_cpu_var(mce_timer);
|
|
struct timer_list *t = &__get_cpu_var(mce_timer);
|
|
@@ -1276,6 +1285,7 @@ static void mce_timer_fn(unsigned long data)
|
|
if (mce_available(__this_cpu_ptr(&cpu_info))) {
|
|
if (mce_available(__this_cpu_ptr(&cpu_info))) {
|
|
machine_check_poll(MCP_TIMESTAMP,
|
|
machine_check_poll(MCP_TIMESTAMP,
|
|
&__get_cpu_var(mce_poll_banks));
|
|
&__get_cpu_var(mce_poll_banks));
|
|
|
|
+ mce_intel_cmci_poll();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -1283,14 +1293,38 @@ static void mce_timer_fn(unsigned long data)
|
|
* polling interval, otherwise increase the polling interval.
|
|
* polling interval, otherwise increase the polling interval.
|
|
*/
|
|
*/
|
|
iv = __this_cpu_read(mce_next_interval);
|
|
iv = __this_cpu_read(mce_next_interval);
|
|
- if (mce_notify_irq())
|
|
|
|
|
|
+ if (mce_notify_irq()) {
|
|
iv = max(iv / 2, (unsigned long) HZ/100);
|
|
iv = max(iv / 2, (unsigned long) HZ/100);
|
|
- else
|
|
|
|
|
|
+ } else {
|
|
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
|
|
iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
|
|
|
|
+ iv = mce_adjust_timer(iv);
|
|
|
|
+ }
|
|
__this_cpu_write(mce_next_interval, iv);
|
|
__this_cpu_write(mce_next_interval, iv);
|
|
|
|
+ /* Might have become 0 after CMCI storm subsided */
|
|
|
|
+ if (iv) {
|
|
|
|
+ t->expires = jiffies + iv;
|
|
|
|
+ add_timer_on(t, smp_processor_id());
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
|
|
- t->expires = jiffies + iv;
|
|
|
|
- add_timer_on(t, smp_processor_id());
|
|
|
|
|
|
+/*
|
|
|
|
+ * Ensure that the timer is firing in @interval from now.
|
|
|
|
+ */
|
|
|
|
+void mce_timer_kick(unsigned long interval)
|
|
|
|
+{
|
|
|
|
+ struct timer_list *t = &__get_cpu_var(mce_timer);
|
|
|
|
+ unsigned long when = jiffies + interval;
|
|
|
|
+ unsigned long iv = __this_cpu_read(mce_next_interval);
|
|
|
|
+
|
|
|
|
+ if (timer_pending(t)) {
|
|
|
|
+ if (time_before(when, t->expires))
|
|
|
|
+ mod_timer_pinned(t, when);
|
|
|
|
+ } else {
|
|
|
|
+ t->expires = round_jiffies(when);
|
|
|
|
+ add_timer_on(t, smp_processor_id());
|
|
|
|
+ }
|
|
|
|
+ if (interval < iv)
|
|
|
|
+ __this_cpu_write(mce_next_interval, interval);
|
|
}
|
|
}
|
|
|
|
|
|
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
|
|
/* Must not be called in IRQ context where del_timer_sync() can deadlock */
|
|
@@ -1585,6 +1619,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
|
switch (c->x86_vendor) {
|
|
switch (c->x86_vendor) {
|
|
case X86_VENDOR_INTEL:
|
|
case X86_VENDOR_INTEL:
|
|
mce_intel_feature_init(c);
|
|
mce_intel_feature_init(c);
|
|
|
|
+ mce_adjust_timer = mce_intel_adjust_timer;
|
|
break;
|
|
break;
|
|
case X86_VENDOR_AMD:
|
|
case X86_VENDOR_AMD:
|
|
mce_amd_feature_init(c);
|
|
mce_amd_feature_init(c);
|
|
@@ -1594,23 +1629,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-static void __mcheck_cpu_init_timer(void)
|
|
|
|
|
|
+static void mce_start_timer(unsigned int cpu, struct timer_list *t)
|
|
{
|
|
{
|
|
- struct timer_list *t = &__get_cpu_var(mce_timer);
|
|
|
|
- unsigned long iv = check_interval * HZ;
|
|
|
|
|
|
+ unsigned long iv = mce_adjust_timer(check_interval * HZ);
|
|
|
|
|
|
- setup_timer(t, mce_timer_fn, smp_processor_id());
|
|
|
|
|
|
+ __this_cpu_write(mce_next_interval, iv);
|
|
|
|
|
|
- if (mce_ignore_ce)
|
|
|
|
|
|
+ if (mce_ignore_ce || !iv)
|
|
return;
|
|
return;
|
|
|
|
|
|
- __this_cpu_write(mce_next_interval, iv);
|
|
|
|
- if (!iv)
|
|
|
|
- return;
|
|
|
|
t->expires = round_jiffies(jiffies + iv);
|
|
t->expires = round_jiffies(jiffies + iv);
|
|
add_timer_on(t, smp_processor_id());
|
|
add_timer_on(t, smp_processor_id());
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void __mcheck_cpu_init_timer(void)
|
|
|
|
+{
|
|
|
|
+ struct timer_list *t = &__get_cpu_var(mce_timer);
|
|
|
|
+ unsigned int cpu = smp_processor_id();
|
|
|
|
+
|
|
|
|
+ setup_timer(t, mce_timer_fn, cpu);
|
|
|
|
+ mce_start_timer(cpu, t);
|
|
|
|
+}
|
|
|
|
+
|
|
/* Handle unconfigured int18 (should never happen) */
|
|
/* Handle unconfigured int18 (should never happen) */
|
|
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
|
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
|
|
{
|
|
{
|
|
@@ -1907,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
|
|
* check, or 0 to not wait
|
|
* check, or 0 to not wait
|
|
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
|
* mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
|
|
* mce=nobootlog Don't log MCEs from before booting.
|
|
* mce=nobootlog Don't log MCEs from before booting.
|
|
|
|
+ * mce=bios_cmci_threshold Don't program the CMCI threshold
|
|
*/
|
|
*/
|
|
static int __init mcheck_enable(char *str)
|
|
static int __init mcheck_enable(char *str)
|
|
{
|
|
{
|
|
@@ -1926,6 +1967,8 @@ static int __init mcheck_enable(char *str)
|
|
mce_ignore_ce = 1;
|
|
mce_ignore_ce = 1;
|
|
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
|
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
|
|
mce_bootlog = (str[0] == 'b');
|
|
mce_bootlog = (str[0] == 'b');
|
|
|
|
+ else if (!strcmp(str, "bios_cmci_threshold"))
|
|
|
|
+ mce_bios_cmci_threshold = 1;
|
|
else if (isdigit(str[0])) {
|
|
else if (isdigit(str[0])) {
|
|
get_option(&str, &tolerant);
|
|
get_option(&str, &tolerant);
|
|
if (*str == ',') {
|
|
if (*str == ',') {
|
|
@@ -2166,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
|
|
&mce_cmci_disabled
|
|
&mce_cmci_disabled
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
|
|
|
|
+ __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
|
|
|
|
+ &mce_bios_cmci_threshold
|
|
|
|
+};
|
|
|
|
+
|
|
static struct device_attribute *mce_device_attrs[] = {
|
|
static struct device_attribute *mce_device_attrs[] = {
|
|
&dev_attr_tolerant.attr,
|
|
&dev_attr_tolerant.attr,
|
|
&dev_attr_check_interval.attr,
|
|
&dev_attr_check_interval.attr,
|
|
@@ -2174,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
|
|
&dev_attr_dont_log_ce.attr,
|
|
&dev_attr_dont_log_ce.attr,
|
|
&dev_attr_ignore_ce.attr,
|
|
&dev_attr_ignore_ce.attr,
|
|
&dev_attr_cmci_disabled.attr,
|
|
&dev_attr_cmci_disabled.attr,
|
|
|
|
+ &dev_attr_bios_cmci_threshold.attr,
|
|
NULL
|
|
NULL
|
|
};
|
|
};
|
|
|
|
|
|
@@ -2294,38 +2343,33 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
|
unsigned int cpu = (unsigned long)hcpu;
|
|
unsigned int cpu = (unsigned long)hcpu;
|
|
struct timer_list *t = &per_cpu(mce_timer, cpu);
|
|
struct timer_list *t = &per_cpu(mce_timer, cpu);
|
|
|
|
|
|
- switch (action) {
|
|
|
|
|
|
+ switch (action & ~CPU_TASKS_FROZEN) {
|
|
case CPU_ONLINE:
|
|
case CPU_ONLINE:
|
|
- case CPU_ONLINE_FROZEN:
|
|
|
|
mce_device_create(cpu);
|
|
mce_device_create(cpu);
|
|
if (threshold_cpu_callback)
|
|
if (threshold_cpu_callback)
|
|
threshold_cpu_callback(action, cpu);
|
|
threshold_cpu_callback(action, cpu);
|
|
break;
|
|
break;
|
|
case CPU_DEAD:
|
|
case CPU_DEAD:
|
|
- case CPU_DEAD_FROZEN:
|
|
|
|
if (threshold_cpu_callback)
|
|
if (threshold_cpu_callback)
|
|
threshold_cpu_callback(action, cpu);
|
|
threshold_cpu_callback(action, cpu);
|
|
mce_device_remove(cpu);
|
|
mce_device_remove(cpu);
|
|
|
|
+ mce_intel_hcpu_update(cpu);
|
|
break;
|
|
break;
|
|
case CPU_DOWN_PREPARE:
|
|
case CPU_DOWN_PREPARE:
|
|
- case CPU_DOWN_PREPARE_FROZEN:
|
|
|
|
- del_timer_sync(t);
|
|
|
|
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
|
|
smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
|
|
|
|
+ del_timer_sync(t);
|
|
break;
|
|
break;
|
|
case CPU_DOWN_FAILED:
|
|
case CPU_DOWN_FAILED:
|
|
- case CPU_DOWN_FAILED_FROZEN:
|
|
|
|
- if (!mce_ignore_ce && check_interval) {
|
|
|
|
- t->expires = round_jiffies(jiffies +
|
|
|
|
- per_cpu(mce_next_interval, cpu));
|
|
|
|
- add_timer_on(t, cpu);
|
|
|
|
- }
|
|
|
|
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
|
|
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
|
|
|
|
+ mce_start_timer(cpu, t);
|
|
break;
|
|
break;
|
|
- case CPU_POST_DEAD:
|
|
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (action == CPU_POST_DEAD) {
|
|
/* intentionally ignoring frozen here */
|
|
/* intentionally ignoring frozen here */
|
|
cmci_rediscover(cpu);
|
|
cmci_rediscover(cpu);
|
|
- break;
|
|
|
|
}
|
|
}
|
|
|
|
+
|
|
return NOTIFY_OK;
|
|
return NOTIFY_OK;
|
|
}
|
|
}
|
|
|
|
|