|
@@ -50,10 +50,19 @@
|
|
#include <linux/poll.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/string.h>
|
|
#include <linux/string.h>
|
|
#include <linux/ctype.h>
|
|
#include <linux/ctype.h>
|
|
|
|
+#include <linux/delay.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/atomic.h>
|
|
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
|
|
-#include <asm/apic.h>
|
|
|
|
|
|
+#ifdef CONFIG_X86
|
|
|
|
+/* This is ugly, but I've determined that x86 is the only architecture
|
|
|
|
+ that can reasonably support the IPMI NMI watchdog timeout at this
|
|
|
|
+ time. If another architecture adds this capability somehow, it
|
|
|
|
+ will have to be a somewhat different mechanism and I have no idea
|
|
|
|
+ how it will work. So in the unlikely event that another
|
|
|
|
+ architecture supports this, we can figure out a good generic
|
|
|
|
+ mechanism for it at that time. */
|
|
|
|
+#include <asm/kdebug.h>
|
|
|
|
+#define HAVE_DIE_NMI
|
|
#endif
|
|
#endif
|
|
|
|
|
|
#define PFX "IPMI Watchdog: "
|
|
#define PFX "IPMI Watchdog: "
|
|
@@ -313,6 +322,11 @@ static unsigned char ipmi_version_minor;
|
|
/* If a pretimeout occurs, this is used to allow only one panic to happen. */
|
|
/* If a pretimeout occurs, this is used to allow only one panic to happen. */
|
|
static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
|
|
static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
|
|
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
|
|
+static int testing_nmi;
|
|
|
|
+static int nmi_handler_registered;
|
|
|
|
+#endif
|
|
|
|
+
|
|
static int ipmi_heartbeat(void);
|
|
static int ipmi_heartbeat(void);
|
|
|
|
|
|
/* We use a mutex to make sure that only one thing can send a set
|
|
/* We use a mutex to make sure that only one thing can send a set
|
|
@@ -352,6 +366,9 @@ static int i_ipmi_set_timeout(struct ipmi_smi_msg *smi_msg,
|
|
int hbnow = 0;
|
|
int hbnow = 0;
|
|
|
|
|
|
|
|
|
|
|
|
+ /* These can be cleared as we are setting the timeout. */
|
|
|
|
+ pretimeout_since_last_heartbeat = 0;
|
|
|
|
+
|
|
data[0] = 0;
|
|
data[0] = 0;
|
|
WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
|
|
WDOG_SET_TIMER_USE(data[0], WDOG_TIMER_USE_SMS_OS);
|
|
|
|
|
|
@@ -426,13 +443,12 @@ static int ipmi_set_timeout(int do_heartbeat)
|
|
|
|
|
|
wait_for_completion(&set_timeout_wait);
|
|
wait_for_completion(&set_timeout_wait);
|
|
|
|
|
|
|
|
+ mutex_unlock(&set_timeout_lock);
|
|
|
|
+
|
|
if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
|
|
if ((do_heartbeat == IPMI_SET_TIMEOUT_FORCE_HB)
|
|
|| ((send_heartbeat_now)
|
|
|| ((send_heartbeat_now)
|
|
&& (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
|
|
&& (do_heartbeat == IPMI_SET_TIMEOUT_HB_IF_NECESSARY)))
|
|
- {
|
|
|
|
rv = ipmi_heartbeat();
|
|
rv = ipmi_heartbeat();
|
|
- }
|
|
|
|
- mutex_unlock(&set_timeout_lock);
|
|
|
|
|
|
|
|
out:
|
|
out:
|
|
return rv;
|
|
return rv;
|
|
@@ -556,9 +572,8 @@ static int ipmi_heartbeat(void)
|
|
int rv;
|
|
int rv;
|
|
struct ipmi_system_interface_addr addr;
|
|
struct ipmi_system_interface_addr addr;
|
|
|
|
|
|
- if (ipmi_ignore_heartbeat) {
|
|
|
|
|
|
+ if (ipmi_ignore_heartbeat)
|
|
return 0;
|
|
return 0;
|
|
- }
|
|
|
|
|
|
|
|
if (ipmi_start_timer_on_heartbeat) {
|
|
if (ipmi_start_timer_on_heartbeat) {
|
|
ipmi_start_timer_on_heartbeat = 0;
|
|
ipmi_start_timer_on_heartbeat = 0;
|
|
@@ -569,7 +584,6 @@ static int ipmi_heartbeat(void)
|
|
We don't want to set the action, though, we want to
|
|
We don't want to set the action, though, we want to
|
|
leave that alone (thus it can't be combined with the
|
|
leave that alone (thus it can't be combined with the
|
|
above operation. */
|
|
above operation. */
|
|
- pretimeout_since_last_heartbeat = 0;
|
|
|
|
return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
|
|
return ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
|
|
}
|
|
}
|
|
|
|
|
|
@@ -927,6 +941,45 @@ static void ipmi_register_watchdog(int ipmi_intf)
|
|
printk(KERN_CRIT PFX "Unable to register misc device\n");
|
|
printk(KERN_CRIT PFX "Unable to register misc device\n");
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
|
|
+ if (nmi_handler_registered) {
|
|
|
|
+ int old_pretimeout = pretimeout;
|
|
|
|
+ int old_timeout = timeout;
|
|
|
|
+ int old_preop_val = preop_val;
|
|
|
|
+
|
|
|
|
+ /* Set the pretimeout to go off in a second and give
|
|
|
|
+ ourselves plenty of time to stop the timer. */
|
|
|
|
+ ipmi_watchdog_state = WDOG_TIMEOUT_RESET;
|
|
|
|
+ preop_val = WDOG_PREOP_NONE; /* Make sure nothing happens */
|
|
|
|
+ pretimeout = 99;
|
|
|
|
+ timeout = 100;
|
|
|
|
+
|
|
|
|
+ testing_nmi = 1;
|
|
|
|
+
|
|
|
|
+ rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
|
|
|
|
+ if (rv) {
|
|
|
|
+ printk(KERN_WARNING PFX "Error starting timer to"
|
|
|
|
+ " test NMI: 0x%x. The NMI pretimeout will"
|
|
|
|
+ " likely not work\n", rv);
|
|
|
|
+ rv = 0;
|
|
|
|
+ goto out_restore;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ msleep(1500);
|
|
|
|
+
|
|
|
|
+ if (testing_nmi != 2) {
|
|
|
|
+ printk(KERN_WARNING PFX "IPMI NMI didn't seem to"
|
|
|
|
+ " occur. The NMI pretimeout will"
|
|
|
|
+ " likely not work\n");
|
|
|
|
+ }
|
|
|
|
+ out_restore:
|
|
|
|
+ testing_nmi = 0;
|
|
|
|
+ preop_val = old_preop_val;
|
|
|
|
+ pretimeout = old_pretimeout;
|
|
|
|
+ timeout = old_timeout;
|
|
|
|
+ }
|
|
|
|
+#endif
|
|
|
|
+
|
|
out:
|
|
out:
|
|
if ((start_now) && (rv == 0)) {
|
|
if ((start_now) && (rv == 0)) {
|
|
/* Run from startup, so start the timer now. */
|
|
/* Run from startup, so start the timer now. */
|
|
@@ -934,6 +987,10 @@ static void ipmi_register_watchdog(int ipmi_intf)
|
|
ipmi_watchdog_state = action_val;
|
|
ipmi_watchdog_state = action_val;
|
|
ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
|
|
ipmi_set_timeout(IPMI_SET_TIMEOUT_FORCE_HB);
|
|
printk(KERN_INFO PFX "Starting now!\n");
|
|
printk(KERN_INFO PFX "Starting now!\n");
|
|
|
|
+ } else {
|
|
|
|
+ /* Stop the timer now. */
|
|
|
|
+ ipmi_watchdog_state = WDOG_TIMEOUT_NONE;
|
|
|
|
+ ipmi_set_timeout(IPMI_SET_TIMEOUT_NO_HB);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -968,17 +1025,41 @@ static void ipmi_unregister_watchdog(int ipmi_intf)
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
-#ifdef HAVE_NMI_HANDLER
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
static int
|
|
static int
|
|
-ipmi_nmi(void *dev_id, int cpu, int handled)
|
|
|
|
|
|
+ipmi_nmi(struct notifier_block *self, unsigned long val, void *data)
|
|
{
|
|
{
|
|
|
|
+ struct die_args *args = data;
|
|
|
|
+
|
|
|
|
+ if (val != DIE_NMI)
|
|
|
|
+ return NOTIFY_OK;
|
|
|
|
+
|
|
|
|
+ /* Hack, if it's a memory or I/O error, ignore it. */
|
|
|
|
+ if (args->err & 0xc0)
|
|
|
|
+ return NOTIFY_OK;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * If we get here, it's an NMI that's not a memory or I/O
|
|
|
|
+ * error. We can't truly tell if it's from IPMI or not
|
|
|
|
+ * without sending a message, and sending a message is almost
|
|
|
|
+ * impossible because of locking.
|
|
|
|
+ */
|
|
|
|
+
|
|
|
|
+ if (testing_nmi) {
|
|
|
|
+ testing_nmi = 2;
|
|
|
|
+ return NOTIFY_STOP;
|
|
|
|
+ }
|
|
|
|
+
|
|
/* If we are not expecting a timeout, ignore it. */
|
|
/* If we are not expecting a timeout, ignore it. */
|
|
if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
|
|
if (ipmi_watchdog_state == WDOG_TIMEOUT_NONE)
|
|
- return NOTIFY_DONE;
|
|
|
|
|
|
+ return NOTIFY_OK;
|
|
|
|
+
|
|
|
|
+ if (preaction_val != WDOG_PRETIMEOUT_NMI)
|
|
|
|
+ return NOTIFY_OK;
|
|
|
|
|
|
/* If no one else handled the NMI, we assume it was the IPMI
|
|
/* If no one else handled the NMI, we assume it was the IPMI
|
|
watchdog. */
|
|
watchdog. */
|
|
- if ((!handled) && (preop_val == WDOG_PREOP_PANIC)) {
|
|
|
|
|
|
+ if (preop_val == WDOG_PREOP_PANIC) {
|
|
/* On some machines, the heartbeat will give
|
|
/* On some machines, the heartbeat will give
|
|
an error and not work unless we re-enable
|
|
an error and not work unless we re-enable
|
|
the timer. So do so. */
|
|
the timer. So do so. */
|
|
@@ -987,18 +1068,12 @@ ipmi_nmi(void *dev_id, int cpu, int handled)
|
|
panic(PFX "pre-timeout");
|
|
panic(PFX "pre-timeout");
|
|
}
|
|
}
|
|
|
|
|
|
- return NOTIFY_DONE;
|
|
|
|
|
|
+ return NOTIFY_STOP;
|
|
}
|
|
}
|
|
|
|
|
|
-static struct nmi_handler ipmi_nmi_handler =
|
|
|
|
-{
|
|
|
|
- .link = LIST_HEAD_INIT(ipmi_nmi_handler.link),
|
|
|
|
- .dev_name = "ipmi_watchdog",
|
|
|
|
- .dev_id = NULL,
|
|
|
|
- .handler = ipmi_nmi,
|
|
|
|
- .priority = 0, /* Call us last. */
|
|
|
|
|
|
+static struct notifier_block ipmi_nmi_handler = {
|
|
|
|
+ .notifier_call = ipmi_nmi
|
|
};
|
|
};
|
|
-int nmi_handler_registered;
|
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
static int wdog_reboot_handler(struct notifier_block *this,
|
|
static int wdog_reboot_handler(struct notifier_block *this,
|
|
@@ -1115,7 +1190,7 @@ static int preaction_op(const char *inval, char *outval)
|
|
preaction_val = WDOG_PRETIMEOUT_NONE;
|
|
preaction_val = WDOG_PRETIMEOUT_NONE;
|
|
else if (strcmp(inval, "pre_smi") == 0)
|
|
else if (strcmp(inval, "pre_smi") == 0)
|
|
preaction_val = WDOG_PRETIMEOUT_SMI;
|
|
preaction_val = WDOG_PRETIMEOUT_SMI;
|
|
-#ifdef HAVE_NMI_HANDLER
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
else if (strcmp(inval, "pre_nmi") == 0)
|
|
else if (strcmp(inval, "pre_nmi") == 0)
|
|
preaction_val = WDOG_PRETIMEOUT_NMI;
|
|
preaction_val = WDOG_PRETIMEOUT_NMI;
|
|
#endif
|
|
#endif
|
|
@@ -1149,7 +1224,7 @@ static int preop_op(const char *inval, char *outval)
|
|
|
|
|
|
static void check_parms(void)
|
|
static void check_parms(void)
|
|
{
|
|
{
|
|
-#ifdef HAVE_NMI_HANDLER
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
int do_nmi = 0;
|
|
int do_nmi = 0;
|
|
int rv;
|
|
int rv;
|
|
|
|
|
|
@@ -1162,20 +1237,9 @@ static void check_parms(void)
|
|
preop_op("preop_none", NULL);
|
|
preop_op("preop_none", NULL);
|
|
do_nmi = 0;
|
|
do_nmi = 0;
|
|
}
|
|
}
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
|
|
- printk(KERN_WARNING PFX "nmi_watchdog is set to IO APIC"
|
|
|
|
- " mode (value is %d), that is incompatible"
|
|
|
|
- " with using NMI in the IPMI watchdog."
|
|
|
|
- " Disabling IPMI nmi pretimeout.\n",
|
|
|
|
- nmi_watchdog);
|
|
|
|
- preaction_val = WDOG_PRETIMEOUT_NONE;
|
|
|
|
- do_nmi = 0;
|
|
|
|
- }
|
|
|
|
-#endif
|
|
|
|
}
|
|
}
|
|
if (do_nmi && !nmi_handler_registered) {
|
|
if (do_nmi && !nmi_handler_registered) {
|
|
- rv = request_nmi(&ipmi_nmi_handler);
|
|
|
|
|
|
+ rv = register_die_notifier(&ipmi_nmi_handler);
|
|
if (rv) {
|
|
if (rv) {
|
|
printk(KERN_WARNING PFX
|
|
printk(KERN_WARNING PFX
|
|
"Can't register nmi handler\n");
|
|
"Can't register nmi handler\n");
|
|
@@ -1183,7 +1247,7 @@ static void check_parms(void)
|
|
} else
|
|
} else
|
|
nmi_handler_registered = 1;
|
|
nmi_handler_registered = 1;
|
|
} else if (!do_nmi && nmi_handler_registered) {
|
|
} else if (!do_nmi && nmi_handler_registered) {
|
|
- release_nmi(&ipmi_nmi_handler);
|
|
|
|
|
|
+ unregister_die_notifier(&ipmi_nmi_handler);
|
|
nmi_handler_registered = 0;
|
|
nmi_handler_registered = 0;
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
@@ -1219,9 +1283,9 @@ static int __init ipmi_wdog_init(void)
|
|
|
|
|
|
rv = ipmi_smi_watcher_register(&smi_watcher);
|
|
rv = ipmi_smi_watcher_register(&smi_watcher);
|
|
if (rv) {
|
|
if (rv) {
|
|
-#ifdef HAVE_NMI_HANDLER
|
|
|
|
- if (preaction_val == WDOG_PRETIMEOUT_NMI)
|
|
|
|
- release_nmi(&ipmi_nmi_handler);
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
|
|
+ if (nmi_handler_registered)
|
|
|
|
+ unregister_die_notifier(&ipmi_nmi_handler);
|
|
#endif
|
|
#endif
|
|
atomic_notifier_chain_unregister(&panic_notifier_list,
|
|
atomic_notifier_chain_unregister(&panic_notifier_list,
|
|
&wdog_panic_notifier);
|
|
&wdog_panic_notifier);
|
|
@@ -1240,9 +1304,9 @@ static void __exit ipmi_wdog_exit(void)
|
|
ipmi_smi_watcher_unregister(&smi_watcher);
|
|
ipmi_smi_watcher_unregister(&smi_watcher);
|
|
ipmi_unregister_watchdog(watchdog_ifnum);
|
|
ipmi_unregister_watchdog(watchdog_ifnum);
|
|
|
|
|
|
-#ifdef HAVE_NMI_HANDLER
|
|
|
|
|
|
+#ifdef HAVE_DIE_NMI
|
|
if (nmi_handler_registered)
|
|
if (nmi_handler_registered)
|
|
- release_nmi(&ipmi_nmi_handler);
|
|
|
|
|
|
+ unregister_die_notifier(&ipmi_nmi_handler);
|
|
#endif
|
|
#endif
|
|
|
|
|
|
atomic_notifier_chain_unregister(&panic_notifier_list,
|
|
atomic_notifier_chain_unregister(&panic_notifier_list,
|