浏览代码

softlockup: allow panic on lockup

allow users to configure the softlockup detector to generate a panic
instead of a warning message.

high-availability systems might opt for this strict method (combined
with panic_timeout= boot option/sysctl), instead of generating
softlockup warnings ad infinitum.

also, automated tests work better if the system reboots reliably (into
a safe kernel) in case of a lockup.

The full spectrum of configurability is supported: boot option, sysctl
option and Kconfig option.

it's default-disabled.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Ingo Molnar 17 年之前
父节点
当前提交
9c44bc03ff
共有 5 个文件被更改,包括 62 次插入2 次删除
  1. 3 0
      Documentation/kernel-parameters.txt
  2. 2 1
      include/linux/sched.h
  3. 21 0
      kernel/softlockup.c
  4. 11 0
      kernel/sysctl.c
  5. 25 1
      lib/Kconfig.debug

+ 3 - 0
Documentation/kernel-parameters.txt

@@ -1971,6 +1971,9 @@ and is between 256 and 4096 characters. It is defined in the file
 
 
 	snd-ymfpci=	[HW,ALSA]
 	snd-ymfpci=	[HW,ALSA]
 
 
+	softlockup_panic=
+			[KNL] Should the soft-lockup detector generate panics.
+
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/sonypi.txt
 			See Documentation/sonypi.txt
 
 

+ 2 - 1
include/linux/sched.h

@@ -294,7 +294,8 @@ extern void softlockup_tick(void);
 extern void spawn_softlockup_task(void);
 extern void spawn_softlockup_task(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern void touch_all_softlockup_watchdogs(void);
-extern unsigned long  softlockup_thresh;
+extern unsigned int  softlockup_panic;
+extern unsigned long softlockup_thresh;
 extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
 extern unsigned long sysctl_hung_task_warnings;

+ 21 - 0
kernel/softlockup.c

@@ -27,6 +27,21 @@ static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
 static int __read_mostly did_panic;
 static int __read_mostly did_panic;
 unsigned long __read_mostly softlockup_thresh = 60;
 unsigned long __read_mostly softlockup_thresh = 60;
 
 
+/*
+ * Should we panic (and reboot, if panic_timeout= is set) when a
+ * soft-lockup occurs:
+ */
+unsigned int __read_mostly softlockup_panic =
+				CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
+
+static int __init softlockup_panic_setup(char *str)
+{
+	softlockup_panic = simple_strtoul(str, NULL, 0);
+
+	return 1;
+}
+__setup("softlockup_panic=", softlockup_panic_setup);
+
 static int
 static int
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
 softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
 {
 {
@@ -120,6 +135,9 @@ void softlockup_tick(void)
 	else
 	else
 		dump_stack();
 		dump_stack();
 	spin_unlock(&print_lock);
 	spin_unlock(&print_lock);
+
+	if (softlockup_panic)
+		panic("softlockup: hung tasks");
 }
 }
 
 
 /*
 /*
@@ -172,6 +190,9 @@ static void check_hung_task(struct task_struct *t, unsigned long now)
 
 
 	t->last_switch_timestamp = now;
 	t->last_switch_timestamp = now;
 	touch_nmi_watchdog();
 	touch_nmi_watchdog();
+
+	if (softlockup_panic)
+		panic("softlockup: blocked tasks");
 }
 }
 
 
 /*
 /*

+ 11 - 0
kernel/sysctl.c

@@ -727,6 +727,17 @@ static struct ctl_table kern_table[] = {
 	},
 	},
 #endif
 #endif
 #ifdef CONFIG_DETECT_SOFTLOCKUP
 #ifdef CONFIG_DETECT_SOFTLOCKUP
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "softlockup_panic",
+		.data		= &softlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_doulongvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "softlockup_thresh",
 		.procname	= "softlockup_thresh",

+ 25 - 1
lib/Kconfig.debug

@@ -147,7 +147,7 @@ config DETECT_SOFTLOCKUP
 	help
 	help
 	  Say Y here to enable the kernel to detect "soft lockups",
 	  Say Y here to enable the kernel to detect "soft lockups",
 	  which are bugs that cause the kernel to loop in kernel
 	  which are bugs that cause the kernel to loop in kernel
-	  mode for more than 10 seconds, without giving other tasks a
+	  mode for more than 60 seconds, without giving other tasks a
 	  chance to run.
 	  chance to run.
 
 
 	  When a soft-lockup is detected, the kernel will print the
 	  When a soft-lockup is detected, the kernel will print the
@@ -159,6 +159,30 @@ config DETECT_SOFTLOCKUP
 	   can be detected via the NMI-watchdog, on platforms that
 	   can be detected via the NMI-watchdog, on platforms that
 	   support it.)
 	   support it.)
 
 
+config BOOTPARAM_SOFTLOCKUP_PANIC
+	bool "Panic (Reboot) On Soft Lockups"
+	depends on DETECT_SOFTLOCKUP
+	help
+	  Say Y here to enable the kernel to panic on "soft lockups",
+	  which are bugs that cause the kernel to loop in kernel
+	  mode for more than 60 seconds, without giving other tasks a
+	  chance to run.
+
+	  The panic can be used in combination with panic_timeout,
+	  to cause the system to reboot automatically after a
+	  lockup has been detected. This feature is useful for
+	  high-availability systems that have uptime guarantees and
+	  where a lockup must be resolved ASAP.
+
+	  Say N if unsure.
+
+config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+	int
+	depends on DETECT_SOFTLOCKUP
+	range 0 1
+	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+
 config SCHED_DEBUG
 config SCHED_DEBUG
 	bool "Collect scheduler debugging info"
 	bool "Collect scheduler debugging info"
 	depends on DEBUG_KERNEL && PROC_FS
 	depends on DEBUG_KERNEL && PROC_FS