|
@@ -16,37 +16,15 @@
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
*/
|
|
|
|
|
|
-/* Change Activity:
|
|
|
- * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
|
|
|
- * End Change Activity
|
|
|
- */
|
|
|
-
|
|
|
-#include <linux/errno.h>
|
|
|
-#include <linux/threads.h>
|
|
|
-#include <linux/kernel_stat.h>
|
|
|
-#include <linux/signal.h>
|
|
|
#include <linux/sched.h>
|
|
|
-#include <linux/ioport.h>
|
|
|
#include <linux/interrupt.h>
|
|
|
-#include <linux/timex.h>
|
|
|
-#include <linux/init.h>
|
|
|
-#include <linux/delay.h>
|
|
|
#include <linux/irq.h>
|
|
|
-#include <linux/random.h>
|
|
|
-#include <linux/sysrq.h>
|
|
|
-#include <linux/bitops.h>
|
|
|
-
|
|
|
-#include <asm/uaccess.h>
|
|
|
-#include <asm/system.h>
|
|
|
-#include <asm/io.h>
|
|
|
-#include <asm/pgtable.h>
|
|
|
-#include <asm/irq.h>
|
|
|
-#include <asm/cache.h>
|
|
|
-#include <asm/prom.h>
|
|
|
-#include <asm/ptrace.h>
|
|
|
+#include <linux/of.h>
|
|
|
+#include <linux/fs.h>
|
|
|
+#include <linux/reboot.h>
|
|
|
+
|
|
|
#include <asm/machdep.h>
|
|
|
#include <asm/rtas.h>
|
|
|
-#include <asm/udbg.h>
|
|
|
#include <asm/firmware.h>
|
|
|
|
|
|
#include "pseries.h"
|
|
@@ -57,7 +35,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
|
|
|
static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
|
|
|
static DEFINE_PER_CPU(__u64, mce_data_buf);
|
|
|
|
|
|
-static int ras_get_sensor_state_token;
|
|
|
static int ras_check_exception_token;
|
|
|
|
|
|
#define EPOW_SENSOR_TOKEN 9
|
|
@@ -75,7 +52,6 @@ static int __init init_ras_IRQ(void)
|
|
|
{
|
|
|
struct device_node *np;
|
|
|
|
|
|
- ras_get_sensor_state_token = rtas_token("get-sensor-state");
|
|
|
ras_check_exception_token = rtas_token("check-exception");
|
|
|
|
|
|
/* Internal Errors */
|
|
@@ -95,26 +71,126 @@ static int __init init_ras_IRQ(void)
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
|
-__initcall(init_ras_IRQ);
|
|
|
+subsys_initcall(init_ras_IRQ);
|
|
|
|
|
|
-/*
|
|
|
- * Handle power subsystem events (EPOW).
|
|
|
- *
|
|
|
- * Presently we just log the event has occurred. This should be fixed
|
|
|
- * to examine the type of power failure and take appropriate action where
|
|
|
- * the time horizon permits something useful to be done.
|
|
|
- */
|
|
|
+#define EPOW_SHUTDOWN_NORMAL 1
|
|
|
+#define EPOW_SHUTDOWN_ON_UPS 2
|
|
|
+#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3
|
|
|
+#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4
|
|
|
+
|
|
|
+static void handle_system_shutdown(char event_modifier)
|
|
|
+{
|
|
|
+ switch (event_modifier) {
|
|
|
+ case EPOW_SHUTDOWN_NORMAL:
|
|
|
+ pr_emerg("Firmware initiated power off");
|
|
|
+ orderly_poweroff(1);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_SHUTDOWN_ON_UPS:
|
|
|
+ pr_emerg("Loss of power reported by firmware, system is "
|
|
|
+ "running on UPS/battery");
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
|
|
|
+ pr_emerg("Loss of system critical functions reported by "
|
|
|
+ "firmware");
|
|
|
+ pr_emerg("Check RTAS error log for details");
|
|
|
+ orderly_poweroff(1);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
|
|
|
+ pr_emerg("Ambient temperature too high reported by firmware");
|
|
|
+ pr_emerg("Check RTAS error log for details");
|
|
|
+ orderly_poweroff(1);
|
|
|
+ break;
|
|
|
+
|
|
|
+ default:
|
|
|
+ pr_err("Unknown power/cooling shutdown event (modifier %d)",
|
|
|
+ event_modifier);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+struct epow_errorlog {
|
|
|
+ unsigned char sensor_value;
|
|
|
+ unsigned char event_modifier;
|
|
|
+ unsigned char extended_modifier;
|
|
|
+ unsigned char reserved;
|
|
|
+ unsigned char platform_reason;
|
|
|
+};
|
|
|
+
|
|
|
+#define EPOW_RESET 0
|
|
|
+#define EPOW_WARN_COOLING 1
|
|
|
+#define EPOW_WARN_POWER 2
|
|
|
+#define EPOW_SYSTEM_SHUTDOWN 3
|
|
|
+#define EPOW_SYSTEM_HALT 4
|
|
|
+#define EPOW_MAIN_ENCLOSURE 5
|
|
|
+#define EPOW_POWER_OFF 7
|
|
|
+
|
|
|
+void rtas_parse_epow_errlog(struct rtas_error_log *log)
|
|
|
+{
|
|
|
+ struct pseries_errorlog *pseries_log;
|
|
|
+ struct epow_errorlog *epow_log;
|
|
|
+ char action_code;
|
|
|
+ char modifier;
|
|
|
+
|
|
|
+ pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
|
|
|
+ if (pseries_log == NULL)
|
|
|
+ return;
|
|
|
+
|
|
|
+ epow_log = (struct epow_errorlog *)pseries_log->data;
|
|
|
+ action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */
|
|
|
+ modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */
|
|
|
+
|
|
|
+ switch (action_code) {
|
|
|
+ case EPOW_RESET:
|
|
|
+ pr_err("Non critical power or cooling issue cleared");
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_WARN_COOLING:
|
|
|
+ pr_err("Non critical cooling issue reported by firmware");
|
|
|
+ pr_err("Check RTAS error log for details");
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_WARN_POWER:
|
|
|
+ pr_err("Non critical power issue reported by firmware");
|
|
|
+ pr_err("Check RTAS error log for details");
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_SYSTEM_SHUTDOWN:
|
|
|
+ handle_system_shutdown(epow_log->event_modifier);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_SYSTEM_HALT:
|
|
|
+ pr_emerg("Firmware initiated power off");
|
|
|
+ orderly_poweroff(1);
|
|
|
+ break;
|
|
|
+
|
|
|
+ case EPOW_MAIN_ENCLOSURE:
|
|
|
+ case EPOW_POWER_OFF:
|
|
|
+ pr_emerg("Critical power/cooling issue reported by firmware");
|
|
|
+ pr_emerg("Check RTAS error log for details");
|
|
|
+ pr_emerg("Immediate power off");
|
|
|
+ emergency_sync();
|
|
|
+ kernel_power_off();
|
|
|
+ break;
|
|
|
+
|
|
|
+ default:
|
|
|
+ pr_err("Unknown power/cooling event (action code %d)",
|
|
|
+ action_code);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/* Handle environmental and power warning (EPOW) interrupts. */
|
|
|
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
|
|
|
{
|
|
|
- int status = 0xdeadbeef;
|
|
|
- int state = 0;
|
|
|
+ int status;
|
|
|
+ int state;
|
|
|
int critical;
|
|
|
|
|
|
- status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
|
|
|
- EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
|
|
|
+ status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
|
|
|
|
|
|
if (state > 3)
|
|
|
- critical = 1; /* Time Critical */
|
|
|
+ critical = 1; /* Time Critical */
|
|
|
else
|
|
|
critical = 0;
|
|
|
|
|
@@ -123,18 +199,14 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
|
|
|
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
|
|
|
RTAS_VECTOR_EXTERNAL_INTERRUPT,
|
|
|
virq_to_hw(irq),
|
|
|
- RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
|
|
|
+ RTAS_EPOW_WARNING,
|
|
|
critical, __pa(&ras_log_buf),
|
|
|
rtas_get_error_log_max());
|
|
|
|
|
|
- udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
|
|
|
- *((unsigned long *)&ras_log_buf), status, state);
|
|
|
- printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
|
|
|
- *((unsigned long *)&ras_log_buf), status, state);
|
|
|
-
|
|
|
- /* format and print the extended information */
|
|
|
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
|
|
|
|
|
|
+ rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
|
|
|
+
|
|
|
spin_unlock(&ras_log_buf_lock);
|
|
|
return IRQ_HANDLED;
|
|
|
}
|
|
@@ -150,7 +222,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
|
|
|
static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
|
|
|
{
|
|
|
struct rtas_error_log *rtas_elog;
|
|
|
- int status = 0xdeadbeef;
|
|
|
+ int status;
|
|
|
int fatal;
|
|
|
|
|
|
spin_lock(&ras_log_buf_lock);
|
|
@@ -158,7 +230,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
|
|
|
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
|
|
|
RTAS_VECTOR_EXTERNAL_INTERRUPT,
|
|
|
virq_to_hw(irq),
|
|
|
- RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
|
|
|
+ RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
|
|
|
__pa(&ras_log_buf),
|
|
|
rtas_get_error_log_max());
|
|
|
|
|
@@ -173,24 +245,13 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
|
|
|
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
|
|
|
|
|
|
if (fatal) {
|
|
|
- udbg_printf("Fatal HW Error <0x%lx 0x%x>\n",
|
|
|
- *((unsigned long *)&ras_log_buf), status);
|
|
|
- printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
|
|
|
- *((unsigned long *)&ras_log_buf), status);
|
|
|
-
|
|
|
-#ifndef DEBUG_RTAS_POWER_OFF
|
|
|
- /* Don't actually power off when debugging so we can test
|
|
|
- * without actually failing while injecting errors.
|
|
|
- * Error data will not be logged to syslog.
|
|
|
- */
|
|
|
- ppc_md.power_off();
|
|
|
-#endif
|
|
|
+ pr_emerg("Fatal hardware error reported by firmware");
|
|
|
+ pr_emerg("Check RTAS error log for details");
|
|
|
+ pr_emerg("Immediate power off");
|
|
|
+ emergency_sync();
|
|
|
+ kernel_power_off();
|
|
|
} else {
|
|
|
- udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n",
|
|
|
- *((unsigned long *)&ras_log_buf), status);
|
|
|
- printk(KERN_WARNING
|
|
|
- "Warning: Recoverable hardware error <0x%lx 0x%x>\n",
|
|
|
- *((unsigned long *)&ras_log_buf), status);
|
|
|
+ pr_err("Recoverable hardware error reported by firmware");
|
|
|
}
|
|
|
|
|
|
spin_unlock(&ras_log_buf_lock);
|