|
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p)
|
|
EXPORT_SYMBOL_GPL(hwpoison_filter);
|
|
EXPORT_SYMBOL_GPL(hwpoison_filter);
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Send all the processes who have the page mapped an ``action optional''
|
|
|
|
- * signal.
|
|
|
|
|
|
+ * Send all the processes who have the page mapped a signal.
|
|
|
|
+ * ``action optional'' if they are not immediately affected by the error
|
|
|
|
+ * ``action required'' if error happened in current execution context
|
|
*/
|
|
*/
|
|
-static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
|
|
|
|
- unsigned long pfn, struct page *page)
|
|
|
|
|
|
+static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
|
|
|
|
+ unsigned long pfn, struct page *page, int flags)
|
|
{
|
|
{
|
|
struct siginfo si;
|
|
struct siginfo si;
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
printk(KERN_ERR
|
|
printk(KERN_ERR
|
|
- "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
|
|
|
|
|
|
+ "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
|
|
pfn, t->comm, t->pid);
|
|
pfn, t->comm, t->pid);
|
|
si.si_signo = SIGBUS;
|
|
si.si_signo = SIGBUS;
|
|
si.si_errno = 0;
|
|
si.si_errno = 0;
|
|
- si.si_code = BUS_MCEERR_AO;
|
|
|
|
si.si_addr = (void *)addr;
|
|
si.si_addr = (void *)addr;
|
|
#ifdef __ARCH_SI_TRAPNO
|
|
#ifdef __ARCH_SI_TRAPNO
|
|
si.si_trapno = trapno;
|
|
si.si_trapno = trapno;
|
|
#endif
|
|
#endif
|
|
si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
|
|
si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
|
|
- /*
|
|
|
|
- * Don't use force here, it's convenient if the signal
|
|
|
|
- * can be temporarily blocked.
|
|
|
|
- * This could cause a loop when the user sets SIGBUS
|
|
|
|
- * to SIG_IGN, but hopefully no one will do that?
|
|
|
|
- */
|
|
|
|
- ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
|
|
|
|
|
|
+
|
|
|
|
+ if ((flags & MF_ACTION_REQUIRED) && t == current) {
|
|
|
|
+ si.si_code = BUS_MCEERR_AR;
|
|
|
|
+ ret = force_sig_info(SIGBUS, &si, t);
|
|
|
|
+ } else {
|
|
|
|
+ /*
|
|
|
|
+ * Don't use force here, it's convenient if the signal
|
|
|
|
+ * can be temporarily blocked.
|
|
|
|
+ * This could cause a loop when the user sets SIGBUS
|
|
|
|
+ * to SIG_IGN, but hopefully no one will do that?
|
|
|
|
+ */
|
|
|
|
+ si.si_code = BUS_MCEERR_AO;
|
|
|
|
+ ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
|
|
|
|
+ }
|
|
if (ret < 0)
|
|
if (ret < 0)
|
|
printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
|
|
printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
|
|
t->comm, t->pid, ret);
|
|
t->comm, t->pid, ret);
|
|
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
|
|
* Also when FAIL is set do a force kill because something went
|
|
* Also when FAIL is set do a force kill because something went
|
|
* wrong earlier.
|
|
* wrong earlier.
|
|
*/
|
|
*/
|
|
-static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
|
|
|
|
- int fail, struct page *page, unsigned long pfn)
|
|
|
|
|
|
+static void kill_procs(struct list_head *to_kill, int doit, int trapno,
|
|
|
|
+ int fail, struct page *page, unsigned long pfn,
|
|
|
|
+ int flags)
|
|
{
|
|
{
|
|
struct to_kill *tk, *next;
|
|
struct to_kill *tk, *next;
|
|
|
|
|
|
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
|
|
* check for that, but we need to tell the
|
|
* check for that, but we need to tell the
|
|
* process anyways.
|
|
* process anyways.
|
|
*/
|
|
*/
|
|
- else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
|
|
|
|
- pfn, page) < 0)
|
|
|
|
|
|
+ else if (kill_proc(tk->tsk, tk->addr, trapno,
|
|
|
|
+ pfn, page, flags) < 0)
|
|
printk(KERN_ERR
|
|
printk(KERN_ERR
|
|
"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
|
|
"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
|
|
pfn, tk->tsk->comm, tk->tsk->pid);
|
|
pfn, tk->tsk->comm, tk->tsk->pid);
|
|
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p,
|
|
* the pages and send SIGBUS to the processes if the data was dirty.
|
|
* the pages and send SIGBUS to the processes if the data was dirty.
|
|
*/
|
|
*/
|
|
static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
|
|
static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
|
|
- int trapno)
|
|
|
|
|
|
+ int trapno, int flags)
|
|
{
|
|
{
|
|
enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
|
|
enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
|
|
struct address_space *mapping;
|
|
struct address_space *mapping;
|
|
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
|
|
* use a more force-full uncatchable kill to prevent
|
|
* use a more force-full uncatchable kill to prevent
|
|
* any accesses to the poisoned memory.
|
|
* any accesses to the poisoned memory.
|
|
*/
|
|
*/
|
|
- kill_procs_ao(&tokill, !!PageDirty(ppage), trapno,
|
|
|
|
- ret != SWAP_SUCCESS, p, pfn);
|
|
|
|
|
|
+ kill_procs(&tokill, !!PageDirty(ppage), trapno,
|
|
|
|
+ ret != SWAP_SUCCESS, p, pfn, flags);
|
|
|
|
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
@@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage)
|
|
ClearPageHWPoison(hpage + i);
|
|
ClearPageHWPoison(hpage + i);
|
|
}
|
|
}
|
|
|
|
|
|
-int __memory_failure(unsigned long pfn, int trapno, int flags)
|
|
|
|
|
|
+/**
|
|
|
|
+ * memory_failure - Handle memory failure of a page.
|
|
|
|
+ * @pfn: Page Number of the corrupted page
|
|
|
|
+ * @trapno: Trap number reported in the signal to user space.
|
|
|
|
+ * @flags: fine tune action taken
|
|
|
|
+ *
|
|
|
|
+ * This function is called by the low level machine check code
|
|
|
|
+ * of an architecture when it detects hardware memory corruption
|
|
|
|
+ * of a page. It tries its best to recover, which includes
|
|
|
|
+ * dropping pages, killing processes etc.
|
|
|
|
+ *
|
|
|
|
+ * The function is primarily of use for corruptions that
|
|
|
|
+ * happen outside the current execution context (e.g. when
|
|
|
|
+ * detected by a background scrubber)
|
|
|
|
+ *
|
|
|
|
+ * Must run in process context (e.g. a work queue) with interrupts
|
|
|
|
+ * enabled and no spinlocks hold.
|
|
|
|
+ */
|
|
|
|
+int memory_failure(unsigned long pfn, int trapno, int flags)
|
|
{
|
|
{
|
|
struct page_state *ps;
|
|
struct page_state *ps;
|
|
struct page *p;
|
|
struct page *p;
|
|
@@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
|
|
* Now take care of user space mappings.
|
|
* Now take care of user space mappings.
|
|
* Abort on fail: __delete_from_page_cache() assumes unmapped page.
|
|
* Abort on fail: __delete_from_page_cache() assumes unmapped page.
|
|
*/
|
|
*/
|
|
- if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
|
|
|
|
|
|
+ if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
|
|
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
|
|
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
|
|
res = -EBUSY;
|
|
res = -EBUSY;
|
|
goto out;
|
|
goto out;
|
|
@@ -1156,29 +1182,7 @@ out:
|
|
unlock_page(hpage);
|
|
unlock_page(hpage);
|
|
return res;
|
|
return res;
|
|
}
|
|
}
|
|
-EXPORT_SYMBOL_GPL(__memory_failure);
|
|
|
|
-
|
|
|
|
-/**
|
|
|
|
- * memory_failure - Handle memory failure of a page.
|
|
|
|
- * @pfn: Page Number of the corrupted page
|
|
|
|
- * @trapno: Trap number reported in the signal to user space.
|
|
|
|
- *
|
|
|
|
- * This function is called by the low level machine check code
|
|
|
|
- * of an architecture when it detects hardware memory corruption
|
|
|
|
- * of a page. It tries its best to recover, which includes
|
|
|
|
- * dropping pages, killing processes etc.
|
|
|
|
- *
|
|
|
|
- * The function is primarily of use for corruptions that
|
|
|
|
- * happen outside the current execution context (e.g. when
|
|
|
|
- * detected by a background scrubber)
|
|
|
|
- *
|
|
|
|
- * Must run in process context (e.g. a work queue) with interrupts
|
|
|
|
- * enabled and no spinlocks hold.
|
|
|
|
- */
|
|
|
|
-void memory_failure(unsigned long pfn, int trapno)
|
|
|
|
-{
|
|
|
|
- __memory_failure(pfn, trapno, 0);
|
|
|
|
-}
|
|
|
|
|
|
+EXPORT_SYMBOL_GPL(memory_failure);
|
|
|
|
|
|
#define MEMORY_FAILURE_FIFO_ORDER 4
|
|
#define MEMORY_FAILURE_FIFO_ORDER 4
|
|
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
|
|
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
|
|
@@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work)
|
|
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
|
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
|
|
if (!gotten)
|
|
if (!gotten)
|
|
break;
|
|
break;
|
|
- __memory_failure(entry.pfn, entry.trapno, entry.flags);
|
|
|
|
|
|
+ memory_failure(entry.pfn, entry.trapno, entry.flags);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|