|
@@ -1154,7 +1154,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
|
|
|
/*
|
|
|
* event overflow
|
|
|
*/
|
|
|
- handled = 1;
|
|
|
+ handled++;
|
|
|
data.period = event->hw.last_period;
|
|
|
|
|
|
if (!x86_perf_event_set_period(event))
|
|
@@ -1200,12 +1200,20 @@ void perf_events_lapic_init(void)
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
|
}
|
|
|
|
|
|
+struct pmu_nmi_state {
|
|
|
+ unsigned int marked;
|
|
|
+ int handled;
|
|
|
+};
|
|
|
+
|
|
|
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
|
|
|
+
|
|
|
static int __kprobes
|
|
|
perf_event_nmi_handler(struct notifier_block *self,
|
|
|
unsigned long cmd, void *__args)
|
|
|
{
|
|
|
struct die_args *args = __args;
|
|
|
- struct pt_regs *regs;
|
|
|
+ unsigned int this_nmi;
|
|
|
+ int handled;
|
|
|
|
|
|
if (!atomic_read(&active_events))
|
|
|
return NOTIFY_DONE;
|
|
@@ -1214,22 +1222,47 @@ perf_event_nmi_handler(struct notifier_block *self,
|
|
|
case DIE_NMI:
|
|
|
case DIE_NMI_IPI:
|
|
|
break;
|
|
|
-
|
|
|
+ case DIE_NMIUNKNOWN:
|
|
|
+ this_nmi = percpu_read(irq_stat.__nmi_count);
|
|
|
+ if (this_nmi != __get_cpu_var(pmu_nmi).marked)
|
|
|
+ /* let the kernel handle the unknown nmi */
|
|
|
+ return NOTIFY_DONE;
|
|
|
+ /*
|
|
|
+ * This one is a PMU back-to-back nmi. Two events
|
|
|
+ * trigger 'simultaneously' raising two back-to-back
|
|
|
+ * NMIs. If the first NMI handles both, the latter
|
|
|
+ * will be empty and daze the CPU. So, we drop it to
|
|
|
+ * avoid false-positive 'unknown nmi' messages.
|
|
|
+ */
|
|
|
+ return NOTIFY_STOP;
|
|
|
default:
|
|
|
return NOTIFY_DONE;
|
|
|
}
|
|
|
|
|
|
- regs = args->regs;
|
|
|
-
|
|
|
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
|
|
- /*
|
|
|
- * Can't rely on the handled return value to say it was our NMI, two
|
|
|
- * events could trigger 'simultaneously' raising two back-to-back NMIs.
|
|
|
- *
|
|
|
- * If the first NMI handles both, the latter will be empty and daze
|
|
|
- * the CPU.
|
|
|
- */
|
|
|
- x86_pmu.handle_irq(regs);
|
|
|
+
|
|
|
+ handled = x86_pmu.handle_irq(args->regs);
|
|
|
+ if (!handled)
|
|
|
+ return NOTIFY_DONE;
|
|
|
+
|
|
|
+ this_nmi = percpu_read(irq_stat.__nmi_count);
|
|
|
+ if ((handled > 1) ||
|
|
|
+ /* the next nmi could be a back-to-back nmi */
|
|
|
+ ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
|
|
|
+ (__get_cpu_var(pmu_nmi).handled > 1))) {
|
|
|
+ /*
|
|
|
+ * We could have two subsequent back-to-back nmis: The
|
|
|
+ * first handles more than one counter, the 2nd
|
|
|
+ * handles only one counter and the 3rd handles no
|
|
|
+ * counter.
|
|
|
+ *
|
|
|
+ * This is the 2nd nmi because the previous was
|
|
|
+ * handling more than one counter. We will mark the
|
|
|
+ * next (3rd) and then drop it if unhandled.
|
|
|
+ */
|
|
|
+ __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
|
|
|
+ __get_cpu_var(pmu_nmi).handled = handled;
|
|
|
+ }
|
|
|
|
|
|
return NOTIFY_STOP;
|
|
|
}
|