|
@@ -13,6 +13,49 @@ enum {
|
|
|
LBR_FORMAT_EIP_FLAGS = 0x03,
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * Intel LBR_SELECT bits
|
|
|
+ * Intel Vol3a, April 2011, Section 16.7 Table 16-10
|
|
|
+ *
|
|
|
+ * Hardware branch filter (not available on all CPUs)
|
|
|
+ */
|
|
|
+#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
|
|
|
+#define LBR_USER_BIT 1 /* do not capture at ring > 0 */
|
|
|
+#define LBR_JCC_BIT 2 /* do not capture conditional branches */
|
|
|
+#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
|
|
|
+#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
|
|
|
+#define LBR_RETURN_BIT 5 /* do not capture near returns */
|
|
|
+#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
|
|
|
+#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
|
|
|
+#define LBR_FAR_BIT 8 /* do not capture far branches */
|
|
|
+
|
|
|
+#define LBR_KERNEL (1 << LBR_KERNEL_BIT)
|
|
|
+#define LBR_USER (1 << LBR_USER_BIT)
|
|
|
+#define LBR_JCC (1 << LBR_JCC_BIT)
|
|
|
+#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
|
|
|
+#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
|
|
|
+#define LBR_RETURN (1 << LBR_RETURN_BIT)
|
|
|
+#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
|
|
|
+#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
|
|
|
+#define LBR_FAR (1 << LBR_FAR_BIT)
|
|
|
+
|
|
|
+#define LBR_PLM (LBR_KERNEL | LBR_USER)
|
|
|
+
|
|
|
+#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
|
|
|
+#define LBR_NOT_SUPP -1 /* LBR filter not supported */
|
|
|
+#define LBR_IGN 0 /* ignored */
|
|
|
+
|
|
|
+#define LBR_ANY \
|
|
|
+ (LBR_JCC |\
|
|
|
+ LBR_REL_CALL |\
|
|
|
+ LBR_IND_CALL |\
|
|
|
+ LBR_RETURN |\
|
|
|
+ LBR_REL_JMP |\
|
|
|
+ LBR_IND_JMP |\
|
|
|
+ LBR_FAR)
|
|
|
+
|
|
|
+#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
|
|
+
|
|
|
/*
|
|
|
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
|
|
|
* otherwise it becomes near impossible to get a reliable stack.
|
|
@@ -151,8 +194,6 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
|
|
|
cpuc->lbr_stack.nr = i;
|
|
|
}
|
|
|
|
|
|
-#define LBR_FROM_FLAG_MISPRED (1ULL << 63)
|
|
|
-
|
|
|
/*
|
|
|
* Due to lack of segmentation in Linux the effective address (offset)
|
|
|
* is the same as the linear address, allowing us to merge the LIP and EIP
|
|
@@ -200,26 +241,84 @@ void intel_pmu_lbr_read(void)
|
|
|
intel_pmu_lbr_read_64(cpuc);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Map interface branch filters onto LBR filters
|
|
|
+ */
|
|
|
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
|
|
|
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
|
|
|
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
|
|
|
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
|
|
|
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
|
|
|
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
|
|
|
+ | LBR_IND_JMP | LBR_FAR,
|
|
|
+ /*
|
|
|
+ * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
|
|
|
+ */
|
|
|
+ [PERF_SAMPLE_BRANCH_ANY_CALL] =
|
|
|
+ LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
|
|
|
+ /*
|
|
|
+ * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
|
|
|
+ */
|
|
|
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
|
|
|
+};
|
|
|
+
|
|
|
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
|
|
|
+ [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
|
|
|
+ [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
|
|
|
+ [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
|
|
|
+ [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
|
|
|
+ [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
|
|
|
+ [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
|
|
|
+ | LBR_FAR,
|
|
|
+ [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
|
|
|
+};
|
|
|
+
|
|
|
+/* core */
|
|
|
void intel_pmu_lbr_init_core(void)
|
|
|
{
|
|
|
x86_pmu.lbr_nr = 4;
|
|
|
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
|
|
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
|
|
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
|
|
+
|
|
|
+ pr_cont("4-deep LBR, ");
|
|
|
}
|
|
|
|
|
|
+/* nehalem/westmere */
|
|
|
void intel_pmu_lbr_init_nhm(void)
|
|
|
{
|
|
|
x86_pmu.lbr_nr = 16;
|
|
|
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
|
|
x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
|
|
x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
|
|
+
|
|
|
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
|
|
+ x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
|
|
|
+
|
|
|
+ pr_cont("16-deep LBR, ");
|
|
|
}
|
|
|
|
|
|
+/* sandy bridge */
|
|
|
+void intel_pmu_lbr_init_snb(void)
|
|
|
+{
|
|
|
+ x86_pmu.lbr_nr = 16;
|
|
|
+ x86_pmu.lbr_tos = MSR_LBR_TOS;
|
|
|
+ x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
|
|
|
+ x86_pmu.lbr_to = MSR_LBR_NHM_TO;
|
|
|
+
|
|
|
+ x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
|
|
|
+ x86_pmu.lbr_sel_map = snb_lbr_sel_map;
|
|
|
+
|
|
|
+ pr_cont("16-deep LBR, ");
|
|
|
+}
|
|
|
+
|
|
|
+/* atom */
|
|
|
void intel_pmu_lbr_init_atom(void)
|
|
|
{
|
|
|
x86_pmu.lbr_nr = 8;
|
|
|
x86_pmu.lbr_tos = MSR_LBR_TOS;
|
|
|
x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
|
|
|
x86_pmu.lbr_to = MSR_LBR_CORE_TO;
|
|
|
+
|
|
|
+ pr_cont("8-deep LBR, ");
|
|
|
}
|