|
@@ -69,6 +69,44 @@ static const u64 pebs_data_source[] = {
|
|
|
OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
|
|
|
};
|
|
|
|
|
|
+static u64 precise_store_data(u64 status)
|
|
|
+{
|
|
|
+ union intel_x86_pebs_dse dse;
|
|
|
+ u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
|
|
|
+
|
|
|
+ dse.val = status;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * bit 4: TLB access
|
|
|
+ * 1 = stored missed 2nd level TLB
|
|
|
+ *
|
|
|
+ * so it either hit the walker or the OS
|
|
|
+ * otherwise hit 2nd level TLB
|
|
|
+ */
|
|
|
+ if (dse.st_stlb_miss)
|
|
|
+ val |= P(TLB, MISS);
|
|
|
+ else
|
|
|
+ val |= P(TLB, HIT);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * bit 0: hit L1 data cache
|
|
|
+ * if not set, then all we know is that
|
|
|
+ * it missed L1D
|
|
|
+ */
|
|
|
+ if (dse.st_l1d_hit)
|
|
|
+ val |= P(LVL, HIT);
|
|
|
+ else
|
|
|
+ val |= P(LVL, MISS);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * bit 5: Locked prefix
|
|
|
+ */
|
|
|
+ if (dse.st_locked)
|
|
|
+ val |= P(LOCK, LOCKED);
|
|
|
+
|
|
|
+ return val;
|
|
|
+}
|
|
|
+
|
|
|
static u64 load_latency_data(u64 status)
|
|
|
{
|
|
|
union intel_x86_pebs_dse dse;
|
|
@@ -486,6 +524,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
|
|
|
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
|
|
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
|
|
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
|
|
+ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
|
|
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
|
|
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
|
|
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
|
@@ -500,6 +539,7 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = {
|
|
|
INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */
|
|
|
INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */
|
|
|
INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
|
|
|
+ INTEL_PST_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORES */
|
|
|
INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */
|
|
|
INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
|
|
|
INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
|
|
@@ -537,6 +577,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|
|
|
|
|
if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
|
|
|
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
|
|
+ else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
|
|
|
+ cpuc->pebs_enabled |= 1ULL << 63;
|
|
|
}
|
|
|
|
|
|
void intel_pmu_pebs_disable(struct perf_event *event)
|
|
@@ -657,12 +699,13 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
|
struct perf_sample_data data;
|
|
|
struct pt_regs regs;
|
|
|
u64 sample_type;
|
|
|
- int fll;
|
|
|
+ int fll, fst;
|
|
|
|
|
|
if (!intel_pmu_save_and_restart(event))
|
|
|
return;
|
|
|
|
|
|
fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
|
|
|
+ fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
|
|
|
|
|
|
perf_sample_data_init(&data, 0, event->hw.last_period);
|
|
|
|
|
@@ -672,7 +715,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
|
/*
|
|
|
* if PEBS-LL or PreciseStore
|
|
|
*/
|
|
|
- if (fll) {
|
|
|
+ if (fll || fst) {
|
|
|
if (sample_type & PERF_SAMPLE_ADDR)
|
|
|
data.addr = pebs->dla;
|
|
|
|
|
@@ -688,6 +731,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
|
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
|
|
if (fll)
|
|
|
data.data_src.val = load_latency_data(pebs->dse);
|
|
|
+ else
|
|
|
+ data.data_src.val = precise_store_data(pebs->dse);
|
|
|
}
|
|
|
}
|
|
|
|