|
@@ -182,16 +182,29 @@ struct pebs_record_nhm {
|
|
|
* Same as pebs_record_nhm, with two additional fields.
|
|
|
*/
|
|
|
struct pebs_record_hsw {
|
|
|
- struct pebs_record_nhm nhm;
|
|
|
- /*
|
|
|
- * Real IP of the event. In the Intel documentation this
|
|
|
- * is called eventingrip.
|
|
|
- */
|
|
|
- u64 real_ip;
|
|
|
- /*
|
|
|
- * TSX tuning information field: abort cycles and abort flags.
|
|
|
- */
|
|
|
- u64 tsx_tuning;
|
|
|
+ u64 flags, ip;
|
|
|
+ u64 ax, bx, cx, dx;
|
|
|
+ u64 si, di, bp, sp;
|
|
|
+ u64 r8, r9, r10, r11;
|
|
|
+ u64 r12, r13, r14, r15;
|
|
|
+ u64 status, dla, dse, lat;
|
|
|
+ u64 real_ip; /* the actual eventing ip */
|
|
|
+ u64 tsx_tuning; /* TSX abort cycles and flags */
|
|
|
+};
|
|
|
+
|
|
|
+union hsw_tsx_tuning {
|
|
|
+ struct {
|
|
|
+ u32 cycles_last_block : 32,
|
|
|
+ hle_abort : 1,
|
|
|
+ rtm_abort : 1,
|
|
|
+ instruction_abort : 1,
|
|
|
+ non_instruction_abort : 1,
|
|
|
+ retry : 1,
|
|
|
+ data_conflict : 1,
|
|
|
+ capacity_writes : 1,
|
|
|
+ capacity_reads : 1;
|
|
|
+ };
|
|
|
+ u64 value;
|
|
|
};
|
|
|
|
|
|
void init_debug_store_on_cpu(int cpu)
|
|
@@ -785,16 +798,26 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs)
|
|
|
+{
|
|
|
+ if (pebs->tsx_tuning) {
|
|
|
+ union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
|
|
|
+ return tsx.cycles_last_block;
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
|
struct pt_regs *iregs, void *__pebs)
|
|
|
{
|
|
|
/*
|
|
|
* We cast to pebs_record_nhm to get the load latency data
|
|
|
* if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
|
|
|
+ * We cast to the biggest PEBS record are careful not
|
|
|
+ * to access out-of-bounds members.
|
|
|
*/
|
|
|
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
- struct pebs_record_nhm *pebs = __pebs;
|
|
|
- struct pebs_record_hsw *pebs_hsw = __pebs;
|
|
|
+ struct pebs_record_hsw *pebs = __pebs;
|
|
|
struct perf_sample_data data;
|
|
|
struct pt_regs regs;
|
|
|
u64 sample_type;
|
|
@@ -853,7 +876,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
|
regs.sp = pebs->sp;
|
|
|
|
|
|
if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
|
|
|
- regs.ip = pebs_hsw->real_ip;
|
|
|
+ regs.ip = pebs->real_ip;
|
|
|
regs.flags |= PERF_EFLAGS_EXACT;
|
|
|
} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s))
|
|
|
regs.flags |= PERF_EFLAGS_EXACT;
|
|
@@ -864,6 +887,12 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
|
|
|
x86_pmu.intel_cap.pebs_format >= 1)
|
|
|
data.addr = pebs->dla;
|
|
|
|
|
|
+ /* Only set the TSX weight when no memory weight was requested. */
|
|
|
+ if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
|
|
|
+ !fll &&
|
|
|
+ (x86_pmu.intel_cap.pebs_format >= 2))
|
|
|
+ data.weight = intel_hsw_weight(pebs);
|
|
|
+
|
|
|
if (has_branch_stack(event))
|
|
|
data.br_stack = &cpuc->lbr_stack;
|
|
|
|