|
@@ -77,6 +77,18 @@ struct cpu_hw_events {
|
|
|
struct debug_store *ds;
|
|
|
};
|
|
|
|
|
|
+struct event_constraint {
|
|
|
+ unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
|
|
+ int code;
|
|
|
+};
|
|
|
+
|
|
|
+#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) }
|
|
|
+#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 }
|
|
|
+
|
|
|
+#define for_each_event_constraint(e, c) \
|
|
|
+ for ((e) = (c); (e)->idxmsk[0]; (e)++)
|
|
|
+
|
|
|
+
|
|
|
/*
|
|
|
* struct x86_pmu - generic x86 pmu
|
|
|
*/
|
|
@@ -102,6 +114,7 @@ struct x86_pmu {
|
|
|
u64 intel_ctrl;
|
|
|
void (*enable_bts)(u64 config);
|
|
|
void (*disable_bts)(void);
|
|
|
+ int (*get_event_idx)(struct hw_perf_event *hwc);
|
|
|
};
|
|
|
|
|
|
static struct x86_pmu x86_pmu __read_mostly;
|
|
@@ -110,6 +123,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
|
|
|
.enabled = 1,
|
|
|
};
|
|
|
|
|
|
+static const struct event_constraint *event_constraint;
|
|
|
+
|
|
|
/*
|
|
|
* Not sure about some of these
|
|
|
*/
|
|
@@ -155,6 +170,16 @@ static u64 p6_pmu_raw_event(u64 hw_event)
|
|
|
return hw_event & P6_EVNTSEL_MASK;
|
|
|
}
|
|
|
|
|
|
+static const struct event_constraint intel_p6_event_constraints[] =
|
|
|
+{
|
|
|
+ EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
|
|
|
+ EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
|
|
+ EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
|
|
|
+ EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
|
|
+ EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
|
|
|
+ EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
|
|
|
+ EVENT_CONSTRAINT_END
|
|
|
+};
|
|
|
|
|
|
/*
|
|
|
* Intel PerfMon v3. Used on Core2 and later.
|
|
@@ -170,6 +195,35 @@ static const u64 intel_perfmon_event_map[] =
|
|
|
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
|
|
|
};
|
|
|
|
|
|
+static const struct event_constraint intel_core_event_constraints[] =
|
|
|
+{
|
|
|
+ EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
|
|
|
+ EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
|
|
|
+ EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
|
|
|
+ EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
|
|
|
+ EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
|
|
|
+ EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
|
|
|
+ EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
|
|
|
+ EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
|
|
|
+ EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
|
|
|
+ EVENT_CONSTRAINT_END
|
|
|
+};
|
|
|
+
|
|
|
+static const struct event_constraint intel_nehalem_event_constraints[] =
|
|
|
+{
|
|
|
+ EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
|
|
|
+ EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
|
|
|
+ EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
|
|
|
+ EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
|
|
|
+ EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
|
|
|
+ EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
|
|
|
+ EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
|
|
|
+ EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
|
|
|
+ EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
|
|
|
+ EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
|
|
|
+ EVENT_CONSTRAINT_END
|
|
|
+};
|
|
|
+
|
|
|
static u64 intel_pmu_event_map(int hw_event)
|
|
|
{
|
|
|
return intel_perfmon_event_map[hw_event];
|
|
@@ -932,6 +986,8 @@ static int __hw_perf_event_init(struct perf_event *event)
|
|
|
*/
|
|
|
hwc->config = ARCH_PERFMON_EVENTSEL_INT;
|
|
|
|
|
|
+ hwc->idx = -1;
|
|
|
+
|
|
|
/*
|
|
|
* Count user and OS events unless requested not to.
|
|
|
*/
|
|
@@ -1365,6 +1421,45 @@ fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
|
|
|
return -1;
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * generic counter allocator: get next free counter
|
|
|
+ */
|
|
|
+static int gen_get_event_idx(struct hw_perf_event *hwc)
|
|
|
+{
|
|
|
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
+ int idx;
|
|
|
+
|
|
|
+ idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events);
|
|
|
+ return idx == x86_pmu.num_events ? -1 : idx;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * intel-specific counter allocator: check event constraints
|
|
|
+ */
|
|
|
+static int intel_get_event_idx(struct hw_perf_event *hwc)
|
|
|
+{
|
|
|
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
|
|
|
+ const struct event_constraint *event_constraint;
|
|
|
+ int i, code;
|
|
|
+
|
|
|
+ if (!event_constraint)
|
|
|
+ goto skip;
|
|
|
+
|
|
|
+ code = hwc->config & 0xff;
|
|
|
+
|
|
|
+ for_each_event_constraint(event_constraint, event_constraint) {
|
|
|
+ if (code == event_constraint->code) {
|
|
|
+ for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) {
|
|
|
+ if (!test_and_set_bit(i, cpuc->used_mask))
|
|
|
+ return i;
|
|
|
+ }
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+skip:
|
|
|
+ return gen_get_event_idx(hwc);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Find a PMC slot for the freshly enabled / scheduled in event:
|
|
|
*/
|
|
@@ -1402,11 +1497,10 @@ static int x86_pmu_enable(struct perf_event *event)
|
|
|
} else {
|
|
|
idx = hwc->idx;
|
|
|
/* Try to get the previous generic event again */
|
|
|
- if (test_and_set_bit(idx, cpuc->used_mask)) {
|
|
|
+ if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) {
|
|
|
try_generic:
|
|
|
- idx = find_first_zero_bit(cpuc->used_mask,
|
|
|
- x86_pmu.num_events);
|
|
|
- if (idx == x86_pmu.num_events)
|
|
|
+ idx = x86_pmu.get_event_idx(hwc);
|
|
|
+ if (idx == -1)
|
|
|
return -EAGAIN;
|
|
|
|
|
|
set_bit(idx, cpuc->used_mask);
|
|
@@ -1883,6 +1977,7 @@ static struct x86_pmu p6_pmu = {
|
|
|
*/
|
|
|
.event_bits = 32,
|
|
|
.event_mask = (1ULL << 32) - 1,
|
|
|
+ .get_event_idx = intel_get_event_idx,
|
|
|
};
|
|
|
|
|
|
static struct x86_pmu intel_pmu = {
|
|
@@ -1906,6 +2001,7 @@ static struct x86_pmu intel_pmu = {
|
|
|
.max_period = (1ULL << 31) - 1,
|
|
|
.enable_bts = intel_pmu_enable_bts,
|
|
|
.disable_bts = intel_pmu_disable_bts,
|
|
|
+ .get_event_idx = intel_get_event_idx,
|
|
|
};
|
|
|
|
|
|
static struct x86_pmu amd_pmu = {
|
|
@@ -1926,6 +2022,7 @@ static struct x86_pmu amd_pmu = {
|
|
|
.apic = 1,
|
|
|
/* use highest bit to detect overflow */
|
|
|
.max_period = (1ULL << 47) - 1,
|
|
|
+ .get_event_idx = gen_get_event_idx,
|
|
|
};
|
|
|
|
|
|
static int p6_pmu_init(void)
|
|
@@ -1938,10 +2035,12 @@ static int p6_pmu_init(void)
|
|
|
case 7:
|
|
|
case 8:
|
|
|
case 11: /* Pentium III */
|
|
|
+ event_constraint = intel_p6_event_constraints;
|
|
|
break;
|
|
|
case 9:
|
|
|
case 13:
|
|
|
/* Pentium M */
|
|
|
+ event_constraint = intel_p6_event_constraints;
|
|
|
break;
|
|
|
default:
|
|
|
pr_cont("unsupported p6 CPU model %d ",
|
|
@@ -2013,12 +2112,14 @@ static int intel_pmu_init(void)
|
|
|
sizeof(hw_cache_event_ids));
|
|
|
|
|
|
pr_cont("Core2 events, ");
|
|
|
+ event_constraint = intel_core_event_constraints;
|
|
|
break;
|
|
|
default:
|
|
|
case 26:
|
|
|
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
|
|
|
sizeof(hw_cache_event_ids));
|
|
|
|
|
|
+ event_constraint = intel_nehalem_event_constraints;
|
|
|
pr_cont("Nehalem/Corei7 events, ");
|
|
|
break;
|
|
|
case 28:
|