perf_event_intel_ds.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. #ifdef CONFIG_CPU_SUP_INTEL
  2. /* The maximal number of PEBS events: */
  3. #define MAX_PEBS_EVENTS 4
  4. /* The size of a BTS record in bytes: */
  5. #define BTS_RECORD_SIZE 24
  6. #define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
  7. #define PEBS_BUFFER_SIZE PAGE_SIZE
  8. /*
  9. * pebs_record_32 for p4 and core not supported
  10. struct pebs_record_32 {
  11. u32 flags, ip;
  12. u32 ax, bc, cx, dx;
  13. u32 si, di, bp, sp;
  14. };
  15. */
  16. struct pebs_record_core {
  17. u64 flags, ip;
  18. u64 ax, bx, cx, dx;
  19. u64 si, di, bp, sp;
  20. u64 r8, r9, r10, r11;
  21. u64 r12, r13, r14, r15;
  22. };
  23. struct pebs_record_nhm {
  24. u64 flags, ip;
  25. u64 ax, bx, cx, dx;
  26. u64 si, di, bp, sp;
  27. u64 r8, r9, r10, r11;
  28. u64 r12, r13, r14, r15;
  29. u64 status, dla, dse, lat;
  30. };
  31. /*
  32. * Bits in the debugctlmsr controlling branch tracing.
  33. */
  34. #define X86_DEBUGCTL_TR (1 << 6)
  35. #define X86_DEBUGCTL_BTS (1 << 7)
  36. #define X86_DEBUGCTL_BTINT (1 << 8)
  37. #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
  38. #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
  39. /*
  40. * A debug store configuration.
  41. *
  42. * We only support architectures that use 64bit fields.
  43. */
  44. struct debug_store {
  45. u64 bts_buffer_base;
  46. u64 bts_index;
  47. u64 bts_absolute_maximum;
  48. u64 bts_interrupt_threshold;
  49. u64 pebs_buffer_base;
  50. u64 pebs_index;
  51. u64 pebs_absolute_maximum;
  52. u64 pebs_interrupt_threshold;
  53. u64 pebs_event_reset[MAX_PEBS_EVENTS];
  54. };
  55. static void init_debug_store_on_cpu(int cpu)
  56. {
  57. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  58. if (!ds)
  59. return;
  60. wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
  61. (u32)((u64)(unsigned long)ds),
  62. (u32)((u64)(unsigned long)ds >> 32));
  63. }
  64. static void fini_debug_store_on_cpu(int cpu)
  65. {
  66. if (!per_cpu(cpu_hw_events, cpu).ds)
  67. return;
  68. wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
  69. }
  70. static void release_ds_buffers(void)
  71. {
  72. int cpu;
  73. if (!x86_pmu.bts && !x86_pmu.pebs)
  74. return;
  75. get_online_cpus();
  76. for_each_online_cpu(cpu)
  77. fini_debug_store_on_cpu(cpu);
  78. for_each_possible_cpu(cpu) {
  79. struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
  80. if (!ds)
  81. continue;
  82. per_cpu(cpu_hw_events, cpu).ds = NULL;
  83. kfree((void *)(unsigned long)ds->pebs_buffer_base);
  84. kfree((void *)(unsigned long)ds->bts_buffer_base);
  85. kfree(ds);
  86. }
  87. put_online_cpus();
  88. }
  89. static int reserve_ds_buffers(void)
  90. {
  91. int cpu, err = 0;
  92. if (!x86_pmu.bts && !x86_pmu.pebs)
  93. return 0;
  94. get_online_cpus();
  95. for_each_possible_cpu(cpu) {
  96. struct debug_store *ds;
  97. void *buffer;
  98. int max, thresh;
  99. err = -ENOMEM;
  100. ds = kzalloc(sizeof(*ds), GFP_KERNEL);
  101. if (unlikely(!ds)) {
  102. kfree(buffer);
  103. break;
  104. }
  105. per_cpu(cpu_hw_events, cpu).ds = ds;
  106. if (x86_pmu.bts) {
  107. buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
  108. if (unlikely(!buffer))
  109. break;
  110. max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
  111. thresh = max / 16;
  112. ds->bts_buffer_base = (u64)(unsigned long)buffer;
  113. ds->bts_index = ds->bts_buffer_base;
  114. ds->bts_absolute_maximum = ds->bts_buffer_base +
  115. max * BTS_RECORD_SIZE;
  116. ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
  117. thresh * BTS_RECORD_SIZE;
  118. }
  119. if (x86_pmu.pebs) {
  120. buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
  121. if (unlikely(!buffer))
  122. break;
  123. max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
  124. ds->pebs_buffer_base = (u64)(unsigned long)buffer;
  125. ds->pebs_index = ds->pebs_buffer_base;
  126. ds->pebs_absolute_maximum = ds->pebs_buffer_base +
  127. max * x86_pmu.pebs_record_size;
  128. /*
  129. * Always use single record PEBS
  130. */
  131. ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
  132. x86_pmu.pebs_record_size;
  133. }
  134. err = 0;
  135. }
  136. if (err)
  137. release_ds_buffers();
  138. else {
  139. for_each_online_cpu(cpu)
  140. init_debug_store_on_cpu(cpu);
  141. }
  142. put_online_cpus();
  143. return err;
  144. }
  145. /*
  146. * BTS
  147. */
  148. static struct event_constraint bts_constraint =
  149. EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
  150. static void intel_pmu_enable_bts(u64 config)
  151. {
  152. unsigned long debugctlmsr;
  153. debugctlmsr = get_debugctlmsr();
  154. debugctlmsr |= X86_DEBUGCTL_TR;
  155. debugctlmsr |= X86_DEBUGCTL_BTS;
  156. debugctlmsr |= X86_DEBUGCTL_BTINT;
  157. if (!(config & ARCH_PERFMON_EVENTSEL_OS))
  158. debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
  159. if (!(config & ARCH_PERFMON_EVENTSEL_USR))
  160. debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
  161. update_debugctlmsr(debugctlmsr);
  162. }
  163. static void intel_pmu_disable_bts(void)
  164. {
  165. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  166. unsigned long debugctlmsr;
  167. if (!cpuc->ds)
  168. return;
  169. debugctlmsr = get_debugctlmsr();
  170. debugctlmsr &=
  171. ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
  172. X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
  173. update_debugctlmsr(debugctlmsr);
  174. }
  175. static void intel_pmu_drain_bts_buffer(void)
  176. {
  177. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  178. struct debug_store *ds = cpuc->ds;
  179. struct bts_record {
  180. u64 from;
  181. u64 to;
  182. u64 flags;
  183. };
  184. struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
  185. struct bts_record *at, *top;
  186. struct perf_output_handle handle;
  187. struct perf_event_header header;
  188. struct perf_sample_data data;
  189. struct pt_regs regs;
  190. if (!event)
  191. return;
  192. if (!ds)
  193. return;
  194. at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
  195. top = (struct bts_record *)(unsigned long)ds->bts_index;
  196. if (top <= at)
  197. return;
  198. ds->bts_index = ds->bts_buffer_base;
  199. perf_sample_data_init(&data, 0);
  200. data.period = event->hw.last_period;
  201. regs.ip = 0;
  202. /*
  203. * Prepare a generic sample, i.e. fill in the invariant fields.
  204. * We will overwrite the from and to address before we output
  205. * the sample.
  206. */
  207. perf_prepare_sample(&header, &data, event, &regs);
  208. if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
  209. return;
  210. for (; at < top; at++) {
  211. data.ip = at->from;
  212. data.addr = at->to;
  213. perf_output_sample(&handle, &header, &data, event);
  214. }
  215. perf_output_end(&handle);
  216. /* There's new data available. */
  217. event->hw.interrupts++;
  218. event->pending_kill = POLL_IN;
  219. }
  220. /*
  221. * PEBS
  222. */
  223. static struct event_constraint intel_core_pebs_events[] = {
  224. PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
  225. PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
  226. PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
  227. PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
  228. PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
  229. PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
  230. PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
  231. PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
  232. PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
  233. EVENT_CONSTRAINT_END
  234. };
  235. static struct event_constraint intel_nehalem_pebs_events[] = {
  236. PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
  237. PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
  238. PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
  239. PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
  240. PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
  241. PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
  242. PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
  243. PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
  244. PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
  245. EVENT_CONSTRAINT_END
  246. };
  247. static struct event_constraint *
  248. intel_pebs_constraints(struct perf_event *event)
  249. {
  250. struct event_constraint *c;
  251. if (!event->attr.precise)
  252. return NULL;
  253. if (x86_pmu.pebs_constraints) {
  254. for_each_event_constraint(c, x86_pmu.pebs_constraints) {
  255. if ((event->hw.config & c->cmask) == c->code)
  256. return c;
  257. }
  258. }
  259. return &emptyconstraint;
  260. }
  261. static void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
  262. {
  263. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  264. u64 val = cpuc->pebs_enabled;
  265. hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
  266. val |= 1ULL << hwc->idx;
  267. wrmsrl(MSR_IA32_PEBS_ENABLE, val);
  268. }
  269. static void intel_pmu_pebs_disable(struct hw_perf_event *hwc)
  270. {
  271. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  272. u64 val = cpuc->pebs_enabled;
  273. val &= ~(1ULL << hwc->idx);
  274. wrmsrl(MSR_IA32_PEBS_ENABLE, val);
  275. hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
  276. }
  277. static void intel_pmu_pebs_enable_all(void)
  278. {
  279. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  280. if (cpuc->pebs_enabled)
  281. wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
  282. }
  283. static void intel_pmu_pebs_disable_all(void)
  284. {
  285. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  286. if (cpuc->pebs_enabled)
  287. wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
  288. }
  289. static int intel_pmu_save_and_restart(struct perf_event *event);
  290. static void intel_pmu_disable_event(struct perf_event *event);
  291. static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
  292. {
  293. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  294. struct debug_store *ds = cpuc->ds;
  295. struct perf_event *event = cpuc->events[0]; /* PMC0 only */
  296. struct pebs_record_core *at, *top;
  297. struct perf_sample_data data;
  298. struct pt_regs regs;
  299. int n;
  300. if (!event || !ds || !x86_pmu.pebs)
  301. return;
  302. intel_pmu_pebs_disable_all();
  303. at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
  304. top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
  305. if (top <= at)
  306. goto out;
  307. ds->pebs_index = ds->pebs_buffer_base;
  308. if (!intel_pmu_save_and_restart(event))
  309. goto out;
  310. perf_sample_data_init(&data, 0);
  311. data.period = event->hw.last_period;
  312. n = top - at;
  313. /*
  314. * Should not happen, we program the threshold at 1 and do not
  315. * set a reset value.
  316. */
  317. WARN_ON_ONCE(n > 1);
  318. /*
  319. * We use the interrupt regs as a base because the PEBS record
  320. * does not contain a full regs set, specifically it seems to
  321. * lack segment descriptors, which get used by things like
  322. * user_mode().
  323. *
  324. * In the simple case fix up only the IP and BP,SP regs, for
  325. * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
  326. * A possible PERF_SAMPLE_REGS will have to transfer all regs.
  327. */
  328. regs = *iregs;
  329. regs.ip = at->ip;
  330. regs.bp = at->bp;
  331. regs.sp = at->sp;
  332. if (perf_event_overflow(event, 1, &data, &regs))
  333. intel_pmu_disable_event(event);
  334. out:
  335. intel_pmu_pebs_enable_all();
  336. }
  337. static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
  338. {
  339. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  340. struct debug_store *ds = cpuc->ds;
  341. struct pebs_record_nhm *at, *top;
  342. struct perf_sample_data data;
  343. struct perf_event *event = NULL;
  344. struct pt_regs regs;
  345. int bit, n;
  346. if (!ds || !x86_pmu.pebs)
  347. return;
  348. intel_pmu_pebs_disable_all();
  349. at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
  350. top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
  351. if (top <= at)
  352. goto out;
  353. ds->pebs_index = ds->pebs_buffer_base;
  354. n = top - at;
  355. /*
  356. * Should not happen, we program the threshold at 1 and do not
  357. * set a reset value.
  358. */
  359. WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
  360. for ( ; at < top; at++) {
  361. for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
  362. if (!cpuc->events[bit]->attr.precise)
  363. continue;
  364. event = cpuc->events[bit];
  365. }
  366. if (!event)
  367. continue;
  368. if (!intel_pmu_save_and_restart(event))
  369. continue;
  370. perf_sample_data_init(&data, 0);
  371. data.period = event->hw.last_period;
  372. /*
  373. * See the comment in intel_pmu_drain_pebs_core()
  374. */
  375. regs = *iregs;
  376. regs.ip = at->ip;
  377. regs.bp = at->bp;
  378. regs.sp = at->sp;
  379. if (perf_event_overflow(event, 1, &data, &regs))
  380. intel_pmu_disable_event(event);
  381. }
  382. out:
  383. intel_pmu_pebs_enable_all();
  384. }
  385. /*
  386. * BTS, PEBS probe and setup
  387. */
  388. static void intel_ds_init(void)
  389. {
  390. /*
  391. * No support for 32bit formats
  392. */
  393. if (!boot_cpu_has(X86_FEATURE_DTES64))
  394. return;
  395. x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
  396. x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
  397. if (x86_pmu.pebs) {
  398. int format = 0;
  399. if (x86_pmu.version > 1) {
  400. u64 capabilities;
  401. /*
  402. * v2+ has a PEBS format field
  403. */
  404. rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
  405. format = (capabilities >> 8) & 0xf;
  406. }
  407. switch (format) {
  408. case 0:
  409. printk(KERN_CONT "PEBS v0, ");
  410. x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
  411. x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
  412. x86_pmu.pebs_constraints = intel_core_pebs_events;
  413. break;
  414. case 1:
  415. printk(KERN_CONT "PEBS v1, ");
  416. x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
  417. x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
  418. x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
  419. break;
  420. default:
  421. printk(KERN_CONT "PEBS unknown format: %d, ", format);
  422. x86_pmu.pebs = 0;
  423. break;
  424. }
  425. }
  426. }
  427. #else /* CONFIG_CPU_SUP_INTEL */
  428. static int reseve_ds_buffers(void)
  429. {
  430. return 0;
  431. }
  432. static void release_ds_buffers(void)
  433. {
  434. }
  435. #endif /* CONFIG_CPU_SUP_INTEL */