perf_event_intel_lbr.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705
  1. #include <linux/perf_event.h>
  2. #include <linux/types.h>
  3. #include <asm/perf_event.h>
  4. #include <asm/msr.h>
  5. #include <asm/insn.h>
  6. #include "perf_event.h"
  7. enum {
  8. LBR_FORMAT_32 = 0x00,
  9. LBR_FORMAT_LIP = 0x01,
  10. LBR_FORMAT_EIP = 0x02,
  11. LBR_FORMAT_EIP_FLAGS = 0x03,
  12. };
  13. /*
  14. * Intel LBR_SELECT bits
  15. * Intel Vol3a, April 2011, Section 16.7 Table 16-10
  16. *
  17. * Hardware branch filter (not available on all CPUs)
  18. */
  19. #define LBR_KERNEL_BIT 0 /* do not capture at ring0 */
  20. #define LBR_USER_BIT 1 /* do not capture at ring > 0 */
  21. #define LBR_JCC_BIT 2 /* do not capture conditional branches */
  22. #define LBR_REL_CALL_BIT 3 /* do not capture relative calls */
  23. #define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */
  24. #define LBR_RETURN_BIT 5 /* do not capture near returns */
  25. #define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */
  26. #define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */
  27. #define LBR_FAR_BIT 8 /* do not capture far branches */
  28. #define LBR_KERNEL (1 << LBR_KERNEL_BIT)
  29. #define LBR_USER (1 << LBR_USER_BIT)
  30. #define LBR_JCC (1 << LBR_JCC_BIT)
  31. #define LBR_REL_CALL (1 << LBR_REL_CALL_BIT)
  32. #define LBR_IND_CALL (1 << LBR_IND_CALL_BIT)
  33. #define LBR_RETURN (1 << LBR_RETURN_BIT)
  34. #define LBR_REL_JMP (1 << LBR_REL_JMP_BIT)
  35. #define LBR_IND_JMP (1 << LBR_IND_JMP_BIT)
  36. #define LBR_FAR (1 << LBR_FAR_BIT)
  37. #define LBR_PLM (LBR_KERNEL | LBR_USER)
  38. #define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */
  39. #define LBR_NOT_SUPP -1 /* LBR filter not supported */
  40. #define LBR_IGN 0 /* ignored */
  41. #define LBR_ANY \
  42. (LBR_JCC |\
  43. LBR_REL_CALL |\
  44. LBR_IND_CALL |\
  45. LBR_RETURN |\
  46. LBR_REL_JMP |\
  47. LBR_IND_JMP |\
  48. LBR_FAR)
  49. #define LBR_FROM_FLAG_MISPRED (1ULL << 63)
  50. #define for_each_branch_sample_type(x) \
  51. for ((x) = PERF_SAMPLE_BRANCH_USER; \
  52. (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
  53. /*
  54. * x86control flow change classification
  55. * x86control flow changes include branches, interrupts, traps, faults
  56. */
  57. enum {
  58. X86_BR_NONE = 0, /* unknown */
  59. X86_BR_USER = 1 << 0, /* branch target is user */
  60. X86_BR_KERNEL = 1 << 1, /* branch target is kernel */
  61. X86_BR_CALL = 1 << 2, /* call */
  62. X86_BR_RET = 1 << 3, /* return */
  63. X86_BR_SYSCALL = 1 << 4, /* syscall */
  64. X86_BR_SYSRET = 1 << 5, /* syscall return */
  65. X86_BR_INT = 1 << 6, /* sw interrupt */
  66. X86_BR_IRET = 1 << 7, /* return from interrupt */
  67. X86_BR_JCC = 1 << 8, /* conditional */
  68. X86_BR_JMP = 1 << 9, /* jump */
  69. X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */
  70. X86_BR_IND_CALL = 1 << 11,/* indirect calls */
  71. };
  72. #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
  73. #define X86_BR_ANY \
  74. (X86_BR_CALL |\
  75. X86_BR_RET |\
  76. X86_BR_SYSCALL |\
  77. X86_BR_SYSRET |\
  78. X86_BR_INT |\
  79. X86_BR_IRET |\
  80. X86_BR_JCC |\
  81. X86_BR_JMP |\
  82. X86_BR_IRQ |\
  83. X86_BR_IND_CALL)
  84. #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
  85. #define X86_BR_ANY_CALL \
  86. (X86_BR_CALL |\
  87. X86_BR_IND_CALL |\
  88. X86_BR_SYSCALL |\
  89. X86_BR_IRQ |\
  90. X86_BR_INT)
  91. static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
  92. /*
  93. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  94. * otherwise it becomes near impossible to get a reliable stack.
  95. */
  96. static void __intel_pmu_lbr_enable(void)
  97. {
  98. u64 debugctl;
  99. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  100. if (cpuc->lbr_sel)
  101. wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
  102. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  103. debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  104. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  105. }
  106. static void __intel_pmu_lbr_disable(void)
  107. {
  108. u64 debugctl;
  109. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  110. debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  111. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  112. }
  113. static void intel_pmu_lbr_reset_32(void)
  114. {
  115. int i;
  116. for (i = 0; i < x86_pmu.lbr_nr; i++)
  117. wrmsrl(x86_pmu.lbr_from + i, 0);
  118. }
  119. static void intel_pmu_lbr_reset_64(void)
  120. {
  121. int i;
  122. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  123. wrmsrl(x86_pmu.lbr_from + i, 0);
  124. wrmsrl(x86_pmu.lbr_to + i, 0);
  125. }
  126. }
  127. void intel_pmu_lbr_reset(void)
  128. {
  129. if (!x86_pmu.lbr_nr)
  130. return;
  131. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  132. intel_pmu_lbr_reset_32();
  133. else
  134. intel_pmu_lbr_reset_64();
  135. }
  136. void intel_pmu_lbr_enable(struct perf_event *event)
  137. {
  138. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  139. if (!x86_pmu.lbr_nr)
  140. return;
  141. /*
  142. * Reset the LBR stack if we changed task context to
  143. * avoid data leaks.
  144. */
  145. if (event->ctx->task && cpuc->lbr_context != event->ctx) {
  146. intel_pmu_lbr_reset();
  147. cpuc->lbr_context = event->ctx;
  148. }
  149. cpuc->br_sel = event->hw.branch_reg.reg;
  150. cpuc->lbr_users++;
  151. }
  152. void intel_pmu_lbr_disable(struct perf_event *event)
  153. {
  154. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  155. if (!x86_pmu.lbr_nr)
  156. return;
  157. cpuc->lbr_users--;
  158. WARN_ON_ONCE(cpuc->lbr_users < 0);
  159. if (cpuc->enabled && !cpuc->lbr_users) {
  160. __intel_pmu_lbr_disable();
  161. /* avoid stale pointer */
  162. cpuc->lbr_context = NULL;
  163. }
  164. }
  165. void intel_pmu_lbr_enable_all(void)
  166. {
  167. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  168. if (cpuc->lbr_users)
  169. __intel_pmu_lbr_enable();
  170. }
  171. void intel_pmu_lbr_disable_all(void)
  172. {
  173. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  174. if (cpuc->lbr_users)
  175. __intel_pmu_lbr_disable();
  176. }
  177. /*
  178. * TOS = most recently recorded branch
  179. */
  180. static inline u64 intel_pmu_lbr_tos(void)
  181. {
  182. u64 tos;
  183. rdmsrl(x86_pmu.lbr_tos, tos);
  184. return tos;
  185. }
  186. static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  187. {
  188. unsigned long mask = x86_pmu.lbr_nr - 1;
  189. u64 tos = intel_pmu_lbr_tos();
  190. int i;
  191. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  192. unsigned long lbr_idx = (tos - i) & mask;
  193. union {
  194. struct {
  195. u32 from;
  196. u32 to;
  197. };
  198. u64 lbr;
  199. } msr_lastbranch;
  200. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  201. cpuc->lbr_entries[i].from = msr_lastbranch.from;
  202. cpuc->lbr_entries[i].to = msr_lastbranch.to;
  203. cpuc->lbr_entries[i].mispred = 0;
  204. cpuc->lbr_entries[i].predicted = 0;
  205. cpuc->lbr_entries[i].reserved = 0;
  206. }
  207. cpuc->lbr_stack.nr = i;
  208. }
  209. /*
  210. * Due to lack of segmentation in Linux the effective address (offset)
  211. * is the same as the linear address, allowing us to merge the LIP and EIP
  212. * LBR formats.
  213. */
  214. static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  215. {
  216. unsigned long mask = x86_pmu.lbr_nr - 1;
  217. int lbr_format = x86_pmu.intel_cap.lbr_format;
  218. u64 tos = intel_pmu_lbr_tos();
  219. int i;
  220. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  221. unsigned long lbr_idx = (tos - i) & mask;
  222. u64 from, to, mis = 0, pred = 0;
  223. rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
  224. rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
  225. if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
  226. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  227. pred = !mis;
  228. from = (u64)((((s64)from) << 1) >> 1);
  229. }
  230. cpuc->lbr_entries[i].from = from;
  231. cpuc->lbr_entries[i].to = to;
  232. cpuc->lbr_entries[i].mispred = mis;
  233. cpuc->lbr_entries[i].predicted = pred;
  234. cpuc->lbr_entries[i].reserved = 0;
  235. }
  236. cpuc->lbr_stack.nr = i;
  237. }
  238. void intel_pmu_lbr_read(void)
  239. {
  240. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  241. if (!cpuc->lbr_users)
  242. return;
  243. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  244. intel_pmu_lbr_read_32(cpuc);
  245. else
  246. intel_pmu_lbr_read_64(cpuc);
  247. intel_pmu_lbr_filter(cpuc);
  248. }
  249. /*
  250. * SW filter is used:
  251. * - in case there is no HW filter
  252. * - in case the HW filter has errata or limitations
  253. */
  254. static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
  255. {
  256. u64 br_type = event->attr.branch_sample_type;
  257. int mask = 0;
  258. if (br_type & PERF_SAMPLE_BRANCH_USER)
  259. mask |= X86_BR_USER;
  260. if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
  261. mask |= X86_BR_KERNEL;
  262. /* we ignore BRANCH_HV here */
  263. if (br_type & PERF_SAMPLE_BRANCH_ANY)
  264. mask |= X86_BR_ANY;
  265. if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
  266. mask |= X86_BR_ANY_CALL;
  267. if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
  268. mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
  269. if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
  270. mask |= X86_BR_IND_CALL;
  271. /*
  272. * stash actual user request into reg, it may
  273. * be used by fixup code for some CPU
  274. */
  275. event->hw.branch_reg.reg = mask;
  276. }
  277. /*
  278. * setup the HW LBR filter
  279. * Used only when available, may not be enough to disambiguate
  280. * all branches, may need the help of the SW filter
  281. */
  282. static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
  283. {
  284. struct hw_perf_event_extra *reg;
  285. u64 br_type = event->attr.branch_sample_type;
  286. u64 mask = 0, m;
  287. u64 v;
  288. for_each_branch_sample_type(m) {
  289. if (!(br_type & m))
  290. continue;
  291. v = x86_pmu.lbr_sel_map[m];
  292. if (v == LBR_NOT_SUPP)
  293. return -EOPNOTSUPP;
  294. if (v != LBR_IGN)
  295. mask |= v;
  296. }
  297. reg = &event->hw.branch_reg;
  298. reg->idx = EXTRA_REG_LBR;
  299. /* LBR_SELECT operates in suppress mode so invert mask */
  300. reg->config = ~mask & x86_pmu.lbr_sel_mask;
  301. return 0;
  302. }
  303. int intel_pmu_setup_lbr_filter(struct perf_event *event)
  304. {
  305. int ret = 0;
  306. /*
  307. * no LBR on this PMU
  308. */
  309. if (!x86_pmu.lbr_nr)
  310. return -EOPNOTSUPP;
  311. /*
  312. * setup SW LBR filter
  313. */
  314. intel_pmu_setup_sw_lbr_filter(event);
  315. /*
  316. * setup HW LBR filter, if any
  317. */
  318. if (x86_pmu.lbr_sel_map)
  319. ret = intel_pmu_setup_hw_lbr_filter(event);
  320. return ret;
  321. }
  322. /*
  323. * return the type of control flow change at address "from"
  324. * intruction is not necessarily a branch (in case of interrupt).
  325. *
  326. * The branch type returned also includes the priv level of the
  327. * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
  328. *
  329. * If a branch type is unknown OR the instruction cannot be
  330. * decoded (e.g., text page not present), then X86_BR_NONE is
  331. * returned.
  332. */
  333. static int branch_type(unsigned long from, unsigned long to)
  334. {
  335. struct insn insn;
  336. void *addr;
  337. int bytes, size = MAX_INSN_SIZE;
  338. int ret = X86_BR_NONE;
  339. int ext, to_plm, from_plm;
  340. u8 buf[MAX_INSN_SIZE];
  341. int is64 = 0;
  342. to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
  343. from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
  344. /*
  345. * maybe zero if lbr did not fill up after a reset by the time
  346. * we get a PMU interrupt
  347. */
  348. if (from == 0 || to == 0)
  349. return X86_BR_NONE;
  350. if (from_plm == X86_BR_USER) {
  351. /*
  352. * can happen if measuring at the user level only
  353. * and we interrupt in a kernel thread, e.g., idle.
  354. */
  355. if (!current->mm)
  356. return X86_BR_NONE;
  357. /* may fail if text not present */
  358. bytes = copy_from_user_nmi(buf, (void __user *)from, size);
  359. if (bytes != size)
  360. return X86_BR_NONE;
  361. addr = buf;
  362. } else
  363. addr = (void *)from;
  364. /*
  365. * decoder needs to know the ABI especially
  366. * on 64-bit systems running 32-bit apps
  367. */
  368. #ifdef CONFIG_X86_64
  369. is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
  370. #endif
  371. insn_init(&insn, addr, is64);
  372. insn_get_opcode(&insn);
  373. switch (insn.opcode.bytes[0]) {
  374. case 0xf:
  375. switch (insn.opcode.bytes[1]) {
  376. case 0x05: /* syscall */
  377. case 0x34: /* sysenter */
  378. ret = X86_BR_SYSCALL;
  379. break;
  380. case 0x07: /* sysret */
  381. case 0x35: /* sysexit */
  382. ret = X86_BR_SYSRET;
  383. break;
  384. case 0x80 ... 0x8f: /* conditional */
  385. ret = X86_BR_JCC;
  386. break;
  387. default:
  388. ret = X86_BR_NONE;
  389. }
  390. break;
  391. case 0x70 ... 0x7f: /* conditional */
  392. ret = X86_BR_JCC;
  393. break;
  394. case 0xc2: /* near ret */
  395. case 0xc3: /* near ret */
  396. case 0xca: /* far ret */
  397. case 0xcb: /* far ret */
  398. ret = X86_BR_RET;
  399. break;
  400. case 0xcf: /* iret */
  401. ret = X86_BR_IRET;
  402. break;
  403. case 0xcc ... 0xce: /* int */
  404. ret = X86_BR_INT;
  405. break;
  406. case 0xe8: /* call near rel */
  407. case 0x9a: /* call far absolute */
  408. ret = X86_BR_CALL;
  409. break;
  410. case 0xe0 ... 0xe3: /* loop jmp */
  411. ret = X86_BR_JCC;
  412. break;
  413. case 0xe9 ... 0xeb: /* jmp */
  414. ret = X86_BR_JMP;
  415. break;
  416. case 0xff: /* call near absolute, call far absolute ind */
  417. insn_get_modrm(&insn);
  418. ext = (insn.modrm.bytes[0] >> 3) & 0x7;
  419. switch (ext) {
  420. case 2: /* near ind call */
  421. case 3: /* far ind call */
  422. ret = X86_BR_IND_CALL;
  423. break;
  424. case 4:
  425. case 5:
  426. ret = X86_BR_JMP;
  427. break;
  428. }
  429. break;
  430. default:
  431. ret = X86_BR_NONE;
  432. }
  433. /*
  434. * interrupts, traps, faults (and thus ring transition) may
  435. * occur on any instructions. Thus, to classify them correctly,
  436. * we need to first look at the from and to priv levels. If they
  437. * are different and to is in the kernel, then it indicates
  438. * a ring transition. If the from instruction is not a ring
  439. * transition instr (syscall, systenter, int), then it means
  440. * it was a irq, trap or fault.
  441. *
  442. * we have no way of detecting kernel to kernel faults.
  443. */
  444. if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
  445. && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
  446. ret = X86_BR_IRQ;
  447. /*
  448. * branch priv level determined by target as
  449. * is done by HW when LBR_SELECT is implemented
  450. */
  451. if (ret != X86_BR_NONE)
  452. ret |= to_plm;
  453. return ret;
  454. }
  455. /*
  456. * implement actual branch filter based on user demand.
  457. * Hardware may not exactly satisfy that request, thus
  458. * we need to inspect opcodes. Mismatched branches are
  459. * discarded. Therefore, the number of branches returned
  460. * in PERF_SAMPLE_BRANCH_STACK sample may vary.
  461. */
  462. static void
  463. intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
  464. {
  465. u64 from, to;
  466. int br_sel = cpuc->br_sel;
  467. int i, j, type;
  468. bool compress = false;
  469. /* if sampling all branches, then nothing to filter */
  470. if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
  471. return;
  472. for (i = 0; i < cpuc->lbr_stack.nr; i++) {
  473. from = cpuc->lbr_entries[i].from;
  474. to = cpuc->lbr_entries[i].to;
  475. type = branch_type(from, to);
  476. /* if type does not correspond, then discard */
  477. if (type == X86_BR_NONE || (br_sel & type) != type) {
  478. cpuc->lbr_entries[i].from = 0;
  479. compress = true;
  480. }
  481. }
  482. if (!compress)
  483. return;
  484. /* remove all entries with from=0 */
  485. for (i = 0; i < cpuc->lbr_stack.nr; ) {
  486. if (!cpuc->lbr_entries[i].from) {
  487. j = i;
  488. while (++j < cpuc->lbr_stack.nr)
  489. cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
  490. cpuc->lbr_stack.nr--;
  491. if (!cpuc->lbr_entries[i].from)
  492. continue;
  493. }
  494. i++;
  495. }
  496. }
  497. /*
  498. * Map interface branch filters onto LBR filters
  499. */
  500. static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
  501. [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
  502. [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
  503. [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
  504. [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
  505. [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
  506. | LBR_IND_JMP | LBR_FAR,
  507. /*
  508. * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
  509. */
  510. [PERF_SAMPLE_BRANCH_ANY_CALL] =
  511. LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
  512. /*
  513. * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
  514. */
  515. [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
  516. };
  517. static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
  518. [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY,
  519. [PERF_SAMPLE_BRANCH_USER] = LBR_USER,
  520. [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL,
  521. [PERF_SAMPLE_BRANCH_HV] = LBR_IGN,
  522. [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
  523. [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL
  524. | LBR_FAR,
  525. [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL,
  526. };
  527. /* core */
  528. void intel_pmu_lbr_init_core(void)
  529. {
  530. x86_pmu.lbr_nr = 4;
  531. x86_pmu.lbr_tos = MSR_LBR_TOS;
  532. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  533. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  534. /*
  535. * SW branch filter usage:
  536. * - compensate for lack of HW filter
  537. */
  538. pr_cont("4-deep LBR, ");
  539. }
  540. /* nehalem/westmere */
  541. void intel_pmu_lbr_init_nhm(void)
  542. {
  543. x86_pmu.lbr_nr = 16;
  544. x86_pmu.lbr_tos = MSR_LBR_TOS;
  545. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  546. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  547. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  548. x86_pmu.lbr_sel_map = nhm_lbr_sel_map;
  549. /*
  550. * SW branch filter usage:
  551. * - workaround LBR_SEL errata (see above)
  552. * - support syscall, sysret capture.
  553. * That requires LBR_FAR but that means far
  554. * jmp need to be filtered out
  555. */
  556. pr_cont("16-deep LBR, ");
  557. }
  558. /* sandy bridge */
  559. void intel_pmu_lbr_init_snb(void)
  560. {
  561. x86_pmu.lbr_nr = 16;
  562. x86_pmu.lbr_tos = MSR_LBR_TOS;
  563. x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
  564. x86_pmu.lbr_to = MSR_LBR_NHM_TO;
  565. x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
  566. x86_pmu.lbr_sel_map = snb_lbr_sel_map;
  567. /*
  568. * SW branch filter usage:
  569. * - support syscall, sysret capture.
  570. * That requires LBR_FAR but that means far
  571. * jmp need to be filtered out
  572. */
  573. pr_cont("16-deep LBR, ");
  574. }
  575. /* atom */
  576. void intel_pmu_lbr_init_atom(void)
  577. {
  578. /*
  579. * only models starting at stepping 10 seems
  580. * to have an operational LBR which can freeze
  581. * on PMU interrupt
  582. */
  583. if (boot_cpu_data.x86_model == 28
  584. && boot_cpu_data.x86_mask < 10) {
  585. pr_cont("LBR disabled due to erratum");
  586. return;
  587. }
  588. x86_pmu.lbr_nr = 8;
  589. x86_pmu.lbr_tos = MSR_LBR_TOS;
  590. x86_pmu.lbr_from = MSR_LBR_CORE_FROM;
  591. x86_pmu.lbr_to = MSR_LBR_CORE_TO;
  592. /*
  593. * SW branch filter usage:
  594. * - compensate for lack of HW filter
  595. */
  596. pr_cont("8-deep LBR, ");
  597. }