perf_event_intel.c 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408
  1. #ifdef CONFIG_CPU_SUP_INTEL
  2. #define MAX_EXTRA_REGS 2
  3. /*
  4. * Per register state.
  5. */
  6. struct er_account {
  7. int ref; /* reference count */
  8. unsigned int extra_reg; /* extra MSR number */
  9. u64 extra_config; /* extra MSR config */
  10. };
  11. /*
  12. * Per core state
  13. * This used to coordinate shared registers for HT threads.
  14. */
  15. struct intel_percore {
  16. raw_spinlock_t lock; /* protect structure */
  17. struct er_account regs[MAX_EXTRA_REGS];
  18. int refcnt; /* number of threads */
  19. unsigned core_id;
  20. };
  21. /*
  22. * Intel PerfMon, used on Core and later.
  23. */
  24. static const u64 intel_perfmon_event_map[] =
  25. {
  26. [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
  27. [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
  28. [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
  29. [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
  30. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
  31. [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
  32. [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
  33. };
  34. static struct event_constraint intel_core_event_constraints[] =
  35. {
  36. INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  37. INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  38. INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  39. INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  40. INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  41. INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
  42. EVENT_CONSTRAINT_END
  43. };
  44. static struct event_constraint intel_core2_event_constraints[] =
  45. {
  46. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  47. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  48. /*
  49. * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
  50. * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
  51. * ratio between these counters.
  52. */
  53. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  54. INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
  55. INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  56. INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  57. INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  58. INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  59. INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
  60. INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  61. INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
  62. INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
  63. INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
  64. EVENT_CONSTRAINT_END
  65. };
  66. static struct event_constraint intel_nehalem_event_constraints[] =
  67. {
  68. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  69. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  70. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  71. INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
  72. INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
  73. INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
  74. INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
  75. INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
  76. INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
  77. INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  78. INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  79. EVENT_CONSTRAINT_END
  80. };
  81. static struct extra_reg intel_nehalem_extra_regs[] =
  82. {
  83. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
  84. EVENT_EXTRA_END
  85. };
  86. static struct event_constraint intel_nehalem_percore_constraints[] =
  87. {
  88. INTEL_EVENT_CONSTRAINT(0xb7, 0),
  89. EVENT_CONSTRAINT_END
  90. };
  91. static struct event_constraint intel_westmere_event_constraints[] =
  92. {
  93. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  94. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  95. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  96. INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  97. INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
  98. INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  99. INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
  100. EVENT_CONSTRAINT_END
  101. };
  102. static struct event_constraint intel_snb_event_constraints[] =
  103. {
  104. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  105. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  106. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  107. INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
  108. INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
  109. INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
  110. INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
  111. INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
  112. EVENT_CONSTRAINT_END
  113. };
  114. static struct extra_reg intel_westmere_extra_regs[] =
  115. {
  116. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
  117. INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
  118. EVENT_EXTRA_END
  119. };
  120. static struct event_constraint intel_westmere_percore_constraints[] =
  121. {
  122. INTEL_EVENT_CONSTRAINT(0xb7, 0),
  123. INTEL_EVENT_CONSTRAINT(0xbb, 0),
  124. EVENT_CONSTRAINT_END
  125. };
  126. static struct event_constraint intel_gen_event_constraints[] =
  127. {
  128. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  129. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  130. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  131. EVENT_CONSTRAINT_END
  132. };
  133. static u64 intel_pmu_event_map(int hw_event)
  134. {
  135. return intel_perfmon_event_map[hw_event];
  136. }
  137. static __initconst const u64 snb_hw_cache_event_ids
  138. [PERF_COUNT_HW_CACHE_MAX]
  139. [PERF_COUNT_HW_CACHE_OP_MAX]
  140. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  141. {
  142. [ C(L1D) ] = {
  143. [ C(OP_READ) ] = {
  144. [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
  145. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
  146. },
  147. [ C(OP_WRITE) ] = {
  148. [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
  149. [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
  150. },
  151. [ C(OP_PREFETCH) ] = {
  152. [ C(RESULT_ACCESS) ] = 0x0,
  153. [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
  154. },
  155. },
  156. [ C(L1I ) ] = {
  157. [ C(OP_READ) ] = {
  158. [ C(RESULT_ACCESS) ] = 0x0,
  159. [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
  160. },
  161. [ C(OP_WRITE) ] = {
  162. [ C(RESULT_ACCESS) ] = -1,
  163. [ C(RESULT_MISS) ] = -1,
  164. },
  165. [ C(OP_PREFETCH) ] = {
  166. [ C(RESULT_ACCESS) ] = 0x0,
  167. [ C(RESULT_MISS) ] = 0x0,
  168. },
  169. },
  170. [ C(LL ) ] = {
  171. /*
  172. * TBD: Need Off-core Response Performance Monitoring support
  173. */
  174. [ C(OP_READ) ] = {
  175. /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
  176. [ C(RESULT_ACCESS) ] = 0x01b7,
  177. /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
  178. [ C(RESULT_MISS) ] = 0x01bb,
  179. },
  180. [ C(OP_WRITE) ] = {
  181. /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
  182. [ C(RESULT_ACCESS) ] = 0x01b7,
  183. /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
  184. [ C(RESULT_MISS) ] = 0x01bb,
  185. },
  186. [ C(OP_PREFETCH) ] = {
  187. /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
  188. [ C(RESULT_ACCESS) ] = 0x01b7,
  189. /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
  190. [ C(RESULT_MISS) ] = 0x01bb,
  191. },
  192. },
  193. [ C(DTLB) ] = {
  194. [ C(OP_READ) ] = {
  195. [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
  196. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
  197. },
  198. [ C(OP_WRITE) ] = {
  199. [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
  200. [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
  201. },
  202. [ C(OP_PREFETCH) ] = {
  203. [ C(RESULT_ACCESS) ] = 0x0,
  204. [ C(RESULT_MISS) ] = 0x0,
  205. },
  206. },
  207. [ C(ITLB) ] = {
  208. [ C(OP_READ) ] = {
  209. [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
  210. [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
  211. },
  212. [ C(OP_WRITE) ] = {
  213. [ C(RESULT_ACCESS) ] = -1,
  214. [ C(RESULT_MISS) ] = -1,
  215. },
  216. [ C(OP_PREFETCH) ] = {
  217. [ C(RESULT_ACCESS) ] = -1,
  218. [ C(RESULT_MISS) ] = -1,
  219. },
  220. },
  221. [ C(BPU ) ] = {
  222. [ C(OP_READ) ] = {
  223. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  224. [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
  225. },
  226. [ C(OP_WRITE) ] = {
  227. [ C(RESULT_ACCESS) ] = -1,
  228. [ C(RESULT_MISS) ] = -1,
  229. },
  230. [ C(OP_PREFETCH) ] = {
  231. [ C(RESULT_ACCESS) ] = -1,
  232. [ C(RESULT_MISS) ] = -1,
  233. },
  234. },
  235. };
  236. static __initconst const u64 westmere_hw_cache_event_ids
  237. [PERF_COUNT_HW_CACHE_MAX]
  238. [PERF_COUNT_HW_CACHE_OP_MAX]
  239. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  240. {
  241. [ C(L1D) ] = {
  242. [ C(OP_READ) ] = {
  243. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  244. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
  245. },
  246. [ C(OP_WRITE) ] = {
  247. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  248. [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
  249. },
  250. [ C(OP_PREFETCH) ] = {
  251. [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
  252. [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
  253. },
  254. },
  255. [ C(L1I ) ] = {
  256. [ C(OP_READ) ] = {
  257. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  258. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  259. },
  260. [ C(OP_WRITE) ] = {
  261. [ C(RESULT_ACCESS) ] = -1,
  262. [ C(RESULT_MISS) ] = -1,
  263. },
  264. [ C(OP_PREFETCH) ] = {
  265. [ C(RESULT_ACCESS) ] = 0x0,
  266. [ C(RESULT_MISS) ] = 0x0,
  267. },
  268. },
  269. [ C(LL ) ] = {
  270. [ C(OP_READ) ] = {
  271. [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
  272. [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
  273. },
  274. [ C(OP_WRITE) ] = {
  275. [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
  276. [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
  277. },
  278. [ C(OP_PREFETCH) ] = {
  279. [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
  280. [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
  281. },
  282. },
  283. [ C(DTLB) ] = {
  284. [ C(OP_READ) ] = {
  285. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  286. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
  287. },
  288. [ C(OP_WRITE) ] = {
  289. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  290. [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
  291. },
  292. [ C(OP_PREFETCH) ] = {
  293. [ C(RESULT_ACCESS) ] = 0x0,
  294. [ C(RESULT_MISS) ] = 0x0,
  295. },
  296. },
  297. [ C(ITLB) ] = {
  298. [ C(OP_READ) ] = {
  299. [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
  300. [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
  301. },
  302. [ C(OP_WRITE) ] = {
  303. [ C(RESULT_ACCESS) ] = -1,
  304. [ C(RESULT_MISS) ] = -1,
  305. },
  306. [ C(OP_PREFETCH) ] = {
  307. [ C(RESULT_ACCESS) ] = -1,
  308. [ C(RESULT_MISS) ] = -1,
  309. },
  310. },
  311. [ C(BPU ) ] = {
  312. [ C(OP_READ) ] = {
  313. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  314. [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
  315. },
  316. [ C(OP_WRITE) ] = {
  317. [ C(RESULT_ACCESS) ] = -1,
  318. [ C(RESULT_MISS) ] = -1,
  319. },
  320. [ C(OP_PREFETCH) ] = {
  321. [ C(RESULT_ACCESS) ] = -1,
  322. [ C(RESULT_MISS) ] = -1,
  323. },
  324. },
  325. };
  326. static __initconst const u64 nehalem_hw_cache_event_ids
  327. [PERF_COUNT_HW_CACHE_MAX]
  328. [PERF_COUNT_HW_CACHE_OP_MAX]
  329. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  330. {
  331. [ C(L1D) ] = {
  332. [ C(OP_READ) ] = {
  333. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
  334. [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
  335. },
  336. [ C(OP_WRITE) ] = {
  337. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
  338. [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
  339. },
  340. [ C(OP_PREFETCH) ] = {
  341. [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
  342. [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
  343. },
  344. },
  345. [ C(L1I ) ] = {
  346. [ C(OP_READ) ] = {
  347. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  348. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  349. },
  350. [ C(OP_WRITE) ] = {
  351. [ C(RESULT_ACCESS) ] = -1,
  352. [ C(RESULT_MISS) ] = -1,
  353. },
  354. [ C(OP_PREFETCH) ] = {
  355. [ C(RESULT_ACCESS) ] = 0x0,
  356. [ C(RESULT_MISS) ] = 0x0,
  357. },
  358. },
  359. [ C(LL ) ] = {
  360. [ C(OP_READ) ] = {
  361. [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
  362. [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
  363. },
  364. [ C(OP_WRITE) ] = {
  365. [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
  366. [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
  367. },
  368. [ C(OP_PREFETCH) ] = {
  369. [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
  370. [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
  371. },
  372. },
  373. [ C(DTLB) ] = {
  374. [ C(OP_READ) ] = {
  375. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
  376. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
  377. },
  378. [ C(OP_WRITE) ] = {
  379. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
  380. [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
  381. },
  382. [ C(OP_PREFETCH) ] = {
  383. [ C(RESULT_ACCESS) ] = 0x0,
  384. [ C(RESULT_MISS) ] = 0x0,
  385. },
  386. },
  387. [ C(ITLB) ] = {
  388. [ C(OP_READ) ] = {
  389. [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
  390. [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
  391. },
  392. [ C(OP_WRITE) ] = {
  393. [ C(RESULT_ACCESS) ] = -1,
  394. [ C(RESULT_MISS) ] = -1,
  395. },
  396. [ C(OP_PREFETCH) ] = {
  397. [ C(RESULT_ACCESS) ] = -1,
  398. [ C(RESULT_MISS) ] = -1,
  399. },
  400. },
  401. [ C(BPU ) ] = {
  402. [ C(OP_READ) ] = {
  403. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  404. [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
  405. },
  406. [ C(OP_WRITE) ] = {
  407. [ C(RESULT_ACCESS) ] = -1,
  408. [ C(RESULT_MISS) ] = -1,
  409. },
  410. [ C(OP_PREFETCH) ] = {
  411. [ C(RESULT_ACCESS) ] = -1,
  412. [ C(RESULT_MISS) ] = -1,
  413. },
  414. },
  415. };
  416. static __initconst const u64 core2_hw_cache_event_ids
  417. [PERF_COUNT_HW_CACHE_MAX]
  418. [PERF_COUNT_HW_CACHE_OP_MAX]
  419. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  420. {
  421. [ C(L1D) ] = {
  422. [ C(OP_READ) ] = {
  423. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
  424. [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
  425. },
  426. [ C(OP_WRITE) ] = {
  427. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
  428. [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
  429. },
  430. [ C(OP_PREFETCH) ] = {
  431. [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
  432. [ C(RESULT_MISS) ] = 0,
  433. },
  434. },
  435. [ C(L1I ) ] = {
  436. [ C(OP_READ) ] = {
  437. [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
  438. [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
  439. },
  440. [ C(OP_WRITE) ] = {
  441. [ C(RESULT_ACCESS) ] = -1,
  442. [ C(RESULT_MISS) ] = -1,
  443. },
  444. [ C(OP_PREFETCH) ] = {
  445. [ C(RESULT_ACCESS) ] = 0,
  446. [ C(RESULT_MISS) ] = 0,
  447. },
  448. },
  449. [ C(LL ) ] = {
  450. [ C(OP_READ) ] = {
  451. [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
  452. [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
  453. },
  454. [ C(OP_WRITE) ] = {
  455. [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
  456. [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
  457. },
  458. [ C(OP_PREFETCH) ] = {
  459. [ C(RESULT_ACCESS) ] = 0,
  460. [ C(RESULT_MISS) ] = 0,
  461. },
  462. },
  463. [ C(DTLB) ] = {
  464. [ C(OP_READ) ] = {
  465. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
  466. [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
  467. },
  468. [ C(OP_WRITE) ] = {
  469. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
  470. [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
  471. },
  472. [ C(OP_PREFETCH) ] = {
  473. [ C(RESULT_ACCESS) ] = 0,
  474. [ C(RESULT_MISS) ] = 0,
  475. },
  476. },
  477. [ C(ITLB) ] = {
  478. [ C(OP_READ) ] = {
  479. [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
  480. [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
  481. },
  482. [ C(OP_WRITE) ] = {
  483. [ C(RESULT_ACCESS) ] = -1,
  484. [ C(RESULT_MISS) ] = -1,
  485. },
  486. [ C(OP_PREFETCH) ] = {
  487. [ C(RESULT_ACCESS) ] = -1,
  488. [ C(RESULT_MISS) ] = -1,
  489. },
  490. },
  491. [ C(BPU ) ] = {
  492. [ C(OP_READ) ] = {
  493. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
  494. [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
  495. },
  496. [ C(OP_WRITE) ] = {
  497. [ C(RESULT_ACCESS) ] = -1,
  498. [ C(RESULT_MISS) ] = -1,
  499. },
  500. [ C(OP_PREFETCH) ] = {
  501. [ C(RESULT_ACCESS) ] = -1,
  502. [ C(RESULT_MISS) ] = -1,
  503. },
  504. },
  505. };
  506. static __initconst const u64 atom_hw_cache_event_ids
  507. [PERF_COUNT_HW_CACHE_MAX]
  508. [PERF_COUNT_HW_CACHE_OP_MAX]
  509. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  510. {
  511. [ C(L1D) ] = {
  512. [ C(OP_READ) ] = {
  513. [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
  514. [ C(RESULT_MISS) ] = 0,
  515. },
  516. [ C(OP_WRITE) ] = {
  517. [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
  518. [ C(RESULT_MISS) ] = 0,
  519. },
  520. [ C(OP_PREFETCH) ] = {
  521. [ C(RESULT_ACCESS) ] = 0x0,
  522. [ C(RESULT_MISS) ] = 0,
  523. },
  524. },
  525. [ C(L1I ) ] = {
  526. [ C(OP_READ) ] = {
  527. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  528. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  529. },
  530. [ C(OP_WRITE) ] = {
  531. [ C(RESULT_ACCESS) ] = -1,
  532. [ C(RESULT_MISS) ] = -1,
  533. },
  534. [ C(OP_PREFETCH) ] = {
  535. [ C(RESULT_ACCESS) ] = 0,
  536. [ C(RESULT_MISS) ] = 0,
  537. },
  538. },
  539. [ C(LL ) ] = {
  540. [ C(OP_READ) ] = {
  541. [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
  542. [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
  543. },
  544. [ C(OP_WRITE) ] = {
  545. [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
  546. [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
  547. },
  548. [ C(OP_PREFETCH) ] = {
  549. [ C(RESULT_ACCESS) ] = 0,
  550. [ C(RESULT_MISS) ] = 0,
  551. },
  552. },
  553. [ C(DTLB) ] = {
  554. [ C(OP_READ) ] = {
  555. [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
  556. [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
  557. },
  558. [ C(OP_WRITE) ] = {
  559. [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
  560. [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
  561. },
  562. [ C(OP_PREFETCH) ] = {
  563. [ C(RESULT_ACCESS) ] = 0,
  564. [ C(RESULT_MISS) ] = 0,
  565. },
  566. },
  567. [ C(ITLB) ] = {
  568. [ C(OP_READ) ] = {
  569. [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
  570. [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
  571. },
  572. [ C(OP_WRITE) ] = {
  573. [ C(RESULT_ACCESS) ] = -1,
  574. [ C(RESULT_MISS) ] = -1,
  575. },
  576. [ C(OP_PREFETCH) ] = {
  577. [ C(RESULT_ACCESS) ] = -1,
  578. [ C(RESULT_MISS) ] = -1,
  579. },
  580. },
  581. [ C(BPU ) ] = {
  582. [ C(OP_READ) ] = {
  583. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
  584. [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
  585. },
  586. [ C(OP_WRITE) ] = {
  587. [ C(RESULT_ACCESS) ] = -1,
  588. [ C(RESULT_MISS) ] = -1,
  589. },
  590. [ C(OP_PREFETCH) ] = {
  591. [ C(RESULT_ACCESS) ] = -1,
  592. [ C(RESULT_MISS) ] = -1,
  593. },
  594. },
  595. };
  596. static void intel_pmu_disable_all(void)
  597. {
  598. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  599. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
  600. if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
  601. intel_pmu_disable_bts();
  602. intel_pmu_pebs_disable_all();
  603. intel_pmu_lbr_disable_all();
  604. }
  605. static void intel_pmu_enable_all(int added)
  606. {
  607. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  608. intel_pmu_pebs_enable_all();
  609. intel_pmu_lbr_enable_all();
  610. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
  611. if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
  612. struct perf_event *event =
  613. cpuc->events[X86_PMC_IDX_FIXED_BTS];
  614. if (WARN_ON_ONCE(!event))
  615. return;
  616. intel_pmu_enable_bts(event->hw.config);
  617. }
  618. }
  619. /*
  620. * Workaround for:
  621. * Intel Errata AAK100 (model 26)
  622. * Intel Errata AAP53 (model 30)
  623. * Intel Errata BD53 (model 44)
  624. *
  625. * The official story:
  626. * These chips need to be 'reset' when adding counters by programming the
  627. * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
  628. * in sequence on the same PMC or on different PMCs.
  629. *
  630. * In practise it appears some of these events do in fact count, and
  631. * we need to programm all 4 events.
  632. */
  633. static void intel_pmu_nhm_workaround(void)
  634. {
  635. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  636. static const unsigned long nhm_magic[4] = {
  637. 0x4300B5,
  638. 0x4300D2,
  639. 0x4300B1,
  640. 0x4300B1
  641. };
  642. struct perf_event *event;
  643. int i;
  644. /*
  645. * The Errata requires below steps:
  646. * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
  647. * 2) Configure 4 PERFEVTSELx with the magic events and clear
  648. * the corresponding PMCx;
  649. * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
  650. * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
  651. * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
  652. */
  653. /*
  654. * The real steps we choose are a little different from above.
  655. * A) To reduce MSR operations, we don't run step 1) as they
  656. * are already cleared before this function is called;
  657. * B) Call x86_perf_event_update to save PMCx before configuring
  658. * PERFEVTSELx with magic number;
  659. * C) With step 5), we do clear only when the PERFEVTSELx is
  660. * not used currently.
  661. * D) Call x86_perf_event_set_period to restore PMCx;
  662. */
  663. /* We always operate 4 pairs of PERF Counters */
  664. for (i = 0; i < 4; i++) {
  665. event = cpuc->events[i];
  666. if (event)
  667. x86_perf_event_update(event);
  668. }
  669. for (i = 0; i < 4; i++) {
  670. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
  671. wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
  672. }
  673. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
  674. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
  675. for (i = 0; i < 4; i++) {
  676. event = cpuc->events[i];
  677. if (event) {
  678. x86_perf_event_set_period(event);
  679. __x86_pmu_enable_event(&event->hw,
  680. ARCH_PERFMON_EVENTSEL_ENABLE);
  681. } else
  682. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
  683. }
  684. }
  685. static void intel_pmu_nhm_enable_all(int added)
  686. {
  687. if (added)
  688. intel_pmu_nhm_workaround();
  689. intel_pmu_enable_all(added);
  690. }
  691. static inline u64 intel_pmu_get_status(void)
  692. {
  693. u64 status;
  694. rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
  695. return status;
  696. }
  697. static inline void intel_pmu_ack_status(u64 ack)
  698. {
  699. wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
  700. }
  701. static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
  702. {
  703. int idx = hwc->idx - X86_PMC_IDX_FIXED;
  704. u64 ctrl_val, mask;
  705. mask = 0xfULL << (idx * 4);
  706. rdmsrl(hwc->config_base, ctrl_val);
  707. ctrl_val &= ~mask;
  708. wrmsrl(hwc->config_base, ctrl_val);
  709. }
  710. static void intel_pmu_disable_event(struct perf_event *event)
  711. {
  712. struct hw_perf_event *hwc = &event->hw;
  713. if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
  714. intel_pmu_disable_bts();
  715. intel_pmu_drain_bts_buffer();
  716. return;
  717. }
  718. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
  719. intel_pmu_disable_fixed(hwc);
  720. return;
  721. }
  722. x86_pmu_disable_event(event);
  723. if (unlikely(event->attr.precise_ip))
  724. intel_pmu_pebs_disable(event);
  725. }
  726. static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
  727. {
  728. int idx = hwc->idx - X86_PMC_IDX_FIXED;
  729. u64 ctrl_val, bits, mask;
  730. /*
  731. * Enable IRQ generation (0x8),
  732. * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
  733. * if requested:
  734. */
  735. bits = 0x8ULL;
  736. if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
  737. bits |= 0x2;
  738. if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
  739. bits |= 0x1;
  740. /*
  741. * ANY bit is supported in v3 and up
  742. */
  743. if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
  744. bits |= 0x4;
  745. bits <<= (idx * 4);
  746. mask = 0xfULL << (idx * 4);
  747. rdmsrl(hwc->config_base, ctrl_val);
  748. ctrl_val &= ~mask;
  749. ctrl_val |= bits;
  750. wrmsrl(hwc->config_base, ctrl_val);
  751. }
  752. static void intel_pmu_enable_event(struct perf_event *event)
  753. {
  754. struct hw_perf_event *hwc = &event->hw;
  755. if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
  756. if (!__this_cpu_read(cpu_hw_events.enabled))
  757. return;
  758. intel_pmu_enable_bts(hwc->config);
  759. return;
  760. }
  761. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
  762. intel_pmu_enable_fixed(hwc);
  763. return;
  764. }
  765. if (unlikely(event->attr.precise_ip))
  766. intel_pmu_pebs_enable(event);
  767. __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
  768. }
  769. /*
  770. * Save and restart an expired event. Called by NMI contexts,
  771. * so it has to be careful about preempting normal event ops:
  772. */
  773. static int intel_pmu_save_and_restart(struct perf_event *event)
  774. {
  775. x86_perf_event_update(event);
  776. return x86_perf_event_set_period(event);
  777. }
  778. static void intel_pmu_reset(void)
  779. {
  780. struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
  781. unsigned long flags;
  782. int idx;
  783. if (!x86_pmu.num_counters)
  784. return;
  785. local_irq_save(flags);
  786. printk("clearing PMU state on CPU#%d\n", smp_processor_id());
  787. for (idx = 0; idx < x86_pmu.num_counters; idx++) {
  788. checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
  789. checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
  790. }
  791. for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
  792. checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
  793. if (ds)
  794. ds->bts_index = ds->bts_buffer_base;
  795. local_irq_restore(flags);
  796. }
  797. /*
  798. * This handler is triggered by the local APIC, so the APIC IRQ handling
  799. * rules apply:
  800. */
  801. static int intel_pmu_handle_irq(struct pt_regs *regs)
  802. {
  803. struct perf_sample_data data;
  804. struct cpu_hw_events *cpuc;
  805. int bit, loops;
  806. u64 status;
  807. int handled;
  808. perf_sample_data_init(&data, 0);
  809. cpuc = &__get_cpu_var(cpu_hw_events);
  810. intel_pmu_disable_all();
  811. handled = intel_pmu_drain_bts_buffer();
  812. status = intel_pmu_get_status();
  813. if (!status) {
  814. intel_pmu_enable_all(0);
  815. return handled;
  816. }
  817. loops = 0;
  818. again:
  819. intel_pmu_ack_status(status);
  820. if (++loops > 100) {
  821. WARN_ONCE(1, "perfevents: irq loop stuck!\n");
  822. perf_event_print_debug();
  823. intel_pmu_reset();
  824. goto done;
  825. }
  826. inc_irq_stat(apic_perf_irqs);
  827. intel_pmu_lbr_read();
  828. /*
  829. * PEBS overflow sets bit 62 in the global status register
  830. */
  831. if (__test_and_clear_bit(62, (unsigned long *)&status)) {
  832. handled++;
  833. x86_pmu.drain_pebs(regs);
  834. }
  835. for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
  836. struct perf_event *event = cpuc->events[bit];
  837. handled++;
  838. if (!test_bit(bit, cpuc->active_mask))
  839. continue;
  840. if (!intel_pmu_save_and_restart(event))
  841. continue;
  842. data.period = event->hw.last_period;
  843. if (perf_event_overflow(event, 1, &data, regs))
  844. x86_pmu_stop(event, 0);
  845. }
  846. /*
  847. * Repeat if there is more work to be done:
  848. */
  849. status = intel_pmu_get_status();
  850. if (status)
  851. goto again;
  852. done:
  853. intel_pmu_enable_all(0);
  854. return handled;
  855. }
  856. static struct event_constraint *
  857. intel_bts_constraints(struct perf_event *event)
  858. {
  859. struct hw_perf_event *hwc = &event->hw;
  860. unsigned int hw_event, bts_event;
  861. hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
  862. bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
  863. if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
  864. return &bts_constraint;
  865. return NULL;
  866. }
  867. static struct event_constraint *
  868. intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
  869. {
  870. struct hw_perf_event *hwc = &event->hw;
  871. unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
  872. struct event_constraint *c;
  873. struct intel_percore *pc;
  874. struct er_account *era;
  875. int i;
  876. int free_slot;
  877. int found;
  878. if (!x86_pmu.percore_constraints || hwc->extra_alloc)
  879. return NULL;
  880. for (c = x86_pmu.percore_constraints; c->cmask; c++) {
  881. if (e != c->code)
  882. continue;
  883. /*
  884. * Allocate resource per core.
  885. */
  886. pc = cpuc->per_core;
  887. if (!pc)
  888. break;
  889. c = &emptyconstraint;
  890. raw_spin_lock(&pc->lock);
  891. free_slot = -1;
  892. found = 0;
  893. for (i = 0; i < MAX_EXTRA_REGS; i++) {
  894. era = &pc->regs[i];
  895. if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
  896. /* Allow sharing same config */
  897. if (hwc->extra_config == era->extra_config) {
  898. era->ref++;
  899. cpuc->percore_used = 1;
  900. hwc->extra_alloc = 1;
  901. c = NULL;
  902. }
  903. /* else conflict */
  904. found = 1;
  905. break;
  906. } else if (era->ref == 0 && free_slot == -1)
  907. free_slot = i;
  908. }
  909. if (!found && free_slot != -1) {
  910. era = &pc->regs[free_slot];
  911. era->ref = 1;
  912. era->extra_reg = hwc->extra_reg;
  913. era->extra_config = hwc->extra_config;
  914. cpuc->percore_used = 1;
  915. hwc->extra_alloc = 1;
  916. c = NULL;
  917. }
  918. raw_spin_unlock(&pc->lock);
  919. return c;
  920. }
  921. return NULL;
  922. }
  923. static struct event_constraint *
  924. intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
  925. {
  926. struct event_constraint *c;
  927. c = intel_bts_constraints(event);
  928. if (c)
  929. return c;
  930. c = intel_pebs_constraints(event);
  931. if (c)
  932. return c;
  933. c = intel_percore_constraints(cpuc, event);
  934. if (c)
  935. return c;
  936. return x86_get_event_constraints(cpuc, event);
  937. }
  938. static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
  939. struct perf_event *event)
  940. {
  941. struct extra_reg *er;
  942. struct intel_percore *pc;
  943. struct er_account *era;
  944. struct hw_perf_event *hwc = &event->hw;
  945. int i, allref;
  946. if (!cpuc->percore_used)
  947. return;
  948. for (er = x86_pmu.extra_regs; er->msr; er++) {
  949. if (er->event != (hwc->config & er->config_mask))
  950. continue;
  951. pc = cpuc->per_core;
  952. raw_spin_lock(&pc->lock);
  953. for (i = 0; i < MAX_EXTRA_REGS; i++) {
  954. era = &pc->regs[i];
  955. if (era->ref > 0 &&
  956. era->extra_config == hwc->extra_config &&
  957. era->extra_reg == er->msr) {
  958. era->ref--;
  959. hwc->extra_alloc = 0;
  960. break;
  961. }
  962. }
  963. allref = 0;
  964. for (i = 0; i < MAX_EXTRA_REGS; i++)
  965. allref += pc->regs[i].ref;
  966. if (allref == 0)
  967. cpuc->percore_used = 0;
  968. raw_spin_unlock(&pc->lock);
  969. break;
  970. }
  971. }
  972. static int intel_pmu_hw_config(struct perf_event *event)
  973. {
  974. int ret = x86_pmu_hw_config(event);
  975. if (ret)
  976. return ret;
  977. if (event->attr.precise_ip &&
  978. (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
  979. /*
  980. * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
  981. * (0x003c) so that we can use it with PEBS.
  982. *
  983. * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
  984. * PEBS capable. However we can use INST_RETIRED.ANY_P
  985. * (0x00c0), which is a PEBS capable event, to get the same
  986. * count.
  987. *
  988. * INST_RETIRED.ANY_P counts the number of cycles that retires
  989. * CNTMASK instructions. By setting CNTMASK to a value (16)
  990. * larger than the maximum number of instructions that can be
  991. * retired per cycle (4) and then inverting the condition, we
  992. * count all cycles that retire 16 or less instructions, which
  993. * is every cycle.
  994. *
  995. * Thereby we gain a PEBS capable cycle counter.
  996. */
  997. u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
  998. alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
  999. event->hw.config = alt_config;
  1000. }
  1001. if (event->attr.type != PERF_TYPE_RAW)
  1002. return 0;
  1003. if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
  1004. return 0;
  1005. if (x86_pmu.version < 3)
  1006. return -EINVAL;
  1007. if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
  1008. return -EACCES;
  1009. event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
  1010. return 0;
  1011. }
  1012. static __initconst const struct x86_pmu core_pmu = {
  1013. .name = "core",
  1014. .handle_irq = x86_pmu_handle_irq,
  1015. .disable_all = x86_pmu_disable_all,
  1016. .enable_all = x86_pmu_enable_all,
  1017. .enable = x86_pmu_enable_event,
  1018. .disable = x86_pmu_disable_event,
  1019. .hw_config = x86_pmu_hw_config,
  1020. .schedule_events = x86_schedule_events,
  1021. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  1022. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  1023. .event_map = intel_pmu_event_map,
  1024. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  1025. .apic = 1,
  1026. /*
  1027. * Intel PMCs cannot be accessed sanely above 32 bit width,
  1028. * so we install an artificial 1<<31 period regardless of
  1029. * the generic event period:
  1030. */
  1031. .max_period = (1ULL << 31) - 1,
  1032. .get_event_constraints = intel_get_event_constraints,
  1033. .put_event_constraints = intel_put_event_constraints,
  1034. .event_constraints = intel_core_event_constraints,
  1035. };
  1036. static int intel_pmu_cpu_prepare(int cpu)
  1037. {
  1038. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1039. cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
  1040. GFP_KERNEL, cpu_to_node(cpu));
  1041. if (!cpuc->per_core)
  1042. return NOTIFY_BAD;
  1043. raw_spin_lock_init(&cpuc->per_core->lock);
  1044. cpuc->per_core->core_id = -1;
  1045. return NOTIFY_OK;
  1046. }
  1047. static void intel_pmu_cpu_starting(int cpu)
  1048. {
  1049. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1050. int core_id = topology_core_id(cpu);
  1051. int i;
  1052. for_each_cpu(i, topology_thread_cpumask(cpu)) {
  1053. struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
  1054. if (pc && pc->core_id == core_id) {
  1055. kfree(cpuc->per_core);
  1056. cpuc->per_core = pc;
  1057. break;
  1058. }
  1059. }
  1060. cpuc->per_core->core_id = core_id;
  1061. cpuc->per_core->refcnt++;
  1062. init_debug_store_on_cpu(cpu);
  1063. /*
  1064. * Deal with CPUs that don't clear their LBRs on power-up.
  1065. */
  1066. intel_pmu_lbr_reset();
  1067. }
  1068. static void intel_pmu_cpu_dying(int cpu)
  1069. {
  1070. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1071. struct intel_percore *pc = cpuc->per_core;
  1072. if (pc) {
  1073. if (pc->core_id == -1 || --pc->refcnt == 0)
  1074. kfree(pc);
  1075. cpuc->per_core = NULL;
  1076. }
  1077. fini_debug_store_on_cpu(cpu);
  1078. }
  1079. static __initconst const struct x86_pmu intel_pmu = {
  1080. .name = "Intel",
  1081. .handle_irq = intel_pmu_handle_irq,
  1082. .disable_all = intel_pmu_disable_all,
  1083. .enable_all = intel_pmu_enable_all,
  1084. .enable = intel_pmu_enable_event,
  1085. .disable = intel_pmu_disable_event,
  1086. .hw_config = intel_pmu_hw_config,
  1087. .schedule_events = x86_schedule_events,
  1088. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  1089. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  1090. .event_map = intel_pmu_event_map,
  1091. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  1092. .apic = 1,
  1093. /*
  1094. * Intel PMCs cannot be accessed sanely above 32 bit width,
  1095. * so we install an artificial 1<<31 period regardless of
  1096. * the generic event period:
  1097. */
  1098. .max_period = (1ULL << 31) - 1,
  1099. .get_event_constraints = intel_get_event_constraints,
  1100. .put_event_constraints = intel_put_event_constraints,
  1101. .cpu_prepare = intel_pmu_cpu_prepare,
  1102. .cpu_starting = intel_pmu_cpu_starting,
  1103. .cpu_dying = intel_pmu_cpu_dying,
  1104. };
  1105. static void intel_clovertown_quirks(void)
  1106. {
  1107. /*
  1108. * PEBS is unreliable due to:
  1109. *
  1110. * AJ67 - PEBS may experience CPL leaks
  1111. * AJ68 - PEBS PMI may be delayed by one event
  1112. * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
  1113. * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
  1114. *
  1115. * AJ67 could be worked around by restricting the OS/USR flags.
  1116. * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
  1117. *
  1118. * AJ106 could possibly be worked around by not allowing LBR
  1119. * usage from PEBS, including the fixup.
  1120. * AJ68 could possibly be worked around by always programming
  1121. * a pebs_event_reset[0] value and coping with the lost events.
  1122. *
  1123. * But taken together it might just make sense to not enable PEBS on
  1124. * these chips.
  1125. */
  1126. printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
  1127. x86_pmu.pebs = 0;
  1128. x86_pmu.pebs_constraints = NULL;
  1129. }
  1130. static __init int intel_pmu_init(void)
  1131. {
  1132. union cpuid10_edx edx;
  1133. union cpuid10_eax eax;
  1134. unsigned int unused;
  1135. unsigned int ebx;
  1136. int version;
  1137. if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  1138. switch (boot_cpu_data.x86) {
  1139. case 0x6:
  1140. return p6_pmu_init();
  1141. case 0xf:
  1142. return p4_pmu_init();
  1143. }
  1144. return -ENODEV;
  1145. }
  1146. /*
  1147. * Check whether the Architectural PerfMon supports
  1148. * Branch Misses Retired hw_event or not.
  1149. */
  1150. cpuid(10, &eax.full, &ebx, &unused, &edx.full);
  1151. if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
  1152. return -ENODEV;
  1153. version = eax.split.version_id;
  1154. if (version < 2)
  1155. x86_pmu = core_pmu;
  1156. else
  1157. x86_pmu = intel_pmu;
  1158. x86_pmu.version = version;
  1159. x86_pmu.num_counters = eax.split.num_counters;
  1160. x86_pmu.cntval_bits = eax.split.bit_width;
  1161. x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
  1162. /*
  1163. * Quirk: v2 perfmon does not report fixed-purpose events, so
  1164. * assume at least 3 events:
  1165. */
  1166. if (version > 1)
  1167. x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
  1168. /*
  1169. * v2 and above have a perf capabilities MSR
  1170. */
  1171. if (version > 1) {
  1172. u64 capabilities;
  1173. rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
  1174. x86_pmu.intel_cap.capabilities = capabilities;
  1175. }
  1176. intel_ds_init();
  1177. /*
  1178. * Install the hw-cache-events table:
  1179. */
  1180. switch (boot_cpu_data.x86_model) {
  1181. case 14: /* 65 nm core solo/duo, "Yonah" */
  1182. pr_cont("Core events, ");
  1183. break;
  1184. case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
  1185. x86_pmu.quirks = intel_clovertown_quirks;
  1186. case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
  1187. case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
  1188. case 29: /* six-core 45 nm xeon "Dunnington" */
  1189. memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
  1190. sizeof(hw_cache_event_ids));
  1191. intel_pmu_lbr_init_core();
  1192. x86_pmu.event_constraints = intel_core2_event_constraints;
  1193. x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
  1194. pr_cont("Core2 events, ");
  1195. break;
  1196. case 26: /* 45 nm nehalem, "Bloomfield" */
  1197. case 30: /* 45 nm nehalem, "Lynnfield" */
  1198. case 46: /* 45 nm nehalem-ex, "Beckton" */
  1199. memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
  1200. sizeof(hw_cache_event_ids));
  1201. intel_pmu_lbr_init_nhm();
  1202. x86_pmu.event_constraints = intel_nehalem_event_constraints;
  1203. x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
  1204. x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
  1205. x86_pmu.enable_all = intel_pmu_nhm_enable_all;
  1206. x86_pmu.extra_regs = intel_nehalem_extra_regs;
  1207. pr_cont("Nehalem events, ");
  1208. break;
  1209. case 28: /* Atom */
  1210. memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
  1211. sizeof(hw_cache_event_ids));
  1212. intel_pmu_lbr_init_atom();
  1213. x86_pmu.event_constraints = intel_gen_event_constraints;
  1214. x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
  1215. pr_cont("Atom events, ");
  1216. break;
  1217. case 37: /* 32 nm nehalem, "Clarkdale" */
  1218. case 44: /* 32 nm nehalem, "Gulftown" */
  1219. memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
  1220. sizeof(hw_cache_event_ids));
  1221. intel_pmu_lbr_init_nhm();
  1222. x86_pmu.event_constraints = intel_westmere_event_constraints;
  1223. x86_pmu.percore_constraints = intel_westmere_percore_constraints;
  1224. x86_pmu.enable_all = intel_pmu_nhm_enable_all;
  1225. x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
  1226. x86_pmu.extra_regs = intel_westmere_extra_regs;
  1227. pr_cont("Westmere events, ");
  1228. break;
  1229. case 42: /* SandyBridge */
  1230. memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
  1231. sizeof(hw_cache_event_ids));
  1232. intel_pmu_lbr_init_nhm();
  1233. x86_pmu.event_constraints = intel_snb_event_constraints;
  1234. x86_pmu.pebs_constraints = intel_snb_pebs_events;
  1235. pr_cont("SandyBridge events, ");
  1236. break;
  1237. default:
  1238. /*
  1239. * default constraints for v2 and up
  1240. */
  1241. x86_pmu.event_constraints = intel_gen_event_constraints;
  1242. pr_cont("generic architected perfmon, ");
  1243. }
  1244. return 0;
  1245. }
  1246. #else /* CONFIG_CPU_SUP_INTEL */
  1247. static int intel_pmu_init(void)
  1248. {
  1249. return 0;
  1250. }
  1251. #endif /* CONFIG_CPU_SUP_INTEL */