perf_event_intel.c 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471
  1. #ifdef CONFIG_CPU_SUP_INTEL
  2. #define MAX_EXTRA_REGS 2
  3. /*
  4. * Per register state.
  5. */
  6. struct er_account {
  7. int ref; /* reference count */
  8. unsigned int extra_reg; /* extra MSR number */
  9. u64 extra_config; /* extra MSR config */
  10. };
  11. /*
  12. * Per core state
  13. * This used to coordinate shared registers for HT threads.
  14. */
  15. struct intel_percore {
  16. raw_spinlock_t lock; /* protect structure */
  17. struct er_account regs[MAX_EXTRA_REGS];
  18. int refcnt; /* number of threads */
  19. unsigned core_id;
  20. };
  21. /*
  22. * Intel PerfMon, used on Core and later.
  23. */
  24. static const u64 intel_perfmon_event_map[] =
  25. {
  26. [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
  27. [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
  28. [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
  29. [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
  30. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
  31. [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
  32. [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
  33. };
  34. static struct event_constraint intel_core_event_constraints[] =
  35. {
  36. INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  37. INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  38. INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  39. INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  40. INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  41. INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
  42. EVENT_CONSTRAINT_END
  43. };
  44. static struct event_constraint intel_core2_event_constraints[] =
  45. {
  46. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  47. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  48. /*
  49. * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event
  50. * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed
  51. * ratio between these counters.
  52. */
  53. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  54. INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
  55. INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
  56. INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
  57. INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
  58. INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
  59. INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
  60. INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
  61. INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
  62. INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
  63. INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
  64. EVENT_CONSTRAINT_END
  65. };
  66. static struct event_constraint intel_nehalem_event_constraints[] =
  67. {
  68. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  69. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  70. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  71. INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
  72. INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
  73. INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
  74. INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
  75. INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
  76. INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
  77. INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  78. INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  79. EVENT_CONSTRAINT_END
  80. };
  81. static struct extra_reg intel_nehalem_extra_regs[] =
  82. {
  83. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
  84. EVENT_EXTRA_END
  85. };
  86. static struct event_constraint intel_nehalem_percore_constraints[] =
  87. {
  88. INTEL_EVENT_CONSTRAINT(0xb7, 0),
  89. EVENT_CONSTRAINT_END
  90. };
  91. static struct event_constraint intel_westmere_event_constraints[] =
  92. {
  93. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  94. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  95. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  96. INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
  97. INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
  98. INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
  99. INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
  100. EVENT_CONSTRAINT_END
  101. };
  102. static struct event_constraint intel_snb_event_constraints[] =
  103. {
  104. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  105. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  106. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  107. INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
  108. INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
  109. INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
  110. INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
  111. INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
  112. EVENT_CONSTRAINT_END
  113. };
  114. static struct extra_reg intel_westmere_extra_regs[] =
  115. {
  116. INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff),
  117. INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff),
  118. EVENT_EXTRA_END
  119. };
  120. static struct event_constraint intel_westmere_percore_constraints[] =
  121. {
  122. INTEL_EVENT_CONSTRAINT(0xb7, 0),
  123. INTEL_EVENT_CONSTRAINT(0xbb, 0),
  124. EVENT_CONSTRAINT_END
  125. };
  126. static struct event_constraint intel_gen_event_constraints[] =
  127. {
  128. FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
  129. FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
  130. /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
  131. EVENT_CONSTRAINT_END
  132. };
  133. static u64 intel_pmu_event_map(int hw_event)
  134. {
  135. return intel_perfmon_event_map[hw_event];
  136. }
  137. static __initconst const u64 snb_hw_cache_event_ids
  138. [PERF_COUNT_HW_CACHE_MAX]
  139. [PERF_COUNT_HW_CACHE_OP_MAX]
  140. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  141. {
  142. [ C(L1D) ] = {
  143. [ C(OP_READ) ] = {
  144. [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
  145. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
  146. },
  147. [ C(OP_WRITE) ] = {
  148. [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
  149. [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
  150. },
  151. [ C(OP_PREFETCH) ] = {
  152. [ C(RESULT_ACCESS) ] = 0x0,
  153. [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
  154. },
  155. },
  156. [ C(L1I ) ] = {
  157. [ C(OP_READ) ] = {
  158. [ C(RESULT_ACCESS) ] = 0x0,
  159. [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
  160. },
  161. [ C(OP_WRITE) ] = {
  162. [ C(RESULT_ACCESS) ] = -1,
  163. [ C(RESULT_MISS) ] = -1,
  164. },
  165. [ C(OP_PREFETCH) ] = {
  166. [ C(RESULT_ACCESS) ] = 0x0,
  167. [ C(RESULT_MISS) ] = 0x0,
  168. },
  169. },
  170. [ C(LL ) ] = {
  171. /*
  172. * TBD: Need Off-core Response Performance Monitoring support
  173. */
  174. [ C(OP_READ) ] = {
  175. /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
  176. [ C(RESULT_ACCESS) ] = 0x01b7,
  177. /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
  178. [ C(RESULT_MISS) ] = 0x01bb,
  179. },
  180. [ C(OP_WRITE) ] = {
  181. /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
  182. [ C(RESULT_ACCESS) ] = 0x01b7,
  183. /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
  184. [ C(RESULT_MISS) ] = 0x01bb,
  185. },
  186. [ C(OP_PREFETCH) ] = {
  187. /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
  188. [ C(RESULT_ACCESS) ] = 0x01b7,
  189. /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
  190. [ C(RESULT_MISS) ] = 0x01bb,
  191. },
  192. },
  193. [ C(DTLB) ] = {
  194. [ C(OP_READ) ] = {
  195. [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
  196. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
  197. },
  198. [ C(OP_WRITE) ] = {
  199. [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
  200. [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
  201. },
  202. [ C(OP_PREFETCH) ] = {
  203. [ C(RESULT_ACCESS) ] = 0x0,
  204. [ C(RESULT_MISS) ] = 0x0,
  205. },
  206. },
  207. [ C(ITLB) ] = {
  208. [ C(OP_READ) ] = {
  209. [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
  210. [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
  211. },
  212. [ C(OP_WRITE) ] = {
  213. [ C(RESULT_ACCESS) ] = -1,
  214. [ C(RESULT_MISS) ] = -1,
  215. },
  216. [ C(OP_PREFETCH) ] = {
  217. [ C(RESULT_ACCESS) ] = -1,
  218. [ C(RESULT_MISS) ] = -1,
  219. },
  220. },
  221. [ C(BPU ) ] = {
  222. [ C(OP_READ) ] = {
  223. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  224. [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
  225. },
  226. [ C(OP_WRITE) ] = {
  227. [ C(RESULT_ACCESS) ] = -1,
  228. [ C(RESULT_MISS) ] = -1,
  229. },
  230. [ C(OP_PREFETCH) ] = {
  231. [ C(RESULT_ACCESS) ] = -1,
  232. [ C(RESULT_MISS) ] = -1,
  233. },
  234. },
  235. };
  236. static __initconst const u64 westmere_hw_cache_event_ids
  237. [PERF_COUNT_HW_CACHE_MAX]
  238. [PERF_COUNT_HW_CACHE_OP_MAX]
  239. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  240. {
  241. [ C(L1D) ] = {
  242. [ C(OP_READ) ] = {
  243. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  244. [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */
  245. },
  246. [ C(OP_WRITE) ] = {
  247. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  248. [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */
  249. },
  250. [ C(OP_PREFETCH) ] = {
  251. [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
  252. [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
  253. },
  254. },
  255. [ C(L1I ) ] = {
  256. [ C(OP_READ) ] = {
  257. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  258. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  259. },
  260. [ C(OP_WRITE) ] = {
  261. [ C(RESULT_ACCESS) ] = -1,
  262. [ C(RESULT_MISS) ] = -1,
  263. },
  264. [ C(OP_PREFETCH) ] = {
  265. [ C(RESULT_ACCESS) ] = 0x0,
  266. [ C(RESULT_MISS) ] = 0x0,
  267. },
  268. },
  269. [ C(LL ) ] = {
  270. [ C(OP_READ) ] = {
  271. /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
  272. [ C(RESULT_ACCESS) ] = 0x01b7,
  273. /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
  274. [ C(RESULT_MISS) ] = 0x01bb,
  275. },
  276. /*
  277. * Use RFO, not WRITEBACK, because a write miss would typically occur
  278. * on RFO.
  279. */
  280. [ C(OP_WRITE) ] = {
  281. /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
  282. [ C(RESULT_ACCESS) ] = 0x01bb,
  283. /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
  284. [ C(RESULT_MISS) ] = 0x01b7,
  285. },
  286. [ C(OP_PREFETCH) ] = {
  287. /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
  288. [ C(RESULT_ACCESS) ] = 0x01b7,
  289. /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
  290. [ C(RESULT_MISS) ] = 0x01bb,
  291. },
  292. },
  293. [ C(DTLB) ] = {
  294. [ C(OP_READ) ] = {
  295. [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */
  296. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
  297. },
  298. [ C(OP_WRITE) ] = {
  299. [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */
  300. [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
  301. },
  302. [ C(OP_PREFETCH) ] = {
  303. [ C(RESULT_ACCESS) ] = 0x0,
  304. [ C(RESULT_MISS) ] = 0x0,
  305. },
  306. },
  307. [ C(ITLB) ] = {
  308. [ C(OP_READ) ] = {
  309. [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
  310. [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */
  311. },
  312. [ C(OP_WRITE) ] = {
  313. [ C(RESULT_ACCESS) ] = -1,
  314. [ C(RESULT_MISS) ] = -1,
  315. },
  316. [ C(OP_PREFETCH) ] = {
  317. [ C(RESULT_ACCESS) ] = -1,
  318. [ C(RESULT_MISS) ] = -1,
  319. },
  320. },
  321. [ C(BPU ) ] = {
  322. [ C(OP_READ) ] = {
  323. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  324. [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
  325. },
  326. [ C(OP_WRITE) ] = {
  327. [ C(RESULT_ACCESS) ] = -1,
  328. [ C(RESULT_MISS) ] = -1,
  329. },
  330. [ C(OP_PREFETCH) ] = {
  331. [ C(RESULT_ACCESS) ] = -1,
  332. [ C(RESULT_MISS) ] = -1,
  333. },
  334. },
  335. };
  336. /*
  337. * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
  338. */
  339. #define DMND_DATA_RD (1 << 0)
  340. #define DMND_RFO (1 << 1)
  341. #define DMND_WB (1 << 3)
  342. #define PF_DATA_RD (1 << 4)
  343. #define PF_DATA_RFO (1 << 5)
  344. #define RESP_UNCORE_HIT (1 << 8)
  345. #define RESP_MISS (0xf600) /* non uncore hit */
  346. static __initconst const u64 nehalem_hw_cache_extra_regs
  347. [PERF_COUNT_HW_CACHE_MAX]
  348. [PERF_COUNT_HW_CACHE_OP_MAX]
  349. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  350. {
  351. [ C(LL ) ] = {
  352. [ C(OP_READ) ] = {
  353. [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
  354. [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS,
  355. },
  356. [ C(OP_WRITE) ] = {
  357. [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
  358. [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS,
  359. },
  360. [ C(OP_PREFETCH) ] = {
  361. [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
  362. [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
  363. },
  364. }
  365. };
  366. static __initconst const u64 nehalem_hw_cache_event_ids
  367. [PERF_COUNT_HW_CACHE_MAX]
  368. [PERF_COUNT_HW_CACHE_OP_MAX]
  369. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  370. {
  371. [ C(L1D) ] = {
  372. [ C(OP_READ) ] = {
  373. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
  374. [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
  375. },
  376. [ C(OP_WRITE) ] = {
  377. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
  378. [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
  379. },
  380. [ C(OP_PREFETCH) ] = {
  381. [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
  382. [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
  383. },
  384. },
  385. [ C(L1I ) ] = {
  386. [ C(OP_READ) ] = {
  387. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  388. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  389. },
  390. [ C(OP_WRITE) ] = {
  391. [ C(RESULT_ACCESS) ] = -1,
  392. [ C(RESULT_MISS) ] = -1,
  393. },
  394. [ C(OP_PREFETCH) ] = {
  395. [ C(RESULT_ACCESS) ] = 0x0,
  396. [ C(RESULT_MISS) ] = 0x0,
  397. },
  398. },
  399. [ C(LL ) ] = {
  400. [ C(OP_READ) ] = {
  401. /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
  402. [ C(RESULT_ACCESS) ] = 0x01b7,
  403. /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
  404. [ C(RESULT_MISS) ] = 0x01b7,
  405. },
  406. /*
  407. * Use RFO, not WRITEBACK, because a write miss would typically occur
  408. * on RFO.
  409. */
  410. [ C(OP_WRITE) ] = {
  411. /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
  412. [ C(RESULT_ACCESS) ] = 0x01b7,
  413. /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
  414. [ C(RESULT_MISS) ] = 0x01b7,
  415. },
  416. [ C(OP_PREFETCH) ] = {
  417. /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
  418. [ C(RESULT_ACCESS) ] = 0x01b7,
  419. /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
  420. [ C(RESULT_MISS) ] = 0x01b7,
  421. },
  422. },
  423. [ C(DTLB) ] = {
  424. [ C(OP_READ) ] = {
  425. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
  426. [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
  427. },
  428. [ C(OP_WRITE) ] = {
  429. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
  430. [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
  431. },
  432. [ C(OP_PREFETCH) ] = {
  433. [ C(RESULT_ACCESS) ] = 0x0,
  434. [ C(RESULT_MISS) ] = 0x0,
  435. },
  436. },
  437. [ C(ITLB) ] = {
  438. [ C(OP_READ) ] = {
  439. [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
  440. [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
  441. },
  442. [ C(OP_WRITE) ] = {
  443. [ C(RESULT_ACCESS) ] = -1,
  444. [ C(RESULT_MISS) ] = -1,
  445. },
  446. [ C(OP_PREFETCH) ] = {
  447. [ C(RESULT_ACCESS) ] = -1,
  448. [ C(RESULT_MISS) ] = -1,
  449. },
  450. },
  451. [ C(BPU ) ] = {
  452. [ C(OP_READ) ] = {
  453. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
  454. [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
  455. },
  456. [ C(OP_WRITE) ] = {
  457. [ C(RESULT_ACCESS) ] = -1,
  458. [ C(RESULT_MISS) ] = -1,
  459. },
  460. [ C(OP_PREFETCH) ] = {
  461. [ C(RESULT_ACCESS) ] = -1,
  462. [ C(RESULT_MISS) ] = -1,
  463. },
  464. },
  465. };
  466. static __initconst const u64 core2_hw_cache_event_ids
  467. [PERF_COUNT_HW_CACHE_MAX]
  468. [PERF_COUNT_HW_CACHE_OP_MAX]
  469. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  470. {
  471. [ C(L1D) ] = {
  472. [ C(OP_READ) ] = {
  473. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
  474. [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
  475. },
  476. [ C(OP_WRITE) ] = {
  477. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
  478. [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
  479. },
  480. [ C(OP_PREFETCH) ] = {
  481. [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
  482. [ C(RESULT_MISS) ] = 0,
  483. },
  484. },
  485. [ C(L1I ) ] = {
  486. [ C(OP_READ) ] = {
  487. [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
  488. [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
  489. },
  490. [ C(OP_WRITE) ] = {
  491. [ C(RESULT_ACCESS) ] = -1,
  492. [ C(RESULT_MISS) ] = -1,
  493. },
  494. [ C(OP_PREFETCH) ] = {
  495. [ C(RESULT_ACCESS) ] = 0,
  496. [ C(RESULT_MISS) ] = 0,
  497. },
  498. },
  499. [ C(LL ) ] = {
  500. [ C(OP_READ) ] = {
  501. [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
  502. [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
  503. },
  504. [ C(OP_WRITE) ] = {
  505. [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
  506. [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
  507. },
  508. [ C(OP_PREFETCH) ] = {
  509. [ C(RESULT_ACCESS) ] = 0,
  510. [ C(RESULT_MISS) ] = 0,
  511. },
  512. },
  513. [ C(DTLB) ] = {
  514. [ C(OP_READ) ] = {
  515. [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
  516. [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
  517. },
  518. [ C(OP_WRITE) ] = {
  519. [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
  520. [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
  521. },
  522. [ C(OP_PREFETCH) ] = {
  523. [ C(RESULT_ACCESS) ] = 0,
  524. [ C(RESULT_MISS) ] = 0,
  525. },
  526. },
  527. [ C(ITLB) ] = {
  528. [ C(OP_READ) ] = {
  529. [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
  530. [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
  531. },
  532. [ C(OP_WRITE) ] = {
  533. [ C(RESULT_ACCESS) ] = -1,
  534. [ C(RESULT_MISS) ] = -1,
  535. },
  536. [ C(OP_PREFETCH) ] = {
  537. [ C(RESULT_ACCESS) ] = -1,
  538. [ C(RESULT_MISS) ] = -1,
  539. },
  540. },
  541. [ C(BPU ) ] = {
  542. [ C(OP_READ) ] = {
  543. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
  544. [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
  545. },
  546. [ C(OP_WRITE) ] = {
  547. [ C(RESULT_ACCESS) ] = -1,
  548. [ C(RESULT_MISS) ] = -1,
  549. },
  550. [ C(OP_PREFETCH) ] = {
  551. [ C(RESULT_ACCESS) ] = -1,
  552. [ C(RESULT_MISS) ] = -1,
  553. },
  554. },
  555. };
  556. static __initconst const u64 atom_hw_cache_event_ids
  557. [PERF_COUNT_HW_CACHE_MAX]
  558. [PERF_COUNT_HW_CACHE_OP_MAX]
  559. [PERF_COUNT_HW_CACHE_RESULT_MAX] =
  560. {
  561. [ C(L1D) ] = {
  562. [ C(OP_READ) ] = {
  563. [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
  564. [ C(RESULT_MISS) ] = 0,
  565. },
  566. [ C(OP_WRITE) ] = {
  567. [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
  568. [ C(RESULT_MISS) ] = 0,
  569. },
  570. [ C(OP_PREFETCH) ] = {
  571. [ C(RESULT_ACCESS) ] = 0x0,
  572. [ C(RESULT_MISS) ] = 0,
  573. },
  574. },
  575. [ C(L1I ) ] = {
  576. [ C(OP_READ) ] = {
  577. [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
  578. [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
  579. },
  580. [ C(OP_WRITE) ] = {
  581. [ C(RESULT_ACCESS) ] = -1,
  582. [ C(RESULT_MISS) ] = -1,
  583. },
  584. [ C(OP_PREFETCH) ] = {
  585. [ C(RESULT_ACCESS) ] = 0,
  586. [ C(RESULT_MISS) ] = 0,
  587. },
  588. },
  589. [ C(LL ) ] = {
  590. [ C(OP_READ) ] = {
  591. [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
  592. [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
  593. },
  594. [ C(OP_WRITE) ] = {
  595. [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
  596. [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
  597. },
  598. [ C(OP_PREFETCH) ] = {
  599. [ C(RESULT_ACCESS) ] = 0,
  600. [ C(RESULT_MISS) ] = 0,
  601. },
  602. },
  603. [ C(DTLB) ] = {
  604. [ C(OP_READ) ] = {
  605. [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
  606. [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
  607. },
  608. [ C(OP_WRITE) ] = {
  609. [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
  610. [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
  611. },
  612. [ C(OP_PREFETCH) ] = {
  613. [ C(RESULT_ACCESS) ] = 0,
  614. [ C(RESULT_MISS) ] = 0,
  615. },
  616. },
  617. [ C(ITLB) ] = {
  618. [ C(OP_READ) ] = {
  619. [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
  620. [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
  621. },
  622. [ C(OP_WRITE) ] = {
  623. [ C(RESULT_ACCESS) ] = -1,
  624. [ C(RESULT_MISS) ] = -1,
  625. },
  626. [ C(OP_PREFETCH) ] = {
  627. [ C(RESULT_ACCESS) ] = -1,
  628. [ C(RESULT_MISS) ] = -1,
  629. },
  630. },
  631. [ C(BPU ) ] = {
  632. [ C(OP_READ) ] = {
  633. [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
  634. [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
  635. },
  636. [ C(OP_WRITE) ] = {
  637. [ C(RESULT_ACCESS) ] = -1,
  638. [ C(RESULT_MISS) ] = -1,
  639. },
  640. [ C(OP_PREFETCH) ] = {
  641. [ C(RESULT_ACCESS) ] = -1,
  642. [ C(RESULT_MISS) ] = -1,
  643. },
  644. },
  645. };
  646. static void intel_pmu_disable_all(void)
  647. {
  648. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  649. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
  650. if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
  651. intel_pmu_disable_bts();
  652. intel_pmu_pebs_disable_all();
  653. intel_pmu_lbr_disable_all();
  654. }
  655. static void intel_pmu_enable_all(int added)
  656. {
  657. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  658. intel_pmu_pebs_enable_all();
  659. intel_pmu_lbr_enable_all();
  660. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
  661. if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
  662. struct perf_event *event =
  663. cpuc->events[X86_PMC_IDX_FIXED_BTS];
  664. if (WARN_ON_ONCE(!event))
  665. return;
  666. intel_pmu_enable_bts(event->hw.config);
  667. }
  668. }
  669. /*
  670. * Workaround for:
  671. * Intel Errata AAK100 (model 26)
  672. * Intel Errata AAP53 (model 30)
  673. * Intel Errata BD53 (model 44)
  674. *
  675. * The official story:
  676. * These chips need to be 'reset' when adding counters by programming the
  677. * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
  678. * in sequence on the same PMC or on different PMCs.
  679. *
  680. * In practise it appears some of these events do in fact count, and
  681. * we need to programm all 4 events.
  682. */
  683. static void intel_pmu_nhm_workaround(void)
  684. {
  685. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  686. static const unsigned long nhm_magic[4] = {
  687. 0x4300B5,
  688. 0x4300D2,
  689. 0x4300B1,
  690. 0x4300B1
  691. };
  692. struct perf_event *event;
  693. int i;
  694. /*
  695. * The Errata requires below steps:
  696. * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
  697. * 2) Configure 4 PERFEVTSELx with the magic events and clear
  698. * the corresponding PMCx;
  699. * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
  700. * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
  701. * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
  702. */
  703. /*
  704. * The real steps we choose are a little different from above.
  705. * A) To reduce MSR operations, we don't run step 1) as they
  706. * are already cleared before this function is called;
  707. * B) Call x86_perf_event_update to save PMCx before configuring
  708. * PERFEVTSELx with magic number;
  709. * C) With step 5), we do clear only when the PERFEVTSELx is
  710. * not used currently.
  711. * D) Call x86_perf_event_set_period to restore PMCx;
  712. */
  713. /* We always operate 4 pairs of PERF Counters */
  714. for (i = 0; i < 4; i++) {
  715. event = cpuc->events[i];
  716. if (event)
  717. x86_perf_event_update(event);
  718. }
  719. for (i = 0; i < 4; i++) {
  720. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
  721. wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
  722. }
  723. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
  724. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
  725. for (i = 0; i < 4; i++) {
  726. event = cpuc->events[i];
  727. if (event) {
  728. x86_perf_event_set_period(event);
  729. __x86_pmu_enable_event(&event->hw,
  730. ARCH_PERFMON_EVENTSEL_ENABLE);
  731. } else
  732. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
  733. }
  734. }
  735. static void intel_pmu_nhm_enable_all(int added)
  736. {
  737. if (added)
  738. intel_pmu_nhm_workaround();
  739. intel_pmu_enable_all(added);
  740. }
  741. static inline u64 intel_pmu_get_status(void)
  742. {
  743. u64 status;
  744. rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
  745. return status;
  746. }
  747. static inline void intel_pmu_ack_status(u64 ack)
  748. {
  749. wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
  750. }
  751. static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
  752. {
  753. int idx = hwc->idx - X86_PMC_IDX_FIXED;
  754. u64 ctrl_val, mask;
  755. mask = 0xfULL << (idx * 4);
  756. rdmsrl(hwc->config_base, ctrl_val);
  757. ctrl_val &= ~mask;
  758. wrmsrl(hwc->config_base, ctrl_val);
  759. }
  760. static void intel_pmu_disable_event(struct perf_event *event)
  761. {
  762. struct hw_perf_event *hwc = &event->hw;
  763. if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
  764. intel_pmu_disable_bts();
  765. intel_pmu_drain_bts_buffer();
  766. return;
  767. }
  768. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
  769. intel_pmu_disable_fixed(hwc);
  770. return;
  771. }
  772. x86_pmu_disable_event(event);
  773. if (unlikely(event->attr.precise_ip))
  774. intel_pmu_pebs_disable(event);
  775. }
  776. static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
  777. {
  778. int idx = hwc->idx - X86_PMC_IDX_FIXED;
  779. u64 ctrl_val, bits, mask;
  780. /*
  781. * Enable IRQ generation (0x8),
  782. * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
  783. * if requested:
  784. */
  785. bits = 0x8ULL;
  786. if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
  787. bits |= 0x2;
  788. if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
  789. bits |= 0x1;
  790. /*
  791. * ANY bit is supported in v3 and up
  792. */
  793. if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
  794. bits |= 0x4;
  795. bits <<= (idx * 4);
  796. mask = 0xfULL << (idx * 4);
  797. rdmsrl(hwc->config_base, ctrl_val);
  798. ctrl_val &= ~mask;
  799. ctrl_val |= bits;
  800. wrmsrl(hwc->config_base, ctrl_val);
  801. }
  802. static void intel_pmu_enable_event(struct perf_event *event)
  803. {
  804. struct hw_perf_event *hwc = &event->hw;
  805. if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
  806. if (!__this_cpu_read(cpu_hw_events.enabled))
  807. return;
  808. intel_pmu_enable_bts(hwc->config);
  809. return;
  810. }
  811. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
  812. intel_pmu_enable_fixed(hwc);
  813. return;
  814. }
  815. if (unlikely(event->attr.precise_ip))
  816. intel_pmu_pebs_enable(event);
  817. __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
  818. }
  819. /*
  820. * Save and restart an expired event. Called by NMI contexts,
  821. * so it has to be careful about preempting normal event ops:
  822. */
  823. static int intel_pmu_save_and_restart(struct perf_event *event)
  824. {
  825. x86_perf_event_update(event);
  826. return x86_perf_event_set_period(event);
  827. }
  828. static void intel_pmu_reset(void)
  829. {
  830. struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
  831. unsigned long flags;
  832. int idx;
  833. if (!x86_pmu.num_counters)
  834. return;
  835. local_irq_save(flags);
  836. printk("clearing PMU state on CPU#%d\n", smp_processor_id());
  837. for (idx = 0; idx < x86_pmu.num_counters; idx++) {
  838. checking_wrmsrl(x86_pmu_config_addr(idx), 0ull);
  839. checking_wrmsrl(x86_pmu_event_addr(idx), 0ull);
  840. }
  841. for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
  842. checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
  843. if (ds)
  844. ds->bts_index = ds->bts_buffer_base;
  845. local_irq_restore(flags);
  846. }
  847. /*
  848. * This handler is triggered by the local APIC, so the APIC IRQ handling
  849. * rules apply:
  850. */
  851. static int intel_pmu_handle_irq(struct pt_regs *regs)
  852. {
  853. struct perf_sample_data data;
  854. struct cpu_hw_events *cpuc;
  855. int bit, loops;
  856. u64 status;
  857. int handled;
  858. perf_sample_data_init(&data, 0);
  859. cpuc = &__get_cpu_var(cpu_hw_events);
  860. intel_pmu_disable_all();
  861. handled = intel_pmu_drain_bts_buffer();
  862. status = intel_pmu_get_status();
  863. if (!status) {
  864. intel_pmu_enable_all(0);
  865. return handled;
  866. }
  867. loops = 0;
  868. again:
  869. intel_pmu_ack_status(status);
  870. if (++loops > 100) {
  871. WARN_ONCE(1, "perfevents: irq loop stuck!\n");
  872. perf_event_print_debug();
  873. intel_pmu_reset();
  874. goto done;
  875. }
  876. inc_irq_stat(apic_perf_irqs);
  877. intel_pmu_lbr_read();
  878. /*
  879. * PEBS overflow sets bit 62 in the global status register
  880. */
  881. if (__test_and_clear_bit(62, (unsigned long *)&status)) {
  882. handled++;
  883. x86_pmu.drain_pebs(regs);
  884. }
  885. for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
  886. struct perf_event *event = cpuc->events[bit];
  887. handled++;
  888. if (!test_bit(bit, cpuc->active_mask))
  889. continue;
  890. if (!intel_pmu_save_and_restart(event))
  891. continue;
  892. data.period = event->hw.last_period;
  893. if (perf_event_overflow(event, 1, &data, regs))
  894. x86_pmu_stop(event, 0);
  895. }
  896. /*
  897. * Repeat if there is more work to be done:
  898. */
  899. status = intel_pmu_get_status();
  900. if (status)
  901. goto again;
  902. done:
  903. intel_pmu_enable_all(0);
  904. return handled;
  905. }
  906. static struct event_constraint *
  907. intel_bts_constraints(struct perf_event *event)
  908. {
  909. struct hw_perf_event *hwc = &event->hw;
  910. unsigned int hw_event, bts_event;
  911. hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
  912. bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
  913. if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
  914. return &bts_constraint;
  915. return NULL;
  916. }
  917. static struct event_constraint *
  918. intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
  919. {
  920. struct hw_perf_event *hwc = &event->hw;
  921. unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
  922. struct event_constraint *c;
  923. struct intel_percore *pc;
  924. struct er_account *era;
  925. int i;
  926. int free_slot;
  927. int found;
  928. if (!x86_pmu.percore_constraints || hwc->extra_alloc)
  929. return NULL;
  930. for (c = x86_pmu.percore_constraints; c->cmask; c++) {
  931. if (e != c->code)
  932. continue;
  933. /*
  934. * Allocate resource per core.
  935. */
  936. pc = cpuc->per_core;
  937. if (!pc)
  938. break;
  939. c = &emptyconstraint;
  940. raw_spin_lock(&pc->lock);
  941. free_slot = -1;
  942. found = 0;
  943. for (i = 0; i < MAX_EXTRA_REGS; i++) {
  944. era = &pc->regs[i];
  945. if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
  946. /* Allow sharing same config */
  947. if (hwc->extra_config == era->extra_config) {
  948. era->ref++;
  949. cpuc->percore_used = 1;
  950. hwc->extra_alloc = 1;
  951. c = NULL;
  952. }
  953. /* else conflict */
  954. found = 1;
  955. break;
  956. } else if (era->ref == 0 && free_slot == -1)
  957. free_slot = i;
  958. }
  959. if (!found && free_slot != -1) {
  960. era = &pc->regs[free_slot];
  961. era->ref = 1;
  962. era->extra_reg = hwc->extra_reg;
  963. era->extra_config = hwc->extra_config;
  964. cpuc->percore_used = 1;
  965. hwc->extra_alloc = 1;
  966. c = NULL;
  967. }
  968. raw_spin_unlock(&pc->lock);
  969. return c;
  970. }
  971. return NULL;
  972. }
  973. static struct event_constraint *
  974. intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
  975. {
  976. struct event_constraint *c;
  977. c = intel_bts_constraints(event);
  978. if (c)
  979. return c;
  980. c = intel_pebs_constraints(event);
  981. if (c)
  982. return c;
  983. c = intel_percore_constraints(cpuc, event);
  984. if (c)
  985. return c;
  986. return x86_get_event_constraints(cpuc, event);
  987. }
  988. static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
  989. struct perf_event *event)
  990. {
  991. struct extra_reg *er;
  992. struct intel_percore *pc;
  993. struct er_account *era;
  994. struct hw_perf_event *hwc = &event->hw;
  995. int i, allref;
  996. if (!cpuc->percore_used)
  997. return;
  998. for (er = x86_pmu.extra_regs; er->msr; er++) {
  999. if (er->event != (hwc->config & er->config_mask))
  1000. continue;
  1001. pc = cpuc->per_core;
  1002. raw_spin_lock(&pc->lock);
  1003. for (i = 0; i < MAX_EXTRA_REGS; i++) {
  1004. era = &pc->regs[i];
  1005. if (era->ref > 0 &&
  1006. era->extra_config == hwc->extra_config &&
  1007. era->extra_reg == er->msr) {
  1008. era->ref--;
  1009. hwc->extra_alloc = 0;
  1010. break;
  1011. }
  1012. }
  1013. allref = 0;
  1014. for (i = 0; i < MAX_EXTRA_REGS; i++)
  1015. allref += pc->regs[i].ref;
  1016. if (allref == 0)
  1017. cpuc->percore_used = 0;
  1018. raw_spin_unlock(&pc->lock);
  1019. break;
  1020. }
  1021. }
  1022. static int intel_pmu_hw_config(struct perf_event *event)
  1023. {
  1024. int ret = x86_pmu_hw_config(event);
  1025. if (ret)
  1026. return ret;
  1027. if (event->attr.precise_ip &&
  1028. (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
  1029. /*
  1030. * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
  1031. * (0x003c) so that we can use it with PEBS.
  1032. *
  1033. * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
  1034. * PEBS capable. However we can use INST_RETIRED.ANY_P
  1035. * (0x00c0), which is a PEBS capable event, to get the same
  1036. * count.
  1037. *
  1038. * INST_RETIRED.ANY_P counts the number of cycles that retires
  1039. * CNTMASK instructions. By setting CNTMASK to a value (16)
  1040. * larger than the maximum number of instructions that can be
  1041. * retired per cycle (4) and then inverting the condition, we
  1042. * count all cycles that retire 16 or less instructions, which
  1043. * is every cycle.
  1044. *
  1045. * Thereby we gain a PEBS capable cycle counter.
  1046. */
  1047. u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
  1048. alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
  1049. event->hw.config = alt_config;
  1050. }
  1051. if (event->attr.type != PERF_TYPE_RAW)
  1052. return 0;
  1053. if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
  1054. return 0;
  1055. if (x86_pmu.version < 3)
  1056. return -EINVAL;
  1057. if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
  1058. return -EACCES;
  1059. event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
  1060. return 0;
  1061. }
  1062. static __initconst const struct x86_pmu core_pmu = {
  1063. .name = "core",
  1064. .handle_irq = x86_pmu_handle_irq,
  1065. .disable_all = x86_pmu_disable_all,
  1066. .enable_all = x86_pmu_enable_all,
  1067. .enable = x86_pmu_enable_event,
  1068. .disable = x86_pmu_disable_event,
  1069. .hw_config = x86_pmu_hw_config,
  1070. .schedule_events = x86_schedule_events,
  1071. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  1072. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  1073. .event_map = intel_pmu_event_map,
  1074. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  1075. .apic = 1,
  1076. /*
  1077. * Intel PMCs cannot be accessed sanely above 32 bit width,
  1078. * so we install an artificial 1<<31 period regardless of
  1079. * the generic event period:
  1080. */
  1081. .max_period = (1ULL << 31) - 1,
  1082. .get_event_constraints = intel_get_event_constraints,
  1083. .put_event_constraints = intel_put_event_constraints,
  1084. .event_constraints = intel_core_event_constraints,
  1085. };
  1086. static int intel_pmu_cpu_prepare(int cpu)
  1087. {
  1088. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1089. if (!cpu_has_ht_siblings())
  1090. return NOTIFY_OK;
  1091. cpuc->per_core = kzalloc_node(sizeof(struct intel_percore),
  1092. GFP_KERNEL, cpu_to_node(cpu));
  1093. if (!cpuc->per_core)
  1094. return NOTIFY_BAD;
  1095. raw_spin_lock_init(&cpuc->per_core->lock);
  1096. cpuc->per_core->core_id = -1;
  1097. return NOTIFY_OK;
  1098. }
  1099. static void intel_pmu_cpu_starting(int cpu)
  1100. {
  1101. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1102. int core_id = topology_core_id(cpu);
  1103. int i;
  1104. init_debug_store_on_cpu(cpu);
  1105. /*
  1106. * Deal with CPUs that don't clear their LBRs on power-up.
  1107. */
  1108. intel_pmu_lbr_reset();
  1109. if (!cpu_has_ht_siblings())
  1110. return;
  1111. for_each_cpu(i, topology_thread_cpumask(cpu)) {
  1112. struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core;
  1113. if (pc && pc->core_id == core_id) {
  1114. kfree(cpuc->per_core);
  1115. cpuc->per_core = pc;
  1116. break;
  1117. }
  1118. }
  1119. cpuc->per_core->core_id = core_id;
  1120. cpuc->per_core->refcnt++;
  1121. }
  1122. static void intel_pmu_cpu_dying(int cpu)
  1123. {
  1124. struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
  1125. struct intel_percore *pc = cpuc->per_core;
  1126. if (pc) {
  1127. if (pc->core_id == -1 || --pc->refcnt == 0)
  1128. kfree(pc);
  1129. cpuc->per_core = NULL;
  1130. }
  1131. fini_debug_store_on_cpu(cpu);
  1132. }
  1133. static __initconst const struct x86_pmu intel_pmu = {
  1134. .name = "Intel",
  1135. .handle_irq = intel_pmu_handle_irq,
  1136. .disable_all = intel_pmu_disable_all,
  1137. .enable_all = intel_pmu_enable_all,
  1138. .enable = intel_pmu_enable_event,
  1139. .disable = intel_pmu_disable_event,
  1140. .hw_config = intel_pmu_hw_config,
  1141. .schedule_events = x86_schedule_events,
  1142. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  1143. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  1144. .event_map = intel_pmu_event_map,
  1145. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  1146. .apic = 1,
  1147. /*
  1148. * Intel PMCs cannot be accessed sanely above 32 bit width,
  1149. * so we install an artificial 1<<31 period regardless of
  1150. * the generic event period:
  1151. */
  1152. .max_period = (1ULL << 31) - 1,
  1153. .get_event_constraints = intel_get_event_constraints,
  1154. .put_event_constraints = intel_put_event_constraints,
  1155. .cpu_prepare = intel_pmu_cpu_prepare,
  1156. .cpu_starting = intel_pmu_cpu_starting,
  1157. .cpu_dying = intel_pmu_cpu_dying,
  1158. };
  1159. static void intel_clovertown_quirks(void)
  1160. {
  1161. /*
  1162. * PEBS is unreliable due to:
  1163. *
  1164. * AJ67 - PEBS may experience CPL leaks
  1165. * AJ68 - PEBS PMI may be delayed by one event
  1166. * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
  1167. * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
  1168. *
  1169. * AJ67 could be worked around by restricting the OS/USR flags.
  1170. * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
  1171. *
  1172. * AJ106 could possibly be worked around by not allowing LBR
  1173. * usage from PEBS, including the fixup.
  1174. * AJ68 could possibly be worked around by always programming
  1175. * a pebs_event_reset[0] value and coping with the lost events.
  1176. *
  1177. * But taken together it might just make sense to not enable PEBS on
  1178. * these chips.
  1179. */
  1180. printk(KERN_WARNING "PEBS disabled due to CPU errata.\n");
  1181. x86_pmu.pebs = 0;
  1182. x86_pmu.pebs_constraints = NULL;
  1183. }
  1184. static __init int intel_pmu_init(void)
  1185. {
  1186. union cpuid10_edx edx;
  1187. union cpuid10_eax eax;
  1188. unsigned int unused;
  1189. unsigned int ebx;
  1190. int version;
  1191. if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  1192. switch (boot_cpu_data.x86) {
  1193. case 0x6:
  1194. return p6_pmu_init();
  1195. case 0xf:
  1196. return p4_pmu_init();
  1197. }
  1198. return -ENODEV;
  1199. }
  1200. /*
  1201. * Check whether the Architectural PerfMon supports
  1202. * Branch Misses Retired hw_event or not.
  1203. */
  1204. cpuid(10, &eax.full, &ebx, &unused, &edx.full);
  1205. if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
  1206. return -ENODEV;
  1207. version = eax.split.version_id;
  1208. if (version < 2)
  1209. x86_pmu = core_pmu;
  1210. else
  1211. x86_pmu = intel_pmu;
  1212. x86_pmu.version = version;
  1213. x86_pmu.num_counters = eax.split.num_counters;
  1214. x86_pmu.cntval_bits = eax.split.bit_width;
  1215. x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1;
  1216. /*
  1217. * Quirk: v2 perfmon does not report fixed-purpose events, so
  1218. * assume at least 3 events:
  1219. */
  1220. if (version > 1)
  1221. x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
  1222. /*
  1223. * v2 and above have a perf capabilities MSR
  1224. */
  1225. if (version > 1) {
  1226. u64 capabilities;
  1227. rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
  1228. x86_pmu.intel_cap.capabilities = capabilities;
  1229. }
  1230. intel_ds_init();
  1231. /*
  1232. * Install the hw-cache-events table:
  1233. */
  1234. switch (boot_cpu_data.x86_model) {
  1235. case 14: /* 65 nm core solo/duo, "Yonah" */
  1236. pr_cont("Core events, ");
  1237. break;
  1238. case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
  1239. x86_pmu.quirks = intel_clovertown_quirks;
  1240. case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
  1241. case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
  1242. case 29: /* six-core 45 nm xeon "Dunnington" */
  1243. memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
  1244. sizeof(hw_cache_event_ids));
  1245. intel_pmu_lbr_init_core();
  1246. x86_pmu.event_constraints = intel_core2_event_constraints;
  1247. x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
  1248. pr_cont("Core2 events, ");
  1249. break;
  1250. case 26: /* 45 nm nehalem, "Bloomfield" */
  1251. case 30: /* 45 nm nehalem, "Lynnfield" */
  1252. case 46: /* 45 nm nehalem-ex, "Beckton" */
  1253. memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
  1254. sizeof(hw_cache_event_ids));
  1255. memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
  1256. sizeof(hw_cache_extra_regs));
  1257. intel_pmu_lbr_init_nhm();
  1258. x86_pmu.event_constraints = intel_nehalem_event_constraints;
  1259. x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
  1260. x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
  1261. x86_pmu.enable_all = intel_pmu_nhm_enable_all;
  1262. x86_pmu.extra_regs = intel_nehalem_extra_regs;
  1263. pr_cont("Nehalem events, ");
  1264. break;
  1265. case 28: /* Atom */
  1266. memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
  1267. sizeof(hw_cache_event_ids));
  1268. intel_pmu_lbr_init_atom();
  1269. x86_pmu.event_constraints = intel_gen_event_constraints;
  1270. x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
  1271. pr_cont("Atom events, ");
  1272. break;
  1273. case 37: /* 32 nm nehalem, "Clarkdale" */
  1274. case 44: /* 32 nm nehalem, "Gulftown" */
  1275. memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
  1276. sizeof(hw_cache_event_ids));
  1277. memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
  1278. sizeof(hw_cache_extra_regs));
  1279. intel_pmu_lbr_init_nhm();
  1280. x86_pmu.event_constraints = intel_westmere_event_constraints;
  1281. x86_pmu.percore_constraints = intel_westmere_percore_constraints;
  1282. x86_pmu.enable_all = intel_pmu_nhm_enable_all;
  1283. x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
  1284. x86_pmu.extra_regs = intel_westmere_extra_regs;
  1285. pr_cont("Westmere events, ");
  1286. break;
  1287. case 42: /* SandyBridge */
  1288. memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
  1289. sizeof(hw_cache_event_ids));
  1290. intel_pmu_lbr_init_nhm();
  1291. x86_pmu.event_constraints = intel_snb_event_constraints;
  1292. x86_pmu.pebs_constraints = intel_snb_pebs_events;
  1293. pr_cont("SandyBridge events, ");
  1294. break;
  1295. default:
  1296. /*
  1297. * default constraints for v2 and up
  1298. */
  1299. x86_pmu.event_constraints = intel_gen_event_constraints;
  1300. pr_cont("generic architected perfmon, ");
  1301. }
  1302. return 0;
  1303. }
  1304. #else /* CONFIG_CPU_SUP_INTEL */
  1305. static int intel_pmu_init(void)
  1306. {
  1307. return 0;
  1308. }
  1309. #endif /* CONFIG_CPU_SUP_INTEL */