perf_event.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899
  1. /* Performance event support for sparc64.
  2. *
  3. * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
  4. *
  5. * This code is based almost entirely upon the x86 perf event
  6. * code, which is:
  7. *
  8. * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  9. * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  10. * Copyright (C) 2009 Jaswinder Singh Rajput
  11. * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  12. * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  13. */
  14. #include <linux/perf_event.h>
  15. #include <linux/kprobes.h>
  16. #include <linux/kernel.h>
  17. #include <linux/kdebug.h>
  18. #include <linux/mutex.h>
  19. #include <asm/cpudata.h>
  20. #include <asm/atomic.h>
  21. #include <asm/nmi.h>
  22. #include <asm/pcr.h>
  23. /* Sparc64 chips have two performance counters, 32-bits each, with
  24. * overflow interrupts generated on transition from 0xffffffff to 0.
  25. * The counters are accessed in one go using a 64-bit register.
  26. *
  27. * Both counters are controlled using a single control register. The
  28. * only way to stop all sampling is to clear all of the context (user,
  29. * supervisor, hypervisor) sampling enable bits. But these bits apply
  30. * to both counters, thus the two counters can't be enabled/disabled
  31. * individually.
  32. *
  33. * The control register has two event fields, one for each of the two
  34. * counters. It's thus nearly impossible to have one counter going
  35. * while keeping the other one stopped. Therefore it is possible to
  36. * get overflow interrupts for counters not currently "in use" and
  37. * that condition must be checked in the overflow interrupt handler.
  38. *
  39. * So we use a hack, in that we program inactive counters with the
  40. * "sw_count0" and "sw_count1" events. These count how many times
  41. * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
  42. * unusual way to encode a NOP and therefore will not trigger in
  43. * normal code.
  44. */
  45. #define MAX_HWEVENTS 2
  46. #define MAX_PERIOD ((1UL << 32) - 1)
  47. #define PIC_UPPER_INDEX 0
  48. #define PIC_LOWER_INDEX 1
  49. struct cpu_hw_events {
  50. struct perf_event *events[MAX_HWEVENTS];
  51. unsigned long used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
  52. unsigned long active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
  53. int enabled;
  54. };
  55. DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
  56. struct perf_event_map {
  57. u16 encoding;
  58. u8 pic_mask;
  59. #define PIC_NONE 0x00
  60. #define PIC_UPPER 0x01
  61. #define PIC_LOWER 0x02
  62. };
  63. #define C(x) PERF_COUNT_HW_CACHE_##x
  64. #define CACHE_OP_UNSUPPORTED 0xfffe
  65. #define CACHE_OP_NONSENSE 0xffff
  66. typedef struct perf_event_map cache_map_t
  67. [PERF_COUNT_HW_CACHE_MAX]
  68. [PERF_COUNT_HW_CACHE_OP_MAX]
  69. [PERF_COUNT_HW_CACHE_RESULT_MAX];
  70. struct sparc_pmu {
  71. const struct perf_event_map *(*event_map)(int);
  72. const cache_map_t *cache_map;
  73. int max_events;
  74. int upper_shift;
  75. int lower_shift;
  76. int event_mask;
  77. int hv_bit;
  78. int irq_bit;
  79. int upper_nop;
  80. int lower_nop;
  81. };
  82. static const struct perf_event_map ultra3_perfmon_event_map[] = {
  83. [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
  84. [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
  85. [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
  86. [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
  87. };
  88. static const struct perf_event_map *ultra3_event_map(int event_id)
  89. {
  90. return &ultra3_perfmon_event_map[event_id];
  91. }
  92. static const cache_map_t ultra3_cache_map = {
  93. [C(L1D)] = {
  94. [C(OP_READ)] = {
  95. [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
  96. [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
  97. },
  98. [C(OP_WRITE)] = {
  99. [C(RESULT_ACCESS)] = { 0x0a, PIC_LOWER },
  100. [C(RESULT_MISS)] = { 0x0a, PIC_UPPER },
  101. },
  102. [C(OP_PREFETCH)] = {
  103. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  104. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  105. },
  106. },
  107. [C(L1I)] = {
  108. [C(OP_READ)] = {
  109. [C(RESULT_ACCESS)] = { 0x09, PIC_LOWER, },
  110. [C(RESULT_MISS)] = { 0x09, PIC_UPPER, },
  111. },
  112. [ C(OP_WRITE) ] = {
  113. [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
  114. [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
  115. },
  116. [ C(OP_PREFETCH) ] = {
  117. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  118. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  119. },
  120. },
  121. [C(LL)] = {
  122. [C(OP_READ)] = {
  123. [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER, },
  124. [C(RESULT_MISS)] = { 0x0c, PIC_UPPER, },
  125. },
  126. [C(OP_WRITE)] = {
  127. [C(RESULT_ACCESS)] = { 0x0c, PIC_LOWER },
  128. [C(RESULT_MISS)] = { 0x0c, PIC_UPPER },
  129. },
  130. [C(OP_PREFETCH)] = {
  131. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  132. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  133. },
  134. },
  135. [C(DTLB)] = {
  136. [C(OP_READ)] = {
  137. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  138. [C(RESULT_MISS)] = { 0x12, PIC_UPPER, },
  139. },
  140. [ C(OP_WRITE) ] = {
  141. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  142. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  143. },
  144. [ C(OP_PREFETCH) ] = {
  145. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  146. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  147. },
  148. },
  149. [C(ITLB)] = {
  150. [C(OP_READ)] = {
  151. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  152. [C(RESULT_MISS)] = { 0x11, PIC_UPPER, },
  153. },
  154. [ C(OP_WRITE) ] = {
  155. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  156. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  157. },
  158. [ C(OP_PREFETCH) ] = {
  159. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  160. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  161. },
  162. },
  163. [C(BPU)] = {
  164. [C(OP_READ)] = {
  165. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  166. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  167. },
  168. [ C(OP_WRITE) ] = {
  169. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  170. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  171. },
  172. [ C(OP_PREFETCH) ] = {
  173. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  174. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  175. },
  176. },
  177. };
  178. static const struct sparc_pmu ultra3_pmu = {
  179. .event_map = ultra3_event_map,
  180. .cache_map = &ultra3_cache_map,
  181. .max_events = ARRAY_SIZE(ultra3_perfmon_event_map),
  182. .upper_shift = 11,
  183. .lower_shift = 4,
  184. .event_mask = 0x3f,
  185. .upper_nop = 0x1c,
  186. .lower_nop = 0x14,
  187. };
  188. /* Niagara1 is very limited. The upper PIC is hard-locked to count
  189. * only instructions, so it is free running which creates all kinds of
  190. * problems. Some hardware designs make one wonder if the creastor
  191. * even looked at how this stuff gets used by software.
  192. */
  193. static const struct perf_event_map niagara1_perfmon_event_map[] = {
  194. [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, PIC_UPPER },
  195. [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, PIC_UPPER },
  196. [PERF_COUNT_HW_CACHE_REFERENCES] = { 0, PIC_NONE },
  197. [PERF_COUNT_HW_CACHE_MISSES] = { 0x03, PIC_LOWER },
  198. };
  199. static const struct perf_event_map *niagara1_event_map(int event_id)
  200. {
  201. return &niagara1_perfmon_event_map[event_id];
  202. }
  203. static const cache_map_t niagara1_cache_map = {
  204. [C(L1D)] = {
  205. [C(OP_READ)] = {
  206. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  207. [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
  208. },
  209. [C(OP_WRITE)] = {
  210. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  211. [C(RESULT_MISS)] = { 0x03, PIC_LOWER, },
  212. },
  213. [C(OP_PREFETCH)] = {
  214. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  215. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  216. },
  217. },
  218. [C(L1I)] = {
  219. [C(OP_READ)] = {
  220. [C(RESULT_ACCESS)] = { 0x00, PIC_UPPER },
  221. [C(RESULT_MISS)] = { 0x02, PIC_LOWER, },
  222. },
  223. [ C(OP_WRITE) ] = {
  224. [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
  225. [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
  226. },
  227. [ C(OP_PREFETCH) ] = {
  228. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  229. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  230. },
  231. },
  232. [C(LL)] = {
  233. [C(OP_READ)] = {
  234. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  235. [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
  236. },
  237. [C(OP_WRITE)] = {
  238. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  239. [C(RESULT_MISS)] = { 0x07, PIC_LOWER, },
  240. },
  241. [C(OP_PREFETCH)] = {
  242. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  243. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  244. },
  245. },
  246. [C(DTLB)] = {
  247. [C(OP_READ)] = {
  248. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  249. [C(RESULT_MISS)] = { 0x05, PIC_LOWER, },
  250. },
  251. [ C(OP_WRITE) ] = {
  252. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  253. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  254. },
  255. [ C(OP_PREFETCH) ] = {
  256. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  257. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  258. },
  259. },
  260. [C(ITLB)] = {
  261. [C(OP_READ)] = {
  262. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  263. [C(RESULT_MISS)] = { 0x04, PIC_LOWER, },
  264. },
  265. [ C(OP_WRITE) ] = {
  266. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  267. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  268. },
  269. [ C(OP_PREFETCH) ] = {
  270. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  271. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  272. },
  273. },
  274. [C(BPU)] = {
  275. [C(OP_READ)] = {
  276. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  277. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  278. },
  279. [ C(OP_WRITE) ] = {
  280. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  281. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  282. },
  283. [ C(OP_PREFETCH) ] = {
  284. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  285. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  286. },
  287. },
  288. };
  289. static const struct sparc_pmu niagara1_pmu = {
  290. .event_map = niagara1_event_map,
  291. .cache_map = &niagara1_cache_map,
  292. .max_events = ARRAY_SIZE(niagara1_perfmon_event_map),
  293. .upper_shift = 0,
  294. .lower_shift = 4,
  295. .event_mask = 0x7,
  296. .upper_nop = 0x0,
  297. .lower_nop = 0x0,
  298. };
  299. static const struct perf_event_map niagara2_perfmon_event_map[] = {
  300. [PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
  301. [PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
  302. [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
  303. [PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
  304. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
  305. [PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
  306. };
  307. static const struct perf_event_map *niagara2_event_map(int event_id)
  308. {
  309. return &niagara2_perfmon_event_map[event_id];
  310. }
  311. static const cache_map_t niagara2_cache_map = {
  312. [C(L1D)] = {
  313. [C(OP_READ)] = {
  314. [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
  315. [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
  316. },
  317. [C(OP_WRITE)] = {
  318. [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
  319. [C(RESULT_MISS)] = { 0x0302, PIC_UPPER | PIC_LOWER, },
  320. },
  321. [C(OP_PREFETCH)] = {
  322. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  323. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  324. },
  325. },
  326. [C(L1I)] = {
  327. [C(OP_READ)] = {
  328. [C(RESULT_ACCESS)] = { 0x02ff, PIC_UPPER | PIC_LOWER, },
  329. [C(RESULT_MISS)] = { 0x0301, PIC_UPPER | PIC_LOWER, },
  330. },
  331. [ C(OP_WRITE) ] = {
  332. [ C(RESULT_ACCESS) ] = { CACHE_OP_NONSENSE },
  333. [ C(RESULT_MISS) ] = { CACHE_OP_NONSENSE },
  334. },
  335. [ C(OP_PREFETCH) ] = {
  336. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  337. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  338. },
  339. },
  340. [C(LL)] = {
  341. [C(OP_READ)] = {
  342. [C(RESULT_ACCESS)] = { 0x0208, PIC_UPPER | PIC_LOWER, },
  343. [C(RESULT_MISS)] = { 0x0330, PIC_UPPER | PIC_LOWER, },
  344. },
  345. [C(OP_WRITE)] = {
  346. [C(RESULT_ACCESS)] = { 0x0210, PIC_UPPER | PIC_LOWER, },
  347. [C(RESULT_MISS)] = { 0x0320, PIC_UPPER | PIC_LOWER, },
  348. },
  349. [C(OP_PREFETCH)] = {
  350. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  351. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  352. },
  353. },
  354. [C(DTLB)] = {
  355. [C(OP_READ)] = {
  356. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  357. [C(RESULT_MISS)] = { 0x0b08, PIC_UPPER | PIC_LOWER, },
  358. },
  359. [ C(OP_WRITE) ] = {
  360. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  361. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  362. },
  363. [ C(OP_PREFETCH) ] = {
  364. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  365. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  366. },
  367. },
  368. [C(ITLB)] = {
  369. [C(OP_READ)] = {
  370. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  371. [C(RESULT_MISS)] = { 0xb04, PIC_UPPER | PIC_LOWER, },
  372. },
  373. [ C(OP_WRITE) ] = {
  374. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  375. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  376. },
  377. [ C(OP_PREFETCH) ] = {
  378. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  379. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  380. },
  381. },
  382. [C(BPU)] = {
  383. [C(OP_READ)] = {
  384. [C(RESULT_ACCESS)] = { CACHE_OP_UNSUPPORTED },
  385. [C(RESULT_MISS)] = { CACHE_OP_UNSUPPORTED },
  386. },
  387. [ C(OP_WRITE) ] = {
  388. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  389. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  390. },
  391. [ C(OP_PREFETCH) ] = {
  392. [ C(RESULT_ACCESS) ] = { CACHE_OP_UNSUPPORTED },
  393. [ C(RESULT_MISS) ] = { CACHE_OP_UNSUPPORTED },
  394. },
  395. },
  396. };
  397. static const struct sparc_pmu niagara2_pmu = {
  398. .event_map = niagara2_event_map,
  399. .cache_map = &niagara2_cache_map,
  400. .max_events = ARRAY_SIZE(niagara2_perfmon_event_map),
  401. .upper_shift = 19,
  402. .lower_shift = 6,
  403. .event_mask = 0xfff,
  404. .hv_bit = 0x8,
  405. .irq_bit = 0x03,
  406. .upper_nop = 0x220,
  407. .lower_nop = 0x220,
  408. };
  409. static const struct sparc_pmu *sparc_pmu __read_mostly;
  410. static u64 event_encoding(u64 event_id, int idx)
  411. {
  412. if (idx == PIC_UPPER_INDEX)
  413. event_id <<= sparc_pmu->upper_shift;
  414. else
  415. event_id <<= sparc_pmu->lower_shift;
  416. return event_id;
  417. }
  418. static u64 mask_for_index(int idx)
  419. {
  420. return event_encoding(sparc_pmu->event_mask, idx);
  421. }
  422. static u64 nop_for_index(int idx)
  423. {
  424. return event_encoding(idx == PIC_UPPER_INDEX ?
  425. sparc_pmu->upper_nop :
  426. sparc_pmu->lower_nop, idx);
  427. }
  428. static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc,
  429. int idx)
  430. {
  431. u64 val, mask = mask_for_index(idx);
  432. val = pcr_ops->read();
  433. pcr_ops->write((val & ~mask) | hwc->config);
  434. }
  435. static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc,
  436. int idx)
  437. {
  438. u64 mask = mask_for_index(idx);
  439. u64 nop = nop_for_index(idx);
  440. u64 val = pcr_ops->read();
  441. pcr_ops->write((val & ~mask) | nop);
  442. }
  443. void hw_perf_enable(void)
  444. {
  445. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  446. u64 val;
  447. int i;
  448. if (cpuc->enabled)
  449. return;
  450. cpuc->enabled = 1;
  451. barrier();
  452. val = pcr_ops->read();
  453. for (i = 0; i < MAX_HWEVENTS; i++) {
  454. struct perf_event *cp = cpuc->events[i];
  455. struct hw_perf_event *hwc;
  456. if (!cp)
  457. continue;
  458. hwc = &cp->hw;
  459. val |= hwc->config_base;
  460. }
  461. pcr_ops->write(val);
  462. }
  463. void hw_perf_disable(void)
  464. {
  465. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  466. u64 val;
  467. if (!cpuc->enabled)
  468. return;
  469. cpuc->enabled = 0;
  470. val = pcr_ops->read();
  471. val &= ~(PCR_UTRACE | PCR_STRACE |
  472. sparc_pmu->hv_bit | sparc_pmu->irq_bit);
  473. pcr_ops->write(val);
  474. }
  475. static u32 read_pmc(int idx)
  476. {
  477. u64 val;
  478. read_pic(val);
  479. if (idx == PIC_UPPER_INDEX)
  480. val >>= 32;
  481. return val & 0xffffffff;
  482. }
  483. static void write_pmc(int idx, u64 val)
  484. {
  485. u64 shift, mask, pic;
  486. shift = 0;
  487. if (idx == PIC_UPPER_INDEX)
  488. shift = 32;
  489. mask = ((u64) 0xffffffff) << shift;
  490. val <<= shift;
  491. read_pic(pic);
  492. pic &= ~mask;
  493. pic |= val;
  494. write_pic(pic);
  495. }
  496. static int sparc_perf_event_set_period(struct perf_event *event,
  497. struct hw_perf_event *hwc, int idx)
  498. {
  499. s64 left = atomic64_read(&hwc->period_left);
  500. s64 period = hwc->sample_period;
  501. int ret = 0;
  502. if (unlikely(left <= -period)) {
  503. left = period;
  504. atomic64_set(&hwc->period_left, left);
  505. hwc->last_period = period;
  506. ret = 1;
  507. }
  508. if (unlikely(left <= 0)) {
  509. left += period;
  510. atomic64_set(&hwc->period_left, left);
  511. hwc->last_period = period;
  512. ret = 1;
  513. }
  514. if (left > MAX_PERIOD)
  515. left = MAX_PERIOD;
  516. atomic64_set(&hwc->prev_count, (u64)-left);
  517. write_pmc(idx, (u64)(-left) & 0xffffffff);
  518. perf_event_update_userpage(event);
  519. return ret;
  520. }
  521. static int sparc_pmu_enable(struct perf_event *event)
  522. {
  523. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  524. struct hw_perf_event *hwc = &event->hw;
  525. int idx = hwc->idx;
  526. if (test_and_set_bit(idx, cpuc->used_mask))
  527. return -EAGAIN;
  528. sparc_pmu_disable_event(hwc, idx);
  529. cpuc->events[idx] = event;
  530. set_bit(idx, cpuc->active_mask);
  531. sparc_perf_event_set_period(event, hwc, idx);
  532. sparc_pmu_enable_event(hwc, idx);
  533. perf_event_update_userpage(event);
  534. return 0;
  535. }
  536. static u64 sparc_perf_event_update(struct perf_event *event,
  537. struct hw_perf_event *hwc, int idx)
  538. {
  539. int shift = 64 - 32;
  540. u64 prev_raw_count, new_raw_count;
  541. s64 delta;
  542. again:
  543. prev_raw_count = atomic64_read(&hwc->prev_count);
  544. new_raw_count = read_pmc(idx);
  545. if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
  546. new_raw_count) != prev_raw_count)
  547. goto again;
  548. delta = (new_raw_count << shift) - (prev_raw_count << shift);
  549. delta >>= shift;
  550. atomic64_add(delta, &event->count);
  551. atomic64_sub(delta, &hwc->period_left);
  552. return new_raw_count;
  553. }
  554. static void sparc_pmu_disable(struct perf_event *event)
  555. {
  556. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  557. struct hw_perf_event *hwc = &event->hw;
  558. int idx = hwc->idx;
  559. clear_bit(idx, cpuc->active_mask);
  560. sparc_pmu_disable_event(hwc, idx);
  561. barrier();
  562. sparc_perf_event_update(event, hwc, idx);
  563. cpuc->events[idx] = NULL;
  564. clear_bit(idx, cpuc->used_mask);
  565. perf_event_update_userpage(event);
  566. }
  567. static void sparc_pmu_read(struct perf_event *event)
  568. {
  569. struct hw_perf_event *hwc = &event->hw;
  570. sparc_perf_event_update(event, hwc, hwc->idx);
  571. }
  572. static void sparc_pmu_unthrottle(struct perf_event *event)
  573. {
  574. struct hw_perf_event *hwc = &event->hw;
  575. sparc_pmu_enable_event(hwc, hwc->idx);
  576. }
  577. static atomic_t active_events = ATOMIC_INIT(0);
  578. static DEFINE_MUTEX(pmc_grab_mutex);
  579. void perf_event_grab_pmc(void)
  580. {
  581. if (atomic_inc_not_zero(&active_events))
  582. return;
  583. mutex_lock(&pmc_grab_mutex);
  584. if (atomic_read(&active_events) == 0) {
  585. if (atomic_read(&nmi_active) > 0) {
  586. on_each_cpu(stop_nmi_watchdog, NULL, 1);
  587. BUG_ON(atomic_read(&nmi_active) != 0);
  588. }
  589. atomic_inc(&active_events);
  590. }
  591. mutex_unlock(&pmc_grab_mutex);
  592. }
  593. void perf_event_release_pmc(void)
  594. {
  595. if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
  596. if (atomic_read(&nmi_active) == 0)
  597. on_each_cpu(start_nmi_watchdog, NULL, 1);
  598. mutex_unlock(&pmc_grab_mutex);
  599. }
  600. }
  601. static const struct perf_event_map *sparc_map_cache_event(u64 config)
  602. {
  603. unsigned int cache_type, cache_op, cache_result;
  604. const struct perf_event_map *pmap;
  605. if (!sparc_pmu->cache_map)
  606. return ERR_PTR(-ENOENT);
  607. cache_type = (config >> 0) & 0xff;
  608. if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  609. return ERR_PTR(-EINVAL);
  610. cache_op = (config >> 8) & 0xff;
  611. if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  612. return ERR_PTR(-EINVAL);
  613. cache_result = (config >> 16) & 0xff;
  614. if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  615. return ERR_PTR(-EINVAL);
  616. pmap = &((*sparc_pmu->cache_map)[cache_type][cache_op][cache_result]);
  617. if (pmap->encoding == CACHE_OP_UNSUPPORTED)
  618. return ERR_PTR(-ENOENT);
  619. if (pmap->encoding == CACHE_OP_NONSENSE)
  620. return ERR_PTR(-EINVAL);
  621. return pmap;
  622. }
  623. static void hw_perf_event_destroy(struct perf_event *event)
  624. {
  625. perf_event_release_pmc();
  626. }
  627. static int __hw_perf_event_init(struct perf_event *event)
  628. {
  629. struct perf_event_attr *attr = &event->attr;
  630. struct hw_perf_event *hwc = &event->hw;
  631. const struct perf_event_map *pmap;
  632. u64 enc;
  633. if (atomic_read(&nmi_active) < 0)
  634. return -ENODEV;
  635. if (attr->type == PERF_TYPE_HARDWARE) {
  636. if (attr->config >= sparc_pmu->max_events)
  637. return -EINVAL;
  638. pmap = sparc_pmu->event_map(attr->config);
  639. } else if (attr->type == PERF_TYPE_HW_CACHE) {
  640. pmap = sparc_map_cache_event(attr->config);
  641. if (IS_ERR(pmap))
  642. return PTR_ERR(pmap);
  643. } else
  644. return -EOPNOTSUPP;
  645. perf_event_grab_pmc();
  646. event->destroy = hw_perf_event_destroy;
  647. /* We save the enable bits in the config_base. So to
  648. * turn off sampling just write 'config', and to enable
  649. * things write 'config | config_base'.
  650. */
  651. hwc->config_base = sparc_pmu->irq_bit;
  652. if (!attr->exclude_user)
  653. hwc->config_base |= PCR_UTRACE;
  654. if (!attr->exclude_kernel)
  655. hwc->config_base |= PCR_STRACE;
  656. if (!attr->exclude_hv)
  657. hwc->config_base |= sparc_pmu->hv_bit;
  658. if (!hwc->sample_period) {
  659. hwc->sample_period = MAX_PERIOD;
  660. hwc->last_period = hwc->sample_period;
  661. atomic64_set(&hwc->period_left, hwc->sample_period);
  662. }
  663. enc = pmap->encoding;
  664. if (pmap->pic_mask & PIC_UPPER) {
  665. hwc->idx = PIC_UPPER_INDEX;
  666. enc <<= sparc_pmu->upper_shift;
  667. } else {
  668. hwc->idx = PIC_LOWER_INDEX;
  669. enc <<= sparc_pmu->lower_shift;
  670. }
  671. hwc->config |= enc;
  672. return 0;
  673. }
  674. static const struct pmu pmu = {
  675. .enable = sparc_pmu_enable,
  676. .disable = sparc_pmu_disable,
  677. .read = sparc_pmu_read,
  678. .unthrottle = sparc_pmu_unthrottle,
  679. };
  680. const struct pmu *hw_perf_event_init(struct perf_event *event)
  681. {
  682. int err = __hw_perf_event_init(event);
  683. if (err)
  684. return ERR_PTR(err);
  685. return &pmu;
  686. }
  687. void perf_event_print_debug(void)
  688. {
  689. unsigned long flags;
  690. u64 pcr, pic;
  691. int cpu;
  692. if (!sparc_pmu)
  693. return;
  694. local_irq_save(flags);
  695. cpu = smp_processor_id();
  696. pcr = pcr_ops->read();
  697. read_pic(pic);
  698. pr_info("\n");
  699. pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
  700. cpu, pcr, pic);
  701. local_irq_restore(flags);
  702. }
  703. static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
  704. unsigned long cmd, void *__args)
  705. {
  706. struct die_args *args = __args;
  707. struct perf_sample_data data;
  708. struct cpu_hw_events *cpuc;
  709. struct pt_regs *regs;
  710. int idx;
  711. if (!atomic_read(&active_events))
  712. return NOTIFY_DONE;
  713. switch (cmd) {
  714. case DIE_NMI:
  715. break;
  716. default:
  717. return NOTIFY_DONE;
  718. }
  719. regs = args->regs;
  720. data.addr = 0;
  721. cpuc = &__get_cpu_var(cpu_hw_events);
  722. for (idx = 0; idx < MAX_HWEVENTS; idx++) {
  723. struct perf_event *event = cpuc->events[idx];
  724. struct hw_perf_event *hwc;
  725. u64 val;
  726. if (!test_bit(idx, cpuc->active_mask))
  727. continue;
  728. hwc = &event->hw;
  729. val = sparc_perf_event_update(event, hwc, idx);
  730. if (val & (1ULL << 31))
  731. continue;
  732. data.period = event->hw.last_period;
  733. if (!sparc_perf_event_set_period(event, hwc, idx))
  734. continue;
  735. if (perf_event_overflow(event, 1, &data, regs))
  736. sparc_pmu_disable_event(hwc, idx);
  737. }
  738. return NOTIFY_STOP;
  739. }
  740. static __read_mostly struct notifier_block perf_event_nmi_notifier = {
  741. .notifier_call = perf_event_nmi_handler,
  742. };
  743. static bool __init supported_pmu(void)
  744. {
  745. if (!strcmp(sparc_pmu_type, "ultra3") ||
  746. !strcmp(sparc_pmu_type, "ultra3+") ||
  747. !strcmp(sparc_pmu_type, "ultra3i") ||
  748. !strcmp(sparc_pmu_type, "ultra4+")) {
  749. sparc_pmu = &ultra3_pmu;
  750. return true;
  751. }
  752. if (!strcmp(sparc_pmu_type, "niagara")) {
  753. sparc_pmu = &niagara1_pmu;
  754. return true;
  755. }
  756. if (!strcmp(sparc_pmu_type, "niagara2")) {
  757. sparc_pmu = &niagara2_pmu;
  758. return true;
  759. }
  760. return false;
  761. }
  762. void __init init_hw_perf_events(void)
  763. {
  764. pr_info("Performance events: ");
  765. if (!supported_pmu()) {
  766. pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
  767. return;
  768. }
  769. pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
  770. /* All sparc64 PMUs currently have 2 events. But this simple
  771. * driver only supports one active event at a time.
  772. */
  773. perf_max_events = 1;
  774. register_die_notifier(&perf_event_nmi_notifier);
  775. }