core-book3s.c 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796
  1. /*
  2. * Performance event support - powerpc architecture code
  3. *
  4. * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/sched.h>
  13. #include <linux/perf_event.h>
  14. #include <linux/percpu.h>
  15. #include <linux/hardirq.h>
  16. #include <asm/reg.h>
  17. #include <asm/pmc.h>
  18. #include <asm/machdep.h>
  19. #include <asm/firmware.h>
  20. #include <asm/ptrace.h>
  21. #define BHRB_MAX_ENTRIES 32
  22. #define BHRB_TARGET 0x0000000000000002
  23. #define BHRB_PREDICTION 0x0000000000000001
  24. #define BHRB_EA 0xFFFFFFFFFFFFFFFC
  25. struct cpu_hw_events {
  26. int n_events;
  27. int n_percpu;
  28. int disabled;
  29. int n_added;
  30. int n_limited;
  31. u8 pmcs_enabled;
  32. struct perf_event *event[MAX_HWEVENTS];
  33. u64 events[MAX_HWEVENTS];
  34. unsigned int flags[MAX_HWEVENTS];
  35. unsigned long mmcr[3];
  36. struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
  37. u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
  38. u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
  39. unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
  40. unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
  41. unsigned int group_flag;
  42. int n_txn_start;
  43. /* BHRB bits */
  44. u64 bhrb_filter; /* BHRB HW branch filter */
  45. int bhrb_users;
  46. void *bhrb_context;
  47. struct perf_branch_stack bhrb_stack;
  48. struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
  49. };
  50. DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
  51. struct power_pmu *ppmu;
  52. /*
  53. * Normally, to ignore kernel events we set the FCS (freeze counters
  54. * in supervisor mode) bit in MMCR0, but if the kernel runs with the
  55. * hypervisor bit set in the MSR, or if we are running on a processor
  56. * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
  57. * then we need to use the FCHV bit to ignore kernel events.
  58. */
  59. static unsigned int freeze_events_kernel = MMCR0_FCS;
  60. /*
  61. * 32-bit doesn't have MMCRA but does have an MMCR2,
  62. * and a few other names are different.
  63. */
  64. #ifdef CONFIG_PPC32
  65. #define MMCR0_FCHV 0
  66. #define MMCR0_PMCjCE MMCR0_PMCnCE
  67. #define SPRN_MMCRA SPRN_MMCR2
  68. #define MMCRA_SAMPLE_ENABLE 0
  69. static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
  70. {
  71. return 0;
  72. }
  73. static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
  74. static inline u32 perf_get_misc_flags(struct pt_regs *regs)
  75. {
  76. return 0;
  77. }
  78. static inline void perf_read_regs(struct pt_regs *regs)
  79. {
  80. regs->result = 0;
  81. }
  82. static inline int perf_intr_is_nmi(struct pt_regs *regs)
  83. {
  84. return 0;
  85. }
  86. static inline int siar_valid(struct pt_regs *regs)
  87. {
  88. return 1;
  89. }
  90. #endif /* CONFIG_PPC32 */
  91. static bool regs_use_siar(struct pt_regs *regs)
  92. {
  93. return !!(regs->result & 1);
  94. }
  95. /*
  96. * Things that are specific to 64-bit implementations.
  97. */
  98. #ifdef CONFIG_PPC64
  99. static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
  100. {
  101. unsigned long mmcra = regs->dsisr;
  102. if ((ppmu->flags & PPMU_HAS_SSLOT) && (mmcra & MMCRA_SAMPLE_ENABLE)) {
  103. unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
  104. if (slot > 1)
  105. return 4 * (slot - 1);
  106. }
  107. return 0;
  108. }
  109. /*
  110. * The user wants a data address recorded.
  111. * If we're not doing instruction sampling, give them the SDAR
  112. * (sampled data address). If we are doing instruction sampling, then
  113. * only give them the SDAR if it corresponds to the instruction
  114. * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or
  115. * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA.
  116. */
  117. static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
  118. {
  119. unsigned long mmcra = regs->dsisr;
  120. unsigned long sdsync;
  121. if (ppmu->flags & PPMU_SIAR_VALID)
  122. sdsync = POWER7P_MMCRA_SDAR_VALID;
  123. else if (ppmu->flags & PPMU_ALT_SIPR)
  124. sdsync = POWER6_MMCRA_SDSYNC;
  125. else
  126. sdsync = MMCRA_SDSYNC;
  127. if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
  128. *addrp = mfspr(SPRN_SDAR);
  129. }
  130. static bool regs_sihv(struct pt_regs *regs)
  131. {
  132. unsigned long sihv = MMCRA_SIHV;
  133. if (ppmu->flags & PPMU_HAS_SIER)
  134. return !!(regs->dar & SIER_SIHV);
  135. if (ppmu->flags & PPMU_ALT_SIPR)
  136. sihv = POWER6_MMCRA_SIHV;
  137. return !!(regs->dsisr & sihv);
  138. }
  139. static bool regs_sipr(struct pt_regs *regs)
  140. {
  141. unsigned long sipr = MMCRA_SIPR;
  142. if (ppmu->flags & PPMU_HAS_SIER)
  143. return !!(regs->dar & SIER_SIPR);
  144. if (ppmu->flags & PPMU_ALT_SIPR)
  145. sipr = POWER6_MMCRA_SIPR;
  146. return !!(regs->dsisr & sipr);
  147. }
  148. static bool regs_no_sipr(struct pt_regs *regs)
  149. {
  150. return !!(regs->result & 2);
  151. }
  152. static inline u32 perf_flags_from_msr(struct pt_regs *regs)
  153. {
  154. if (regs->msr & MSR_PR)
  155. return PERF_RECORD_MISC_USER;
  156. if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
  157. return PERF_RECORD_MISC_HYPERVISOR;
  158. return PERF_RECORD_MISC_KERNEL;
  159. }
  160. static inline u32 perf_get_misc_flags(struct pt_regs *regs)
  161. {
  162. bool use_siar = regs_use_siar(regs);
  163. if (!use_siar)
  164. return perf_flags_from_msr(regs);
  165. /*
  166. * If we don't have flags in MMCRA, rather than using
  167. * the MSR, we intuit the flags from the address in
  168. * SIAR which should give slightly more reliable
  169. * results
  170. */
  171. if (regs_no_sipr(regs)) {
  172. unsigned long siar = mfspr(SPRN_SIAR);
  173. if (siar >= PAGE_OFFSET)
  174. return PERF_RECORD_MISC_KERNEL;
  175. return PERF_RECORD_MISC_USER;
  176. }
  177. /* PR has priority over HV, so order below is important */
  178. if (regs_sipr(regs))
  179. return PERF_RECORD_MISC_USER;
  180. if (regs_sihv(regs) && (freeze_events_kernel != MMCR0_FCHV))
  181. return PERF_RECORD_MISC_HYPERVISOR;
  182. return PERF_RECORD_MISC_KERNEL;
  183. }
  184. /*
  185. * Overload regs->dsisr to store MMCRA so we only need to read it once
  186. * on each interrupt.
  187. * Overload regs->dar to store SIER if we have it.
  188. * Overload regs->result to specify whether we should use the MSR (result
  189. * is zero) or the SIAR (result is non zero).
  190. */
  191. static inline void perf_read_regs(struct pt_regs *regs)
  192. {
  193. unsigned long mmcra = mfspr(SPRN_MMCRA);
  194. int marked = mmcra & MMCRA_SAMPLE_ENABLE;
  195. int use_siar;
  196. regs->dsisr = mmcra;
  197. regs->result = 0;
  198. if (ppmu->flags & PPMU_NO_SIPR)
  199. regs->result |= 2;
  200. /*
  201. * On power8 if we're in random sampling mode, the SIER is updated.
  202. * If we're in continuous sampling mode, we don't have SIPR.
  203. */
  204. if (ppmu->flags & PPMU_HAS_SIER) {
  205. if (marked)
  206. regs->dar = mfspr(SPRN_SIER);
  207. else
  208. regs->result |= 2;
  209. }
  210. /*
  211. * If this isn't a PMU exception (eg a software event) the SIAR is
  212. * not valid. Use pt_regs.
  213. *
  214. * If it is a marked event use the SIAR.
  215. *
  216. * If the PMU doesn't update the SIAR for non marked events use
  217. * pt_regs.
  218. *
  219. * If the PMU has HV/PR flags then check to see if they
  220. * place the exception in userspace. If so, use pt_regs. In
  221. * continuous sampling mode the SIAR and the PMU exception are
  222. * not synchronised, so they may be many instructions apart.
  223. * This can result in confusing backtraces. We still want
  224. * hypervisor samples as well as samples in the kernel with
  225. * interrupts off hence the userspace check.
  226. */
  227. if (TRAP(regs) != 0xf00)
  228. use_siar = 0;
  229. else if (marked)
  230. use_siar = 1;
  231. else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
  232. use_siar = 0;
  233. else if (!regs_no_sipr(regs) && regs_sipr(regs))
  234. use_siar = 0;
  235. else
  236. use_siar = 1;
  237. regs->result |= use_siar;
  238. }
  239. /*
  240. * If interrupts were soft-disabled when a PMU interrupt occurs, treat
  241. * it as an NMI.
  242. */
  243. static inline int perf_intr_is_nmi(struct pt_regs *regs)
  244. {
  245. return !regs->softe;
  246. }
  247. /*
  248. * On processors like P7+ that have the SIAR-Valid bit, marked instructions
  249. * must be sampled only if the SIAR-valid bit is set.
  250. *
  251. * For unmarked instructions and for processors that don't have the SIAR-Valid
  252. * bit, assume that SIAR is valid.
  253. */
  254. static inline int siar_valid(struct pt_regs *regs)
  255. {
  256. unsigned long mmcra = regs->dsisr;
  257. int marked = mmcra & MMCRA_SAMPLE_ENABLE;
  258. if ((ppmu->flags & PPMU_SIAR_VALID) && marked)
  259. return mmcra & POWER7P_MMCRA_SIAR_VALID;
  260. return 1;
  261. }
  262. #endif /* CONFIG_PPC64 */
  263. static void perf_event_interrupt(struct pt_regs *regs);
  264. void perf_event_print_debug(void)
  265. {
  266. }
  267. /*
  268. * Read one performance monitor counter (PMC).
  269. */
  270. static unsigned long read_pmc(int idx)
  271. {
  272. unsigned long val;
  273. switch (idx) {
  274. case 1:
  275. val = mfspr(SPRN_PMC1);
  276. break;
  277. case 2:
  278. val = mfspr(SPRN_PMC2);
  279. break;
  280. case 3:
  281. val = mfspr(SPRN_PMC3);
  282. break;
  283. case 4:
  284. val = mfspr(SPRN_PMC4);
  285. break;
  286. case 5:
  287. val = mfspr(SPRN_PMC5);
  288. break;
  289. case 6:
  290. val = mfspr(SPRN_PMC6);
  291. break;
  292. #ifdef CONFIG_PPC64
  293. case 7:
  294. val = mfspr(SPRN_PMC7);
  295. break;
  296. case 8:
  297. val = mfspr(SPRN_PMC8);
  298. break;
  299. #endif /* CONFIG_PPC64 */
  300. default:
  301. printk(KERN_ERR "oops trying to read PMC%d\n", idx);
  302. val = 0;
  303. }
  304. return val;
  305. }
  306. /*
  307. * Write one PMC.
  308. */
  309. static void write_pmc(int idx, unsigned long val)
  310. {
  311. switch (idx) {
  312. case 1:
  313. mtspr(SPRN_PMC1, val);
  314. break;
  315. case 2:
  316. mtspr(SPRN_PMC2, val);
  317. break;
  318. case 3:
  319. mtspr(SPRN_PMC3, val);
  320. break;
  321. case 4:
  322. mtspr(SPRN_PMC4, val);
  323. break;
  324. case 5:
  325. mtspr(SPRN_PMC5, val);
  326. break;
  327. case 6:
  328. mtspr(SPRN_PMC6, val);
  329. break;
  330. #ifdef CONFIG_PPC64
  331. case 7:
  332. mtspr(SPRN_PMC7, val);
  333. break;
  334. case 8:
  335. mtspr(SPRN_PMC8, val);
  336. break;
  337. #endif /* CONFIG_PPC64 */
  338. default:
  339. printk(KERN_ERR "oops trying to write PMC%d\n", idx);
  340. }
  341. }
  342. /*
  343. * Check if a set of events can all go on the PMU at once.
  344. * If they can't, this will look at alternative codes for the events
  345. * and see if any combination of alternative codes is feasible.
  346. * The feasible set is returned in event_id[].
  347. */
  348. static int power_check_constraints(struct cpu_hw_events *cpuhw,
  349. u64 event_id[], unsigned int cflags[],
  350. int n_ev)
  351. {
  352. unsigned long mask, value, nv;
  353. unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
  354. int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
  355. int i, j;
  356. unsigned long addf = ppmu->add_fields;
  357. unsigned long tadd = ppmu->test_adder;
  358. if (n_ev > ppmu->n_counter)
  359. return -1;
  360. /* First see if the events will go on as-is */
  361. for (i = 0; i < n_ev; ++i) {
  362. if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
  363. && !ppmu->limited_pmc_event(event_id[i])) {
  364. ppmu->get_alternatives(event_id[i], cflags[i],
  365. cpuhw->alternatives[i]);
  366. event_id[i] = cpuhw->alternatives[i][0];
  367. }
  368. if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
  369. &cpuhw->avalues[i][0]))
  370. return -1;
  371. }
  372. value = mask = 0;
  373. for (i = 0; i < n_ev; ++i) {
  374. nv = (value | cpuhw->avalues[i][0]) +
  375. (value & cpuhw->avalues[i][0] & addf);
  376. if ((((nv + tadd) ^ value) & mask) != 0 ||
  377. (((nv + tadd) ^ cpuhw->avalues[i][0]) &
  378. cpuhw->amasks[i][0]) != 0)
  379. break;
  380. value = nv;
  381. mask |= cpuhw->amasks[i][0];
  382. }
  383. if (i == n_ev)
  384. return 0; /* all OK */
  385. /* doesn't work, gather alternatives... */
  386. if (!ppmu->get_alternatives)
  387. return -1;
  388. for (i = 0; i < n_ev; ++i) {
  389. choice[i] = 0;
  390. n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
  391. cpuhw->alternatives[i]);
  392. for (j = 1; j < n_alt[i]; ++j)
  393. ppmu->get_constraint(cpuhw->alternatives[i][j],
  394. &cpuhw->amasks[i][j],
  395. &cpuhw->avalues[i][j]);
  396. }
  397. /* enumerate all possibilities and see if any will work */
  398. i = 0;
  399. j = -1;
  400. value = mask = nv = 0;
  401. while (i < n_ev) {
  402. if (j >= 0) {
  403. /* we're backtracking, restore context */
  404. value = svalues[i];
  405. mask = smasks[i];
  406. j = choice[i];
  407. }
  408. /*
  409. * See if any alternative k for event_id i,
  410. * where k > j, will satisfy the constraints.
  411. */
  412. while (++j < n_alt[i]) {
  413. nv = (value | cpuhw->avalues[i][j]) +
  414. (value & cpuhw->avalues[i][j] & addf);
  415. if ((((nv + tadd) ^ value) & mask) == 0 &&
  416. (((nv + tadd) ^ cpuhw->avalues[i][j])
  417. & cpuhw->amasks[i][j]) == 0)
  418. break;
  419. }
  420. if (j >= n_alt[i]) {
  421. /*
  422. * No feasible alternative, backtrack
  423. * to event_id i-1 and continue enumerating its
  424. * alternatives from where we got up to.
  425. */
  426. if (--i < 0)
  427. return -1;
  428. } else {
  429. /*
  430. * Found a feasible alternative for event_id i,
  431. * remember where we got up to with this event_id,
  432. * go on to the next event_id, and start with
  433. * the first alternative for it.
  434. */
  435. choice[i] = j;
  436. svalues[i] = value;
  437. smasks[i] = mask;
  438. value = nv;
  439. mask |= cpuhw->amasks[i][j];
  440. ++i;
  441. j = -1;
  442. }
  443. }
  444. /* OK, we have a feasible combination, tell the caller the solution */
  445. for (i = 0; i < n_ev; ++i)
  446. event_id[i] = cpuhw->alternatives[i][choice[i]];
  447. return 0;
  448. }
  449. /*
  450. * Check if newly-added events have consistent settings for
  451. * exclude_{user,kernel,hv} with each other and any previously
  452. * added events.
  453. */
  454. static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
  455. int n_prev, int n_new)
  456. {
  457. int eu = 0, ek = 0, eh = 0;
  458. int i, n, first;
  459. struct perf_event *event;
  460. n = n_prev + n_new;
  461. if (n <= 1)
  462. return 0;
  463. first = 1;
  464. for (i = 0; i < n; ++i) {
  465. if (cflags[i] & PPMU_LIMITED_PMC_OK) {
  466. cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
  467. continue;
  468. }
  469. event = ctrs[i];
  470. if (first) {
  471. eu = event->attr.exclude_user;
  472. ek = event->attr.exclude_kernel;
  473. eh = event->attr.exclude_hv;
  474. first = 0;
  475. } else if (event->attr.exclude_user != eu ||
  476. event->attr.exclude_kernel != ek ||
  477. event->attr.exclude_hv != eh) {
  478. return -EAGAIN;
  479. }
  480. }
  481. if (eu || ek || eh)
  482. for (i = 0; i < n; ++i)
  483. if (cflags[i] & PPMU_LIMITED_PMC_OK)
  484. cflags[i] |= PPMU_LIMITED_PMC_REQD;
  485. return 0;
  486. }
  487. static u64 check_and_compute_delta(u64 prev, u64 val)
  488. {
  489. u64 delta = (val - prev) & 0xfffffffful;
  490. /*
  491. * POWER7 can roll back counter values, if the new value is smaller
  492. * than the previous value it will cause the delta and the counter to
  493. * have bogus values unless we rolled a counter over. If a coutner is
  494. * rolled back, it will be smaller, but within 256, which is the maximum
  495. * number of events to rollback at once. If we dectect a rollback
  496. * return 0. This can lead to a small lack of precision in the
  497. * counters.
  498. */
  499. if (prev > val && (prev - val) < 256)
  500. delta = 0;
  501. return delta;
  502. }
  503. static void power_pmu_read(struct perf_event *event)
  504. {
  505. s64 val, delta, prev;
  506. if (event->hw.state & PERF_HES_STOPPED)
  507. return;
  508. if (!event->hw.idx)
  509. return;
  510. /*
  511. * Performance monitor interrupts come even when interrupts
  512. * are soft-disabled, as long as interrupts are hard-enabled.
  513. * Therefore we treat them like NMIs.
  514. */
  515. do {
  516. prev = local64_read(&event->hw.prev_count);
  517. barrier();
  518. val = read_pmc(event->hw.idx);
  519. delta = check_and_compute_delta(prev, val);
  520. if (!delta)
  521. return;
  522. } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
  523. local64_add(delta, &event->count);
  524. local64_sub(delta, &event->hw.period_left);
  525. }
  526. /*
  527. * On some machines, PMC5 and PMC6 can't be written, don't respect
  528. * the freeze conditions, and don't generate interrupts. This tells
  529. * us if `event' is using such a PMC.
  530. */
  531. static int is_limited_pmc(int pmcnum)
  532. {
  533. return (ppmu->flags & PPMU_LIMITED_PMC5_6)
  534. && (pmcnum == 5 || pmcnum == 6);
  535. }
  536. static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
  537. unsigned long pmc5, unsigned long pmc6)
  538. {
  539. struct perf_event *event;
  540. u64 val, prev, delta;
  541. int i;
  542. for (i = 0; i < cpuhw->n_limited; ++i) {
  543. event = cpuhw->limited_counter[i];
  544. if (!event->hw.idx)
  545. continue;
  546. val = (event->hw.idx == 5) ? pmc5 : pmc6;
  547. prev = local64_read(&event->hw.prev_count);
  548. event->hw.idx = 0;
  549. delta = check_and_compute_delta(prev, val);
  550. if (delta)
  551. local64_add(delta, &event->count);
  552. }
  553. }
  554. static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
  555. unsigned long pmc5, unsigned long pmc6)
  556. {
  557. struct perf_event *event;
  558. u64 val, prev;
  559. int i;
  560. for (i = 0; i < cpuhw->n_limited; ++i) {
  561. event = cpuhw->limited_counter[i];
  562. event->hw.idx = cpuhw->limited_hwidx[i];
  563. val = (event->hw.idx == 5) ? pmc5 : pmc6;
  564. prev = local64_read(&event->hw.prev_count);
  565. if (check_and_compute_delta(prev, val))
  566. local64_set(&event->hw.prev_count, val);
  567. perf_event_update_userpage(event);
  568. }
  569. }
  570. /*
  571. * Since limited events don't respect the freeze conditions, we
  572. * have to read them immediately after freezing or unfreezing the
  573. * other events. We try to keep the values from the limited
  574. * events as consistent as possible by keeping the delay (in
  575. * cycles and instructions) between freezing/unfreezing and reading
  576. * the limited events as small and consistent as possible.
  577. * Therefore, if any limited events are in use, we read them
  578. * both, and always in the same order, to minimize variability,
  579. * and do it inside the same asm that writes MMCR0.
  580. */
  581. static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  582. {
  583. unsigned long pmc5, pmc6;
  584. if (!cpuhw->n_limited) {
  585. mtspr(SPRN_MMCR0, mmcr0);
  586. return;
  587. }
  588. /*
  589. * Write MMCR0, then read PMC5 and PMC6 immediately.
  590. * To ensure we don't get a performance monitor interrupt
  591. * between writing MMCR0 and freezing/thawing the limited
  592. * events, we first write MMCR0 with the event overflow
  593. * interrupt enable bits turned off.
  594. */
  595. asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
  596. : "=&r" (pmc5), "=&r" (pmc6)
  597. : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
  598. "i" (SPRN_MMCR0),
  599. "i" (SPRN_PMC5), "i" (SPRN_PMC6));
  600. if (mmcr0 & MMCR0_FC)
  601. freeze_limited_counters(cpuhw, pmc5, pmc6);
  602. else
  603. thaw_limited_counters(cpuhw, pmc5, pmc6);
  604. /*
  605. * Write the full MMCR0 including the event overflow interrupt
  606. * enable bits, if necessary.
  607. */
  608. if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
  609. mtspr(SPRN_MMCR0, mmcr0);
  610. }
  611. /*
  612. * Disable all events to prevent PMU interrupts and to allow
  613. * events to be added or removed.
  614. */
  615. static void power_pmu_disable(struct pmu *pmu)
  616. {
  617. struct cpu_hw_events *cpuhw;
  618. unsigned long flags;
  619. if (!ppmu)
  620. return;
  621. local_irq_save(flags);
  622. cpuhw = &__get_cpu_var(cpu_hw_events);
  623. if (!cpuhw->disabled) {
  624. cpuhw->disabled = 1;
  625. cpuhw->n_added = 0;
  626. /*
  627. * Check if we ever enabled the PMU on this cpu.
  628. */
  629. if (!cpuhw->pmcs_enabled) {
  630. ppc_enable_pmcs();
  631. cpuhw->pmcs_enabled = 1;
  632. }
  633. /*
  634. * Disable instruction sampling if it was enabled
  635. */
  636. if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
  637. mtspr(SPRN_MMCRA,
  638. cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
  639. mb();
  640. }
  641. /*
  642. * Set the 'freeze counters' bit.
  643. * The barrier is to make sure the mtspr has been
  644. * executed and the PMU has frozen the events
  645. * before we return.
  646. */
  647. write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
  648. mb();
  649. }
  650. local_irq_restore(flags);
  651. }
  652. /*
  653. * Re-enable all events if disable == 0.
  654. * If we were previously disabled and events were added, then
  655. * put the new config on the PMU.
  656. */
  657. static void power_pmu_enable(struct pmu *pmu)
  658. {
  659. struct perf_event *event;
  660. struct cpu_hw_events *cpuhw;
  661. unsigned long flags;
  662. long i;
  663. unsigned long val;
  664. s64 left;
  665. unsigned int hwc_index[MAX_HWEVENTS];
  666. int n_lim;
  667. int idx;
  668. if (!ppmu)
  669. return;
  670. local_irq_save(flags);
  671. cpuhw = &__get_cpu_var(cpu_hw_events);
  672. if (!cpuhw->disabled) {
  673. local_irq_restore(flags);
  674. return;
  675. }
  676. cpuhw->disabled = 0;
  677. /*
  678. * If we didn't change anything, or only removed events,
  679. * no need to recalculate MMCR* settings and reset the PMCs.
  680. * Just reenable the PMU with the current MMCR* settings
  681. * (possibly updated for removal of events).
  682. */
  683. if (!cpuhw->n_added) {
  684. mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
  685. mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
  686. if (cpuhw->n_events == 0)
  687. ppc_set_pmu_inuse(0);
  688. goto out_enable;
  689. }
  690. /*
  691. * Compute MMCR* values for the new set of events
  692. */
  693. if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
  694. cpuhw->mmcr)) {
  695. /* shouldn't ever get here */
  696. printk(KERN_ERR "oops compute_mmcr failed\n");
  697. goto out;
  698. }
  699. /*
  700. * Add in MMCR0 freeze bits corresponding to the
  701. * attr.exclude_* bits for the first event.
  702. * We have already checked that all events have the
  703. * same values for these bits as the first event.
  704. */
  705. event = cpuhw->event[0];
  706. if (event->attr.exclude_user)
  707. cpuhw->mmcr[0] |= MMCR0_FCP;
  708. if (event->attr.exclude_kernel)
  709. cpuhw->mmcr[0] |= freeze_events_kernel;
  710. if (event->attr.exclude_hv)
  711. cpuhw->mmcr[0] |= MMCR0_FCHV;
  712. /*
  713. * Write the new configuration to MMCR* with the freeze
  714. * bit set and set the hardware events to their initial values.
  715. * Then unfreeze the events.
  716. */
  717. ppc_set_pmu_inuse(1);
  718. mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
  719. mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
  720. mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
  721. | MMCR0_FC);
  722. /*
  723. * Read off any pre-existing events that need to move
  724. * to another PMC.
  725. */
  726. for (i = 0; i < cpuhw->n_events; ++i) {
  727. event = cpuhw->event[i];
  728. if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
  729. power_pmu_read(event);
  730. write_pmc(event->hw.idx, 0);
  731. event->hw.idx = 0;
  732. }
  733. }
  734. /*
  735. * Initialize the PMCs for all the new and moved events.
  736. */
  737. cpuhw->n_limited = n_lim = 0;
  738. for (i = 0; i < cpuhw->n_events; ++i) {
  739. event = cpuhw->event[i];
  740. if (event->hw.idx)
  741. continue;
  742. idx = hwc_index[i] + 1;
  743. if (is_limited_pmc(idx)) {
  744. cpuhw->limited_counter[n_lim] = event;
  745. cpuhw->limited_hwidx[n_lim] = idx;
  746. ++n_lim;
  747. continue;
  748. }
  749. val = 0;
  750. if (event->hw.sample_period) {
  751. left = local64_read(&event->hw.period_left);
  752. if (left < 0x80000000L)
  753. val = 0x80000000L - left;
  754. }
  755. local64_set(&event->hw.prev_count, val);
  756. event->hw.idx = idx;
  757. if (event->hw.state & PERF_HES_STOPPED)
  758. val = 0;
  759. write_pmc(idx, val);
  760. perf_event_update_userpage(event);
  761. }
  762. cpuhw->n_limited = n_lim;
  763. cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
  764. out_enable:
  765. mb();
  766. write_mmcr0(cpuhw, cpuhw->mmcr[0]);
  767. /*
  768. * Enable instruction sampling if necessary
  769. */
  770. if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
  771. mb();
  772. mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
  773. }
  774. out:
  775. if (cpuhw->bhrb_users)
  776. ppmu->config_bhrb(cpuhw->bhrb_filter);
  777. local_irq_restore(flags);
  778. }
  779. static int collect_events(struct perf_event *group, int max_count,
  780. struct perf_event *ctrs[], u64 *events,
  781. unsigned int *flags)
  782. {
  783. int n = 0;
  784. struct perf_event *event;
  785. if (!is_software_event(group)) {
  786. if (n >= max_count)
  787. return -1;
  788. ctrs[n] = group;
  789. flags[n] = group->hw.event_base;
  790. events[n++] = group->hw.config;
  791. }
  792. list_for_each_entry(event, &group->sibling_list, group_entry) {
  793. if (!is_software_event(event) &&
  794. event->state != PERF_EVENT_STATE_OFF) {
  795. if (n >= max_count)
  796. return -1;
  797. ctrs[n] = event;
  798. flags[n] = event->hw.event_base;
  799. events[n++] = event->hw.config;
  800. }
  801. }
  802. return n;
  803. }
  804. /* Reset all possible BHRB entries */
  805. static void power_pmu_bhrb_reset(void)
  806. {
  807. asm volatile(PPC_CLRBHRB);
  808. }
  809. void power_pmu_bhrb_enable(struct perf_event *event)
  810. {
  811. struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
  812. if (!ppmu->bhrb_nr)
  813. return;
  814. /* Clear BHRB if we changed task context to avoid data leaks */
  815. if (event->ctx->task && cpuhw->bhrb_context != event->ctx) {
  816. power_pmu_bhrb_reset();
  817. cpuhw->bhrb_context = event->ctx;
  818. }
  819. cpuhw->bhrb_users++;
  820. }
  821. void power_pmu_bhrb_disable(struct perf_event *event)
  822. {
  823. struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
  824. if (!ppmu->bhrb_nr)
  825. return;
  826. cpuhw->bhrb_users--;
  827. WARN_ON_ONCE(cpuhw->bhrb_users < 0);
  828. if (!cpuhw->disabled && !cpuhw->bhrb_users) {
  829. /* BHRB cannot be turned off when other
  830. * events are active on the PMU.
  831. */
  832. /* avoid stale pointer */
  833. cpuhw->bhrb_context = NULL;
  834. }
  835. }
  836. /*
  837. * Add a event to the PMU.
  838. * If all events are not already frozen, then we disable and
  839. * re-enable the PMU in order to get hw_perf_enable to do the
  840. * actual work of reconfiguring the PMU.
  841. */
  842. static int power_pmu_add(struct perf_event *event, int ef_flags)
  843. {
  844. struct cpu_hw_events *cpuhw;
  845. unsigned long flags;
  846. int n0;
  847. int ret = -EAGAIN;
  848. local_irq_save(flags);
  849. perf_pmu_disable(event->pmu);
  850. /*
  851. * Add the event to the list (if there is room)
  852. * and check whether the total set is still feasible.
  853. */
  854. cpuhw = &__get_cpu_var(cpu_hw_events);
  855. n0 = cpuhw->n_events;
  856. if (n0 >= ppmu->n_counter)
  857. goto out;
  858. cpuhw->event[n0] = event;
  859. cpuhw->events[n0] = event->hw.config;
  860. cpuhw->flags[n0] = event->hw.event_base;
  861. /*
  862. * This event may have been disabled/stopped in record_and_restart()
  863. * because we exceeded the ->event_limit. If re-starting the event,
  864. * clear the ->hw.state (STOPPED and UPTODATE flags), so the user
  865. * notification is re-enabled.
  866. */
  867. if (!(ef_flags & PERF_EF_START))
  868. event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
  869. else
  870. event->hw.state = 0;
  871. /*
  872. * If group events scheduling transaction was started,
  873. * skip the schedulability test here, it will be performed
  874. * at commit time(->commit_txn) as a whole
  875. */
  876. if (cpuhw->group_flag & PERF_EVENT_TXN)
  877. goto nocheck;
  878. if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
  879. goto out;
  880. if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
  881. goto out;
  882. event->hw.config = cpuhw->events[n0];
  883. nocheck:
  884. ++cpuhw->n_events;
  885. ++cpuhw->n_added;
  886. ret = 0;
  887. out:
  888. if (has_branch_stack(event))
  889. power_pmu_bhrb_enable(event);
  890. perf_pmu_enable(event->pmu);
  891. local_irq_restore(flags);
  892. return ret;
  893. }
  894. /*
  895. * Remove a event from the PMU.
  896. */
  897. static void power_pmu_del(struct perf_event *event, int ef_flags)
  898. {
  899. struct cpu_hw_events *cpuhw;
  900. long i;
  901. unsigned long flags;
  902. local_irq_save(flags);
  903. perf_pmu_disable(event->pmu);
  904. power_pmu_read(event);
  905. cpuhw = &__get_cpu_var(cpu_hw_events);
  906. for (i = 0; i < cpuhw->n_events; ++i) {
  907. if (event == cpuhw->event[i]) {
  908. while (++i < cpuhw->n_events) {
  909. cpuhw->event[i-1] = cpuhw->event[i];
  910. cpuhw->events[i-1] = cpuhw->events[i];
  911. cpuhw->flags[i-1] = cpuhw->flags[i];
  912. }
  913. --cpuhw->n_events;
  914. ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
  915. if (event->hw.idx) {
  916. write_pmc(event->hw.idx, 0);
  917. event->hw.idx = 0;
  918. }
  919. perf_event_update_userpage(event);
  920. break;
  921. }
  922. }
  923. for (i = 0; i < cpuhw->n_limited; ++i)
  924. if (event == cpuhw->limited_counter[i])
  925. break;
  926. if (i < cpuhw->n_limited) {
  927. while (++i < cpuhw->n_limited) {
  928. cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
  929. cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
  930. }
  931. --cpuhw->n_limited;
  932. }
  933. if (cpuhw->n_events == 0) {
  934. /* disable exceptions if no events are running */
  935. cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
  936. }
  937. if (has_branch_stack(event))
  938. power_pmu_bhrb_disable(event);
  939. perf_pmu_enable(event->pmu);
  940. local_irq_restore(flags);
  941. }
  942. /*
  943. * POWER-PMU does not support disabling individual counters, hence
  944. * program their cycle counter to their max value and ignore the interrupts.
  945. */
  946. static void power_pmu_start(struct perf_event *event, int ef_flags)
  947. {
  948. unsigned long flags;
  949. s64 left;
  950. unsigned long val;
  951. if (!event->hw.idx || !event->hw.sample_period)
  952. return;
  953. if (!(event->hw.state & PERF_HES_STOPPED))
  954. return;
  955. if (ef_flags & PERF_EF_RELOAD)
  956. WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
  957. local_irq_save(flags);
  958. perf_pmu_disable(event->pmu);
  959. event->hw.state = 0;
  960. left = local64_read(&event->hw.period_left);
  961. val = 0;
  962. if (left < 0x80000000L)
  963. val = 0x80000000L - left;
  964. write_pmc(event->hw.idx, val);
  965. perf_event_update_userpage(event);
  966. perf_pmu_enable(event->pmu);
  967. local_irq_restore(flags);
  968. }
  969. static void power_pmu_stop(struct perf_event *event, int ef_flags)
  970. {
  971. unsigned long flags;
  972. if (!event->hw.idx || !event->hw.sample_period)
  973. return;
  974. if (event->hw.state & PERF_HES_STOPPED)
  975. return;
  976. local_irq_save(flags);
  977. perf_pmu_disable(event->pmu);
  978. power_pmu_read(event);
  979. event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
  980. write_pmc(event->hw.idx, 0);
  981. perf_event_update_userpage(event);
  982. perf_pmu_enable(event->pmu);
  983. local_irq_restore(flags);
  984. }
  985. /*
  986. * Start group events scheduling transaction
  987. * Set the flag to make pmu::enable() not perform the
  988. * schedulability test, it will be performed at commit time
  989. */
  990. void power_pmu_start_txn(struct pmu *pmu)
  991. {
  992. struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
  993. perf_pmu_disable(pmu);
  994. cpuhw->group_flag |= PERF_EVENT_TXN;
  995. cpuhw->n_txn_start = cpuhw->n_events;
  996. }
  997. /*
  998. * Stop group events scheduling transaction
  999. * Clear the flag and pmu::enable() will perform the
  1000. * schedulability test.
  1001. */
  1002. void power_pmu_cancel_txn(struct pmu *pmu)
  1003. {
  1004. struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
  1005. cpuhw->group_flag &= ~PERF_EVENT_TXN;
  1006. perf_pmu_enable(pmu);
  1007. }
  1008. /*
  1009. * Commit group events scheduling transaction
  1010. * Perform the group schedulability test as a whole
  1011. * Return 0 if success
  1012. */
  1013. int power_pmu_commit_txn(struct pmu *pmu)
  1014. {
  1015. struct cpu_hw_events *cpuhw;
  1016. long i, n;
  1017. if (!ppmu)
  1018. return -EAGAIN;
  1019. cpuhw = &__get_cpu_var(cpu_hw_events);
  1020. n = cpuhw->n_events;
  1021. if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
  1022. return -EAGAIN;
  1023. i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
  1024. if (i < 0)
  1025. return -EAGAIN;
  1026. for (i = cpuhw->n_txn_start; i < n; ++i)
  1027. cpuhw->event[i]->hw.config = cpuhw->events[i];
  1028. cpuhw->group_flag &= ~PERF_EVENT_TXN;
  1029. perf_pmu_enable(pmu);
  1030. return 0;
  1031. }
  1032. /* Called from ctxsw to prevent one process's branch entries to
  1033. * mingle with the other process's entries during context switch.
  1034. */
  1035. void power_pmu_flush_branch_stack(void)
  1036. {
  1037. if (ppmu->bhrb_nr)
  1038. power_pmu_bhrb_reset();
  1039. }
  1040. /*
  1041. * Return 1 if we might be able to put event on a limited PMC,
  1042. * or 0 if not.
  1043. * A event can only go on a limited PMC if it counts something
  1044. * that a limited PMC can count, doesn't require interrupts, and
  1045. * doesn't exclude any processor mode.
  1046. */
  1047. static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
  1048. unsigned int flags)
  1049. {
  1050. int n;
  1051. u64 alt[MAX_EVENT_ALTERNATIVES];
  1052. if (event->attr.exclude_user
  1053. || event->attr.exclude_kernel
  1054. || event->attr.exclude_hv
  1055. || event->attr.sample_period)
  1056. return 0;
  1057. if (ppmu->limited_pmc_event(ev))
  1058. return 1;
  1059. /*
  1060. * The requested event_id isn't on a limited PMC already;
  1061. * see if any alternative code goes on a limited PMC.
  1062. */
  1063. if (!ppmu->get_alternatives)
  1064. return 0;
  1065. flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
  1066. n = ppmu->get_alternatives(ev, flags, alt);
  1067. return n > 0;
  1068. }
  1069. /*
  1070. * Find an alternative event_id that goes on a normal PMC, if possible,
  1071. * and return the event_id code, or 0 if there is no such alternative.
  1072. * (Note: event_id code 0 is "don't count" on all machines.)
  1073. */
  1074. static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
  1075. {
  1076. u64 alt[MAX_EVENT_ALTERNATIVES];
  1077. int n;
  1078. flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
  1079. n = ppmu->get_alternatives(ev, flags, alt);
  1080. if (!n)
  1081. return 0;
  1082. return alt[0];
  1083. }
  1084. /* Number of perf_events counting hardware events */
  1085. static atomic_t num_events;
  1086. /* Used to avoid races in calling reserve/release_pmc_hardware */
  1087. static DEFINE_MUTEX(pmc_reserve_mutex);
  1088. /*
  1089. * Release the PMU if this is the last perf_event.
  1090. */
  1091. static void hw_perf_event_destroy(struct perf_event *event)
  1092. {
  1093. if (!atomic_add_unless(&num_events, -1, 1)) {
  1094. mutex_lock(&pmc_reserve_mutex);
  1095. if (atomic_dec_return(&num_events) == 0)
  1096. release_pmc_hardware();
  1097. mutex_unlock(&pmc_reserve_mutex);
  1098. }
  1099. }
  1100. /*
  1101. * Translate a generic cache event_id config to a raw event_id code.
  1102. */
  1103. static int hw_perf_cache_event(u64 config, u64 *eventp)
  1104. {
  1105. unsigned long type, op, result;
  1106. int ev;
  1107. if (!ppmu->cache_events)
  1108. return -EINVAL;
  1109. /* unpack config */
  1110. type = config & 0xff;
  1111. op = (config >> 8) & 0xff;
  1112. result = (config >> 16) & 0xff;
  1113. if (type >= PERF_COUNT_HW_CACHE_MAX ||
  1114. op >= PERF_COUNT_HW_CACHE_OP_MAX ||
  1115. result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  1116. return -EINVAL;
  1117. ev = (*ppmu->cache_events)[type][op][result];
  1118. if (ev == 0)
  1119. return -EOPNOTSUPP;
  1120. if (ev == -1)
  1121. return -EINVAL;
  1122. *eventp = ev;
  1123. return 0;
  1124. }
  1125. static int power_pmu_event_init(struct perf_event *event)
  1126. {
  1127. u64 ev;
  1128. unsigned long flags;
  1129. struct perf_event *ctrs[MAX_HWEVENTS];
  1130. u64 events[MAX_HWEVENTS];
  1131. unsigned int cflags[MAX_HWEVENTS];
  1132. int n;
  1133. int err;
  1134. struct cpu_hw_events *cpuhw;
  1135. if (!ppmu)
  1136. return -ENOENT;
  1137. if (has_branch_stack(event)) {
  1138. /* PMU has BHRB enabled */
  1139. if (!(ppmu->flags & PPMU_BHRB))
  1140. return -EOPNOTSUPP;
  1141. }
  1142. switch (event->attr.type) {
  1143. case PERF_TYPE_HARDWARE:
  1144. ev = event->attr.config;
  1145. if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
  1146. return -EOPNOTSUPP;
  1147. ev = ppmu->generic_events[ev];
  1148. break;
  1149. case PERF_TYPE_HW_CACHE:
  1150. err = hw_perf_cache_event(event->attr.config, &ev);
  1151. if (err)
  1152. return err;
  1153. break;
  1154. case PERF_TYPE_RAW:
  1155. ev = event->attr.config;
  1156. break;
  1157. default:
  1158. return -ENOENT;
  1159. }
  1160. event->hw.config_base = ev;
  1161. event->hw.idx = 0;
  1162. /*
  1163. * If we are not running on a hypervisor, force the
  1164. * exclude_hv bit to 0 so that we don't care what
  1165. * the user set it to.
  1166. */
  1167. if (!firmware_has_feature(FW_FEATURE_LPAR))
  1168. event->attr.exclude_hv = 0;
  1169. /*
  1170. * If this is a per-task event, then we can use
  1171. * PM_RUN_* events interchangeably with their non RUN_*
  1172. * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
  1173. * XXX we should check if the task is an idle task.
  1174. */
  1175. flags = 0;
  1176. if (event->attach_state & PERF_ATTACH_TASK)
  1177. flags |= PPMU_ONLY_COUNT_RUN;
  1178. /*
  1179. * If this machine has limited events, check whether this
  1180. * event_id could go on a limited event.
  1181. */
  1182. if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
  1183. if (can_go_on_limited_pmc(event, ev, flags)) {
  1184. flags |= PPMU_LIMITED_PMC_OK;
  1185. } else if (ppmu->limited_pmc_event(ev)) {
  1186. /*
  1187. * The requested event_id is on a limited PMC,
  1188. * but we can't use a limited PMC; see if any
  1189. * alternative goes on a normal PMC.
  1190. */
  1191. ev = normal_pmc_alternative(ev, flags);
  1192. if (!ev)
  1193. return -EINVAL;
  1194. }
  1195. }
  1196. /*
  1197. * If this is in a group, check if it can go on with all the
  1198. * other hardware events in the group. We assume the event
  1199. * hasn't been linked into its leader's sibling list at this point.
  1200. */
  1201. n = 0;
  1202. if (event->group_leader != event) {
  1203. n = collect_events(event->group_leader, ppmu->n_counter - 1,
  1204. ctrs, events, cflags);
  1205. if (n < 0)
  1206. return -EINVAL;
  1207. }
  1208. events[n] = ev;
  1209. ctrs[n] = event;
  1210. cflags[n] = flags;
  1211. if (check_excludes(ctrs, cflags, n, 1))
  1212. return -EINVAL;
  1213. cpuhw = &get_cpu_var(cpu_hw_events);
  1214. err = power_check_constraints(cpuhw, events, cflags, n + 1);
  1215. if (has_branch_stack(event)) {
  1216. cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
  1217. event->attr.branch_sample_type);
  1218. if(cpuhw->bhrb_filter == -1)
  1219. return -EOPNOTSUPP;
  1220. }
  1221. put_cpu_var(cpu_hw_events);
  1222. if (err)
  1223. return -EINVAL;
  1224. event->hw.config = events[n];
  1225. event->hw.event_base = cflags[n];
  1226. event->hw.last_period = event->hw.sample_period;
  1227. local64_set(&event->hw.period_left, event->hw.last_period);
  1228. /*
  1229. * See if we need to reserve the PMU.
  1230. * If no events are currently in use, then we have to take a
  1231. * mutex to ensure that we don't race with another task doing
  1232. * reserve_pmc_hardware or release_pmc_hardware.
  1233. */
  1234. err = 0;
  1235. if (!atomic_inc_not_zero(&num_events)) {
  1236. mutex_lock(&pmc_reserve_mutex);
  1237. if (atomic_read(&num_events) == 0 &&
  1238. reserve_pmc_hardware(perf_event_interrupt))
  1239. err = -EBUSY;
  1240. else
  1241. atomic_inc(&num_events);
  1242. mutex_unlock(&pmc_reserve_mutex);
  1243. }
  1244. event->destroy = hw_perf_event_destroy;
  1245. return err;
  1246. }
  1247. static int power_pmu_event_idx(struct perf_event *event)
  1248. {
  1249. return event->hw.idx;
  1250. }
  1251. ssize_t power_events_sysfs_show(struct device *dev,
  1252. struct device_attribute *attr, char *page)
  1253. {
  1254. struct perf_pmu_events_attr *pmu_attr;
  1255. pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
  1256. return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
  1257. }
  1258. struct pmu power_pmu = {
  1259. .pmu_enable = power_pmu_enable,
  1260. .pmu_disable = power_pmu_disable,
  1261. .event_init = power_pmu_event_init,
  1262. .add = power_pmu_add,
  1263. .del = power_pmu_del,
  1264. .start = power_pmu_start,
  1265. .stop = power_pmu_stop,
  1266. .read = power_pmu_read,
  1267. .start_txn = power_pmu_start_txn,
  1268. .cancel_txn = power_pmu_cancel_txn,
  1269. .commit_txn = power_pmu_commit_txn,
  1270. .event_idx = power_pmu_event_idx,
  1271. .flush_branch_stack = power_pmu_flush_branch_stack,
  1272. };
  1273. /* Processing BHRB entries */
  1274. void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
  1275. {
  1276. u64 val;
  1277. u64 addr;
  1278. int r_index, u_index, target, pred;
  1279. r_index = 0;
  1280. u_index = 0;
  1281. while (r_index < ppmu->bhrb_nr) {
  1282. /* Assembly read function */
  1283. val = read_bhrb(r_index);
  1284. /* Terminal marker: End of valid BHRB entries */
  1285. if (val == 0) {
  1286. break;
  1287. } else {
  1288. /* BHRB field break up */
  1289. addr = val & BHRB_EA;
  1290. pred = val & BHRB_PREDICTION;
  1291. target = val & BHRB_TARGET;
  1292. /* Probable Missed entry: Not applicable for POWER8 */
  1293. if ((addr == 0) && (target == 0) && (pred == 1)) {
  1294. r_index++;
  1295. continue;
  1296. }
  1297. /* Real Missed entry: Power8 based missed entry */
  1298. if ((addr == 0) && (target == 1) && (pred == 1)) {
  1299. r_index++;
  1300. continue;
  1301. }
  1302. /* Reserved condition: Not a valid entry */
  1303. if ((addr == 0) && (target == 1) && (pred == 0)) {
  1304. r_index++;
  1305. continue;
  1306. }
  1307. /* Is a target address */
  1308. if (val & BHRB_TARGET) {
  1309. /* First address cannot be a target address */
  1310. if (r_index == 0) {
  1311. r_index++;
  1312. continue;
  1313. }
  1314. /* Update target address for the previous entry */
  1315. cpuhw->bhrb_entries[u_index - 1].to = addr;
  1316. cpuhw->bhrb_entries[u_index - 1].mispred = pred;
  1317. cpuhw->bhrb_entries[u_index - 1].predicted = ~pred;
  1318. /* Dont increment u_index */
  1319. r_index++;
  1320. } else {
  1321. /* Update address, flags for current entry */
  1322. cpuhw->bhrb_entries[u_index].from = addr;
  1323. cpuhw->bhrb_entries[u_index].mispred = pred;
  1324. cpuhw->bhrb_entries[u_index].predicted = ~pred;
  1325. /* Successfully popullated one entry */
  1326. u_index++;
  1327. r_index++;
  1328. }
  1329. }
  1330. }
  1331. cpuhw->bhrb_stack.nr = u_index;
  1332. return;
  1333. }
  1334. /*
  1335. * A counter has overflowed; update its count and record
  1336. * things if requested. Note that interrupts are hard-disabled
  1337. * here so there is no possibility of being interrupted.
  1338. */
  1339. static void record_and_restart(struct perf_event *event, unsigned long val,
  1340. struct pt_regs *regs)
  1341. {
  1342. u64 period = event->hw.sample_period;
  1343. s64 prev, delta, left;
  1344. int record = 0;
  1345. if (event->hw.state & PERF_HES_STOPPED) {
  1346. write_pmc(event->hw.idx, 0);
  1347. return;
  1348. }
  1349. /* we don't have to worry about interrupts here */
  1350. prev = local64_read(&event->hw.prev_count);
  1351. delta = check_and_compute_delta(prev, val);
  1352. local64_add(delta, &event->count);
  1353. /*
  1354. * See if the total period for this event has expired,
  1355. * and update for the next period.
  1356. */
  1357. val = 0;
  1358. left = local64_read(&event->hw.period_left) - delta;
  1359. if (delta == 0)
  1360. left++;
  1361. if (period) {
  1362. if (left <= 0) {
  1363. left += period;
  1364. if (left <= 0)
  1365. left = period;
  1366. record = siar_valid(regs);
  1367. event->hw.last_period = event->hw.sample_period;
  1368. }
  1369. if (left < 0x80000000LL)
  1370. val = 0x80000000LL - left;
  1371. }
  1372. write_pmc(event->hw.idx, val);
  1373. local64_set(&event->hw.prev_count, val);
  1374. local64_set(&event->hw.period_left, left);
  1375. perf_event_update_userpage(event);
  1376. /*
  1377. * Finally record data if requested.
  1378. */
  1379. if (record) {
  1380. struct perf_sample_data data;
  1381. perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
  1382. if (event->attr.sample_type & PERF_SAMPLE_ADDR)
  1383. perf_get_data_addr(regs, &data.addr);
  1384. if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
  1385. struct cpu_hw_events *cpuhw;
  1386. cpuhw = &__get_cpu_var(cpu_hw_events);
  1387. power_pmu_bhrb_read(cpuhw);
  1388. data.br_stack = &cpuhw->bhrb_stack;
  1389. }
  1390. if (perf_event_overflow(event, &data, regs))
  1391. power_pmu_stop(event, 0);
  1392. }
  1393. }
  1394. /*
  1395. * Called from generic code to get the misc flags (i.e. processor mode)
  1396. * for an event_id.
  1397. */
  1398. unsigned long perf_misc_flags(struct pt_regs *regs)
  1399. {
  1400. u32 flags = perf_get_misc_flags(regs);
  1401. if (flags)
  1402. return flags;
  1403. return user_mode(regs) ? PERF_RECORD_MISC_USER :
  1404. PERF_RECORD_MISC_KERNEL;
  1405. }
  1406. /*
  1407. * Called from generic code to get the instruction pointer
  1408. * for an event_id.
  1409. */
  1410. unsigned long perf_instruction_pointer(struct pt_regs *regs)
  1411. {
  1412. bool use_siar = regs_use_siar(regs);
  1413. if (use_siar && siar_valid(regs))
  1414. return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
  1415. else if (use_siar)
  1416. return 0; // no valid instruction pointer
  1417. else
  1418. return regs->nip;
  1419. }
  1420. static bool pmc_overflow_power7(unsigned long val)
  1421. {
  1422. /*
  1423. * Events on POWER7 can roll back if a speculative event doesn't
  1424. * eventually complete. Unfortunately in some rare cases they will
  1425. * raise a performance monitor exception. We need to catch this to
  1426. * ensure we reset the PMC. In all cases the PMC will be 256 or less
  1427. * cycles from overflow.
  1428. *
  1429. * We only do this if the first pass fails to find any overflowing
  1430. * PMCs because a user might set a period of less than 256 and we
  1431. * don't want to mistakenly reset them.
  1432. */
  1433. if ((0x80000000 - val) <= 256)
  1434. return true;
  1435. return false;
  1436. }
  1437. static bool pmc_overflow(unsigned long val)
  1438. {
  1439. if ((int)val < 0)
  1440. return true;
  1441. return false;
  1442. }
  1443. /*
  1444. * Performance monitor interrupt stuff
  1445. */
  1446. static void perf_event_interrupt(struct pt_regs *regs)
  1447. {
  1448. int i, j;
  1449. struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
  1450. struct perf_event *event;
  1451. unsigned long val[8];
  1452. int found, active;
  1453. int nmi;
  1454. if (cpuhw->n_limited)
  1455. freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
  1456. mfspr(SPRN_PMC6));
  1457. perf_read_regs(regs);
  1458. nmi = perf_intr_is_nmi(regs);
  1459. if (nmi)
  1460. nmi_enter();
  1461. else
  1462. irq_enter();
  1463. /* Read all the PMCs since we'll need them a bunch of times */
  1464. for (i = 0; i < ppmu->n_counter; ++i)
  1465. val[i] = read_pmc(i + 1);
  1466. /* Try to find what caused the IRQ */
  1467. found = 0;
  1468. for (i = 0; i < ppmu->n_counter; ++i) {
  1469. if (!pmc_overflow(val[i]))
  1470. continue;
  1471. if (is_limited_pmc(i + 1))
  1472. continue; /* these won't generate IRQs */
  1473. /*
  1474. * We've found one that's overflowed. For active
  1475. * counters we need to log this. For inactive
  1476. * counters, we need to reset it anyway
  1477. */
  1478. found = 1;
  1479. active = 0;
  1480. for (j = 0; j < cpuhw->n_events; ++j) {
  1481. event = cpuhw->event[j];
  1482. if (event->hw.idx == (i + 1)) {
  1483. active = 1;
  1484. record_and_restart(event, val[i], regs);
  1485. break;
  1486. }
  1487. }
  1488. if (!active)
  1489. /* reset non active counters that have overflowed */
  1490. write_pmc(i + 1, 0);
  1491. }
  1492. if (!found && pvr_version_is(PVR_POWER7)) {
  1493. /* check active counters for special buggy p7 overflow */
  1494. for (i = 0; i < cpuhw->n_events; ++i) {
  1495. event = cpuhw->event[i];
  1496. if (!event->hw.idx || is_limited_pmc(event->hw.idx))
  1497. continue;
  1498. if (pmc_overflow_power7(val[event->hw.idx - 1])) {
  1499. /* event has overflowed in a buggy way*/
  1500. found = 1;
  1501. record_and_restart(event,
  1502. val[event->hw.idx - 1],
  1503. regs);
  1504. }
  1505. }
  1506. }
  1507. if ((!found) && printk_ratelimit())
  1508. printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
  1509. /*
  1510. * Reset MMCR0 to its normal value. This will set PMXE and
  1511. * clear FC (freeze counters) and PMAO (perf mon alert occurred)
  1512. * and thus allow interrupts to occur again.
  1513. * XXX might want to use MSR.PM to keep the events frozen until
  1514. * we get back out of this interrupt.
  1515. */
  1516. write_mmcr0(cpuhw, cpuhw->mmcr[0]);
  1517. if (nmi)
  1518. nmi_exit();
  1519. else
  1520. irq_exit();
  1521. }
  1522. static void power_pmu_setup(int cpu)
  1523. {
  1524. struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
  1525. if (!ppmu)
  1526. return;
  1527. memset(cpuhw, 0, sizeof(*cpuhw));
  1528. cpuhw->mmcr[0] = MMCR0_FC;
  1529. }
  1530. static int __cpuinit
  1531. power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
  1532. {
  1533. unsigned int cpu = (long)hcpu;
  1534. switch (action & ~CPU_TASKS_FROZEN) {
  1535. case CPU_UP_PREPARE:
  1536. power_pmu_setup(cpu);
  1537. break;
  1538. default:
  1539. break;
  1540. }
  1541. return NOTIFY_OK;
  1542. }
  1543. int __cpuinit register_power_pmu(struct power_pmu *pmu)
  1544. {
  1545. if (ppmu)
  1546. return -EBUSY; /* something's already registered */
  1547. ppmu = pmu;
  1548. pr_info("%s performance monitor hardware support registered\n",
  1549. pmu->name);
  1550. power_pmu.attr_groups = ppmu->attr_groups;
  1551. #ifdef MSR_HV
  1552. /*
  1553. * Use FCHV to ignore kernel events if MSR.HV is set.
  1554. */
  1555. if (mfmsr() & MSR_HV)
  1556. freeze_events_kernel = MMCR0_FCHV;
  1557. #endif /* CONFIG_PPC64 */
  1558. perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
  1559. perf_cpu_notifier(power_pmu_notifier);
  1560. return 0;
  1561. }