perf_event.c 34 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348
  1. #undef DEBUG
  2. /*
  3. * ARM performance counter support.
  4. *
  5. * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
  6. *
  7. * This code is based on the sparc64 perf event code, which is in turn based
  8. * on the x86 code. Callchain code is based on the ARM OProfile backtrace
  9. * code.
  10. */
  11. #define pr_fmt(fmt) "hw perfevents: " fmt
  12. #include <linux/interrupt.h>
  13. #include <linux/kernel.h>
  14. #include <linux/perf_event.h>
  15. #include <linux/spinlock.h>
  16. #include <linux/uaccess.h>
  17. #include <asm/cputype.h>
  18. #include <asm/irq.h>
  19. #include <asm/irq_regs.h>
  20. #include <asm/pmu.h>
  21. #include <asm/stacktrace.h>
  22. static const struct pmu_irqs *pmu_irqs;
  23. /*
  24. * Hardware lock to serialize accesses to PMU registers. Needed for the
  25. * read/modify/write sequences.
  26. */
  27. DEFINE_SPINLOCK(pmu_lock);
  28. /*
  29. * ARMv6 supports a maximum of 3 events, starting from index 1. If we add
  30. * another platform that supports more, we need to increase this to be the
  31. * largest of all platforms.
  32. */
  33. #define ARMPMU_MAX_HWEVENTS 4
  34. /* The events for a given CPU. */
  35. struct cpu_hw_events {
  36. /*
  37. * The events that are active on the CPU for the given index. Index 0
  38. * is reserved.
  39. */
  40. struct perf_event *events[ARMPMU_MAX_HWEVENTS];
  41. /*
  42. * A 1 bit for an index indicates that the counter is being used for
  43. * an event. A 0 means that the counter can be used.
  44. */
  45. unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
  46. /*
  47. * A 1 bit for an index indicates that the counter is actively being
  48. * used.
  49. */
  50. unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
  51. };
  52. DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
  53. struct arm_pmu {
  54. const char *name;
  55. irqreturn_t (*handle_irq)(int irq_num, void *dev);
  56. void (*enable)(struct hw_perf_event *evt, int idx);
  57. void (*disable)(struct hw_perf_event *evt, int idx);
  58. int (*event_map)(int evt);
  59. u64 (*raw_event)(u64);
  60. int (*get_event_idx)(struct cpu_hw_events *cpuc,
  61. struct hw_perf_event *hwc);
  62. u32 (*read_counter)(int idx);
  63. void (*write_counter)(int idx, u32 val);
  64. void (*start)(void);
  65. void (*stop)(void);
  66. int num_events;
  67. u64 max_period;
  68. };
  69. /* Set at runtime when we know what CPU type we are. */
  70. static const struct arm_pmu *armpmu;
  71. #define HW_OP_UNSUPPORTED 0xFFFF
  72. #define C(_x) \
  73. PERF_COUNT_HW_CACHE_##_x
  74. #define CACHE_OP_UNSUPPORTED 0xFFFF
  75. static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
  76. [PERF_COUNT_HW_CACHE_OP_MAX]
  77. [PERF_COUNT_HW_CACHE_RESULT_MAX];
  78. static int
  79. armpmu_map_cache_event(u64 config)
  80. {
  81. unsigned int cache_type, cache_op, cache_result, ret;
  82. cache_type = (config >> 0) & 0xff;
  83. if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  84. return -EINVAL;
  85. cache_op = (config >> 8) & 0xff;
  86. if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  87. return -EINVAL;
  88. cache_result = (config >> 16) & 0xff;
  89. if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  90. return -EINVAL;
  91. ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
  92. if (ret == CACHE_OP_UNSUPPORTED)
  93. return -ENOENT;
  94. return ret;
  95. }
  96. static int
  97. armpmu_event_set_period(struct perf_event *event,
  98. struct hw_perf_event *hwc,
  99. int idx)
  100. {
  101. s64 left = atomic64_read(&hwc->period_left);
  102. s64 period = hwc->sample_period;
  103. int ret = 0;
  104. if (unlikely(left <= -period)) {
  105. left = period;
  106. atomic64_set(&hwc->period_left, left);
  107. hwc->last_period = period;
  108. ret = 1;
  109. }
  110. if (unlikely(left <= 0)) {
  111. left += period;
  112. atomic64_set(&hwc->period_left, left);
  113. hwc->last_period = period;
  114. ret = 1;
  115. }
  116. if (left > (s64)armpmu->max_period)
  117. left = armpmu->max_period;
  118. atomic64_set(&hwc->prev_count, (u64)-left);
  119. armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
  120. perf_event_update_userpage(event);
  121. return ret;
  122. }
  123. static u64
  124. armpmu_event_update(struct perf_event *event,
  125. struct hw_perf_event *hwc,
  126. int idx)
  127. {
  128. int shift = 64 - 32;
  129. s64 prev_raw_count, new_raw_count;
  130. s64 delta;
  131. again:
  132. prev_raw_count = atomic64_read(&hwc->prev_count);
  133. new_raw_count = armpmu->read_counter(idx);
  134. if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
  135. new_raw_count) != prev_raw_count)
  136. goto again;
  137. delta = (new_raw_count << shift) - (prev_raw_count << shift);
  138. delta >>= shift;
  139. atomic64_add(delta, &event->count);
  140. atomic64_sub(delta, &hwc->period_left);
  141. return new_raw_count;
  142. }
  143. static void
  144. armpmu_disable(struct perf_event *event)
  145. {
  146. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  147. struct hw_perf_event *hwc = &event->hw;
  148. int idx = hwc->idx;
  149. WARN_ON(idx < 0);
  150. clear_bit(idx, cpuc->active_mask);
  151. armpmu->disable(hwc, idx);
  152. barrier();
  153. armpmu_event_update(event, hwc, idx);
  154. cpuc->events[idx] = NULL;
  155. clear_bit(idx, cpuc->used_mask);
  156. perf_event_update_userpage(event);
  157. }
  158. static void
  159. armpmu_read(struct perf_event *event)
  160. {
  161. struct hw_perf_event *hwc = &event->hw;
  162. /* Don't read disabled counters! */
  163. if (hwc->idx < 0)
  164. return;
  165. armpmu_event_update(event, hwc, hwc->idx);
  166. }
  167. static void
  168. armpmu_unthrottle(struct perf_event *event)
  169. {
  170. struct hw_perf_event *hwc = &event->hw;
  171. /*
  172. * Set the period again. Some counters can't be stopped, so when we
  173. * were throttled we simply disabled the IRQ source and the counter
  174. * may have been left counting. If we don't do this step then we may
  175. * get an interrupt too soon or *way* too late if the overflow has
  176. * happened since disabling.
  177. */
  178. armpmu_event_set_period(event, hwc, hwc->idx);
  179. armpmu->enable(hwc, hwc->idx);
  180. }
  181. static int
  182. armpmu_enable(struct perf_event *event)
  183. {
  184. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  185. struct hw_perf_event *hwc = &event->hw;
  186. int idx;
  187. int err = 0;
  188. /* If we don't have a space for the counter then finish early. */
  189. idx = armpmu->get_event_idx(cpuc, hwc);
  190. if (idx < 0) {
  191. err = idx;
  192. goto out;
  193. }
  194. /*
  195. * If there is an event in the counter we are going to use then make
  196. * sure it is disabled.
  197. */
  198. event->hw.idx = idx;
  199. armpmu->disable(hwc, idx);
  200. cpuc->events[idx] = event;
  201. set_bit(idx, cpuc->active_mask);
  202. /* Set the period for the event. */
  203. armpmu_event_set_period(event, hwc, idx);
  204. /* Enable the event. */
  205. armpmu->enable(hwc, idx);
  206. /* Propagate our changes to the userspace mapping. */
  207. perf_event_update_userpage(event);
  208. out:
  209. return err;
  210. }
  211. static struct pmu pmu = {
  212. .enable = armpmu_enable,
  213. .disable = armpmu_disable,
  214. .unthrottle = armpmu_unthrottle,
  215. .read = armpmu_read,
  216. };
  217. static int
  218. validate_event(struct cpu_hw_events *cpuc,
  219. struct perf_event *event)
  220. {
  221. struct hw_perf_event fake_event = event->hw;
  222. if (event->pmu && event->pmu != &pmu)
  223. return 0;
  224. return armpmu->get_event_idx(cpuc, &fake_event) >= 0;
  225. }
  226. static int
  227. validate_group(struct perf_event *event)
  228. {
  229. struct perf_event *sibling, *leader = event->group_leader;
  230. struct cpu_hw_events fake_pmu;
  231. memset(&fake_pmu, 0, sizeof(fake_pmu));
  232. if (!validate_event(&fake_pmu, leader))
  233. return -ENOSPC;
  234. list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
  235. if (!validate_event(&fake_pmu, sibling))
  236. return -ENOSPC;
  237. }
  238. if (!validate_event(&fake_pmu, event))
  239. return -ENOSPC;
  240. return 0;
  241. }
  242. static int
  243. armpmu_reserve_hardware(void)
  244. {
  245. int i;
  246. int err;
  247. pmu_irqs = reserve_pmu();
  248. if (IS_ERR(pmu_irqs)) {
  249. pr_warning("unable to reserve pmu\n");
  250. return PTR_ERR(pmu_irqs);
  251. }
  252. init_pmu();
  253. if (pmu_irqs->num_irqs < 1) {
  254. pr_err("no irqs for PMUs defined\n");
  255. return -ENODEV;
  256. }
  257. for (i = 0; i < pmu_irqs->num_irqs; ++i) {
  258. err = request_irq(pmu_irqs->irqs[i], armpmu->handle_irq,
  259. IRQF_DISABLED, "armpmu", NULL);
  260. if (err) {
  261. pr_warning("unable to request IRQ%d for ARM "
  262. "perf counters\n", pmu_irqs->irqs[i]);
  263. break;
  264. }
  265. }
  266. if (err) {
  267. for (i = i - 1; i >= 0; --i)
  268. free_irq(pmu_irqs->irqs[i], NULL);
  269. release_pmu(pmu_irqs);
  270. pmu_irqs = NULL;
  271. }
  272. return err;
  273. }
  274. static void
  275. armpmu_release_hardware(void)
  276. {
  277. int i;
  278. for (i = pmu_irqs->num_irqs - 1; i >= 0; --i)
  279. free_irq(pmu_irqs->irqs[i], NULL);
  280. armpmu->stop();
  281. release_pmu(pmu_irqs);
  282. pmu_irqs = NULL;
  283. }
  284. static atomic_t active_events = ATOMIC_INIT(0);
  285. static DEFINE_MUTEX(pmu_reserve_mutex);
  286. static void
  287. hw_perf_event_destroy(struct perf_event *event)
  288. {
  289. if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) {
  290. armpmu_release_hardware();
  291. mutex_unlock(&pmu_reserve_mutex);
  292. }
  293. }
  294. static int
  295. __hw_perf_event_init(struct perf_event *event)
  296. {
  297. struct hw_perf_event *hwc = &event->hw;
  298. int mapping, err;
  299. /* Decode the generic type into an ARM event identifier. */
  300. if (PERF_TYPE_HARDWARE == event->attr.type) {
  301. mapping = armpmu->event_map(event->attr.config);
  302. } else if (PERF_TYPE_HW_CACHE == event->attr.type) {
  303. mapping = armpmu_map_cache_event(event->attr.config);
  304. } else if (PERF_TYPE_RAW == event->attr.type) {
  305. mapping = armpmu->raw_event(event->attr.config);
  306. } else {
  307. pr_debug("event type %x not supported\n", event->attr.type);
  308. return -EOPNOTSUPP;
  309. }
  310. if (mapping < 0) {
  311. pr_debug("event %x:%llx not supported\n", event->attr.type,
  312. event->attr.config);
  313. return mapping;
  314. }
  315. /*
  316. * Check whether we need to exclude the counter from certain modes.
  317. * The ARM performance counters are on all of the time so if someone
  318. * has asked us for some excludes then we have to fail.
  319. */
  320. if (event->attr.exclude_kernel || event->attr.exclude_user ||
  321. event->attr.exclude_hv || event->attr.exclude_idle) {
  322. pr_debug("ARM performance counters do not support "
  323. "mode exclusion\n");
  324. return -EPERM;
  325. }
  326. /*
  327. * We don't assign an index until we actually place the event onto
  328. * hardware. Use -1 to signify that we haven't decided where to put it
  329. * yet. For SMP systems, each core has it's own PMU so we can't do any
  330. * clever allocation or constraints checking at this point.
  331. */
  332. hwc->idx = -1;
  333. /*
  334. * Store the event encoding into the config_base field. config and
  335. * event_base are unused as the only 2 things we need to know are
  336. * the event mapping and the counter to use. The counter to use is
  337. * also the indx and the config_base is the event type.
  338. */
  339. hwc->config_base = (unsigned long)mapping;
  340. hwc->config = 0;
  341. hwc->event_base = 0;
  342. if (!hwc->sample_period) {
  343. hwc->sample_period = armpmu->max_period;
  344. hwc->last_period = hwc->sample_period;
  345. atomic64_set(&hwc->period_left, hwc->sample_period);
  346. }
  347. err = 0;
  348. if (event->group_leader != event) {
  349. err = validate_group(event);
  350. if (err)
  351. return -EINVAL;
  352. }
  353. return err;
  354. }
  355. const struct pmu *
  356. hw_perf_event_init(struct perf_event *event)
  357. {
  358. int err = 0;
  359. if (!armpmu)
  360. return ERR_PTR(-ENODEV);
  361. event->destroy = hw_perf_event_destroy;
  362. if (!atomic_inc_not_zero(&active_events)) {
  363. if (atomic_read(&active_events) > perf_max_events) {
  364. atomic_dec(&active_events);
  365. return ERR_PTR(-ENOSPC);
  366. }
  367. mutex_lock(&pmu_reserve_mutex);
  368. if (atomic_read(&active_events) == 0) {
  369. err = armpmu_reserve_hardware();
  370. }
  371. if (!err)
  372. atomic_inc(&active_events);
  373. mutex_unlock(&pmu_reserve_mutex);
  374. }
  375. if (err)
  376. return ERR_PTR(err);
  377. err = __hw_perf_event_init(event);
  378. if (err)
  379. hw_perf_event_destroy(event);
  380. return err ? ERR_PTR(err) : &pmu;
  381. }
  382. void
  383. hw_perf_enable(void)
  384. {
  385. /* Enable all of the perf events on hardware. */
  386. int idx;
  387. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  388. if (!armpmu)
  389. return;
  390. for (idx = 0; idx <= armpmu->num_events; ++idx) {
  391. struct perf_event *event = cpuc->events[idx];
  392. if (!event)
  393. continue;
  394. armpmu->enable(&event->hw, idx);
  395. }
  396. armpmu->start();
  397. }
  398. void
  399. hw_perf_disable(void)
  400. {
  401. if (armpmu)
  402. armpmu->stop();
  403. }
  404. /*
  405. * ARMv6 Performance counter handling code.
  406. *
  407. * ARMv6 has 2 configurable performance counters and a single cycle counter.
  408. * They all share a single reset bit but can be written to zero so we can use
  409. * that for a reset.
  410. *
  411. * The counters can't be individually enabled or disabled so when we remove
  412. * one event and replace it with another we could get spurious counts from the
  413. * wrong event. However, we can take advantage of the fact that the
  414. * performance counters can export events to the event bus, and the event bus
  415. * itself can be monitored. This requires that we *don't* export the events to
  416. * the event bus. The procedure for disabling a configurable counter is:
  417. * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
  418. * effectively stops the counter from counting.
  419. * - disable the counter's interrupt generation (each counter has it's
  420. * own interrupt enable bit).
  421. * Once stopped, the counter value can be written as 0 to reset.
  422. *
  423. * To enable a counter:
  424. * - enable the counter's interrupt generation.
  425. * - set the new event type.
  426. *
  427. * Note: the dedicated cycle counter only counts cycles and can't be
  428. * enabled/disabled independently of the others. When we want to disable the
  429. * cycle counter, we have to just disable the interrupt reporting and start
  430. * ignoring that counter. When re-enabling, we have to reset the value and
  431. * enable the interrupt.
  432. */
  433. enum armv6_perf_types {
  434. ARMV6_PERFCTR_ICACHE_MISS = 0x0,
  435. ARMV6_PERFCTR_IBUF_STALL = 0x1,
  436. ARMV6_PERFCTR_DDEP_STALL = 0x2,
  437. ARMV6_PERFCTR_ITLB_MISS = 0x3,
  438. ARMV6_PERFCTR_DTLB_MISS = 0x4,
  439. ARMV6_PERFCTR_BR_EXEC = 0x5,
  440. ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
  441. ARMV6_PERFCTR_INSTR_EXEC = 0x7,
  442. ARMV6_PERFCTR_DCACHE_HIT = 0x9,
  443. ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
  444. ARMV6_PERFCTR_DCACHE_MISS = 0xB,
  445. ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
  446. ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
  447. ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
  448. ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
  449. ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
  450. ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
  451. ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
  452. ARMV6_PERFCTR_NOP = 0x20,
  453. };
  454. enum armv6_counters {
  455. ARMV6_CYCLE_COUNTER = 1,
  456. ARMV6_COUNTER0,
  457. ARMV6_COUNTER1,
  458. };
  459. /*
  460. * The hardware events that we support. We do support cache operations but
  461. * we have harvard caches and no way to combine instruction and data
  462. * accesses/misses in hardware.
  463. */
  464. static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
  465. [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
  466. [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
  467. [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
  468. [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
  469. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
  470. [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
  471. [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
  472. };
  473. static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
  474. [PERF_COUNT_HW_CACHE_OP_MAX]
  475. [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
  476. [C(L1D)] = {
  477. /*
  478. * The performance counters don't differentiate between read
  479. * and write accesses/misses so this isn't strictly correct,
  480. * but it's the best we can do. Writes and reads get
  481. * combined.
  482. */
  483. [C(OP_READ)] = {
  484. [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
  485. [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
  486. },
  487. [C(OP_WRITE)] = {
  488. [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
  489. [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
  490. },
  491. [C(OP_PREFETCH)] = {
  492. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  493. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  494. },
  495. },
  496. [C(L1I)] = {
  497. [C(OP_READ)] = {
  498. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  499. [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
  500. },
  501. [C(OP_WRITE)] = {
  502. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  503. [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
  504. },
  505. [C(OP_PREFETCH)] = {
  506. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  507. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  508. },
  509. },
  510. [C(LL)] = {
  511. [C(OP_READ)] = {
  512. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  513. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  514. },
  515. [C(OP_WRITE)] = {
  516. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  517. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  518. },
  519. [C(OP_PREFETCH)] = {
  520. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  521. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  522. },
  523. },
  524. [C(DTLB)] = {
  525. /*
  526. * The ARM performance counters can count micro DTLB misses,
  527. * micro ITLB misses and main TLB misses. There isn't an event
  528. * for TLB misses, so use the micro misses here and if users
  529. * want the main TLB misses they can use a raw counter.
  530. */
  531. [C(OP_READ)] = {
  532. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  533. [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
  534. },
  535. [C(OP_WRITE)] = {
  536. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  537. [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
  538. },
  539. [C(OP_PREFETCH)] = {
  540. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  541. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  542. },
  543. },
  544. [C(ITLB)] = {
  545. [C(OP_READ)] = {
  546. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  547. [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
  548. },
  549. [C(OP_WRITE)] = {
  550. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  551. [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
  552. },
  553. [C(OP_PREFETCH)] = {
  554. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  555. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  556. },
  557. },
  558. [C(BPU)] = {
  559. [C(OP_READ)] = {
  560. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  561. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  562. },
  563. [C(OP_WRITE)] = {
  564. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  565. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  566. },
  567. [C(OP_PREFETCH)] = {
  568. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  569. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  570. },
  571. },
  572. };
  573. enum armv6mpcore_perf_types {
  574. ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
  575. ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
  576. ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
  577. ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
  578. ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
  579. ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
  580. ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
  581. ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
  582. ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
  583. ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
  584. ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
  585. ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
  586. ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
  587. ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
  588. ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
  589. ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
  590. ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
  591. ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
  592. ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
  593. ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
  594. };
  595. /*
  596. * The hardware events that we support. We do support cache operations but
  597. * we have harvard caches and no way to combine instruction and data
  598. * accesses/misses in hardware.
  599. */
  600. static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
  601. [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
  602. [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
  603. [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
  604. [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
  605. [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
  606. [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
  607. [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
  608. };
  609. static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
  610. [PERF_COUNT_HW_CACHE_OP_MAX]
  611. [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
  612. [C(L1D)] = {
  613. [C(OP_READ)] = {
  614. [C(RESULT_ACCESS)] =
  615. ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
  616. [C(RESULT_MISS)] =
  617. ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
  618. },
  619. [C(OP_WRITE)] = {
  620. [C(RESULT_ACCESS)] =
  621. ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
  622. [C(RESULT_MISS)] =
  623. ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
  624. },
  625. [C(OP_PREFETCH)] = {
  626. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  627. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  628. },
  629. },
  630. [C(L1I)] = {
  631. [C(OP_READ)] = {
  632. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  633. [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
  634. },
  635. [C(OP_WRITE)] = {
  636. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  637. [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
  638. },
  639. [C(OP_PREFETCH)] = {
  640. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  641. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  642. },
  643. },
  644. [C(LL)] = {
  645. [C(OP_READ)] = {
  646. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  647. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  648. },
  649. [C(OP_WRITE)] = {
  650. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  651. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  652. },
  653. [C(OP_PREFETCH)] = {
  654. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  655. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  656. },
  657. },
  658. [C(DTLB)] = {
  659. /*
  660. * The ARM performance counters can count micro DTLB misses,
  661. * micro ITLB misses and main TLB misses. There isn't an event
  662. * for TLB misses, so use the micro misses here and if users
  663. * want the main TLB misses they can use a raw counter.
  664. */
  665. [C(OP_READ)] = {
  666. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  667. [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
  668. },
  669. [C(OP_WRITE)] = {
  670. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  671. [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
  672. },
  673. [C(OP_PREFETCH)] = {
  674. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  675. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  676. },
  677. },
  678. [C(ITLB)] = {
  679. [C(OP_READ)] = {
  680. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  681. [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
  682. },
  683. [C(OP_WRITE)] = {
  684. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  685. [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
  686. },
  687. [C(OP_PREFETCH)] = {
  688. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  689. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  690. },
  691. },
  692. [C(BPU)] = {
  693. [C(OP_READ)] = {
  694. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  695. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  696. },
  697. [C(OP_WRITE)] = {
  698. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  699. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  700. },
  701. [C(OP_PREFETCH)] = {
  702. [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
  703. [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
  704. },
  705. },
  706. };
  707. static inline unsigned long
  708. armv6_pmcr_read(void)
  709. {
  710. u32 val;
  711. asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
  712. return val;
  713. }
  714. static inline void
  715. armv6_pmcr_write(unsigned long val)
  716. {
  717. asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
  718. }
  719. #define ARMV6_PMCR_ENABLE (1 << 0)
  720. #define ARMV6_PMCR_CTR01_RESET (1 << 1)
  721. #define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
  722. #define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
  723. #define ARMV6_PMCR_COUNT0_IEN (1 << 4)
  724. #define ARMV6_PMCR_COUNT1_IEN (1 << 5)
  725. #define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
  726. #define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
  727. #define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
  728. #define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
  729. #define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
  730. #define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
  731. #define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
  732. #define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
  733. #define ARMV6_PMCR_OVERFLOWED_MASK \
  734. (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
  735. ARMV6_PMCR_CCOUNT_OVERFLOW)
  736. static inline int
  737. armv6_pmcr_has_overflowed(unsigned long pmcr)
  738. {
  739. return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
  740. }
  741. static inline int
  742. armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
  743. enum armv6_counters counter)
  744. {
  745. int ret = 0;
  746. if (ARMV6_CYCLE_COUNTER == counter)
  747. ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
  748. else if (ARMV6_COUNTER0 == counter)
  749. ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
  750. else if (ARMV6_COUNTER1 == counter)
  751. ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
  752. else
  753. WARN_ONCE(1, "invalid counter number (%d)\n", counter);
  754. return ret;
  755. }
  756. static inline u32
  757. armv6pmu_read_counter(int counter)
  758. {
  759. unsigned long value = 0;
  760. if (ARMV6_CYCLE_COUNTER == counter)
  761. asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
  762. else if (ARMV6_COUNTER0 == counter)
  763. asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
  764. else if (ARMV6_COUNTER1 == counter)
  765. asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
  766. else
  767. WARN_ONCE(1, "invalid counter number (%d)\n", counter);
  768. return value;
  769. }
  770. static inline void
  771. armv6pmu_write_counter(int counter,
  772. u32 value)
  773. {
  774. if (ARMV6_CYCLE_COUNTER == counter)
  775. asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
  776. else if (ARMV6_COUNTER0 == counter)
  777. asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
  778. else if (ARMV6_COUNTER1 == counter)
  779. asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
  780. else
  781. WARN_ONCE(1, "invalid counter number (%d)\n", counter);
  782. }
  783. void
  784. armv6pmu_enable_event(struct hw_perf_event *hwc,
  785. int idx)
  786. {
  787. unsigned long val, mask, evt, flags;
  788. if (ARMV6_CYCLE_COUNTER == idx) {
  789. mask = 0;
  790. evt = ARMV6_PMCR_CCOUNT_IEN;
  791. } else if (ARMV6_COUNTER0 == idx) {
  792. mask = ARMV6_PMCR_EVT_COUNT0_MASK;
  793. evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
  794. ARMV6_PMCR_COUNT0_IEN;
  795. } else if (ARMV6_COUNTER1 == idx) {
  796. mask = ARMV6_PMCR_EVT_COUNT1_MASK;
  797. evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
  798. ARMV6_PMCR_COUNT1_IEN;
  799. } else {
  800. WARN_ONCE(1, "invalid counter number (%d)\n", idx);
  801. return;
  802. }
  803. /*
  804. * Mask out the current event and set the counter to count the event
  805. * that we're interested in.
  806. */
  807. spin_lock_irqsave(&pmu_lock, flags);
  808. val = armv6_pmcr_read();
  809. val &= ~mask;
  810. val |= evt;
  811. armv6_pmcr_write(val);
  812. spin_unlock_irqrestore(&pmu_lock, flags);
  813. }
  814. static irqreturn_t
  815. armv6pmu_handle_irq(int irq_num,
  816. void *dev)
  817. {
  818. unsigned long pmcr = armv6_pmcr_read();
  819. struct perf_sample_data data;
  820. struct cpu_hw_events *cpuc;
  821. struct pt_regs *regs;
  822. int idx;
  823. if (!armv6_pmcr_has_overflowed(pmcr))
  824. return IRQ_NONE;
  825. regs = get_irq_regs();
  826. /*
  827. * The interrupts are cleared by writing the overflow flags back to
  828. * the control register. All of the other bits don't have any effect
  829. * if they are rewritten, so write the whole value back.
  830. */
  831. armv6_pmcr_write(pmcr);
  832. data.addr = 0;
  833. cpuc = &__get_cpu_var(cpu_hw_events);
  834. for (idx = 0; idx <= armpmu->num_events; ++idx) {
  835. struct perf_event *event = cpuc->events[idx];
  836. struct hw_perf_event *hwc;
  837. if (!test_bit(idx, cpuc->active_mask))
  838. continue;
  839. /*
  840. * We have a single interrupt for all counters. Check that
  841. * each counter has overflowed before we process it.
  842. */
  843. if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
  844. continue;
  845. hwc = &event->hw;
  846. armpmu_event_update(event, hwc, idx);
  847. data.period = event->hw.last_period;
  848. if (!armpmu_event_set_period(event, hwc, idx))
  849. continue;
  850. if (perf_event_overflow(event, 0, &data, regs))
  851. armpmu->disable(hwc, idx);
  852. }
  853. /*
  854. * Handle the pending perf events.
  855. *
  856. * Note: this call *must* be run with interrupts enabled. For
  857. * platforms that can have the PMU interrupts raised as a PMI, this
  858. * will not work.
  859. */
  860. perf_event_do_pending();
  861. return IRQ_HANDLED;
  862. }
  863. static void
  864. armv6pmu_start(void)
  865. {
  866. unsigned long flags, val;
  867. spin_lock_irqsave(&pmu_lock, flags);
  868. val = armv6_pmcr_read();
  869. val |= ARMV6_PMCR_ENABLE;
  870. armv6_pmcr_write(val);
  871. spin_unlock_irqrestore(&pmu_lock, flags);
  872. }
  873. void
  874. armv6pmu_stop(void)
  875. {
  876. unsigned long flags, val;
  877. spin_lock_irqsave(&pmu_lock, flags);
  878. val = armv6_pmcr_read();
  879. val &= ~ARMV6_PMCR_ENABLE;
  880. armv6_pmcr_write(val);
  881. spin_unlock_irqrestore(&pmu_lock, flags);
  882. }
  883. static inline int
  884. armv6pmu_event_map(int config)
  885. {
  886. int mapping = armv6_perf_map[config];
  887. if (HW_OP_UNSUPPORTED == mapping)
  888. mapping = -EOPNOTSUPP;
  889. return mapping;
  890. }
  891. static inline int
  892. armv6mpcore_pmu_event_map(int config)
  893. {
  894. int mapping = armv6mpcore_perf_map[config];
  895. if (HW_OP_UNSUPPORTED == mapping)
  896. mapping = -EOPNOTSUPP;
  897. return mapping;
  898. }
  899. static u64
  900. armv6pmu_raw_event(u64 config)
  901. {
  902. return config & 0xff;
  903. }
  904. static int
  905. armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
  906. struct hw_perf_event *event)
  907. {
  908. /* Always place a cycle counter into the cycle counter. */
  909. if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
  910. if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
  911. return -EAGAIN;
  912. return ARMV6_CYCLE_COUNTER;
  913. } else {
  914. /*
  915. * For anything other than a cycle counter, try and use
  916. * counter0 and counter1.
  917. */
  918. if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) {
  919. return ARMV6_COUNTER1;
  920. }
  921. if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) {
  922. return ARMV6_COUNTER0;
  923. }
  924. /* The counters are all in use. */
  925. return -EAGAIN;
  926. }
  927. }
  928. static void
  929. armv6pmu_disable_event(struct hw_perf_event *hwc,
  930. int idx)
  931. {
  932. unsigned long val, mask, evt, flags;
  933. if (ARMV6_CYCLE_COUNTER == idx) {
  934. mask = ARMV6_PMCR_CCOUNT_IEN;
  935. evt = 0;
  936. } else if (ARMV6_COUNTER0 == idx) {
  937. mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
  938. evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
  939. } else if (ARMV6_COUNTER1 == idx) {
  940. mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
  941. evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
  942. } else {
  943. WARN_ONCE(1, "invalid counter number (%d)\n", idx);
  944. return;
  945. }
  946. /*
  947. * Mask out the current event and set the counter to count the number
  948. * of ETM bus signal assertion cycles. The external reporting should
  949. * be disabled and so this should never increment.
  950. */
  951. spin_lock_irqsave(&pmu_lock, flags);
  952. val = armv6_pmcr_read();
  953. val &= ~mask;
  954. val |= evt;
  955. armv6_pmcr_write(val);
  956. spin_unlock_irqrestore(&pmu_lock, flags);
  957. }
  958. static void
  959. armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
  960. int idx)
  961. {
  962. unsigned long val, mask, flags, evt = 0;
  963. if (ARMV6_CYCLE_COUNTER == idx) {
  964. mask = ARMV6_PMCR_CCOUNT_IEN;
  965. } else if (ARMV6_COUNTER0 == idx) {
  966. mask = ARMV6_PMCR_COUNT0_IEN;
  967. } else if (ARMV6_COUNTER1 == idx) {
  968. mask = ARMV6_PMCR_COUNT1_IEN;
  969. } else {
  970. WARN_ONCE(1, "invalid counter number (%d)\n", idx);
  971. return;
  972. }
  973. /*
  974. * Unlike UP ARMv6, we don't have a way of stopping the counters. We
  975. * simply disable the interrupt reporting.
  976. */
  977. spin_lock_irqsave(&pmu_lock, flags);
  978. val = armv6_pmcr_read();
  979. val &= ~mask;
  980. val |= evt;
  981. armv6_pmcr_write(val);
  982. spin_unlock_irqrestore(&pmu_lock, flags);
  983. }
  984. static const struct arm_pmu armv6pmu = {
  985. .name = "v6",
  986. .handle_irq = armv6pmu_handle_irq,
  987. .enable = armv6pmu_enable_event,
  988. .disable = armv6pmu_disable_event,
  989. .event_map = armv6pmu_event_map,
  990. .raw_event = armv6pmu_raw_event,
  991. .read_counter = armv6pmu_read_counter,
  992. .write_counter = armv6pmu_write_counter,
  993. .get_event_idx = armv6pmu_get_event_idx,
  994. .start = armv6pmu_start,
  995. .stop = armv6pmu_stop,
  996. .num_events = 3,
  997. .max_period = (1LLU << 32) - 1,
  998. };
  999. /*
  1000. * ARMv6mpcore is almost identical to single core ARMv6 with the exception
  1001. * that some of the events have different enumerations and that there is no
  1002. * *hack* to stop the programmable counters. To stop the counters we simply
  1003. * disable the interrupt reporting and update the event. When unthrottling we
  1004. * reset the period and enable the interrupt reporting.
  1005. */
  1006. static const struct arm_pmu armv6mpcore_pmu = {
  1007. .name = "v6mpcore",
  1008. .handle_irq = armv6pmu_handle_irq,
  1009. .enable = armv6pmu_enable_event,
  1010. .disable = armv6mpcore_pmu_disable_event,
  1011. .event_map = armv6mpcore_pmu_event_map,
  1012. .raw_event = armv6pmu_raw_event,
  1013. .read_counter = armv6pmu_read_counter,
  1014. .write_counter = armv6pmu_write_counter,
  1015. .get_event_idx = armv6pmu_get_event_idx,
  1016. .start = armv6pmu_start,
  1017. .stop = armv6pmu_stop,
  1018. .num_events = 3,
  1019. .max_period = (1LLU << 32) - 1,
  1020. };
  1021. static int __init
  1022. init_hw_perf_events(void)
  1023. {
  1024. unsigned long cpuid = read_cpuid_id();
  1025. unsigned long implementor = (cpuid & 0xFF000000) >> 24;
  1026. unsigned long part_number = (cpuid & 0xFFF0);
  1027. /* We only support ARM CPUs implemented by ARM at the moment. */
  1028. if (0x41 == implementor) {
  1029. switch (part_number) {
  1030. case 0xB360: /* ARM1136 */
  1031. case 0xB560: /* ARM1156 */
  1032. case 0xB760: /* ARM1176 */
  1033. armpmu = &armv6pmu;
  1034. memcpy(armpmu_perf_cache_map, armv6_perf_cache_map,
  1035. sizeof(armv6_perf_cache_map));
  1036. perf_max_events = armv6pmu.num_events;
  1037. break;
  1038. case 0xB020: /* ARM11mpcore */
  1039. armpmu = &armv6mpcore_pmu;
  1040. memcpy(armpmu_perf_cache_map,
  1041. armv6mpcore_perf_cache_map,
  1042. sizeof(armv6mpcore_perf_cache_map));
  1043. perf_max_events = armv6mpcore_pmu.num_events;
  1044. break;
  1045. default:
  1046. pr_info("no hardware support available\n");
  1047. perf_max_events = -1;
  1048. }
  1049. }
  1050. if (armpmu)
  1051. pr_info("enabled with %s PMU driver\n",
  1052. armpmu->name);
  1053. return 0;
  1054. }
  1055. arch_initcall(init_hw_perf_events);
  1056. /*
  1057. * Callchain handling code.
  1058. */
  1059. static inline void
  1060. callchain_store(struct perf_callchain_entry *entry,
  1061. u64 ip)
  1062. {
  1063. if (entry->nr < PERF_MAX_STACK_DEPTH)
  1064. entry->ip[entry->nr++] = ip;
  1065. }
  1066. /*
  1067. * The registers we're interested in are at the end of the variable
  1068. * length saved register structure. The fp points at the end of this
  1069. * structure so the address of this struct is:
  1070. * (struct frame_tail *)(xxx->fp)-1
  1071. *
  1072. * This code has been adapted from the ARM OProfile support.
  1073. */
  1074. struct frame_tail {
  1075. struct frame_tail *fp;
  1076. unsigned long sp;
  1077. unsigned long lr;
  1078. } __attribute__((packed));
  1079. /*
  1080. * Get the return address for a single stackframe and return a pointer to the
  1081. * next frame tail.
  1082. */
  1083. static struct frame_tail *
  1084. user_backtrace(struct frame_tail *tail,
  1085. struct perf_callchain_entry *entry)
  1086. {
  1087. struct frame_tail buftail;
  1088. /* Also check accessibility of one struct frame_tail beyond */
  1089. if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
  1090. return NULL;
  1091. if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
  1092. return NULL;
  1093. callchain_store(entry, buftail.lr);
  1094. /*
  1095. * Frame pointers should strictly progress back up the stack
  1096. * (towards higher addresses).
  1097. */
  1098. if (tail >= buftail.fp)
  1099. return NULL;
  1100. return buftail.fp - 1;
  1101. }
  1102. static void
  1103. perf_callchain_user(struct pt_regs *regs,
  1104. struct perf_callchain_entry *entry)
  1105. {
  1106. struct frame_tail *tail;
  1107. callchain_store(entry, PERF_CONTEXT_USER);
  1108. if (!user_mode(regs))
  1109. regs = task_pt_regs(current);
  1110. tail = (struct frame_tail *)regs->ARM_fp - 1;
  1111. while (tail && !((unsigned long)tail & 0x3))
  1112. tail = user_backtrace(tail, entry);
  1113. }
  1114. /*
  1115. * Gets called by walk_stackframe() for every stackframe. This will be called
  1116. * whist unwinding the stackframe and is like a subroutine return so we use
  1117. * the PC.
  1118. */
  1119. static int
  1120. callchain_trace(struct stackframe *fr,
  1121. void *data)
  1122. {
  1123. struct perf_callchain_entry *entry = data;
  1124. callchain_store(entry, fr->pc);
  1125. return 0;
  1126. }
  1127. static void
  1128. perf_callchain_kernel(struct pt_regs *regs,
  1129. struct perf_callchain_entry *entry)
  1130. {
  1131. struct stackframe fr;
  1132. callchain_store(entry, PERF_CONTEXT_KERNEL);
  1133. fr.fp = regs->ARM_fp;
  1134. fr.sp = regs->ARM_sp;
  1135. fr.lr = regs->ARM_lr;
  1136. fr.pc = regs->ARM_pc;
  1137. walk_stackframe(&fr, callchain_trace, entry);
  1138. }
  1139. static void
  1140. perf_do_callchain(struct pt_regs *regs,
  1141. struct perf_callchain_entry *entry)
  1142. {
  1143. int is_user;
  1144. if (!regs)
  1145. return;
  1146. is_user = user_mode(regs);
  1147. if (!current || !current->pid)
  1148. return;
  1149. if (is_user && current->state != TASK_RUNNING)
  1150. return;
  1151. if (!is_user)
  1152. perf_callchain_kernel(regs, entry);
  1153. if (current->mm)
  1154. perf_callchain_user(regs, entry);
  1155. }
  1156. static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
  1157. struct perf_callchain_entry *
  1158. perf_callchain(struct pt_regs *regs)
  1159. {
  1160. struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry);
  1161. entry->nr = 0;
  1162. perf_do_callchain(regs, entry);
  1163. return entry;
  1164. }