perf_counter.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /* Performance counter support for sparc64.
  2. *
  3. * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
  4. *
  5. * This code is based almost entirely upon the x86 perf counter
  6. * code, which is:
  7. *
  8. * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  9. * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  10. * Copyright (C) 2009 Jaswinder Singh Rajput
  11. * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
  12. * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  13. */
  14. #include <linux/perf_counter.h>
  15. #include <linux/kprobes.h>
  16. #include <linux/kernel.h>
  17. #include <linux/kdebug.h>
  18. #include <linux/mutex.h>
  19. #include <asm/cpudata.h>
  20. #include <asm/atomic.h>
  21. #include <asm/nmi.h>
  22. #include <asm/pcr.h>
  23. /* Sparc64 chips have two performance counters, 32-bits each, with
  24. * overflow interrupts generated on transition from 0xffffffff to 0.
  25. * The counters are accessed in one go using a 64-bit register.
  26. *
  27. * Both counters are controlled using a single control register. The
  28. * only way to stop all sampling is to clear all of the context (user,
  29. * supervisor, hypervisor) sampling enable bits. But these bits apply
  30. * to both counters, thus the two counters can't be enabled/disabled
  31. * individually.
  32. *
  33. * The control register has two event fields, one for each of the two
  34. * counters. It's thus nearly impossible to have one counter going
  35. * while keeping the other one stopped. Therefore it is possible to
  36. * get overflow interrupts for counters not currently "in use" and
  37. * that condition must be checked in the overflow interrupt handler.
  38. *
  39. * So we use a hack, in that we program inactive counters with the
  40. * "sw_count0" and "sw_count1" events. These count how many times
  41. * the instruction "sethi %hi(0xfc000), %g0" is executed. It's an
  42. * unusual way to encode a NOP and therefore will not trigger in
  43. * normal code.
  44. */
  45. #define MAX_HWCOUNTERS 2
  46. #define MAX_PERIOD ((1UL << 32) - 1)
  47. #define PIC_UPPER_INDEX 0
  48. #define PIC_LOWER_INDEX 1
  49. #define PIC_UPPER_NOP 0x1c
  50. #define PIC_LOWER_NOP 0x14
  51. struct cpu_hw_counters {
  52. struct perf_counter *counters[MAX_HWCOUNTERS];
  53. unsigned long used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
  54. unsigned long active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
  55. int enabled;
  56. };
  57. DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, };
  58. struct perf_event_map {
  59. u16 encoding;
  60. u8 pic_mask;
  61. #define PIC_NONE 0x00
  62. #define PIC_UPPER 0x01
  63. #define PIC_LOWER 0x02
  64. };
  65. struct sparc_pmu {
  66. const struct perf_event_map *(*event_map)(int);
  67. int max_events;
  68. int upper_shift;
  69. int lower_shift;
  70. int event_mask;
  71. };
  72. static const struct perf_event_map ultra3i_perfmon_event_map[] = {
  73. [PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
  74. [PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
  75. [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
  76. [PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
  77. };
  78. static const struct perf_event_map *ultra3i_event_map(int event)
  79. {
  80. return &ultra3i_perfmon_event_map[event];
  81. }
  82. static const struct sparc_pmu ultra3i_pmu = {
  83. .event_map = ultra3i_event_map,
  84. .max_events = ARRAY_SIZE(ultra3i_perfmon_event_map),
  85. .upper_shift = 11,
  86. .lower_shift = 4,
  87. .event_mask = 0x3f,
  88. };
  89. static const struct sparc_pmu *sparc_pmu __read_mostly;
  90. static u64 event_encoding(u64 event, int idx)
  91. {
  92. if (idx == PIC_UPPER_INDEX)
  93. event <<= sparc_pmu->upper_shift;
  94. else
  95. event <<= sparc_pmu->lower_shift;
  96. return event;
  97. }
  98. static u64 mask_for_index(int idx)
  99. {
  100. return event_encoding(sparc_pmu->event_mask, idx);
  101. }
  102. static u64 nop_for_index(int idx)
  103. {
  104. return event_encoding(idx == PIC_UPPER_INDEX ?
  105. PIC_UPPER_NOP : PIC_LOWER_NOP, idx);
  106. }
  107. static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc,
  108. int idx)
  109. {
  110. u64 val, mask = mask_for_index(idx);
  111. val = pcr_ops->read();
  112. pcr_ops->write((val & ~mask) | hwc->config);
  113. }
  114. static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc,
  115. int idx)
  116. {
  117. u64 mask = mask_for_index(idx);
  118. u64 nop = nop_for_index(idx);
  119. u64 val = pcr_ops->read();
  120. pcr_ops->write((val & ~mask) | nop);
  121. }
  122. void hw_perf_enable(void)
  123. {
  124. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  125. u64 val;
  126. int i;
  127. if (cpuc->enabled)
  128. return;
  129. cpuc->enabled = 1;
  130. barrier();
  131. val = pcr_ops->read();
  132. for (i = 0; i < MAX_HWCOUNTERS; i++) {
  133. struct perf_counter *cp = cpuc->counters[i];
  134. struct hw_perf_counter *hwc;
  135. if (!cp)
  136. continue;
  137. hwc = &cp->hw;
  138. val |= hwc->config_base;
  139. }
  140. pcr_ops->write(val);
  141. }
  142. void hw_perf_disable(void)
  143. {
  144. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  145. u64 val;
  146. if (!cpuc->enabled)
  147. return;
  148. cpuc->enabled = 0;
  149. val = pcr_ops->read();
  150. val &= ~(PCR_UTRACE | PCR_STRACE);
  151. pcr_ops->write(val);
  152. }
  153. static u32 read_pmc(int idx)
  154. {
  155. u64 val;
  156. read_pic(val);
  157. if (idx == PIC_UPPER_INDEX)
  158. val >>= 32;
  159. return val & 0xffffffff;
  160. }
  161. static void write_pmc(int idx, u64 val)
  162. {
  163. u64 shift, mask, pic;
  164. shift = 0;
  165. if (idx == PIC_UPPER_INDEX)
  166. shift = 32;
  167. mask = ((u64) 0xffffffff) << shift;
  168. val <<= shift;
  169. read_pic(pic);
  170. pic &= ~mask;
  171. pic |= val;
  172. write_pic(pic);
  173. }
  174. static int sparc_perf_counter_set_period(struct perf_counter *counter,
  175. struct hw_perf_counter *hwc, int idx)
  176. {
  177. s64 left = atomic64_read(&hwc->period_left);
  178. s64 period = hwc->sample_period;
  179. int ret = 0;
  180. if (unlikely(left <= -period)) {
  181. left = period;
  182. atomic64_set(&hwc->period_left, left);
  183. hwc->last_period = period;
  184. ret = 1;
  185. }
  186. if (unlikely(left <= 0)) {
  187. left += period;
  188. atomic64_set(&hwc->period_left, left);
  189. hwc->last_period = period;
  190. ret = 1;
  191. }
  192. if (left > MAX_PERIOD)
  193. left = MAX_PERIOD;
  194. atomic64_set(&hwc->prev_count, (u64)-left);
  195. write_pmc(idx, (u64)(-left) & 0xffffffff);
  196. perf_counter_update_userpage(counter);
  197. return ret;
  198. }
  199. static int sparc_pmu_enable(struct perf_counter *counter)
  200. {
  201. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  202. struct hw_perf_counter *hwc = &counter->hw;
  203. int idx = hwc->idx;
  204. if (test_and_set_bit(idx, cpuc->used_mask))
  205. return -EAGAIN;
  206. sparc_pmu_disable_counter(hwc, idx);
  207. cpuc->counters[idx] = counter;
  208. set_bit(idx, cpuc->active_mask);
  209. sparc_perf_counter_set_period(counter, hwc, idx);
  210. sparc_pmu_enable_counter(hwc, idx);
  211. perf_counter_update_userpage(counter);
  212. return 0;
  213. }
  214. static u64 sparc_perf_counter_update(struct perf_counter *counter,
  215. struct hw_perf_counter *hwc, int idx)
  216. {
  217. int shift = 64 - 32;
  218. u64 prev_raw_count, new_raw_count;
  219. s64 delta;
  220. again:
  221. prev_raw_count = atomic64_read(&hwc->prev_count);
  222. new_raw_count = read_pmc(idx);
  223. if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
  224. new_raw_count) != prev_raw_count)
  225. goto again;
  226. delta = (new_raw_count << shift) - (prev_raw_count << shift);
  227. delta >>= shift;
  228. atomic64_add(delta, &counter->count);
  229. atomic64_sub(delta, &hwc->period_left);
  230. return new_raw_count;
  231. }
  232. static void sparc_pmu_disable(struct perf_counter *counter)
  233. {
  234. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  235. struct hw_perf_counter *hwc = &counter->hw;
  236. int idx = hwc->idx;
  237. clear_bit(idx, cpuc->active_mask);
  238. sparc_pmu_disable_counter(hwc, idx);
  239. barrier();
  240. sparc_perf_counter_update(counter, hwc, idx);
  241. cpuc->counters[idx] = NULL;
  242. clear_bit(idx, cpuc->used_mask);
  243. perf_counter_update_userpage(counter);
  244. }
  245. static void sparc_pmu_read(struct perf_counter *counter)
  246. {
  247. struct hw_perf_counter *hwc = &counter->hw;
  248. sparc_perf_counter_update(counter, hwc, hwc->idx);
  249. }
  250. static void sparc_pmu_unthrottle(struct perf_counter *counter)
  251. {
  252. struct hw_perf_counter *hwc = &counter->hw;
  253. sparc_pmu_enable_counter(hwc, hwc->idx);
  254. }
  255. static atomic_t active_counters = ATOMIC_INIT(0);
  256. static DEFINE_MUTEX(pmc_grab_mutex);
  257. void perf_counter_grab_pmc(void)
  258. {
  259. if (atomic_inc_not_zero(&active_counters))
  260. return;
  261. mutex_lock(&pmc_grab_mutex);
  262. if (atomic_read(&active_counters) == 0) {
  263. if (atomic_read(&nmi_active) > 0) {
  264. on_each_cpu(stop_nmi_watchdog, NULL, 1);
  265. BUG_ON(atomic_read(&nmi_active) != 0);
  266. }
  267. atomic_inc(&active_counters);
  268. }
  269. mutex_unlock(&pmc_grab_mutex);
  270. }
  271. void perf_counter_release_pmc(void)
  272. {
  273. if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) {
  274. if (atomic_read(&nmi_active) == 0)
  275. on_each_cpu(start_nmi_watchdog, NULL, 1);
  276. mutex_unlock(&pmc_grab_mutex);
  277. }
  278. }
  279. static void hw_perf_counter_destroy(struct perf_counter *counter)
  280. {
  281. perf_counter_release_pmc();
  282. }
  283. static int __hw_perf_counter_init(struct perf_counter *counter)
  284. {
  285. struct perf_counter_attr *attr = &counter->attr;
  286. struct hw_perf_counter *hwc = &counter->hw;
  287. const struct perf_event_map *pmap;
  288. u64 enc;
  289. if (atomic_read(&nmi_active) < 0)
  290. return -ENODEV;
  291. if (attr->type != PERF_TYPE_HARDWARE)
  292. return -EOPNOTSUPP;
  293. if (attr->config >= sparc_pmu->max_events)
  294. return -EINVAL;
  295. perf_counter_grab_pmc();
  296. counter->destroy = hw_perf_counter_destroy;
  297. /* We save the enable bits in the config_base. So to
  298. * turn off sampling just write 'config', and to enable
  299. * things write 'config | config_base'.
  300. */
  301. hwc->config_base = 0;
  302. if (!attr->exclude_user)
  303. hwc->config_base |= PCR_UTRACE;
  304. if (!attr->exclude_kernel)
  305. hwc->config_base |= PCR_STRACE;
  306. if (!hwc->sample_period) {
  307. hwc->sample_period = MAX_PERIOD;
  308. hwc->last_period = hwc->sample_period;
  309. atomic64_set(&hwc->period_left, hwc->sample_period);
  310. }
  311. pmap = sparc_pmu->event_map(attr->config);
  312. enc = pmap->encoding;
  313. if (pmap->pic_mask & PIC_UPPER) {
  314. hwc->idx = PIC_UPPER_INDEX;
  315. enc <<= sparc_pmu->upper_shift;
  316. } else {
  317. hwc->idx = PIC_LOWER_INDEX;
  318. enc <<= sparc_pmu->lower_shift;
  319. }
  320. hwc->config |= enc;
  321. return 0;
  322. }
  323. static const struct pmu pmu = {
  324. .enable = sparc_pmu_enable,
  325. .disable = sparc_pmu_disable,
  326. .read = sparc_pmu_read,
  327. .unthrottle = sparc_pmu_unthrottle,
  328. };
  329. const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
  330. {
  331. int err = __hw_perf_counter_init(counter);
  332. if (err)
  333. return ERR_PTR(err);
  334. return &pmu;
  335. }
  336. void perf_counter_print_debug(void)
  337. {
  338. unsigned long flags;
  339. u64 pcr, pic;
  340. int cpu;
  341. if (!sparc_pmu)
  342. return;
  343. local_irq_save(flags);
  344. cpu = smp_processor_id();
  345. pcr = pcr_ops->read();
  346. read_pic(pic);
  347. pr_info("\n");
  348. pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
  349. cpu, pcr, pic);
  350. local_irq_restore(flags);
  351. }
  352. static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
  353. unsigned long cmd, void *__args)
  354. {
  355. struct die_args *args = __args;
  356. struct perf_sample_data data;
  357. struct cpu_hw_counters *cpuc;
  358. struct pt_regs *regs;
  359. int idx;
  360. if (!atomic_read(&active_counters))
  361. return NOTIFY_DONE;
  362. switch (cmd) {
  363. case DIE_NMI:
  364. break;
  365. default:
  366. return NOTIFY_DONE;
  367. }
  368. regs = args->regs;
  369. data.regs = regs;
  370. data.addr = 0;
  371. cpuc = &__get_cpu_var(cpu_hw_counters);
  372. for (idx = 0; idx < MAX_HWCOUNTERS; idx++) {
  373. struct perf_counter *counter = cpuc->counters[idx];
  374. struct hw_perf_counter *hwc;
  375. u64 val;
  376. if (!test_bit(idx, cpuc->active_mask))
  377. continue;
  378. hwc = &counter->hw;
  379. val = sparc_perf_counter_update(counter, hwc, idx);
  380. if (val & (1ULL << 31))
  381. continue;
  382. data.period = counter->hw.last_period;
  383. if (!sparc_perf_counter_set_period(counter, hwc, idx))
  384. continue;
  385. if (perf_counter_overflow(counter, 1, &data))
  386. sparc_pmu_disable_counter(hwc, idx);
  387. }
  388. return NOTIFY_STOP;
  389. }
  390. static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
  391. .notifier_call = perf_counter_nmi_handler,
  392. };
  393. static bool __init supported_pmu(void)
  394. {
  395. if (!strcmp(sparc_pmu_type, "ultra3i")) {
  396. sparc_pmu = &ultra3i_pmu;
  397. return true;
  398. }
  399. return false;
  400. }
  401. void __init init_hw_perf_counters(void)
  402. {
  403. pr_info("Performance counters: ");
  404. if (!supported_pmu()) {
  405. pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
  406. return;
  407. }
  408. pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
  409. /* All sparc64 PMUs currently have 2 counters. But this simple
  410. * driver only supports one active counter at a time.
  411. */
  412. perf_max_counters = 1;
  413. register_die_notifier(&perf_counter_nmi_notifier);
  414. }