perf_counter.c 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. /*
  2. * Performance counter x86 architecture code
  3. *
  4. * Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
  5. * Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
  6. * Copyright(C) 2009 Jaswinder Singh Rajput
  7. *
  8. * For licencing details see kernel-base/COPYING
  9. */
  10. #include <linux/perf_counter.h>
  11. #include <linux/capability.h>
  12. #include <linux/notifier.h>
  13. #include <linux/hardirq.h>
  14. #include <linux/kprobes.h>
  15. #include <linux/module.h>
  16. #include <linux/kdebug.h>
  17. #include <linux/sched.h>
  18. #include <asm/apic.h>
  19. static bool perf_counters_initialized __read_mostly;
  20. /*
  21. * Number of (generic) HW counters:
  22. */
  23. static int nr_counters_generic __read_mostly;
  24. static u64 perf_counter_mask __read_mostly;
  25. static u64 counter_value_mask __read_mostly;
  26. static int counter_value_bits __read_mostly;
  27. static int nr_counters_fixed __read_mostly;
  28. struct cpu_hw_counters {
  29. struct perf_counter *counters[X86_PMC_IDX_MAX];
  30. unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
  31. unsigned long interrupts;
  32. u64 throttle_ctrl;
  33. unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
  34. int enabled;
  35. };
  36. /*
  37. * struct pmc_x86_ops - performance counter x86 ops
  38. */
  39. struct pmc_x86_ops {
  40. u64 (*save_disable_all)(void);
  41. void (*restore_all)(u64);
  42. u64 (*get_status)(u64);
  43. void (*ack_status)(u64);
  44. void (*enable)(int, u64);
  45. void (*disable)(int, u64);
  46. unsigned eventsel;
  47. unsigned perfctr;
  48. u64 (*event_map)(int);
  49. u64 (*raw_event)(u64);
  50. int max_events;
  51. };
  52. static struct pmc_x86_ops *pmc_ops;
  53. static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
  54. .enabled = 1,
  55. };
  56. /*
  57. * Intel PerfMon v3. Used on Core2 and later.
  58. */
  59. static const u64 intel_perfmon_event_map[] =
  60. {
  61. [PERF_COUNT_CPU_CYCLES] = 0x003c,
  62. [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
  63. [PERF_COUNT_CACHE_REFERENCES] = 0x4f2e,
  64. [PERF_COUNT_CACHE_MISSES] = 0x412e,
  65. [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
  66. [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
  67. [PERF_COUNT_BUS_CYCLES] = 0x013c,
  68. };
  69. static u64 pmc_intel_event_map(int event)
  70. {
  71. return intel_perfmon_event_map[event];
  72. }
  73. static u64 pmc_intel_raw_event(u64 event)
  74. {
  75. #define CORE_EVNTSEL_EVENT_MASK 0x000000FF
  76. #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00
  77. #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000
  78. #define CORE_EVNTSEL_MASK \
  79. (CORE_EVNTSEL_EVENT_MASK | \
  80. CORE_EVNTSEL_UNIT_MASK | \
  81. CORE_EVNTSEL_COUNTER_MASK)
  82. return event & CORE_EVNTSEL_MASK;
  83. }
  84. /*
  85. * AMD Performance Monitor K7 and later.
  86. */
  87. static const u64 amd_perfmon_event_map[] =
  88. {
  89. [PERF_COUNT_CPU_CYCLES] = 0x0076,
  90. [PERF_COUNT_INSTRUCTIONS] = 0x00c0,
  91. [PERF_COUNT_CACHE_REFERENCES] = 0x0080,
  92. [PERF_COUNT_CACHE_MISSES] = 0x0081,
  93. [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x00c4,
  94. [PERF_COUNT_BRANCH_MISSES] = 0x00c5,
  95. };
  96. static u64 pmc_amd_event_map(int event)
  97. {
  98. return amd_perfmon_event_map[event];
  99. }
  100. static u64 pmc_amd_raw_event(u64 event)
  101. {
  102. #define K7_EVNTSEL_EVENT_MASK 0x7000000FF
  103. #define K7_EVNTSEL_UNIT_MASK 0x00000FF00
  104. #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000
  105. #define K7_EVNTSEL_MASK \
  106. (K7_EVNTSEL_EVENT_MASK | \
  107. K7_EVNTSEL_UNIT_MASK | \
  108. K7_EVNTSEL_COUNTER_MASK)
  109. return event & K7_EVNTSEL_MASK;
  110. }
  111. /*
  112. * Propagate counter elapsed time into the generic counter.
  113. * Can only be executed on the CPU where the counter is active.
  114. * Returns the delta events processed.
  115. */
  116. static void
  117. x86_perf_counter_update(struct perf_counter *counter,
  118. struct hw_perf_counter *hwc, int idx)
  119. {
  120. u64 prev_raw_count, new_raw_count, delta;
  121. /*
  122. * Careful: an NMI might modify the previous counter value.
  123. *
  124. * Our tactic to handle this is to first atomically read and
  125. * exchange a new raw count - then add that new-prev delta
  126. * count to the generic counter atomically:
  127. */
  128. again:
  129. prev_raw_count = atomic64_read(&hwc->prev_count);
  130. rdmsrl(hwc->counter_base + idx, new_raw_count);
  131. if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
  132. new_raw_count) != prev_raw_count)
  133. goto again;
  134. /*
  135. * Now we have the new raw value and have updated the prev
  136. * timestamp already. We can now calculate the elapsed delta
  137. * (counter-)time and add that to the generic counter.
  138. *
  139. * Careful, not all hw sign-extends above the physical width
  140. * of the count, so we do that by clipping the delta to 32 bits:
  141. */
  142. delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
  143. atomic64_add(delta, &counter->count);
  144. atomic64_sub(delta, &hwc->period_left);
  145. }
  146. /*
  147. * Setup the hardware configuration for a given hw_event_type
  148. */
  149. static int __hw_perf_counter_init(struct perf_counter *counter)
  150. {
  151. struct perf_counter_hw_event *hw_event = &counter->hw_event;
  152. struct hw_perf_counter *hwc = &counter->hw;
  153. if (unlikely(!perf_counters_initialized))
  154. return -EINVAL;
  155. /*
  156. * Generate PMC IRQs:
  157. * (keep 'enabled' bit clear for now)
  158. */
  159. hwc->config = ARCH_PERFMON_EVENTSEL_INT;
  160. /*
  161. * Count user and OS events unless requested not to.
  162. */
  163. if (!hw_event->exclude_user)
  164. hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
  165. if (!hw_event->exclude_kernel)
  166. hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
  167. /*
  168. * If privileged enough, allow NMI events:
  169. */
  170. hwc->nmi = 0;
  171. if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
  172. hwc->nmi = 1;
  173. hwc->irq_period = hw_event->irq_period;
  174. /*
  175. * Intel PMCs cannot be accessed sanely above 32 bit width,
  176. * so we install an artificial 1<<31 period regardless of
  177. * the generic counter period:
  178. */
  179. if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
  180. if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
  181. hwc->irq_period = 0x7FFFFFFF;
  182. atomic64_set(&hwc->period_left, hwc->irq_period);
  183. /*
  184. * Raw event type provide the config in the event structure
  185. */
  186. if (hw_event->raw) {
  187. hwc->config |= pmc_ops->raw_event(hw_event->type);
  188. } else {
  189. if (hw_event->type >= pmc_ops->max_events)
  190. return -EINVAL;
  191. /*
  192. * The generic map:
  193. */
  194. hwc->config |= pmc_ops->event_map(hw_event->type);
  195. }
  196. counter->wakeup_pending = 0;
  197. return 0;
  198. }
  199. static u64 pmc_intel_save_disable_all(void)
  200. {
  201. u64 ctrl;
  202. rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
  203. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
  204. return ctrl;
  205. }
  206. static u64 pmc_amd_save_disable_all(void)
  207. {
  208. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  209. int enabled, idx;
  210. enabled = cpuc->enabled;
  211. cpuc->enabled = 0;
  212. barrier();
  213. for (idx = 0; idx < nr_counters_generic; idx++) {
  214. u64 val;
  215. rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
  216. if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
  217. val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
  218. wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
  219. }
  220. }
  221. return enabled;
  222. }
  223. u64 hw_perf_save_disable(void)
  224. {
  225. if (unlikely(!perf_counters_initialized))
  226. return 0;
  227. return pmc_ops->save_disable_all();
  228. }
  229. /*
  230. * Exported because of ACPI idle
  231. */
  232. EXPORT_SYMBOL_GPL(hw_perf_save_disable);
  233. static void pmc_intel_restore_all(u64 ctrl)
  234. {
  235. wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
  236. }
  237. static void pmc_amd_restore_all(u64 ctrl)
  238. {
  239. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  240. int idx;
  241. cpuc->enabled = ctrl;
  242. barrier();
  243. if (!ctrl)
  244. return;
  245. for (idx = 0; idx < nr_counters_generic; idx++) {
  246. if (test_bit(idx, cpuc->active_mask)) {
  247. u64 val;
  248. rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
  249. val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  250. wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
  251. }
  252. }
  253. }
  254. void hw_perf_restore(u64 ctrl)
  255. {
  256. if (unlikely(!perf_counters_initialized))
  257. return;
  258. pmc_ops->restore_all(ctrl);
  259. }
  260. /*
  261. * Exported because of ACPI idle
  262. */
  263. EXPORT_SYMBOL_GPL(hw_perf_restore);
  264. static u64 pmc_intel_get_status(u64 mask)
  265. {
  266. u64 status;
  267. rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
  268. return status;
  269. }
  270. static u64 pmc_amd_get_status(u64 mask)
  271. {
  272. u64 status = 0;
  273. int idx;
  274. for (idx = 0; idx < nr_counters_generic; idx++) {
  275. s64 val;
  276. if (!(mask & (1 << idx)))
  277. continue;
  278. rdmsrl(MSR_K7_PERFCTR0 + idx, val);
  279. val <<= (64 - counter_value_bits);
  280. if (val >= 0)
  281. status |= (1 << idx);
  282. }
  283. return status;
  284. }
  285. static u64 hw_perf_get_status(u64 mask)
  286. {
  287. if (unlikely(!perf_counters_initialized))
  288. return 0;
  289. return pmc_ops->get_status(mask);
  290. }
  291. static void pmc_intel_ack_status(u64 ack)
  292. {
  293. wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
  294. }
  295. static void pmc_amd_ack_status(u64 ack)
  296. {
  297. }
  298. static void hw_perf_ack_status(u64 ack)
  299. {
  300. if (unlikely(!perf_counters_initialized))
  301. return;
  302. pmc_ops->ack_status(ack);
  303. }
  304. static void pmc_intel_enable(int idx, u64 config)
  305. {
  306. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
  307. config | ARCH_PERFMON_EVENTSEL0_ENABLE);
  308. }
  309. static void pmc_amd_enable(int idx, u64 config)
  310. {
  311. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  312. set_bit(idx, cpuc->active_mask);
  313. if (cpuc->enabled)
  314. config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  315. wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
  316. }
  317. static void hw_perf_enable(int idx, u64 config)
  318. {
  319. if (unlikely(!perf_counters_initialized))
  320. return;
  321. pmc_ops->enable(idx, config);
  322. }
  323. static void pmc_intel_disable(int idx, u64 config)
  324. {
  325. wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
  326. }
  327. static void pmc_amd_disable(int idx, u64 config)
  328. {
  329. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  330. clear_bit(idx, cpuc->active_mask);
  331. wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
  332. }
  333. static void hw_perf_disable(int idx, u64 config)
  334. {
  335. if (unlikely(!perf_counters_initialized))
  336. return;
  337. pmc_ops->disable(idx, config);
  338. }
  339. static inline void
  340. __pmc_fixed_disable(struct perf_counter *counter,
  341. struct hw_perf_counter *hwc, unsigned int __idx)
  342. {
  343. int idx = __idx - X86_PMC_IDX_FIXED;
  344. u64 ctrl_val, mask;
  345. int err;
  346. mask = 0xfULL << (idx * 4);
  347. rdmsrl(hwc->config_base, ctrl_val);
  348. ctrl_val &= ~mask;
  349. err = checking_wrmsrl(hwc->config_base, ctrl_val);
  350. }
  351. static inline void
  352. __pmc_generic_disable(struct perf_counter *counter,
  353. struct hw_perf_counter *hwc, unsigned int idx)
  354. {
  355. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
  356. __pmc_fixed_disable(counter, hwc, idx);
  357. else
  358. hw_perf_disable(idx, hwc->config);
  359. }
  360. static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
  361. /*
  362. * Set the next IRQ period, based on the hwc->period_left value.
  363. * To be called with the counter disabled in hw:
  364. */
  365. static void
  366. __hw_perf_counter_set_period(struct perf_counter *counter,
  367. struct hw_perf_counter *hwc, int idx)
  368. {
  369. s64 left = atomic64_read(&hwc->period_left);
  370. s32 period = hwc->irq_period;
  371. int err;
  372. /*
  373. * If we are way outside a reasoable range then just skip forward:
  374. */
  375. if (unlikely(left <= -period)) {
  376. left = period;
  377. atomic64_set(&hwc->period_left, left);
  378. }
  379. if (unlikely(left <= 0)) {
  380. left += period;
  381. atomic64_set(&hwc->period_left, left);
  382. }
  383. per_cpu(prev_left[idx], smp_processor_id()) = left;
  384. /*
  385. * The hw counter starts counting from this counter offset,
  386. * mark it to be able to extra future deltas:
  387. */
  388. atomic64_set(&hwc->prev_count, (u64)-left);
  389. err = checking_wrmsrl(hwc->counter_base + idx,
  390. (u64)(-left) & counter_value_mask);
  391. }
  392. static inline void
  393. __pmc_fixed_enable(struct perf_counter *counter,
  394. struct hw_perf_counter *hwc, unsigned int __idx)
  395. {
  396. int idx = __idx - X86_PMC_IDX_FIXED;
  397. u64 ctrl_val, bits, mask;
  398. int err;
  399. /*
  400. * Enable IRQ generation (0x8),
  401. * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
  402. * if requested:
  403. */
  404. bits = 0x8ULL;
  405. if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
  406. bits |= 0x2;
  407. if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
  408. bits |= 0x1;
  409. bits <<= (idx * 4);
  410. mask = 0xfULL << (idx * 4);
  411. rdmsrl(hwc->config_base, ctrl_val);
  412. ctrl_val &= ~mask;
  413. ctrl_val |= bits;
  414. err = checking_wrmsrl(hwc->config_base, ctrl_val);
  415. }
  416. static void
  417. __pmc_generic_enable(struct perf_counter *counter,
  418. struct hw_perf_counter *hwc, int idx)
  419. {
  420. if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
  421. __pmc_fixed_enable(counter, hwc, idx);
  422. else
  423. hw_perf_enable(idx, hwc->config);
  424. }
  425. static int
  426. fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
  427. {
  428. unsigned int event;
  429. if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
  430. return -1;
  431. if (unlikely(hwc->nmi))
  432. return -1;
  433. event = hwc->config & ARCH_PERFMON_EVENT_MASK;
  434. if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
  435. return X86_PMC_IDX_FIXED_INSTRUCTIONS;
  436. if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
  437. return X86_PMC_IDX_FIXED_CPU_CYCLES;
  438. if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
  439. return X86_PMC_IDX_FIXED_BUS_CYCLES;
  440. return -1;
  441. }
  442. /*
  443. * Find a PMC slot for the freshly enabled / scheduled in counter:
  444. */
  445. static int pmc_generic_enable(struct perf_counter *counter)
  446. {
  447. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  448. struct hw_perf_counter *hwc = &counter->hw;
  449. int idx;
  450. idx = fixed_mode_idx(counter, hwc);
  451. if (idx >= 0) {
  452. /*
  453. * Try to get the fixed counter, if that is already taken
  454. * then try to get a generic counter:
  455. */
  456. if (test_and_set_bit(idx, cpuc->used))
  457. goto try_generic;
  458. hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
  459. /*
  460. * We set it so that counter_base + idx in wrmsr/rdmsr maps to
  461. * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
  462. */
  463. hwc->counter_base =
  464. MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
  465. hwc->idx = idx;
  466. } else {
  467. idx = hwc->idx;
  468. /* Try to get the previous generic counter again */
  469. if (test_and_set_bit(idx, cpuc->used)) {
  470. try_generic:
  471. idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
  472. if (idx == nr_counters_generic)
  473. return -EAGAIN;
  474. set_bit(idx, cpuc->used);
  475. hwc->idx = idx;
  476. }
  477. hwc->config_base = pmc_ops->eventsel;
  478. hwc->counter_base = pmc_ops->perfctr;
  479. }
  480. perf_counters_lapic_init(hwc->nmi);
  481. __pmc_generic_disable(counter, hwc, idx);
  482. cpuc->counters[idx] = counter;
  483. /*
  484. * Make it visible before enabling the hw:
  485. */
  486. smp_wmb();
  487. __hw_perf_counter_set_period(counter, hwc, idx);
  488. __pmc_generic_enable(counter, hwc, idx);
  489. return 0;
  490. }
  491. void perf_counter_print_debug(void)
  492. {
  493. u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
  494. struct cpu_hw_counters *cpuc;
  495. int cpu, idx;
  496. if (!nr_counters_generic)
  497. return;
  498. local_irq_disable();
  499. cpu = smp_processor_id();
  500. cpuc = &per_cpu(cpu_hw_counters, cpu);
  501. if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
  502. rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
  503. rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
  504. rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
  505. rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
  506. pr_info("\n");
  507. pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
  508. pr_info("CPU#%d: status: %016llx\n", cpu, status);
  509. pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
  510. pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
  511. }
  512. pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used);
  513. for (idx = 0; idx < nr_counters_generic; idx++) {
  514. rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
  515. rdmsrl(pmc_ops->perfctr + idx, pmc_count);
  516. prev_left = per_cpu(prev_left[idx], cpu);
  517. pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
  518. cpu, idx, pmc_ctrl);
  519. pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
  520. cpu, idx, pmc_count);
  521. pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
  522. cpu, idx, prev_left);
  523. }
  524. for (idx = 0; idx < nr_counters_fixed; idx++) {
  525. rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
  526. pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
  527. cpu, idx, pmc_count);
  528. }
  529. local_irq_enable();
  530. }
  531. static void pmc_generic_disable(struct perf_counter *counter)
  532. {
  533. struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
  534. struct hw_perf_counter *hwc = &counter->hw;
  535. unsigned int idx = hwc->idx;
  536. __pmc_generic_disable(counter, hwc, idx);
  537. clear_bit(idx, cpuc->used);
  538. cpuc->counters[idx] = NULL;
  539. /*
  540. * Make sure the cleared pointer becomes visible before we
  541. * (potentially) free the counter:
  542. */
  543. smp_wmb();
  544. /*
  545. * Drain the remaining delta count out of a counter
  546. * that we are disabling:
  547. */
  548. x86_perf_counter_update(counter, hwc, idx);
  549. }
  550. static void perf_store_irq_data(struct perf_counter *counter, u64 data)
  551. {
  552. struct perf_data *irqdata = counter->irqdata;
  553. if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
  554. irqdata->overrun++;
  555. } else {
  556. u64 *p = (u64 *) &irqdata->data[irqdata->len];
  557. *p = data;
  558. irqdata->len += sizeof(u64);
  559. }
  560. }
  561. /*
  562. * Save and restart an expired counter. Called by NMI contexts,
  563. * so it has to be careful about preempting normal counter ops:
  564. */
  565. static void perf_save_and_restart(struct perf_counter *counter)
  566. {
  567. struct hw_perf_counter *hwc = &counter->hw;
  568. int idx = hwc->idx;
  569. x86_perf_counter_update(counter, hwc, idx);
  570. __hw_perf_counter_set_period(counter, hwc, idx);
  571. if (counter->state == PERF_COUNTER_STATE_ACTIVE)
  572. __pmc_generic_enable(counter, hwc, idx);
  573. }
  574. static void
  575. perf_handle_group(struct perf_counter *sibling, u64 *status, u64 *overflown)
  576. {
  577. struct perf_counter *counter, *group_leader = sibling->group_leader;
  578. /*
  579. * Store sibling timestamps (if any):
  580. */
  581. list_for_each_entry(counter, &group_leader->sibling_list, list_entry) {
  582. x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
  583. perf_store_irq_data(sibling, counter->hw_event.type);
  584. perf_store_irq_data(sibling, atomic64_read(&counter->count));
  585. }
  586. }
  587. /*
  588. * Maximum interrupt frequency of 100KHz per CPU
  589. */
  590. #define PERFMON_MAX_INTERRUPTS (100000/HZ)
  591. /*
  592. * This handler is triggered by the local APIC, so the APIC IRQ handling
  593. * rules apply:
  594. */
  595. static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
  596. {
  597. int bit, cpu = smp_processor_id();
  598. u64 ack, status;
  599. struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
  600. int ret = 0;
  601. cpuc->throttle_ctrl = hw_perf_save_disable();
  602. status = hw_perf_get_status(cpuc->throttle_ctrl);
  603. if (!status)
  604. goto out;
  605. ret = 1;
  606. again:
  607. inc_irq_stat(apic_perf_irqs);
  608. ack = status;
  609. for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
  610. struct perf_counter *counter = cpuc->counters[bit];
  611. clear_bit(bit, (unsigned long *) &status);
  612. if (!counter)
  613. continue;
  614. perf_save_and_restart(counter);
  615. switch (counter->hw_event.record_type) {
  616. case PERF_RECORD_SIMPLE:
  617. continue;
  618. case PERF_RECORD_IRQ:
  619. perf_store_irq_data(counter, instruction_pointer(regs));
  620. break;
  621. case PERF_RECORD_GROUP:
  622. perf_handle_group(counter, &status, &ack);
  623. break;
  624. }
  625. /*
  626. * From NMI context we cannot call into the scheduler to
  627. * do a task wakeup - but we mark these generic as
  628. * wakeup_pending and initate a wakeup callback:
  629. */
  630. if (nmi) {
  631. counter->wakeup_pending = 1;
  632. set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
  633. } else {
  634. wake_up(&counter->waitq);
  635. }
  636. }
  637. hw_perf_ack_status(ack);
  638. /*
  639. * Repeat if there is more work to be done:
  640. */
  641. status = hw_perf_get_status(cpuc->throttle_ctrl);
  642. if (status)
  643. goto again;
  644. out:
  645. /*
  646. * Restore - do not reenable when global enable is off or throttled:
  647. */
  648. if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
  649. hw_perf_restore(cpuc->throttle_ctrl);
  650. return ret;
  651. }
  652. void perf_counter_unthrottle(void)
  653. {
  654. struct cpu_hw_counters *cpuc;
  655. if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  656. return;
  657. if (unlikely(!perf_counters_initialized))
  658. return;
  659. cpuc = &__get_cpu_var(cpu_hw_counters);
  660. if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
  661. if (printk_ratelimit())
  662. printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
  663. hw_perf_restore(cpuc->throttle_ctrl);
  664. }
  665. cpuc->interrupts = 0;
  666. }
  667. void smp_perf_counter_interrupt(struct pt_regs *regs)
  668. {
  669. irq_enter();
  670. apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
  671. ack_APIC_irq();
  672. __smp_perf_counter_interrupt(regs, 0);
  673. irq_exit();
  674. }
  675. /*
  676. * This handler is triggered by NMI contexts:
  677. */
  678. void perf_counter_notify(struct pt_regs *regs)
  679. {
  680. struct cpu_hw_counters *cpuc;
  681. unsigned long flags;
  682. int bit, cpu;
  683. local_irq_save(flags);
  684. cpu = smp_processor_id();
  685. cpuc = &per_cpu(cpu_hw_counters, cpu);
  686. for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
  687. struct perf_counter *counter = cpuc->counters[bit];
  688. if (!counter)
  689. continue;
  690. if (counter->wakeup_pending) {
  691. counter->wakeup_pending = 0;
  692. wake_up(&counter->waitq);
  693. }
  694. }
  695. local_irq_restore(flags);
  696. }
  697. void perf_counters_lapic_init(int nmi)
  698. {
  699. u32 apic_val;
  700. if (!perf_counters_initialized)
  701. return;
  702. /*
  703. * Enable the performance counter vector in the APIC LVT:
  704. */
  705. apic_val = apic_read(APIC_LVTERR);
  706. apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
  707. if (nmi)
  708. apic_write(APIC_LVTPC, APIC_DM_NMI);
  709. else
  710. apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
  711. apic_write(APIC_LVTERR, apic_val);
  712. }
  713. static int __kprobes
  714. perf_counter_nmi_handler(struct notifier_block *self,
  715. unsigned long cmd, void *__args)
  716. {
  717. struct die_args *args = __args;
  718. struct pt_regs *regs;
  719. int ret;
  720. switch (cmd) {
  721. case DIE_NMI:
  722. case DIE_NMI_IPI:
  723. break;
  724. default:
  725. return NOTIFY_DONE;
  726. }
  727. regs = args->regs;
  728. apic_write(APIC_LVTPC, APIC_DM_NMI);
  729. ret = __smp_perf_counter_interrupt(regs, 1);
  730. return ret ? NOTIFY_STOP : NOTIFY_OK;
  731. }
  732. static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
  733. .notifier_call = perf_counter_nmi_handler,
  734. .next = NULL,
  735. .priority = 1
  736. };
  737. static struct pmc_x86_ops pmc_intel_ops = {
  738. .save_disable_all = pmc_intel_save_disable_all,
  739. .restore_all = pmc_intel_restore_all,
  740. .get_status = pmc_intel_get_status,
  741. .ack_status = pmc_intel_ack_status,
  742. .enable = pmc_intel_enable,
  743. .disable = pmc_intel_disable,
  744. .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
  745. .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
  746. .event_map = pmc_intel_event_map,
  747. .raw_event = pmc_intel_raw_event,
  748. .max_events = ARRAY_SIZE(intel_perfmon_event_map),
  749. };
  750. static struct pmc_x86_ops pmc_amd_ops = {
  751. .save_disable_all = pmc_amd_save_disable_all,
  752. .restore_all = pmc_amd_restore_all,
  753. .get_status = pmc_amd_get_status,
  754. .ack_status = pmc_amd_ack_status,
  755. .enable = pmc_amd_enable,
  756. .disable = pmc_amd_disable,
  757. .eventsel = MSR_K7_EVNTSEL0,
  758. .perfctr = MSR_K7_PERFCTR0,
  759. .event_map = pmc_amd_event_map,
  760. .raw_event = pmc_amd_raw_event,
  761. .max_events = ARRAY_SIZE(amd_perfmon_event_map),
  762. };
  763. static struct pmc_x86_ops *pmc_intel_init(void)
  764. {
  765. union cpuid10_eax eax;
  766. unsigned int ebx;
  767. unsigned int unused;
  768. union cpuid10_edx edx;
  769. /*
  770. * Check whether the Architectural PerfMon supports
  771. * Branch Misses Retired Event or not.
  772. */
  773. cpuid(10, &eax.full, &ebx, &unused, &edx.full);
  774. if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
  775. return NULL;
  776. pr_info("Intel Performance Monitoring support detected.\n");
  777. pr_info("... version: %d\n", eax.split.version_id);
  778. pr_info("... bit width: %d\n", eax.split.bit_width);
  779. pr_info("... mask length: %d\n", eax.split.mask_length);
  780. nr_counters_generic = eax.split.num_counters;
  781. nr_counters_fixed = edx.split.num_counters_fixed;
  782. counter_value_mask = (1ULL << eax.split.bit_width) - 1;
  783. return &pmc_intel_ops;
  784. }
  785. static struct pmc_x86_ops *pmc_amd_init(void)
  786. {
  787. nr_counters_generic = 4;
  788. nr_counters_fixed = 0;
  789. counter_value_mask = 0x0000FFFFFFFFFFFFULL;
  790. counter_value_bits = 48;
  791. pr_info("AMD Performance Monitoring support detected.\n");
  792. return &pmc_amd_ops;
  793. }
  794. void __init init_hw_perf_counters(void)
  795. {
  796. if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  797. return;
  798. switch (boot_cpu_data.x86_vendor) {
  799. case X86_VENDOR_INTEL:
  800. pmc_ops = pmc_intel_init();
  801. break;
  802. case X86_VENDOR_AMD:
  803. pmc_ops = pmc_amd_init();
  804. break;
  805. }
  806. if (!pmc_ops)
  807. return;
  808. pr_info("... num counters: %d\n", nr_counters_generic);
  809. if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
  810. nr_counters_generic = X86_PMC_MAX_GENERIC;
  811. WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
  812. nr_counters_generic, X86_PMC_MAX_GENERIC);
  813. }
  814. perf_counter_mask = (1 << nr_counters_generic) - 1;
  815. perf_max_counters = nr_counters_generic;
  816. pr_info("... value mask: %016Lx\n", counter_value_mask);
  817. if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
  818. nr_counters_fixed = X86_PMC_MAX_FIXED;
  819. WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
  820. nr_counters_fixed, X86_PMC_MAX_FIXED);
  821. }
  822. pr_info("... fixed counters: %d\n", nr_counters_fixed);
  823. perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
  824. pr_info("... counter mask: %016Lx\n", perf_counter_mask);
  825. perf_counters_initialized = true;
  826. perf_counters_lapic_init(0);
  827. register_die_notifier(&perf_counter_nmi_notifier);
  828. }
  829. static void pmc_generic_read(struct perf_counter *counter)
  830. {
  831. x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
  832. }
  833. static const struct hw_perf_counter_ops x86_perf_counter_ops = {
  834. .enable = pmc_generic_enable,
  835. .disable = pmc_generic_disable,
  836. .read = pmc_generic_read,
  837. };
  838. const struct hw_perf_counter_ops *
  839. hw_perf_counter_init(struct perf_counter *counter)
  840. {
  841. int err;
  842. err = __hw_perf_counter_init(counter);
  843. if (err)
  844. return NULL;
  845. return &x86_perf_counter_ops;
  846. }