op_model_ppro.c 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. */
  14. #include <linux/oprofile.h>
  15. #include <linux/slab.h>
  16. #include <asm/ptrace.h>
  17. #include <asm/msr.h>
  18. #include <asm/apic.h>
  19. #include <asm/nmi.h>
  20. #include <asm/perf_counter.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
  26. #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
  27. #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
  28. #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  29. #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  30. #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
  31. #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
  32. #define CTRL_CLEAR(x) (x &= (1<<21))
  33. #define CTRL_SET_ENABLE(val) (val |= 1<<20)
  34. #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
  35. #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
  36. #define CTRL_SET_UM(val, m) (val |= (m << 8))
  37. #define CTRL_SET_EVENT(val, e) (val |= e)
  38. static u64 *reset_value;
  39. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  40. {
  41. int i;
  42. for (i = 0; i < num_counters; i++) {
  43. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  44. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  45. else
  46. msrs->counters[i].addr = 0;
  47. }
  48. for (i = 0; i < num_counters; i++) {
  49. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  50. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  51. else
  52. msrs->controls[i].addr = 0;
  53. }
  54. }
  55. static void ppro_setup_ctrs(struct op_msrs const * const msrs)
  56. {
  57. unsigned int low, high;
  58. int i;
  59. if (!reset_value) {
  60. reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
  61. GFP_ATOMIC);
  62. if (!reset_value)
  63. return;
  64. }
  65. if (cpu_has_arch_perfmon) {
  66. union cpuid10_eax eax;
  67. eax.full = cpuid_eax(0xa);
  68. /*
  69. * For Core2 (family 6, model 15), don't reset the
  70. * counter width:
  71. */
  72. if (!(eax.split.version_id == 0 &&
  73. current_cpu_data.x86 == 6 &&
  74. current_cpu_data.x86_model == 15)) {
  75. if (counter_width < eax.split.bit_width)
  76. counter_width = eax.split.bit_width;
  77. }
  78. }
  79. /* clear all counters */
  80. for (i = 0 ; i < num_counters; ++i) {
  81. if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
  82. continue;
  83. CTRL_READ(low, high, msrs, i);
  84. CTRL_CLEAR(low);
  85. CTRL_WRITE(low, high, msrs, i);
  86. }
  87. /* avoid a false detection of ctr overflows in NMI handler */
  88. for (i = 0; i < num_counters; ++i) {
  89. if (unlikely(!CTR_IS_RESERVED(msrs, i)))
  90. continue;
  91. wrmsrl(msrs->counters[i].addr, -1LL);
  92. }
  93. /* enable active counters */
  94. for (i = 0; i < num_counters; ++i) {
  95. if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
  96. reset_value[i] = counter_config[i].count;
  97. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  98. CTRL_READ(low, high, msrs, i);
  99. CTRL_CLEAR(low);
  100. CTRL_SET_ENABLE(low);
  101. CTRL_SET_USR(low, counter_config[i].user);
  102. CTRL_SET_KERN(low, counter_config[i].kernel);
  103. CTRL_SET_UM(low, counter_config[i].unit_mask);
  104. CTRL_SET_EVENT(low, counter_config[i].event);
  105. CTRL_WRITE(low, high, msrs, i);
  106. } else {
  107. reset_value[i] = 0;
  108. }
  109. }
  110. }
  111. static int ppro_check_ctrs(struct pt_regs * const regs,
  112. struct op_msrs const * const msrs)
  113. {
  114. u64 val;
  115. int i;
  116. /*
  117. * This can happen if perf counters are in use when
  118. * we steal the die notifier NMI.
  119. */
  120. if (unlikely(!reset_value))
  121. goto out;
  122. for (i = 0 ; i < num_counters; ++i) {
  123. if (!reset_value[i])
  124. continue;
  125. rdmsrl(msrs->counters[i].addr, val);
  126. if (CTR_OVERFLOWED(val)) {
  127. oprofile_add_sample(regs, i);
  128. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  129. }
  130. }
  131. out:
  132. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  133. * doesn't hurt other P6 variant */
  134. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  135. /* We can't work out if we really handled an interrupt. We
  136. * might have caught a *second* counter just after overflowing
  137. * the interrupt for this counter then arrives
  138. * and we don't find a counter that's overflowed, so we
  139. * would return 0 and get dazed + confused. Instead we always
  140. * assume we found an overflow. This sucks.
  141. */
  142. return 1;
  143. }
  144. static void ppro_start(struct op_msrs const * const msrs)
  145. {
  146. unsigned int low, high;
  147. int i;
  148. if (!reset_value)
  149. return;
  150. for (i = 0; i < num_counters; ++i) {
  151. if (reset_value[i]) {
  152. CTRL_READ(low, high, msrs, i);
  153. CTRL_SET_ACTIVE(low);
  154. CTRL_WRITE(low, high, msrs, i);
  155. }
  156. }
  157. }
  158. static void ppro_stop(struct op_msrs const * const msrs)
  159. {
  160. unsigned int low, high;
  161. int i;
  162. if (!reset_value)
  163. return;
  164. for (i = 0; i < num_counters; ++i) {
  165. if (!reset_value[i])
  166. continue;
  167. CTRL_READ(low, high, msrs, i);
  168. CTRL_SET_INACTIVE(low);
  169. CTRL_WRITE(low, high, msrs, i);
  170. }
  171. }
  172. static void ppro_shutdown(struct op_msrs const * const msrs)
  173. {
  174. int i;
  175. for (i = 0 ; i < num_counters ; ++i) {
  176. if (CTR_IS_RESERVED(msrs, i))
  177. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  178. }
  179. for (i = 0 ; i < num_counters ; ++i) {
  180. if (CTRL_IS_RESERVED(msrs, i))
  181. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  182. }
  183. if (reset_value) {
  184. kfree(reset_value);
  185. reset_value = NULL;
  186. }
  187. }
  188. struct op_x86_model_spec op_ppro_spec = {
  189. .num_counters = 2, /* can be overriden */
  190. .num_controls = 2, /* dito */
  191. .fill_in_addresses = &ppro_fill_in_addresses,
  192. .setup_ctrs = &ppro_setup_ctrs,
  193. .check_ctrs = &ppro_check_ctrs,
  194. .start = &ppro_start,
  195. .stop = &ppro_stop,
  196. .shutdown = &ppro_shutdown
  197. };
  198. /*
  199. * Architectural performance monitoring.
  200. *
  201. * Newer Intel CPUs (Core1+) have support for architectural
  202. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  203. * The advantage of this is that it can be done without knowing about
  204. * the specific CPU.
  205. */
  206. void arch_perfmon_setup_counters(void)
  207. {
  208. union cpuid10_eax eax;
  209. eax.full = cpuid_eax(0xa);
  210. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  211. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  212. current_cpu_data.x86_model == 15) {
  213. eax.split.version_id = 2;
  214. eax.split.num_counters = 2;
  215. eax.split.bit_width = 40;
  216. }
  217. num_counters = eax.split.num_counters;
  218. op_arch_perfmon_spec.num_counters = num_counters;
  219. op_arch_perfmon_spec.num_controls = num_counters;
  220. op_ppro_spec.num_counters = num_counters;
  221. op_ppro_spec.num_controls = num_counters;
  222. }
  223. struct op_x86_model_spec op_arch_perfmon_spec = {
  224. /* num_counters/num_controls filled in at runtime */
  225. .fill_in_addresses = &ppro_fill_in_addresses,
  226. /* user space does the cpuid check for available events */
  227. .setup_ctrs = &ppro_setup_ctrs,
  228. .check_ctrs = &ppro_check_ctrs,
  229. .start = &ppro_start,
  230. .stop = &ppro_stop,
  231. .shutdown = &ppro_shutdown
  232. };