op_model_ppro.c 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. * @author Robert Richter <robert.richter@amd.com>
  14. */
  15. #include <linux/oprofile.h>
  16. #include <linux/slab.h>
  17. #include <asm/ptrace.h>
  18. #include <asm/msr.h>
  19. #include <asm/apic.h>
  20. #include <asm/nmi.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
  26. static u64 *reset_value;
  27. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  28. {
  29. int i;
  30. for (i = 0; i < num_counters; i++) {
  31. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  32. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  33. }
  34. for (i = 0; i < num_counters; i++) {
  35. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  36. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  37. }
  38. }
  39. static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
  40. struct op_msrs const * const msrs)
  41. {
  42. u64 val;
  43. int i;
  44. if (!reset_value) {
  45. reset_value = kzalloc(sizeof(reset_value[0]) * num_counters,
  46. GFP_ATOMIC);
  47. if (!reset_value)
  48. return;
  49. }
  50. if (cpu_has_arch_perfmon) {
  51. union cpuid10_eax eax;
  52. eax.full = cpuid_eax(0xa);
  53. /*
  54. * For Core2 (family 6, model 15), don't reset the
  55. * counter width:
  56. */
  57. if (!(eax.split.version_id == 0 &&
  58. current_cpu_data.x86 == 6 &&
  59. current_cpu_data.x86_model == 15)) {
  60. if (counter_width < eax.split.bit_width)
  61. counter_width = eax.split.bit_width;
  62. }
  63. }
  64. /* clear all counters */
  65. for (i = 0; i < num_counters; ++i) {
  66. if (unlikely(!msrs->controls[i].addr)) {
  67. if (counter_config[i].enabled && !smp_processor_id())
  68. /*
  69. * counter is reserved, this is on all
  70. * cpus, so report only for cpu #0
  71. */
  72. op_x86_warn_reserved(i);
  73. continue;
  74. }
  75. rdmsrl(msrs->controls[i].addr, val);
  76. if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
  77. op_x86_warn_in_use(i);
  78. val &= model->reserved;
  79. wrmsrl(msrs->controls[i].addr, val);
  80. }
  81. /* avoid a false detection of ctr overflows in NMI handler */
  82. for (i = 0; i < num_counters; ++i) {
  83. if (unlikely(!msrs->counters[i].addr))
  84. continue;
  85. wrmsrl(msrs->counters[i].addr, -1LL);
  86. }
  87. /* enable active counters */
  88. for (i = 0; i < num_counters; ++i) {
  89. if (counter_config[i].enabled && msrs->counters[i].addr) {
  90. reset_value[i] = counter_config[i].count;
  91. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  92. rdmsrl(msrs->controls[i].addr, val);
  93. val &= model->reserved;
  94. val |= op_x86_get_ctrl(model, &counter_config[i]);
  95. wrmsrl(msrs->controls[i].addr, val);
  96. } else {
  97. reset_value[i] = 0;
  98. }
  99. }
  100. }
  101. static int ppro_check_ctrs(struct pt_regs * const regs,
  102. struct op_msrs const * const msrs)
  103. {
  104. u64 val;
  105. int i;
  106. /*
  107. * This can happen if perf counters are in use when
  108. * we steal the die notifier NMI.
  109. */
  110. if (unlikely(!reset_value))
  111. goto out;
  112. for (i = 0; i < num_counters; ++i) {
  113. if (!reset_value[i])
  114. continue;
  115. rdmsrl(msrs->counters[i].addr, val);
  116. if (val & (1ULL << (counter_width - 1)))
  117. continue;
  118. oprofile_add_sample(regs, i);
  119. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  120. }
  121. out:
  122. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  123. * doesn't hurt other P6 variant */
  124. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  125. /* We can't work out if we really handled an interrupt. We
  126. * might have caught a *second* counter just after overflowing
  127. * the interrupt for this counter then arrives
  128. * and we don't find a counter that's overflowed, so we
  129. * would return 0 and get dazed + confused. Instead we always
  130. * assume we found an overflow. This sucks.
  131. */
  132. return 1;
  133. }
  134. static void ppro_start(struct op_msrs const * const msrs)
  135. {
  136. u64 val;
  137. int i;
  138. if (!reset_value)
  139. return;
  140. for (i = 0; i < num_counters; ++i) {
  141. if (reset_value[i]) {
  142. rdmsrl(msrs->controls[i].addr, val);
  143. val |= ARCH_PERFMON_EVENTSEL_ENABLE;
  144. wrmsrl(msrs->controls[i].addr, val);
  145. }
  146. }
  147. }
  148. static void ppro_stop(struct op_msrs const * const msrs)
  149. {
  150. u64 val;
  151. int i;
  152. if (!reset_value)
  153. return;
  154. for (i = 0; i < num_counters; ++i) {
  155. if (!reset_value[i])
  156. continue;
  157. rdmsrl(msrs->controls[i].addr, val);
  158. val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
  159. wrmsrl(msrs->controls[i].addr, val);
  160. }
  161. }
  162. static void ppro_shutdown(struct op_msrs const * const msrs)
  163. {
  164. int i;
  165. for (i = 0; i < num_counters; ++i) {
  166. if (msrs->counters[i].addr)
  167. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  168. }
  169. for (i = 0; i < num_counters; ++i) {
  170. if (msrs->controls[i].addr)
  171. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  172. }
  173. if (reset_value) {
  174. kfree(reset_value);
  175. reset_value = NULL;
  176. }
  177. }
  178. struct op_x86_model_spec op_ppro_spec = {
  179. .num_counters = 2,
  180. .num_controls = 2,
  181. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  182. .fill_in_addresses = &ppro_fill_in_addresses,
  183. .setup_ctrs = &ppro_setup_ctrs,
  184. .check_ctrs = &ppro_check_ctrs,
  185. .start = &ppro_start,
  186. .stop = &ppro_stop,
  187. .shutdown = &ppro_shutdown
  188. };
  189. /*
  190. * Architectural performance monitoring.
  191. *
  192. * Newer Intel CPUs (Core1+) have support for architectural
  193. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  194. * The advantage of this is that it can be done without knowing about
  195. * the specific CPU.
  196. */
  197. static void arch_perfmon_setup_counters(void)
  198. {
  199. union cpuid10_eax eax;
  200. eax.full = cpuid_eax(0xa);
  201. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  202. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  203. current_cpu_data.x86_model == 15) {
  204. eax.split.version_id = 2;
  205. eax.split.num_counters = 2;
  206. eax.split.bit_width = 40;
  207. }
  208. num_counters = eax.split.num_counters;
  209. op_arch_perfmon_spec.num_counters = num_counters;
  210. op_arch_perfmon_spec.num_controls = num_counters;
  211. }
  212. static int arch_perfmon_init(struct oprofile_operations *ignore)
  213. {
  214. arch_perfmon_setup_counters();
  215. return 0;
  216. }
  217. struct op_x86_model_spec op_arch_perfmon_spec = {
  218. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  219. .init = &arch_perfmon_init,
  220. /* num_counters/num_controls filled in at runtime */
  221. .fill_in_addresses = &ppro_fill_in_addresses,
  222. /* user space does the cpuid check for available events */
  223. .setup_ctrs = &ppro_setup_ctrs,
  224. .check_ctrs = &ppro_check_ctrs,
  225. .start = &ppro_start,
  226. .stop = &ppro_stop,
  227. .shutdown = &ppro_shutdown
  228. };