op_model_ppro.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. * @author Robert Richter <robert.richter@amd.com>
  14. */
  15. #include <linux/oprofile.h>
  16. #include <linux/slab.h>
  17. #include <asm/ptrace.h>
  18. #include <asm/msr.h>
  19. #include <asm/apic.h>
  20. #include <asm/nmi.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
  26. static u64 *reset_value;
  27. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  28. {
  29. int i;
  30. for (i = 0; i < num_counters; i++) {
  31. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  32. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  33. else
  34. msrs->counters[i].addr = 0;
  35. }
  36. for (i = 0; i < num_counters; i++) {
  37. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  38. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  39. else
  40. msrs->controls[i].addr = 0;
  41. }
  42. }
  43. static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
  44. struct op_msrs const * const msrs)
  45. {
  46. u64 val;
  47. int i;
  48. if (!reset_value) {
  49. reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
  50. GFP_ATOMIC);
  51. if (!reset_value)
  52. return;
  53. }
  54. if (cpu_has_arch_perfmon) {
  55. union cpuid10_eax eax;
  56. eax.full = cpuid_eax(0xa);
  57. /*
  58. * For Core2 (family 6, model 15), don't reset the
  59. * counter width:
  60. */
  61. if (!(eax.split.version_id == 0 &&
  62. current_cpu_data.x86 == 6 &&
  63. current_cpu_data.x86_model == 15)) {
  64. if (counter_width < eax.split.bit_width)
  65. counter_width = eax.split.bit_width;
  66. }
  67. }
  68. /* clear all counters */
  69. for (i = 0; i < num_counters; ++i) {
  70. if (unlikely(!msrs->controls[i].addr))
  71. continue;
  72. rdmsrl(msrs->controls[i].addr, val);
  73. val &= model->reserved;
  74. wrmsrl(msrs->controls[i].addr, val);
  75. }
  76. /* avoid a false detection of ctr overflows in NMI handler */
  77. for (i = 0; i < num_counters; ++i) {
  78. if (unlikely(!msrs->counters[i].addr))
  79. continue;
  80. wrmsrl(msrs->counters[i].addr, -1LL);
  81. }
  82. /* enable active counters */
  83. for (i = 0; i < num_counters; ++i) {
  84. if (counter_config[i].enabled && msrs->counters[i].addr) {
  85. reset_value[i] = counter_config[i].count;
  86. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  87. rdmsrl(msrs->controls[i].addr, val);
  88. val &= model->reserved;
  89. val |= op_x86_get_ctrl(model, &counter_config[i]);
  90. wrmsrl(msrs->controls[i].addr, val);
  91. } else {
  92. reset_value[i] = 0;
  93. }
  94. }
  95. }
  96. static int ppro_check_ctrs(struct pt_regs * const regs,
  97. struct op_msrs const * const msrs)
  98. {
  99. u64 val;
  100. int i;
  101. /*
  102. * This can happen if perf counters are in use when
  103. * we steal the die notifier NMI.
  104. */
  105. if (unlikely(!reset_value))
  106. goto out;
  107. for (i = 0; i < num_counters; ++i) {
  108. if (!reset_value[i])
  109. continue;
  110. rdmsrl(msrs->counters[i].addr, val);
  111. if (val & (1ULL << (counter_width - 1)))
  112. continue;
  113. oprofile_add_sample(regs, i);
  114. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  115. }
  116. out:
  117. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  118. * doesn't hurt other P6 variant */
  119. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  120. /* We can't work out if we really handled an interrupt. We
  121. * might have caught a *second* counter just after overflowing
  122. * the interrupt for this counter then arrives
  123. * and we don't find a counter that's overflowed, so we
  124. * would return 0 and get dazed + confused. Instead we always
  125. * assume we found an overflow. This sucks.
  126. */
  127. return 1;
  128. }
  129. static void ppro_start(struct op_msrs const * const msrs)
  130. {
  131. u64 val;
  132. int i;
  133. if (!reset_value)
  134. return;
  135. for (i = 0; i < num_counters; ++i) {
  136. if (reset_value[i]) {
  137. rdmsrl(msrs->controls[i].addr, val);
  138. val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  139. wrmsrl(msrs->controls[i].addr, val);
  140. }
  141. }
  142. }
  143. static void ppro_stop(struct op_msrs const * const msrs)
  144. {
  145. u64 val;
  146. int i;
  147. if (!reset_value)
  148. return;
  149. for (i = 0; i < num_counters; ++i) {
  150. if (!reset_value[i])
  151. continue;
  152. rdmsrl(msrs->controls[i].addr, val);
  153. val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
  154. wrmsrl(msrs->controls[i].addr, val);
  155. }
  156. }
  157. static void ppro_shutdown(struct op_msrs const * const msrs)
  158. {
  159. int i;
  160. for (i = 0; i < num_counters; ++i) {
  161. if (msrs->counters[i].addr)
  162. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  163. }
  164. for (i = 0; i < num_counters; ++i) {
  165. if (msrs->controls[i].addr)
  166. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  167. }
  168. if (reset_value) {
  169. kfree(reset_value);
  170. reset_value = NULL;
  171. }
  172. }
  173. struct op_x86_model_spec op_ppro_spec = {
  174. .num_counters = 2,
  175. .num_controls = 2,
  176. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  177. .fill_in_addresses = &ppro_fill_in_addresses,
  178. .setup_ctrs = &ppro_setup_ctrs,
  179. .check_ctrs = &ppro_check_ctrs,
  180. .start = &ppro_start,
  181. .stop = &ppro_stop,
  182. .shutdown = &ppro_shutdown
  183. };
  184. /*
  185. * Architectural performance monitoring.
  186. *
  187. * Newer Intel CPUs (Core1+) have support for architectural
  188. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  189. * The advantage of this is that it can be done without knowing about
  190. * the specific CPU.
  191. */
  192. static void arch_perfmon_setup_counters(void)
  193. {
  194. union cpuid10_eax eax;
  195. eax.full = cpuid_eax(0xa);
  196. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  197. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  198. current_cpu_data.x86_model == 15) {
  199. eax.split.version_id = 2;
  200. eax.split.num_events = 2;
  201. eax.split.bit_width = 40;
  202. }
  203. num_counters = eax.split.num_events;
  204. op_arch_perfmon_spec.num_counters = num_counters;
  205. op_arch_perfmon_spec.num_controls = num_counters;
  206. }
  207. static int arch_perfmon_init(struct oprofile_operations *ignore)
  208. {
  209. arch_perfmon_setup_counters();
  210. return 0;
  211. }
  212. struct op_x86_model_spec op_arch_perfmon_spec = {
  213. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  214. .init = &arch_perfmon_init,
  215. /* num_counters/num_controls filled in at runtime */
  216. .fill_in_addresses = &ppro_fill_in_addresses,
  217. /* user space does the cpuid check for available events */
  218. .setup_ctrs = &ppro_setup_ctrs,
  219. .check_ctrs = &ppro_check_ctrs,
  220. .start = &ppro_start,
  221. .stop = &ppro_stop,
  222. .shutdown = &ppro_shutdown
  223. };