op_model_ppro.c 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. * @author Robert Richter <robert.richter@amd.com>
  14. */
  15. #include <linux/oprofile.h>
  16. #include <linux/slab.h>
  17. #include <asm/ptrace.h>
  18. #include <asm/msr.h>
  19. #include <asm/apic.h>
  20. #include <asm/nmi.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21))
  26. static u64 *reset_value;
  27. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  28. {
  29. int i;
  30. for (i = 0; i < num_counters; i++) {
  31. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  32. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  33. else
  34. msrs->counters[i].addr = 0;
  35. }
  36. for (i = 0; i < num_counters; i++) {
  37. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  38. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  39. else
  40. msrs->controls[i].addr = 0;
  41. }
  42. }
  43. static void ppro_setup_ctrs(struct op_x86_model_spec const *model,
  44. struct op_msrs const * const msrs)
  45. {
  46. u64 val;
  47. int i;
  48. if (!reset_value) {
  49. reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
  50. GFP_ATOMIC);
  51. if (!reset_value)
  52. return;
  53. }
  54. if (cpu_has_arch_perfmon) {
  55. union cpuid10_eax eax;
  56. eax.full = cpuid_eax(0xa);
  57. /*
  58. * For Core2 (family 6, model 15), don't reset the
  59. * counter width:
  60. */
  61. if (!(eax.split.version_id == 0 &&
  62. current_cpu_data.x86 == 6 &&
  63. current_cpu_data.x86_model == 15)) {
  64. if (counter_width < eax.split.bit_width)
  65. counter_width = eax.split.bit_width;
  66. }
  67. }
  68. /* clear all counters */
  69. for (i = 0 ; i < num_counters; ++i) {
  70. if (unlikely(!msrs->controls[i].addr))
  71. continue;
  72. rdmsrl(msrs->controls[i].addr, val);
  73. val &= model->reserved;
  74. wrmsrl(msrs->controls[i].addr, val);
  75. }
  76. /* avoid a false detection of ctr overflows in NMI handler */
  77. for (i = 0; i < num_counters; ++i) {
  78. if (unlikely(!msrs->counters[i].addr))
  79. continue;
  80. wrmsrl(msrs->counters[i].addr, -1LL);
  81. }
  82. /* enable active counters */
  83. for (i = 0; i < num_counters; ++i) {
  84. if (counter_config[i].enabled && msrs->counters[i].addr) {
  85. reset_value[i] = counter_config[i].count;
  86. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  87. rdmsrl(msrs->controls[i].addr, val);
  88. val &= model->reserved;
  89. val |= op_x86_get_ctrl(model, &counter_config[i]);
  90. wrmsrl(msrs->controls[i].addr, val);
  91. } else {
  92. reset_value[i] = 0;
  93. }
  94. }
  95. }
  96. static int ppro_check_ctrs(struct pt_regs * const regs,
  97. struct op_msrs const * const msrs)
  98. {
  99. u64 val;
  100. int i;
  101. for (i = 0 ; i < num_counters; ++i) {
  102. if (!reset_value[i])
  103. continue;
  104. rdmsrl(msrs->counters[i].addr, val);
  105. if (val & (1ULL << (counter_width - 1)))
  106. continue;
  107. oprofile_add_sample(regs, i);
  108. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  109. }
  110. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  111. * doesn't hurt other P6 variant */
  112. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  113. /* We can't work out if we really handled an interrupt. We
  114. * might have caught a *second* counter just after overflowing
  115. * the interrupt for this counter then arrives
  116. * and we don't find a counter that's overflowed, so we
  117. * would return 0 and get dazed + confused. Instead we always
  118. * assume we found an overflow. This sucks.
  119. */
  120. return 1;
  121. }
  122. static void ppro_start(struct op_msrs const * const msrs)
  123. {
  124. u64 val;
  125. int i;
  126. if (!reset_value)
  127. return;
  128. for (i = 0; i < num_counters; ++i) {
  129. if (reset_value[i]) {
  130. rdmsrl(msrs->controls[i].addr, val);
  131. val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
  132. wrmsrl(msrs->controls[i].addr, val);
  133. }
  134. }
  135. }
  136. static void ppro_stop(struct op_msrs const * const msrs)
  137. {
  138. u64 val;
  139. int i;
  140. if (!reset_value)
  141. return;
  142. for (i = 0; i < num_counters; ++i) {
  143. if (!reset_value[i])
  144. continue;
  145. rdmsrl(msrs->controls[i].addr, val);
  146. val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
  147. wrmsrl(msrs->controls[i].addr, val);
  148. }
  149. }
  150. static void ppro_shutdown(struct op_msrs const * const msrs)
  151. {
  152. int i;
  153. for (i = 0 ; i < num_counters ; ++i) {
  154. if (msrs->counters[i].addr)
  155. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  156. }
  157. for (i = 0 ; i < num_counters ; ++i) {
  158. if (msrs->controls[i].addr)
  159. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  160. }
  161. if (reset_value) {
  162. kfree(reset_value);
  163. reset_value = NULL;
  164. }
  165. }
  166. struct op_x86_model_spec const op_ppro_spec = {
  167. .num_counters = 2,
  168. .num_controls = 2,
  169. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  170. .fill_in_addresses = &ppro_fill_in_addresses,
  171. .setup_ctrs = &ppro_setup_ctrs,
  172. .check_ctrs = &ppro_check_ctrs,
  173. .start = &ppro_start,
  174. .stop = &ppro_stop,
  175. .shutdown = &ppro_shutdown
  176. };
  177. /*
  178. * Architectural performance monitoring.
  179. *
  180. * Newer Intel CPUs (Core1+) have support for architectural
  181. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  182. * The advantage of this is that it can be done without knowing about
  183. * the specific CPU.
  184. */
  185. static void arch_perfmon_setup_counters(void)
  186. {
  187. union cpuid10_eax eax;
  188. eax.full = cpuid_eax(0xa);
  189. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  190. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  191. current_cpu_data.x86_model == 15) {
  192. eax.split.version_id = 2;
  193. eax.split.num_counters = 2;
  194. eax.split.bit_width = 40;
  195. }
  196. num_counters = eax.split.num_counters;
  197. op_arch_perfmon_spec.num_counters = num_counters;
  198. op_arch_perfmon_spec.num_controls = num_counters;
  199. }
  200. static int arch_perfmon_init(struct oprofile_operations *ignore)
  201. {
  202. arch_perfmon_setup_counters();
  203. return 0;
  204. }
  205. struct op_x86_model_spec op_arch_perfmon_spec = {
  206. .reserved = MSR_PPRO_EVENTSEL_RESERVED,
  207. .init = &arch_perfmon_init,
  208. /* num_counters/num_controls filled in at runtime */
  209. .fill_in_addresses = &ppro_fill_in_addresses,
  210. /* user space does the cpuid check for available events */
  211. .setup_ctrs = &ppro_setup_ctrs,
  212. .check_ctrs = &ppro_check_ctrs,
  213. .start = &ppro_start,
  214. .stop = &ppro_stop,
  215. .shutdown = &ppro_shutdown
  216. };