op_model_ppro.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. */
  14. #include <linux/oprofile.h>
  15. #include <linux/slab.h>
  16. #include <asm/ptrace.h>
  17. #include <asm/msr.h>
  18. #include <asm/apic.h>
  19. #include <asm/nmi.h>
  20. #include "op_x86_model.h"
  21. #include "op_counter.h"
  22. static int num_counters = 2;
  23. static int counter_width = 32;
  24. #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
  25. #define CTRL_CLEAR(x) (x &= (1<<21))
  26. #define CTRL_SET_EVENT(val, e) (val |= e)
  27. static u64 *reset_value;
  28. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  29. {
  30. int i;
  31. for (i = 0; i < num_counters; i++) {
  32. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  33. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  34. else
  35. msrs->counters[i].addr = 0;
  36. }
  37. for (i = 0; i < num_counters; i++) {
  38. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  39. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  40. else
  41. msrs->controls[i].addr = 0;
  42. }
  43. }
  44. static void ppro_setup_ctrs(struct op_msrs const * const msrs)
  45. {
  46. unsigned int low, high;
  47. int i;
  48. if (!reset_value) {
  49. reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
  50. GFP_ATOMIC);
  51. if (!reset_value)
  52. return;
  53. }
  54. if (cpu_has_arch_perfmon) {
  55. union cpuid10_eax eax;
  56. eax.full = cpuid_eax(0xa);
  57. /*
  58. * For Core2 (family 6, model 15), don't reset the
  59. * counter width:
  60. */
  61. if (!(eax.split.version_id == 0 &&
  62. current_cpu_data.x86 == 6 &&
  63. current_cpu_data.x86_model == 15)) {
  64. if (counter_width < eax.split.bit_width)
  65. counter_width = eax.split.bit_width;
  66. }
  67. }
  68. /* clear all counters */
  69. for (i = 0 ; i < num_counters; ++i) {
  70. if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
  71. continue;
  72. rdmsr(msrs->controls[i].addr, low, high);
  73. CTRL_CLEAR(low);
  74. wrmsr(msrs->controls[i].addr, low, high);
  75. }
  76. /* avoid a false detection of ctr overflows in NMI handler */
  77. for (i = 0; i < num_counters; ++i) {
  78. if (unlikely(!CTR_IS_RESERVED(msrs, i)))
  79. continue;
  80. wrmsrl(msrs->counters[i].addr, -1LL);
  81. }
  82. /* enable active counters */
  83. for (i = 0; i < num_counters; ++i) {
  84. if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
  85. reset_value[i] = counter_config[i].count;
  86. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  87. rdmsr(msrs->controls[i].addr, low, high);
  88. CTRL_CLEAR(low);
  89. CTRL_SET_ENABLE(low);
  90. CTRL_SET_USR(low, counter_config[i].user);
  91. CTRL_SET_KERN(low, counter_config[i].kernel);
  92. CTRL_SET_UM(low, counter_config[i].unit_mask);
  93. CTRL_SET_EVENT(low, counter_config[i].event);
  94. wrmsr(msrs->controls[i].addr, low, high);
  95. } else {
  96. reset_value[i] = 0;
  97. }
  98. }
  99. }
  100. static int ppro_check_ctrs(struct pt_regs * const regs,
  101. struct op_msrs const * const msrs)
  102. {
  103. u64 val;
  104. int i;
  105. for (i = 0 ; i < num_counters; ++i) {
  106. if (!reset_value[i])
  107. continue;
  108. rdmsrl(msrs->counters[i].addr, val);
  109. if (CTR_OVERFLOWED(val)) {
  110. oprofile_add_sample(regs, i);
  111. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  112. }
  113. }
  114. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  115. * doesn't hurt other P6 variant */
  116. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  117. /* We can't work out if we really handled an interrupt. We
  118. * might have caught a *second* counter just after overflowing
  119. * the interrupt for this counter then arrives
  120. * and we don't find a counter that's overflowed, so we
  121. * would return 0 and get dazed + confused. Instead we always
  122. * assume we found an overflow. This sucks.
  123. */
  124. return 1;
  125. }
  126. static void ppro_start(struct op_msrs const * const msrs)
  127. {
  128. unsigned int low, high;
  129. int i;
  130. if (!reset_value)
  131. return;
  132. for (i = 0; i < num_counters; ++i) {
  133. if (reset_value[i]) {
  134. rdmsr(msrs->controls[i].addr, low, high);
  135. CTRL_SET_ACTIVE(low);
  136. wrmsr(msrs->controls[i].addr, low, high);
  137. }
  138. }
  139. }
  140. static void ppro_stop(struct op_msrs const * const msrs)
  141. {
  142. unsigned int low, high;
  143. int i;
  144. if (!reset_value)
  145. return;
  146. for (i = 0; i < num_counters; ++i) {
  147. if (!reset_value[i])
  148. continue;
  149. rdmsr(msrs->controls[i].addr, low, high);
  150. CTRL_SET_INACTIVE(low);
  151. wrmsr(msrs->controls[i].addr, low, high);
  152. }
  153. }
  154. static void ppro_shutdown(struct op_msrs const * const msrs)
  155. {
  156. int i;
  157. for (i = 0 ; i < num_counters ; ++i) {
  158. if (CTR_IS_RESERVED(msrs, i))
  159. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  160. }
  161. for (i = 0 ; i < num_counters ; ++i) {
  162. if (CTRL_IS_RESERVED(msrs, i))
  163. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  164. }
  165. if (reset_value) {
  166. kfree(reset_value);
  167. reset_value = NULL;
  168. }
  169. }
  170. struct op_x86_model_spec const op_ppro_spec = {
  171. .num_counters = 2,
  172. .num_controls = 2,
  173. .fill_in_addresses = &ppro_fill_in_addresses,
  174. .setup_ctrs = &ppro_setup_ctrs,
  175. .check_ctrs = &ppro_check_ctrs,
  176. .start = &ppro_start,
  177. .stop = &ppro_stop,
  178. .shutdown = &ppro_shutdown
  179. };
  180. /*
  181. * Architectural performance monitoring.
  182. *
  183. * Newer Intel CPUs (Core1+) have support for architectural
  184. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  185. * The advantage of this is that it can be done without knowing about
  186. * the specific CPU.
  187. */
  188. static void arch_perfmon_setup_counters(void)
  189. {
  190. union cpuid10_eax eax;
  191. eax.full = cpuid_eax(0xa);
  192. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  193. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  194. current_cpu_data.x86_model == 15) {
  195. eax.split.version_id = 2;
  196. eax.split.num_counters = 2;
  197. eax.split.bit_width = 40;
  198. }
  199. num_counters = eax.split.num_counters;
  200. op_arch_perfmon_spec.num_counters = num_counters;
  201. op_arch_perfmon_spec.num_controls = num_counters;
  202. }
  203. static int arch_perfmon_init(struct oprofile_operations *ignore)
  204. {
  205. arch_perfmon_setup_counters();
  206. return 0;
  207. }
  208. struct op_x86_model_spec op_arch_perfmon_spec = {
  209. .init = &arch_perfmon_init,
  210. /* num_counters/num_controls filled in at runtime */
  211. .fill_in_addresses = &ppro_fill_in_addresses,
  212. /* user space does the cpuid check for available events */
  213. .setup_ctrs = &ppro_setup_ctrs,
  214. .check_ctrs = &ppro_check_ctrs,
  215. .start = &ppro_start,
  216. .stop = &ppro_stop,
  217. .shutdown = &ppro_shutdown
  218. };