op_model_ppro.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. */
  14. #include <linux/oprofile.h>
  15. #include <linux/slab.h>
  16. #include <asm/ptrace.h>
  17. #include <asm/msr.h>
  18. #include <asm/apic.h>
  19. #include <asm/nmi.h>
  20. #include <asm/intel_arch_perfmon.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
  26. #define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1))))
  27. #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
  28. #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  29. #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  30. #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
  31. #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
  32. #define CTRL_CLEAR(x) (x &= (1<<21))
  33. #define CTRL_SET_ENABLE(val) (val |= 1<<20)
  34. #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
  35. #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
  36. #define CTRL_SET_UM(val, m) (val |= (m << 8))
  37. #define CTRL_SET_EVENT(val, e) (val |= e)
  38. static u64 *reset_value;
  39. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  40. {
  41. int i;
  42. for (i = 0; i < num_counters; i++) {
  43. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  44. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  45. else
  46. msrs->counters[i].addr = 0;
  47. }
  48. for (i = 0; i < num_counters; i++) {
  49. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  50. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  51. else
  52. msrs->controls[i].addr = 0;
  53. }
  54. }
  55. static void ppro_setup_ctrs(struct op_msrs const * const msrs)
  56. {
  57. unsigned int low, high;
  58. int i;
  59. if (!reset_value) {
  60. reset_value = kmalloc(sizeof(unsigned) * num_counters,
  61. GFP_ATOMIC);
  62. if (!reset_value)
  63. return;
  64. }
  65. if (cpu_has_arch_perfmon) {
  66. union cpuid10_eax eax;
  67. eax.full = cpuid_eax(0xa);
  68. if (counter_width < eax.split.bit_width)
  69. counter_width = eax.split.bit_width;
  70. }
  71. /* clear all counters */
  72. for (i = 0 ; i < num_counters; ++i) {
  73. if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
  74. continue;
  75. CTRL_READ(low, high, msrs, i);
  76. CTRL_CLEAR(low);
  77. CTRL_WRITE(low, high, msrs, i);
  78. }
  79. /* avoid a false detection of ctr overflows in NMI handler */
  80. for (i = 0; i < num_counters; ++i) {
  81. if (unlikely(!CTR_IS_RESERVED(msrs, i)))
  82. continue;
  83. wrmsrl(msrs->counters[i].addr, -1LL);
  84. }
  85. /* enable active counters */
  86. for (i = 0; i < num_counters; ++i) {
  87. if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
  88. reset_value[i] = counter_config[i].count;
  89. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  90. CTRL_READ(low, high, msrs, i);
  91. CTRL_CLEAR(low);
  92. CTRL_SET_ENABLE(low);
  93. CTRL_SET_USR(low, counter_config[i].user);
  94. CTRL_SET_KERN(low, counter_config[i].kernel);
  95. CTRL_SET_UM(low, counter_config[i].unit_mask);
  96. CTRL_SET_EVENT(low, counter_config[i].event);
  97. CTRL_WRITE(low, high, msrs, i);
  98. } else {
  99. reset_value[i] = 0;
  100. }
  101. }
  102. }
  103. static int ppro_check_ctrs(struct pt_regs * const regs,
  104. struct op_msrs const * const msrs)
  105. {
  106. u64 val;
  107. int i;
  108. for (i = 0 ; i < num_counters; ++i) {
  109. if (!reset_value[i])
  110. continue;
  111. rdmsrl(msrs->counters[i].addr, val);
  112. if (CTR_OVERFLOWED(val)) {
  113. oprofile_add_sample(regs, i);
  114. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  115. }
  116. }
  117. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  118. * doesn't hurt other P6 variant */
  119. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  120. /* We can't work out if we really handled an interrupt. We
  121. * might have caught a *second* counter just after overflowing
  122. * the interrupt for this counter then arrives
  123. * and we don't find a counter that's overflowed, so we
  124. * would return 0 and get dazed + confused. Instead we always
  125. * assume we found an overflow. This sucks.
  126. */
  127. return 1;
  128. }
  129. static void ppro_start(struct op_msrs const * const msrs)
  130. {
  131. unsigned int low, high;
  132. int i;
  133. for (i = 0; i < num_counters; ++i) {
  134. if (reset_value[i]) {
  135. CTRL_READ(low, high, msrs, i);
  136. CTRL_SET_ACTIVE(low);
  137. CTRL_WRITE(low, high, msrs, i);
  138. }
  139. }
  140. }
  141. static void ppro_stop(struct op_msrs const * const msrs)
  142. {
  143. unsigned int low, high;
  144. int i;
  145. for (i = 0; i < num_counters; ++i) {
  146. if (!reset_value[i])
  147. continue;
  148. CTRL_READ(low, high, msrs, i);
  149. CTRL_SET_INACTIVE(low);
  150. CTRL_WRITE(low, high, msrs, i);
  151. }
  152. }
  153. static void ppro_shutdown(struct op_msrs const * const msrs)
  154. {
  155. int i;
  156. for (i = 0 ; i < num_counters ; ++i) {
  157. if (CTR_IS_RESERVED(msrs, i))
  158. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  159. }
  160. for (i = 0 ; i < num_counters ; ++i) {
  161. if (CTRL_IS_RESERVED(msrs, i))
  162. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  163. }
  164. if (reset_value) {
  165. kfree(reset_value);
  166. reset_value = NULL;
  167. }
  168. }
  169. struct op_x86_model_spec op_ppro_spec = {
  170. .num_counters = 2, /* can be overriden */
  171. .num_controls = 2, /* dito */
  172. .fill_in_addresses = &ppro_fill_in_addresses,
  173. .setup_ctrs = &ppro_setup_ctrs,
  174. .check_ctrs = &ppro_check_ctrs,
  175. .start = &ppro_start,
  176. .stop = &ppro_stop,
  177. .shutdown = &ppro_shutdown
  178. };
  179. /*
  180. * Architectural performance monitoring.
  181. *
  182. * Newer Intel CPUs (Core1+) have support for architectural
  183. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  184. * The advantage of this is that it can be done without knowing about
  185. * the specific CPU.
  186. */
  187. void arch_perfmon_setup_counters(void)
  188. {
  189. union cpuid10_eax eax;
  190. eax.full = cpuid_eax(0xa);
  191. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  192. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  193. current_cpu_data.x86_model == 15) {
  194. eax.split.version_id = 2;
  195. eax.split.num_counters = 2;
  196. eax.split.bit_width = 40;
  197. }
  198. num_counters = eax.split.num_counters;
  199. op_arch_perfmon_spec.num_counters = num_counters;
  200. op_arch_perfmon_spec.num_controls = num_counters;
  201. op_ppro_spec.num_counters = num_counters;
  202. op_ppro_spec.num_controls = num_counters;
  203. }
  204. struct op_x86_model_spec op_arch_perfmon_spec = {
  205. /* num_counters/num_controls filled in at runtime */
  206. .fill_in_addresses = &ppro_fill_in_addresses,
  207. /* user space does the cpuid check for available events */
  208. .setup_ctrs = &ppro_setup_ctrs,
  209. .check_ctrs = &ppro_check_ctrs,
  210. .start = &ppro_start,
  211. .stop = &ppro_stop,
  212. .shutdown = &ppro_shutdown
  213. };