op_model_ppro.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. /*
  2. * @file op_model_ppro.h
  3. * Family 6 perfmon and architectural perfmon MSR operations
  4. *
  5. * @remark Copyright 2002 OProfile authors
  6. * @remark Copyright 2008 Intel Corporation
  7. * @remark Read the file COPYING
  8. *
  9. * @author John Levon
  10. * @author Philippe Elie
  11. * @author Graydon Hoare
  12. * @author Andi Kleen
  13. */
  14. #include <linux/oprofile.h>
  15. #include <linux/slab.h>
  16. #include <asm/ptrace.h>
  17. #include <asm/msr.h>
  18. #include <asm/apic.h>
  19. #include <asm/nmi.h>
  20. #include <asm/intel_arch_perfmon.h>
  21. #include "op_x86_model.h"
  22. #include "op_counter.h"
  23. static int num_counters = 2;
  24. static int counter_width = 32;
  25. #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
  26. #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
  27. #define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1))))
  28. #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
  29. #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  30. #define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
  31. #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
  32. #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
  33. #define CTRL_CLEAR(x) (x &= (1<<21))
  34. #define CTRL_SET_ENABLE(val) (val |= 1<<20)
  35. #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
  36. #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
  37. #define CTRL_SET_UM(val, m) (val |= (m << 8))
  38. #define CTRL_SET_EVENT(val, e) (val |= e)
  39. static u64 *reset_value;
  40. static void ppro_fill_in_addresses(struct op_msrs * const msrs)
  41. {
  42. int i;
  43. for (i = 0; i < num_counters; i++) {
  44. if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
  45. msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
  46. else
  47. msrs->counters[i].addr = 0;
  48. }
  49. for (i = 0; i < num_counters; i++) {
  50. if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
  51. msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
  52. else
  53. msrs->controls[i].addr = 0;
  54. }
  55. }
  56. static void ppro_setup_ctrs(struct op_msrs const * const msrs)
  57. {
  58. unsigned int low, high;
  59. int i;
  60. if (!reset_value) {
  61. reset_value = kmalloc(sizeof(unsigned) * num_counters,
  62. GFP_ATOMIC);
  63. if (!reset_value)
  64. return;
  65. }
  66. if (cpu_has_arch_perfmon) {
  67. union cpuid10_eax eax;
  68. eax.full = cpuid_eax(0xa);
  69. if (counter_width < eax.split.bit_width)
  70. counter_width = eax.split.bit_width;
  71. }
  72. /* clear all counters */
  73. for (i = 0 ; i < num_counters; ++i) {
  74. if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
  75. continue;
  76. CTRL_READ(low, high, msrs, i);
  77. CTRL_CLEAR(low);
  78. CTRL_WRITE(low, high, msrs, i);
  79. }
  80. /* avoid a false detection of ctr overflows in NMI handler */
  81. for (i = 0; i < num_counters; ++i) {
  82. if (unlikely(!CTR_IS_RESERVED(msrs, i)))
  83. continue;
  84. wrmsrl(msrs->counters[i].addr, -1LL);
  85. }
  86. /* enable active counters */
  87. for (i = 0; i < num_counters; ++i) {
  88. if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
  89. reset_value[i] = counter_config[i].count;
  90. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  91. CTRL_READ(low, high, msrs, i);
  92. CTRL_CLEAR(low);
  93. CTRL_SET_ENABLE(low);
  94. CTRL_SET_USR(low, counter_config[i].user);
  95. CTRL_SET_KERN(low, counter_config[i].kernel);
  96. CTRL_SET_UM(low, counter_config[i].unit_mask);
  97. CTRL_SET_EVENT(low, counter_config[i].event);
  98. CTRL_WRITE(low, high, msrs, i);
  99. } else {
  100. reset_value[i] = 0;
  101. }
  102. }
  103. }
  104. static int ppro_check_ctrs(struct pt_regs * const regs,
  105. struct op_msrs const * const msrs)
  106. {
  107. unsigned int low, high;
  108. int i;
  109. for (i = 0 ; i < num_counters; ++i) {
  110. if (!reset_value[i])
  111. continue;
  112. CTR_READ(low, high, msrs, i);
  113. if (CTR_OVERFLOWED(low)) {
  114. oprofile_add_sample(regs, i);
  115. wrmsrl(msrs->counters[i].addr, -reset_value[i]);
  116. }
  117. }
  118. /* Only P6 based Pentium M need to re-unmask the apic vector but it
  119. * doesn't hurt other P6 variant */
  120. apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
  121. /* We can't work out if we really handled an interrupt. We
  122. * might have caught a *second* counter just after overflowing
  123. * the interrupt for this counter then arrives
  124. * and we don't find a counter that's overflowed, so we
  125. * would return 0 and get dazed + confused. Instead we always
  126. * assume we found an overflow. This sucks.
  127. */
  128. return 1;
  129. }
  130. static void ppro_start(struct op_msrs const * const msrs)
  131. {
  132. unsigned int low, high;
  133. int i;
  134. for (i = 0; i < num_counters; ++i) {
  135. if (reset_value[i]) {
  136. CTRL_READ(low, high, msrs, i);
  137. CTRL_SET_ACTIVE(low);
  138. CTRL_WRITE(low, high, msrs, i);
  139. }
  140. }
  141. }
  142. static void ppro_stop(struct op_msrs const * const msrs)
  143. {
  144. unsigned int low, high;
  145. int i;
  146. for (i = 0; i < num_counters; ++i) {
  147. if (!reset_value[i])
  148. continue;
  149. CTRL_READ(low, high, msrs, i);
  150. CTRL_SET_INACTIVE(low);
  151. CTRL_WRITE(low, high, msrs, i);
  152. }
  153. }
  154. static void ppro_shutdown(struct op_msrs const * const msrs)
  155. {
  156. int i;
  157. for (i = 0 ; i < num_counters ; ++i) {
  158. if (CTR_IS_RESERVED(msrs, i))
  159. release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
  160. }
  161. for (i = 0 ; i < num_counters ; ++i) {
  162. if (CTRL_IS_RESERVED(msrs, i))
  163. release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
  164. }
  165. if (reset_value) {
  166. kfree(reset_value);
  167. reset_value = NULL;
  168. }
  169. }
  170. struct op_x86_model_spec op_ppro_spec = {
  171. .num_counters = 2, /* can be overriden */
  172. .num_controls = 2, /* dito */
  173. .fill_in_addresses = &ppro_fill_in_addresses,
  174. .setup_ctrs = &ppro_setup_ctrs,
  175. .check_ctrs = &ppro_check_ctrs,
  176. .start = &ppro_start,
  177. .stop = &ppro_stop,
  178. .shutdown = &ppro_shutdown
  179. };
  180. /*
  181. * Architectural performance monitoring.
  182. *
  183. * Newer Intel CPUs (Core1+) have support for architectural
  184. * events described in CPUID 0xA. See the IA32 SDM Vol3b.18 for details.
  185. * The advantage of this is that it can be done without knowing about
  186. * the specific CPU.
  187. */
  188. void arch_perfmon_setup_counters(void)
  189. {
  190. union cpuid10_eax eax;
  191. eax.full = cpuid_eax(0xa);
  192. /* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
  193. if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
  194. current_cpu_data.x86_model == 15) {
  195. eax.split.version_id = 2;
  196. eax.split.num_counters = 2;
  197. eax.split.bit_width = 40;
  198. }
  199. num_counters = eax.split.num_counters;
  200. op_arch_perfmon_spec.num_counters = num_counters;
  201. op_arch_perfmon_spec.num_controls = num_counters;
  202. op_ppro_spec.num_counters = num_counters;
  203. op_ppro_spec.num_controls = num_counters;
  204. }
  205. struct op_x86_model_spec op_arch_perfmon_spec = {
  206. /* num_counters/num_controls filled in at runtime */
  207. .fill_in_addresses = &ppro_fill_in_addresses,
  208. /* user space does the cpuid check for available events */
  209. .setup_ctrs = &ppro_setup_ctrs,
  210. .check_ctrs = &ppro_check_ctrs,
  211. .start = &ppro_start,
  212. .stop = &ppro_stop,
  213. .shutdown = &ppro_shutdown
  214. };