perf_event_intel_lbr.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. #include <linux/perf_event.h>
  2. #include <linux/types.h>
  3. #include <asm/perf_event.h>
  4. #include <asm/msr.h>
  5. #include "perf_event.h"
  6. enum {
  7. LBR_FORMAT_32 = 0x00,
  8. LBR_FORMAT_LIP = 0x01,
  9. LBR_FORMAT_EIP = 0x02,
  10. LBR_FORMAT_EIP_FLAGS = 0x03,
  11. };
  12. /*
  13. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  14. * otherwise it becomes near impossible to get a reliable stack.
  15. */
  16. static void __intel_pmu_lbr_enable(void)
  17. {
  18. u64 debugctl;
  19. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  20. debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  21. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  22. }
  23. static void __intel_pmu_lbr_disable(void)
  24. {
  25. u64 debugctl;
  26. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  27. debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  28. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  29. }
  30. static void intel_pmu_lbr_reset_32(void)
  31. {
  32. int i;
  33. for (i = 0; i < x86_pmu.lbr_nr; i++)
  34. wrmsrl(x86_pmu.lbr_from + i, 0);
  35. }
  36. static void intel_pmu_lbr_reset_64(void)
  37. {
  38. int i;
  39. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  40. wrmsrl(x86_pmu.lbr_from + i, 0);
  41. wrmsrl(x86_pmu.lbr_to + i, 0);
  42. }
  43. }
  44. void intel_pmu_lbr_reset(void)
  45. {
  46. if (!x86_pmu.lbr_nr)
  47. return;
  48. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  49. intel_pmu_lbr_reset_32();
  50. else
  51. intel_pmu_lbr_reset_64();
  52. }
  53. void intel_pmu_lbr_enable(struct perf_event *event)
  54. {
  55. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  56. if (!x86_pmu.lbr_nr)
  57. return;
  58. /*
  59. * Reset the LBR stack if we changed task context to
  60. * avoid data leaks.
  61. */
  62. if (event->ctx->task && cpuc->lbr_context != event->ctx) {
  63. intel_pmu_lbr_reset();
  64. cpuc->lbr_context = event->ctx;
  65. }
  66. cpuc->lbr_users++;
  67. }
  68. void intel_pmu_lbr_disable(struct perf_event *event)
  69. {
  70. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  71. if (!x86_pmu.lbr_nr)
  72. return;
  73. cpuc->lbr_users--;
  74. WARN_ON_ONCE(cpuc->lbr_users < 0);
  75. if (cpuc->enabled && !cpuc->lbr_users)
  76. __intel_pmu_lbr_disable();
  77. }
  78. void intel_pmu_lbr_enable_all(void)
  79. {
  80. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  81. if (cpuc->lbr_users)
  82. __intel_pmu_lbr_enable();
  83. }
  84. void intel_pmu_lbr_disable_all(void)
  85. {
  86. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  87. if (cpuc->lbr_users)
  88. __intel_pmu_lbr_disable();
  89. }
  90. static inline u64 intel_pmu_lbr_tos(void)
  91. {
  92. u64 tos;
  93. rdmsrl(x86_pmu.lbr_tos, tos);
  94. return tos;
  95. }
  96. static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  97. {
  98. unsigned long mask = x86_pmu.lbr_nr - 1;
  99. u64 tos = intel_pmu_lbr_tos();
  100. int i;
  101. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  102. unsigned long lbr_idx = (tos - i) & mask;
  103. union {
  104. struct {
  105. u32 from;
  106. u32 to;
  107. };
  108. u64 lbr;
  109. } msr_lastbranch;
  110. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  111. cpuc->lbr_entries[i].from = msr_lastbranch.from;
  112. cpuc->lbr_entries[i].to = msr_lastbranch.to;
  113. cpuc->lbr_entries[i].flags = 0;
  114. }
  115. cpuc->lbr_stack.nr = i;
  116. }
  117. #define LBR_FROM_FLAG_MISPRED (1ULL << 63)
  118. /*
  119. * Due to lack of segmentation in Linux the effective address (offset)
  120. * is the same as the linear address, allowing us to merge the LIP and EIP
  121. * LBR formats.
  122. */
  123. static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  124. {
  125. unsigned long mask = x86_pmu.lbr_nr - 1;
  126. int lbr_format = x86_pmu.intel_cap.lbr_format;
  127. u64 tos = intel_pmu_lbr_tos();
  128. int i;
  129. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  130. unsigned long lbr_idx = (tos - i) & mask;
  131. u64 from, to, flags = 0;
  132. rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
  133. rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
  134. if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
  135. flags = !!(from & LBR_FROM_FLAG_MISPRED);
  136. from = (u64)((((s64)from) << 1) >> 1);
  137. }
  138. cpuc->lbr_entries[i].from = from;
  139. cpuc->lbr_entries[i].to = to;
  140. cpuc->lbr_entries[i].flags = flags;
  141. }
  142. cpuc->lbr_stack.nr = i;
  143. }
  144. void intel_pmu_lbr_read(void)
  145. {
  146. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  147. if (!cpuc->lbr_users)
  148. return;
  149. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  150. intel_pmu_lbr_read_32(cpuc);
  151. else
  152. intel_pmu_lbr_read_64(cpuc);
  153. }
  154. void intel_pmu_lbr_init_core(void)
  155. {
  156. x86_pmu.lbr_nr = 4;
  157. x86_pmu.lbr_tos = 0x01c9;
  158. x86_pmu.lbr_from = 0x40;
  159. x86_pmu.lbr_to = 0x60;
  160. }
  161. void intel_pmu_lbr_init_nhm(void)
  162. {
  163. x86_pmu.lbr_nr = 16;
  164. x86_pmu.lbr_tos = 0x01c9;
  165. x86_pmu.lbr_from = 0x680;
  166. x86_pmu.lbr_to = 0x6c0;
  167. }
  168. void intel_pmu_lbr_init_atom(void)
  169. {
  170. x86_pmu.lbr_nr = 8;
  171. x86_pmu.lbr_tos = 0x01c9;
  172. x86_pmu.lbr_from = 0x40;
  173. x86_pmu.lbr_to = 0x60;
  174. }