perf_event_intel_lbr.c 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. #include <linux/perf_event.h>
  2. #include <linux/types.h>
  3. #include <asm/perf_event.h>
  4. #include <asm/msr.h>
  5. #include "perf_event.h"
  6. enum {
  7. LBR_FORMAT_32 = 0x00,
  8. LBR_FORMAT_LIP = 0x01,
  9. LBR_FORMAT_EIP = 0x02,
  10. LBR_FORMAT_EIP_FLAGS = 0x03,
  11. };
  12. /*
  13. * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
  14. * otherwise it becomes near impossible to get a reliable stack.
  15. */
  16. static void __intel_pmu_lbr_enable(void)
  17. {
  18. u64 debugctl;
  19. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  20. debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  21. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  22. }
  23. static void __intel_pmu_lbr_disable(void)
  24. {
  25. u64 debugctl;
  26. rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  27. debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
  28. wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
  29. }
  30. static void intel_pmu_lbr_reset_32(void)
  31. {
  32. int i;
  33. for (i = 0; i < x86_pmu.lbr_nr; i++)
  34. wrmsrl(x86_pmu.lbr_from + i, 0);
  35. }
  36. static void intel_pmu_lbr_reset_64(void)
  37. {
  38. int i;
  39. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  40. wrmsrl(x86_pmu.lbr_from + i, 0);
  41. wrmsrl(x86_pmu.lbr_to + i, 0);
  42. }
  43. }
  44. void intel_pmu_lbr_reset(void)
  45. {
  46. if (!x86_pmu.lbr_nr)
  47. return;
  48. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  49. intel_pmu_lbr_reset_32();
  50. else
  51. intel_pmu_lbr_reset_64();
  52. }
  53. void intel_pmu_lbr_enable(struct perf_event *event)
  54. {
  55. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  56. if (!x86_pmu.lbr_nr)
  57. return;
  58. /*
  59. * Reset the LBR stack if we changed task context to
  60. * avoid data leaks.
  61. */
  62. if (event->ctx->task && cpuc->lbr_context != event->ctx) {
  63. intel_pmu_lbr_reset();
  64. cpuc->lbr_context = event->ctx;
  65. }
  66. cpuc->lbr_users++;
  67. }
  68. void intel_pmu_lbr_disable(struct perf_event *event)
  69. {
  70. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  71. if (!x86_pmu.lbr_nr)
  72. return;
  73. cpuc->lbr_users--;
  74. WARN_ON_ONCE(cpuc->lbr_users < 0);
  75. if (cpuc->enabled && !cpuc->lbr_users)
  76. __intel_pmu_lbr_disable();
  77. }
  78. void intel_pmu_lbr_enable_all(void)
  79. {
  80. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  81. if (cpuc->lbr_users)
  82. __intel_pmu_lbr_enable();
  83. }
  84. void intel_pmu_lbr_disable_all(void)
  85. {
  86. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  87. if (cpuc->lbr_users)
  88. __intel_pmu_lbr_disable();
  89. }
  90. static inline u64 intel_pmu_lbr_tos(void)
  91. {
  92. u64 tos;
  93. rdmsrl(x86_pmu.lbr_tos, tos);
  94. return tos;
  95. }
  96. static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  97. {
  98. unsigned long mask = x86_pmu.lbr_nr - 1;
  99. u64 tos = intel_pmu_lbr_tos();
  100. int i;
  101. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  102. unsigned long lbr_idx = (tos - i) & mask;
  103. union {
  104. struct {
  105. u32 from;
  106. u32 to;
  107. };
  108. u64 lbr;
  109. } msr_lastbranch;
  110. rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
  111. cpuc->lbr_entries[i].from = msr_lastbranch.from;
  112. cpuc->lbr_entries[i].to = msr_lastbranch.to;
  113. cpuc->lbr_entries[i].mispred = 0;
  114. cpuc->lbr_entries[i].predicted = 0;
  115. cpuc->lbr_entries[i].reserved = 0;
  116. }
  117. cpuc->lbr_stack.nr = i;
  118. }
  119. #define LBR_FROM_FLAG_MISPRED (1ULL << 63)
  120. /*
  121. * Due to lack of segmentation in Linux the effective address (offset)
  122. * is the same as the linear address, allowing us to merge the LIP and EIP
  123. * LBR formats.
  124. */
  125. static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
  126. {
  127. unsigned long mask = x86_pmu.lbr_nr - 1;
  128. int lbr_format = x86_pmu.intel_cap.lbr_format;
  129. u64 tos = intel_pmu_lbr_tos();
  130. int i;
  131. for (i = 0; i < x86_pmu.lbr_nr; i++) {
  132. unsigned long lbr_idx = (tos - i) & mask;
  133. u64 from, to, mis = 0, pred = 0;
  134. rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
  135. rdmsrl(x86_pmu.lbr_to + lbr_idx, to);
  136. if (lbr_format == LBR_FORMAT_EIP_FLAGS) {
  137. mis = !!(from & LBR_FROM_FLAG_MISPRED);
  138. pred = !mis;
  139. from = (u64)((((s64)from) << 1) >> 1);
  140. }
  141. cpuc->lbr_entries[i].from = from;
  142. cpuc->lbr_entries[i].to = to;
  143. cpuc->lbr_entries[i].mispred = mis;
  144. cpuc->lbr_entries[i].predicted = pred;
  145. cpuc->lbr_entries[i].reserved = 0;
  146. }
  147. cpuc->lbr_stack.nr = i;
  148. }
  149. void intel_pmu_lbr_read(void)
  150. {
  151. struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
  152. if (!cpuc->lbr_users)
  153. return;
  154. if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
  155. intel_pmu_lbr_read_32(cpuc);
  156. else
  157. intel_pmu_lbr_read_64(cpuc);
  158. }
  159. void intel_pmu_lbr_init_core(void)
  160. {
  161. x86_pmu.lbr_nr = 4;
  162. x86_pmu.lbr_tos = 0x01c9;
  163. x86_pmu.lbr_from = 0x40;
  164. x86_pmu.lbr_to = 0x60;
  165. }
  166. void intel_pmu_lbr_init_nhm(void)
  167. {
  168. x86_pmu.lbr_nr = 16;
  169. x86_pmu.lbr_tos = 0x01c9;
  170. x86_pmu.lbr_from = 0x680;
  171. x86_pmu.lbr_to = 0x6c0;
  172. }
  173. void intel_pmu_lbr_init_atom(void)
  174. {
  175. x86_pmu.lbr_nr = 8;
  176. x86_pmu.lbr_tos = 0x01c9;
  177. x86_pmu.lbr_from = 0x40;
  178. x86_pmu.lbr_to = 0x60;
  179. }