perfctr-watchdog.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. /*
  2. * local apic based NMI watchdog for various CPUs.
  3. *
  4. * This file also handles reservation of performance counters for coordination
  5. * with other users (like oprofile).
  6. *
  7. * Note that these events normally don't tick when the CPU idles. This means
  8. * the frequency varies with CPU load.
  9. *
  10. * Original code for K7/P6 written by Keith Owens
  11. *
  12. */
  13. #include <linux/percpu.h>
  14. #include <linux/module.h>
  15. #include <linux/kernel.h>
  16. #include <linux/bitops.h>
  17. #include <linux/smp.h>
  18. #include <linux/nmi.h>
  19. #include <linux/kprobes.h>
  20. #include <asm/apic.h>
  21. #include <asm/perf_event.h>
  22. struct nmi_watchdog_ctlblk {
  23. unsigned int cccr_msr;
  24. unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
  25. unsigned int evntsel_msr; /* the MSR to select the events to handle */
  26. };
  27. /* Interface defining a CPU specific perfctr watchdog */
  28. struct wd_ops {
  29. int (*reserve)(void);
  30. void (*unreserve)(void);
  31. int (*setup)(unsigned nmi_hz);
  32. void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
  33. void (*stop)(void);
  34. unsigned perfctr;
  35. unsigned evntsel;
  36. u64 checkbit;
  37. };
  38. static const struct wd_ops *wd_ops;
  39. /*
  40. * this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
  41. * offset from MSR_P4_BSU_ESCR0.
  42. *
  43. * It will be the max for all platforms (for now)
  44. */
  45. #define NMI_MAX_COUNTER_BITS 66
  46. /*
  47. * perfctr_nmi_owner tracks the ownership of the perfctr registers:
  48. * evtsel_nmi_owner tracks the ownership of the event selection
  49. * - different performance counters/ event selection may be reserved for
  50. * different subsystems this reservation system just tries to coordinate
  51. * things a little
  52. */
  53. static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
  54. static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
  55. static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
  56. /* converts an msr to an appropriate reservation bit */
  57. static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
  58. {
  59. /* returns the bit offset of the performance counter register */
  60. switch (boot_cpu_data.x86_vendor) {
  61. case X86_VENDOR_AMD:
  62. return msr - MSR_K7_PERFCTR0;
  63. case X86_VENDOR_INTEL:
  64. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  65. return msr - MSR_ARCH_PERFMON_PERFCTR0;
  66. switch (boot_cpu_data.x86) {
  67. case 6:
  68. return msr - MSR_P6_PERFCTR0;
  69. case 15:
  70. return msr - MSR_P4_BPU_PERFCTR0;
  71. }
  72. }
  73. return 0;
  74. }
  75. /*
  76. * converts an msr to an appropriate reservation bit
  77. * returns the bit offset of the event selection register
  78. */
  79. static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
  80. {
  81. /* returns the bit offset of the event selection register */
  82. switch (boot_cpu_data.x86_vendor) {
  83. case X86_VENDOR_AMD:
  84. return msr - MSR_K7_EVNTSEL0;
  85. case X86_VENDOR_INTEL:
  86. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
  87. return msr - MSR_ARCH_PERFMON_EVENTSEL0;
  88. switch (boot_cpu_data.x86) {
  89. case 6:
  90. return msr - MSR_P6_EVNTSEL0;
  91. case 15:
  92. return msr - MSR_P4_BSU_ESCR0;
  93. }
  94. }
  95. return 0;
  96. }
  97. /* checks for a bit availability (hack for oprofile) */
  98. int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
  99. {
  100. BUG_ON(counter > NMI_MAX_COUNTER_BITS);
  101. return !test_bit(counter, perfctr_nmi_owner);
  102. }
  103. EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
  104. int reserve_perfctr_nmi(unsigned int msr)
  105. {
  106. unsigned int counter;
  107. counter = nmi_perfctr_msr_to_bit(msr);
  108. /* register not managed by the allocator? */
  109. if (counter > NMI_MAX_COUNTER_BITS)
  110. return 1;
  111. if (!test_and_set_bit(counter, perfctr_nmi_owner))
  112. return 1;
  113. return 0;
  114. }
  115. EXPORT_SYMBOL(reserve_perfctr_nmi);
  116. void release_perfctr_nmi(unsigned int msr)
  117. {
  118. unsigned int counter;
  119. counter = nmi_perfctr_msr_to_bit(msr);
  120. /* register not managed by the allocator? */
  121. if (counter > NMI_MAX_COUNTER_BITS)
  122. return;
  123. clear_bit(counter, perfctr_nmi_owner);
  124. }
  125. EXPORT_SYMBOL(release_perfctr_nmi);
  126. int reserve_evntsel_nmi(unsigned int msr)
  127. {
  128. unsigned int counter;
  129. counter = nmi_evntsel_msr_to_bit(msr);
  130. /* register not managed by the allocator? */
  131. if (counter > NMI_MAX_COUNTER_BITS)
  132. return 1;
  133. if (!test_and_set_bit(counter, evntsel_nmi_owner))
  134. return 1;
  135. return 0;
  136. }
  137. EXPORT_SYMBOL(reserve_evntsel_nmi);
  138. void release_evntsel_nmi(unsigned int msr)
  139. {
  140. unsigned int counter;
  141. counter = nmi_evntsel_msr_to_bit(msr);
  142. /* register not managed by the allocator? */
  143. if (counter > NMI_MAX_COUNTER_BITS)
  144. return;
  145. clear_bit(counter, evntsel_nmi_owner);
  146. }
  147. EXPORT_SYMBOL(release_evntsel_nmi);
  148. void disable_lapic_nmi_watchdog(void)
  149. {
  150. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  151. if (atomic_read(&nmi_active) <= 0)
  152. return;
  153. on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
  154. if (wd_ops)
  155. wd_ops->unreserve();
  156. BUG_ON(atomic_read(&nmi_active) != 0);
  157. }
  158. void enable_lapic_nmi_watchdog(void)
  159. {
  160. BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
  161. /* are we already enabled */
  162. if (atomic_read(&nmi_active) != 0)
  163. return;
  164. /* are we lapic aware */
  165. if (!wd_ops)
  166. return;
  167. if (!wd_ops->reserve()) {
  168. printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
  169. return;
  170. }
  171. on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
  172. touch_nmi_watchdog();
  173. }
  174. /*
  175. * Activate the NMI watchdog via the local APIC.
  176. */
  177. static unsigned int adjust_for_32bit_ctr(unsigned int hz)
  178. {
  179. u64 counter_val;
  180. unsigned int retval = hz;
  181. /*
  182. * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
  183. * are writable, with higher bits sign extending from bit 31.
  184. * So, we can only program the counter with 31 bit values and
  185. * 32nd bit should be 1, for 33.. to be 1.
  186. * Find the appropriate nmi_hz
  187. */
  188. counter_val = (u64)cpu_khz * 1000;
  189. do_div(counter_val, retval);
  190. if (counter_val > 0x7fffffffULL) {
  191. u64 count = (u64)cpu_khz * 1000;
  192. do_div(count, 0x7fffffffUL);
  193. retval = count + 1;
  194. }
  195. return retval;
  196. }
  197. static void write_watchdog_counter(unsigned int perfctr_msr,
  198. const char *descr, unsigned nmi_hz)
  199. {
  200. u64 count = (u64)cpu_khz * 1000;
  201. do_div(count, nmi_hz);
  202. if (descr)
  203. pr_debug("setting %s to -0x%08Lx\n", descr, count);
  204. wrmsrl(perfctr_msr, 0 - count);
  205. }
  206. static void write_watchdog_counter32(unsigned int perfctr_msr,
  207. const char *descr, unsigned nmi_hz)
  208. {
  209. u64 count = (u64)cpu_khz * 1000;
  210. do_div(count, nmi_hz);
  211. if (descr)
  212. pr_debug("setting %s to -0x%08Lx\n", descr, count);
  213. wrmsr(perfctr_msr, (u32)(-count), 0);
  214. }
  215. /*
  216. * AMD K7/K8/Family10h/Family11h support.
  217. * AMD keeps this interface nicely stable so there is not much variety
  218. */
  219. #define K7_EVNTSEL_ENABLE (1 << 22)
  220. #define K7_EVNTSEL_INT (1 << 20)
  221. #define K7_EVNTSEL_OS (1 << 17)
  222. #define K7_EVNTSEL_USR (1 << 16)
  223. #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
  224. #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
  225. static int setup_k7_watchdog(unsigned nmi_hz)
  226. {
  227. unsigned int perfctr_msr, evntsel_msr;
  228. unsigned int evntsel;
  229. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  230. perfctr_msr = wd_ops->perfctr;
  231. evntsel_msr = wd_ops->evntsel;
  232. wrmsrl(perfctr_msr, 0UL);
  233. evntsel = K7_EVNTSEL_INT
  234. | K7_EVNTSEL_OS
  235. | K7_EVNTSEL_USR
  236. | K7_NMI_EVENT;
  237. /* setup the timer */
  238. wrmsr(evntsel_msr, evntsel, 0);
  239. write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
  240. /* initialize the wd struct before enabling */
  241. wd->perfctr_msr = perfctr_msr;
  242. wd->evntsel_msr = evntsel_msr;
  243. wd->cccr_msr = 0; /* unused */
  244. /* ok, everything is initialized, announce that we're set */
  245. cpu_nmi_set_wd_enabled();
  246. apic_write(APIC_LVTPC, APIC_DM_NMI);
  247. evntsel |= K7_EVNTSEL_ENABLE;
  248. wrmsr(evntsel_msr, evntsel, 0);
  249. return 1;
  250. }
  251. static void single_msr_stop_watchdog(void)
  252. {
  253. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  254. wrmsr(wd->evntsel_msr, 0, 0);
  255. }
  256. static int single_msr_reserve(void)
  257. {
  258. if (!reserve_perfctr_nmi(wd_ops->perfctr))
  259. return 0;
  260. if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
  261. release_perfctr_nmi(wd_ops->perfctr);
  262. return 0;
  263. }
  264. return 1;
  265. }
  266. static void single_msr_unreserve(void)
  267. {
  268. release_evntsel_nmi(wd_ops->evntsel);
  269. release_perfctr_nmi(wd_ops->perfctr);
  270. }
  271. static void __kprobes
  272. single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
  273. {
  274. /* start the cycle over again */
  275. write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
  276. }
  277. static const struct wd_ops k7_wd_ops = {
  278. .reserve = single_msr_reserve,
  279. .unreserve = single_msr_unreserve,
  280. .setup = setup_k7_watchdog,
  281. .rearm = single_msr_rearm,
  282. .stop = single_msr_stop_watchdog,
  283. .perfctr = MSR_K7_PERFCTR0,
  284. .evntsel = MSR_K7_EVNTSEL0,
  285. .checkbit = 1ULL << 47,
  286. };
  287. /*
  288. * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
  289. */
  290. #define P6_EVNTSEL0_ENABLE (1 << 22)
  291. #define P6_EVNTSEL_INT (1 << 20)
  292. #define P6_EVNTSEL_OS (1 << 17)
  293. #define P6_EVNTSEL_USR (1 << 16)
  294. #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
  295. #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
  296. static int setup_p6_watchdog(unsigned nmi_hz)
  297. {
  298. unsigned int perfctr_msr, evntsel_msr;
  299. unsigned int evntsel;
  300. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  301. perfctr_msr = wd_ops->perfctr;
  302. evntsel_msr = wd_ops->evntsel;
  303. /* KVM doesn't implement this MSR */
  304. if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
  305. return 0;
  306. evntsel = P6_EVNTSEL_INT
  307. | P6_EVNTSEL_OS
  308. | P6_EVNTSEL_USR
  309. | P6_NMI_EVENT;
  310. /* setup the timer */
  311. wrmsr(evntsel_msr, evntsel, 0);
  312. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  313. write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
  314. /* initialize the wd struct before enabling */
  315. wd->perfctr_msr = perfctr_msr;
  316. wd->evntsel_msr = evntsel_msr;
  317. wd->cccr_msr = 0; /* unused */
  318. /* ok, everything is initialized, announce that we're set */
  319. cpu_nmi_set_wd_enabled();
  320. apic_write(APIC_LVTPC, APIC_DM_NMI);
  321. evntsel |= P6_EVNTSEL0_ENABLE;
  322. wrmsr(evntsel_msr, evntsel, 0);
  323. return 1;
  324. }
  325. static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
  326. {
  327. /*
  328. * P6 based Pentium M need to re-unmask
  329. * the apic vector but it doesn't hurt
  330. * other P6 variant.
  331. * ArchPerfom/Core Duo also needs this
  332. */
  333. apic_write(APIC_LVTPC, APIC_DM_NMI);
  334. /* P6/ARCH_PERFMON has 32 bit counter write */
  335. write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
  336. }
  337. static const struct wd_ops p6_wd_ops = {
  338. .reserve = single_msr_reserve,
  339. .unreserve = single_msr_unreserve,
  340. .setup = setup_p6_watchdog,
  341. .rearm = p6_rearm,
  342. .stop = single_msr_stop_watchdog,
  343. .perfctr = MSR_P6_PERFCTR0,
  344. .evntsel = MSR_P6_EVNTSEL0,
  345. .checkbit = 1ULL << 39,
  346. };
  347. /*
  348. * Intel P4 performance counters.
  349. * By far the most complicated of all.
  350. */
  351. #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
  352. #define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
  353. #define P4_ESCR_OS (1 << 3)
  354. #define P4_ESCR_USR (1 << 2)
  355. #define P4_CCCR_OVF_PMI0 (1 << 26)
  356. #define P4_CCCR_OVF_PMI1 (1 << 27)
  357. #define P4_CCCR_THRESHOLD(N) ((N) << 20)
  358. #define P4_CCCR_COMPLEMENT (1 << 19)
  359. #define P4_CCCR_COMPARE (1 << 18)
  360. #define P4_CCCR_REQUIRED (3 << 16)
  361. #define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
  362. #define P4_CCCR_ENABLE (1 << 12)
  363. #define P4_CCCR_OVF (1 << 31)
  364. #define P4_CONTROLS 18
  365. static unsigned int p4_controls[18] = {
  366. MSR_P4_BPU_CCCR0,
  367. MSR_P4_BPU_CCCR1,
  368. MSR_P4_BPU_CCCR2,
  369. MSR_P4_BPU_CCCR3,
  370. MSR_P4_MS_CCCR0,
  371. MSR_P4_MS_CCCR1,
  372. MSR_P4_MS_CCCR2,
  373. MSR_P4_MS_CCCR3,
  374. MSR_P4_FLAME_CCCR0,
  375. MSR_P4_FLAME_CCCR1,
  376. MSR_P4_FLAME_CCCR2,
  377. MSR_P4_FLAME_CCCR3,
  378. MSR_P4_IQ_CCCR0,
  379. MSR_P4_IQ_CCCR1,
  380. MSR_P4_IQ_CCCR2,
  381. MSR_P4_IQ_CCCR3,
  382. MSR_P4_IQ_CCCR4,
  383. MSR_P4_IQ_CCCR5,
  384. };
  385. /*
  386. * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
  387. * CRU_ESCR0 (with any non-null event selector) through a complemented
  388. * max threshold. [IA32-Vol3, Section 14.9.9]
  389. */
  390. static int setup_p4_watchdog(unsigned nmi_hz)
  391. {
  392. unsigned int perfctr_msr, evntsel_msr, cccr_msr;
  393. unsigned int evntsel, cccr_val;
  394. unsigned int misc_enable, dummy;
  395. unsigned int ht_num;
  396. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  397. rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
  398. if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
  399. return 0;
  400. #ifdef CONFIG_SMP
  401. /* detect which hyperthread we are on */
  402. if (smp_num_siblings == 2) {
  403. unsigned int ebx, apicid;
  404. ebx = cpuid_ebx(1);
  405. apicid = (ebx >> 24) & 0xff;
  406. ht_num = apicid & 1;
  407. } else
  408. #endif
  409. ht_num = 0;
  410. /*
  411. * performance counters are shared resources
  412. * assign each hyperthread its own set
  413. * (re-use the ESCR0 register, seems safe
  414. * and keeps the cccr_val the same)
  415. */
  416. if (!ht_num) {
  417. /* logical cpu 0 */
  418. perfctr_msr = MSR_P4_IQ_PERFCTR0;
  419. evntsel_msr = MSR_P4_CRU_ESCR0;
  420. cccr_msr = MSR_P4_IQ_CCCR0;
  421. cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
  422. /*
  423. * If we're on the kdump kernel or other situation, we may
  424. * still have other performance counter registers set to
  425. * interrupt and they'll keep interrupting forever because
  426. * of the P4_CCCR_OVF quirk. So we need to ACK all the
  427. * pending interrupts and disable all the registers here,
  428. * before reenabling the NMI delivery. Refer to p4_rearm()
  429. * about the P4_CCCR_OVF quirk.
  430. */
  431. if (reset_devices) {
  432. unsigned int low, high;
  433. int i;
  434. for (i = 0; i < P4_CONTROLS; i++) {
  435. rdmsr(p4_controls[i], low, high);
  436. low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
  437. wrmsr(p4_controls[i], low, high);
  438. }
  439. }
  440. } else {
  441. /* logical cpu 1 */
  442. perfctr_msr = MSR_P4_IQ_PERFCTR1;
  443. evntsel_msr = MSR_P4_CRU_ESCR0;
  444. cccr_msr = MSR_P4_IQ_CCCR1;
  445. /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
  446. if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
  447. cccr_val = P4_CCCR_OVF_PMI0;
  448. else
  449. cccr_val = P4_CCCR_OVF_PMI1;
  450. cccr_val |= P4_CCCR_ESCR_SELECT(4);
  451. }
  452. evntsel = P4_ESCR_EVENT_SELECT(0x3F)
  453. | P4_ESCR_OS
  454. | P4_ESCR_USR;
  455. cccr_val |= P4_CCCR_THRESHOLD(15)
  456. | P4_CCCR_COMPLEMENT
  457. | P4_CCCR_COMPARE
  458. | P4_CCCR_REQUIRED;
  459. wrmsr(evntsel_msr, evntsel, 0);
  460. wrmsr(cccr_msr, cccr_val, 0);
  461. write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
  462. wd->perfctr_msr = perfctr_msr;
  463. wd->evntsel_msr = evntsel_msr;
  464. wd->cccr_msr = cccr_msr;
  465. /* ok, everything is initialized, announce that we're set */
  466. cpu_nmi_set_wd_enabled();
  467. apic_write(APIC_LVTPC, APIC_DM_NMI);
  468. cccr_val |= P4_CCCR_ENABLE;
  469. wrmsr(cccr_msr, cccr_val, 0);
  470. return 1;
  471. }
  472. static void stop_p4_watchdog(void)
  473. {
  474. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  475. wrmsr(wd->cccr_msr, 0, 0);
  476. wrmsr(wd->evntsel_msr, 0, 0);
  477. }
  478. static int p4_reserve(void)
  479. {
  480. if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
  481. return 0;
  482. #ifdef CONFIG_SMP
  483. if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
  484. goto fail1;
  485. #endif
  486. if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
  487. goto fail2;
  488. /* RED-PEN why is ESCR1 not reserved here? */
  489. return 1;
  490. fail2:
  491. #ifdef CONFIG_SMP
  492. if (smp_num_siblings > 1)
  493. release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
  494. fail1:
  495. #endif
  496. release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
  497. return 0;
  498. }
  499. static void p4_unreserve(void)
  500. {
  501. #ifdef CONFIG_SMP
  502. if (smp_num_siblings > 1)
  503. release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
  504. #endif
  505. release_evntsel_nmi(MSR_P4_CRU_ESCR0);
  506. release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
  507. }
  508. static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
  509. {
  510. unsigned dummy;
  511. /*
  512. * P4 quirks:
  513. * - An overflown perfctr will assert its interrupt
  514. * until the OVF flag in its CCCR is cleared.
  515. * - LVTPC is masked on interrupt and must be
  516. * unmasked by the LVTPC handler.
  517. */
  518. rdmsrl(wd->cccr_msr, dummy);
  519. dummy &= ~P4_CCCR_OVF;
  520. wrmsrl(wd->cccr_msr, dummy);
  521. apic_write(APIC_LVTPC, APIC_DM_NMI);
  522. /* start the cycle over again */
  523. write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
  524. }
  525. static const struct wd_ops p4_wd_ops = {
  526. .reserve = p4_reserve,
  527. .unreserve = p4_unreserve,
  528. .setup = setup_p4_watchdog,
  529. .rearm = p4_rearm,
  530. .stop = stop_p4_watchdog,
  531. /* RED-PEN this is wrong for the other sibling */
  532. .perfctr = MSR_P4_BPU_PERFCTR0,
  533. .evntsel = MSR_P4_BSU_ESCR0,
  534. .checkbit = 1ULL << 39,
  535. };
  536. /*
  537. * Watchdog using the Intel architected PerfMon.
  538. * Used for Core2 and hopefully all future Intel CPUs.
  539. */
  540. #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
  541. #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
  542. static struct wd_ops intel_arch_wd_ops;
  543. static int setup_intel_arch_watchdog(unsigned nmi_hz)
  544. {
  545. unsigned int ebx;
  546. union cpuid10_eax eax;
  547. unsigned int unused;
  548. unsigned int perfctr_msr, evntsel_msr;
  549. unsigned int evntsel;
  550. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  551. /*
  552. * Check whether the Architectural PerfMon supports
  553. * Unhalted Core Cycles Event or not.
  554. * NOTE: Corresponding bit = 0 in ebx indicates event present.
  555. */
  556. cpuid(10, &(eax.full), &ebx, &unused, &unused);
  557. if ((eax.split.mask_length <
  558. (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
  559. (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
  560. return 0;
  561. perfctr_msr = wd_ops->perfctr;
  562. evntsel_msr = wd_ops->evntsel;
  563. wrmsrl(perfctr_msr, 0UL);
  564. evntsel = ARCH_PERFMON_EVENTSEL_INT
  565. | ARCH_PERFMON_EVENTSEL_OS
  566. | ARCH_PERFMON_EVENTSEL_USR
  567. | ARCH_PERFMON_NMI_EVENT_SEL
  568. | ARCH_PERFMON_NMI_EVENT_UMASK;
  569. /* setup the timer */
  570. wrmsr(evntsel_msr, evntsel, 0);
  571. nmi_hz = adjust_for_32bit_ctr(nmi_hz);
  572. write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
  573. wd->perfctr_msr = perfctr_msr;
  574. wd->evntsel_msr = evntsel_msr;
  575. wd->cccr_msr = 0; /* unused */
  576. /* ok, everything is initialized, announce that we're set */
  577. cpu_nmi_set_wd_enabled();
  578. apic_write(APIC_LVTPC, APIC_DM_NMI);
  579. evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
  580. wrmsr(evntsel_msr, evntsel, 0);
  581. intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
  582. return 1;
  583. }
  584. static struct wd_ops intel_arch_wd_ops __read_mostly = {
  585. .reserve = single_msr_reserve,
  586. .unreserve = single_msr_unreserve,
  587. .setup = setup_intel_arch_watchdog,
  588. .rearm = p6_rearm,
  589. .stop = single_msr_stop_watchdog,
  590. .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
  591. .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
  592. };
  593. static void probe_nmi_watchdog(void)
  594. {
  595. switch (boot_cpu_data.x86_vendor) {
  596. case X86_VENDOR_AMD:
  597. if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
  598. boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17)
  599. return;
  600. wd_ops = &k7_wd_ops;
  601. break;
  602. case X86_VENDOR_INTEL:
  603. /* Work around where perfctr1 doesn't have a working enable
  604. * bit as described in the following errata:
  605. * AE49 Core Duo and Intel Core Solo 65 nm
  606. * AN49 Intel Pentium Dual-Core
  607. * AF49 Dual-Core Intel Xeon Processor LV
  608. */
  609. if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
  610. ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
  611. boot_cpu_data.x86_mask == 4))) {
  612. intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
  613. intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
  614. }
  615. if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
  616. wd_ops = &intel_arch_wd_ops;
  617. break;
  618. }
  619. switch (boot_cpu_data.x86) {
  620. case 6:
  621. if (boot_cpu_data.x86_model > 13)
  622. return;
  623. wd_ops = &p6_wd_ops;
  624. break;
  625. case 15:
  626. wd_ops = &p4_wd_ops;
  627. break;
  628. default:
  629. return;
  630. }
  631. break;
  632. }
  633. }
  634. /* Interface to nmi.c */
  635. int lapic_watchdog_init(unsigned nmi_hz)
  636. {
  637. if (!wd_ops) {
  638. probe_nmi_watchdog();
  639. if (!wd_ops) {
  640. printk(KERN_INFO "NMI watchdog: CPU not supported\n");
  641. return -1;
  642. }
  643. if (!wd_ops->reserve()) {
  644. printk(KERN_ERR
  645. "NMI watchdog: cannot reserve perfctrs\n");
  646. return -1;
  647. }
  648. }
  649. if (!(wd_ops->setup(nmi_hz))) {
  650. printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
  651. raw_smp_processor_id());
  652. return -1;
  653. }
  654. return 0;
  655. }
  656. void lapic_watchdog_stop(void)
  657. {
  658. if (wd_ops)
  659. wd_ops->stop();
  660. }
  661. unsigned lapic_adjust_nmi_hz(unsigned hz)
  662. {
  663. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  664. if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
  665. wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
  666. hz = adjust_for_32bit_ctr(hz);
  667. return hz;
  668. }
  669. int __kprobes lapic_wd_event(unsigned nmi_hz)
  670. {
  671. struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
  672. u64 ctr;
  673. rdmsrl(wd->perfctr_msr, ctr);
  674. if (ctr & wd_ops->checkbit) /* perfctr still running? */
  675. return 0;
  676. wd_ops->rearm(wd, nmi_hz);
  677. return 1;
  678. }