nmi_int.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. /**
  2. * @file nmi_int.c
  3. *
  4. * @remark Copyright 2002-2008 OProfile authors
  5. * @remark Read the file COPYING
  6. *
  7. * @author John Levon <levon@movementarian.org>
  8. * @author Robert Richter <robert.richter@amd.com>
  9. */
  10. #include <linux/init.h>
  11. #include <linux/notifier.h>
  12. #include <linux/smp.h>
  13. #include <linux/oprofile.h>
  14. #include <linux/sysdev.h>
  15. #include <linux/slab.h>
  16. #include <linux/moduleparam.h>
  17. #include <linux/kdebug.h>
  18. #include <linux/cpu.h>
  19. #include <asm/nmi.h>
  20. #include <asm/msr.h>
  21. #include <asm/apic.h>
  22. #include "op_counter.h"
  23. #include "op_x86_model.h"
  24. static struct op_x86_model_spec const *model;
  25. static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
  26. static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
  27. static int nmi_start(void);
  28. static void nmi_stop(void);
  29. static void nmi_cpu_start(void *dummy);
  30. static void nmi_cpu_stop(void *dummy);
  31. /* 0 == registered but off, 1 == registered and on */
  32. static int nmi_enabled = 0;
  33. #ifdef CONFIG_SMP
  34. static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
  35. void *data)
  36. {
  37. int cpu = (unsigned long)data;
  38. switch (action) {
  39. case CPU_DOWN_FAILED:
  40. case CPU_ONLINE:
  41. smp_call_function_single(cpu, nmi_cpu_start, NULL, 0);
  42. break;
  43. case CPU_DOWN_PREPARE:
  44. smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1);
  45. break;
  46. }
  47. return NOTIFY_DONE;
  48. }
  49. static struct notifier_block oprofile_cpu_nb = {
  50. .notifier_call = oprofile_cpu_notifier
  51. };
  52. #endif
  53. #ifdef CONFIG_PM
  54. static int nmi_suspend(struct sys_device *dev, pm_message_t state)
  55. {
  56. /* Only one CPU left, just stop that one */
  57. if (nmi_enabled == 1)
  58. nmi_cpu_stop(NULL);
  59. return 0;
  60. }
  61. static int nmi_resume(struct sys_device *dev)
  62. {
  63. if (nmi_enabled == 1)
  64. nmi_cpu_start(NULL);
  65. return 0;
  66. }
  67. static struct sysdev_class oprofile_sysclass = {
  68. .name = "oprofile",
  69. .resume = nmi_resume,
  70. .suspend = nmi_suspend,
  71. };
  72. static struct sys_device device_oprofile = {
  73. .id = 0,
  74. .cls = &oprofile_sysclass,
  75. };
  76. static int __init init_sysfs(void)
  77. {
  78. int error;
  79. error = sysdev_class_register(&oprofile_sysclass);
  80. if (!error)
  81. error = sysdev_register(&device_oprofile);
  82. return error;
  83. }
  84. static void exit_sysfs(void)
  85. {
  86. sysdev_unregister(&device_oprofile);
  87. sysdev_class_unregister(&oprofile_sysclass);
  88. }
  89. #else
  90. #define init_sysfs() do { } while (0)
  91. #define exit_sysfs() do { } while (0)
  92. #endif /* CONFIG_PM */
  93. static int profile_exceptions_notify(struct notifier_block *self,
  94. unsigned long val, void *data)
  95. {
  96. struct die_args *args = (struct die_args *)data;
  97. int ret = NOTIFY_DONE;
  98. int cpu = smp_processor_id();
  99. switch (val) {
  100. case DIE_NMI:
  101. if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
  102. ret = NOTIFY_STOP;
  103. break;
  104. default:
  105. break;
  106. }
  107. return ret;
  108. }
  109. static void nmi_cpu_save_registers(struct op_msrs *msrs)
  110. {
  111. unsigned int const nr_ctrs = model->num_counters;
  112. unsigned int const nr_ctrls = model->num_controls;
  113. struct op_msr *counters = msrs->counters;
  114. struct op_msr *controls = msrs->controls;
  115. unsigned int i;
  116. for (i = 0; i < nr_ctrs; ++i) {
  117. if (counters[i].addr) {
  118. rdmsr(counters[i].addr,
  119. counters[i].saved.low,
  120. counters[i].saved.high);
  121. }
  122. }
  123. for (i = 0; i < nr_ctrls; ++i) {
  124. if (controls[i].addr) {
  125. rdmsr(controls[i].addr,
  126. controls[i].saved.low,
  127. controls[i].saved.high);
  128. }
  129. }
  130. }
  131. static void nmi_save_registers(void *dummy)
  132. {
  133. int cpu = smp_processor_id();
  134. struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
  135. nmi_cpu_save_registers(msrs);
  136. }
  137. static void free_msrs(void)
  138. {
  139. int i;
  140. for_each_possible_cpu(i) {
  141. kfree(per_cpu(cpu_msrs, i).counters);
  142. per_cpu(cpu_msrs, i).counters = NULL;
  143. kfree(per_cpu(cpu_msrs, i).controls);
  144. per_cpu(cpu_msrs, i).controls = NULL;
  145. }
  146. }
  147. static int allocate_msrs(void)
  148. {
  149. int success = 1;
  150. size_t controls_size = sizeof(struct op_msr) * model->num_controls;
  151. size_t counters_size = sizeof(struct op_msr) * model->num_counters;
  152. int i;
  153. for_each_possible_cpu(i) {
  154. per_cpu(cpu_msrs, i).counters = kmalloc(counters_size,
  155. GFP_KERNEL);
  156. if (!per_cpu(cpu_msrs, i).counters) {
  157. success = 0;
  158. break;
  159. }
  160. per_cpu(cpu_msrs, i).controls = kmalloc(controls_size,
  161. GFP_KERNEL);
  162. if (!per_cpu(cpu_msrs, i).controls) {
  163. success = 0;
  164. break;
  165. }
  166. }
  167. if (!success)
  168. free_msrs();
  169. return success;
  170. }
  171. static void nmi_cpu_setup(void *dummy)
  172. {
  173. int cpu = smp_processor_id();
  174. struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
  175. spin_lock(&oprofilefs_lock);
  176. model->setup_ctrs(msrs);
  177. spin_unlock(&oprofilefs_lock);
  178. per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
  179. apic_write(APIC_LVTPC, APIC_DM_NMI);
  180. }
  181. static struct notifier_block profile_exceptions_nb = {
  182. .notifier_call = profile_exceptions_notify,
  183. .next = NULL,
  184. .priority = 0
  185. };
  186. static int nmi_setup(void)
  187. {
  188. int err = 0;
  189. int cpu;
  190. if (!allocate_msrs())
  191. return -ENOMEM;
  192. err = register_die_notifier(&profile_exceptions_nb);
  193. if (err) {
  194. free_msrs();
  195. return err;
  196. }
  197. /* We need to serialize save and setup for HT because the subset
  198. * of msrs are distinct for save and setup operations
  199. */
  200. /* Assume saved/restored counters are the same on all CPUs */
  201. model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
  202. for_each_possible_cpu(cpu) {
  203. if (cpu != 0) {
  204. memcpy(per_cpu(cpu_msrs, cpu).counters,
  205. per_cpu(cpu_msrs, 0).counters,
  206. sizeof(struct op_msr) * model->num_counters);
  207. memcpy(per_cpu(cpu_msrs, cpu).controls,
  208. per_cpu(cpu_msrs, 0).controls,
  209. sizeof(struct op_msr) * model->num_controls);
  210. }
  211. }
  212. on_each_cpu(nmi_save_registers, NULL, 1);
  213. on_each_cpu(nmi_cpu_setup, NULL, 1);
  214. nmi_enabled = 1;
  215. return 0;
  216. }
  217. static void nmi_restore_registers(struct op_msrs *msrs)
  218. {
  219. unsigned int const nr_ctrs = model->num_counters;
  220. unsigned int const nr_ctrls = model->num_controls;
  221. struct op_msr *counters = msrs->counters;
  222. struct op_msr *controls = msrs->controls;
  223. unsigned int i;
  224. for (i = 0; i < nr_ctrls; ++i) {
  225. if (controls[i].addr) {
  226. wrmsr(controls[i].addr,
  227. controls[i].saved.low,
  228. controls[i].saved.high);
  229. }
  230. }
  231. for (i = 0; i < nr_ctrs; ++i) {
  232. if (counters[i].addr) {
  233. wrmsr(counters[i].addr,
  234. counters[i].saved.low,
  235. counters[i].saved.high);
  236. }
  237. }
  238. }
  239. static void nmi_cpu_shutdown(void *dummy)
  240. {
  241. unsigned int v;
  242. int cpu = smp_processor_id();
  243. struct op_msrs *msrs = &__get_cpu_var(cpu_msrs);
  244. /* restoring APIC_LVTPC can trigger an apic error because the delivery
  245. * mode and vector nr combination can be illegal. That's by design: on
  246. * power on apic lvt contain a zero vector nr which are legal only for
  247. * NMI delivery mode. So inhibit apic err before restoring lvtpc
  248. */
  249. v = apic_read(APIC_LVTERR);
  250. apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
  251. apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
  252. apic_write(APIC_LVTERR, v);
  253. nmi_restore_registers(msrs);
  254. }
  255. static void nmi_shutdown(void)
  256. {
  257. struct op_msrs *msrs;
  258. nmi_enabled = 0;
  259. on_each_cpu(nmi_cpu_shutdown, NULL, 1);
  260. unregister_die_notifier(&profile_exceptions_nb);
  261. msrs = &get_cpu_var(cpu_msrs);
  262. model->shutdown(msrs);
  263. free_msrs();
  264. put_cpu_var(cpu_msrs);
  265. }
  266. static void nmi_cpu_start(void *dummy)
  267. {
  268. struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
  269. model->start(msrs);
  270. }
  271. static int nmi_start(void)
  272. {
  273. on_each_cpu(nmi_cpu_start, NULL, 1);
  274. return 0;
  275. }
  276. static void nmi_cpu_stop(void *dummy)
  277. {
  278. struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
  279. model->stop(msrs);
  280. }
  281. static void nmi_stop(void)
  282. {
  283. on_each_cpu(nmi_cpu_stop, NULL, 1);
  284. }
  285. struct op_counter_config counter_config[OP_MAX_COUNTER];
  286. static int nmi_create_files(struct super_block *sb, struct dentry *root)
  287. {
  288. unsigned int i;
  289. for (i = 0; i < model->num_counters; ++i) {
  290. struct dentry *dir;
  291. char buf[4];
  292. /* quick little hack to _not_ expose a counter if it is not
  293. * available for use. This should protect userspace app.
  294. * NOTE: assumes 1:1 mapping here (that counters are organized
  295. * sequentially in their struct assignment).
  296. */
  297. if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
  298. continue;
  299. snprintf(buf, sizeof(buf), "%d", i);
  300. dir = oprofilefs_mkdir(sb, root, buf);
  301. oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
  302. oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
  303. oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
  304. oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
  305. oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
  306. oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
  307. }
  308. return 0;
  309. }
  310. static int p4force;
  311. module_param(p4force, int, 0);
  312. static int __init p4_init(char **cpu_type)
  313. {
  314. __u8 cpu_model = boot_cpu_data.x86_model;
  315. if (!p4force && (cpu_model > 6 || cpu_model == 5))
  316. return 0;
  317. #ifndef CONFIG_SMP
  318. *cpu_type = "i386/p4";
  319. model = &op_p4_spec;
  320. return 1;
  321. #else
  322. switch (smp_num_siblings) {
  323. case 1:
  324. *cpu_type = "i386/p4";
  325. model = &op_p4_spec;
  326. return 1;
  327. case 2:
  328. *cpu_type = "i386/p4-ht";
  329. model = &op_p4_ht2_spec;
  330. return 1;
  331. }
  332. #endif
  333. printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
  334. printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
  335. return 0;
  336. }
  337. static int __init ppro_init(char **cpu_type)
  338. {
  339. __u8 cpu_model = boot_cpu_data.x86_model;
  340. switch (cpu_model) {
  341. case 0 ... 2:
  342. *cpu_type = "i386/ppro";
  343. break;
  344. case 3 ... 5:
  345. *cpu_type = "i386/pii";
  346. break;
  347. case 6 ... 8:
  348. *cpu_type = "i386/piii";
  349. break;
  350. case 9:
  351. *cpu_type = "i386/p6_mobile";
  352. break;
  353. case 10 ... 13:
  354. *cpu_type = "i386/p6";
  355. break;
  356. case 14:
  357. *cpu_type = "i386/core";
  358. break;
  359. case 15: case 23:
  360. *cpu_type = "i386/core_2";
  361. break;
  362. default:
  363. /* Unknown */
  364. return 0;
  365. }
  366. model = &op_ppro_spec;
  367. return 1;
  368. }
  369. static int __init arch_perfmon_init(char **cpu_type)
  370. {
  371. if (!cpu_has_arch_perfmon)
  372. return 0;
  373. *cpu_type = "i386/arch_perfmon";
  374. model = &op_arch_perfmon_spec;
  375. arch_perfmon_setup_counters();
  376. return 1;
  377. }
  378. /* in order to get sysfs right */
  379. static int using_nmi;
  380. int __init op_nmi_init(struct oprofile_operations *ops)
  381. {
  382. __u8 vendor = boot_cpu_data.x86_vendor;
  383. __u8 family = boot_cpu_data.x86;
  384. char *cpu_type = NULL;
  385. int ret = 0;
  386. if (!cpu_has_apic)
  387. return -ENODEV;
  388. switch (vendor) {
  389. case X86_VENDOR_AMD:
  390. /* Needs to be at least an Athlon (or hammer in 32bit mode) */
  391. switch (family) {
  392. default:
  393. return -ENODEV;
  394. case 6:
  395. model = &op_amd_spec;
  396. cpu_type = "i386/athlon";
  397. break;
  398. case 0xf:
  399. model = &op_amd_spec;
  400. /* Actually it could be i386/hammer too, but give
  401. user space an consistent name. */
  402. cpu_type = "x86-64/hammer";
  403. break;
  404. case 0x10:
  405. model = &op_amd_spec;
  406. cpu_type = "x86-64/family10";
  407. break;
  408. case 0x11:
  409. model = &op_amd_spec;
  410. cpu_type = "x86-64/family11h";
  411. break;
  412. }
  413. break;
  414. case X86_VENDOR_INTEL:
  415. switch (family) {
  416. /* Pentium IV */
  417. case 0xf:
  418. p4_init(&cpu_type);
  419. break;
  420. /* A P6-class processor */
  421. case 6:
  422. ppro_init(&cpu_type);
  423. break;
  424. default:
  425. break;
  426. }
  427. if (!cpu_type && !arch_perfmon_init(&cpu_type))
  428. return -ENODEV;
  429. break;
  430. default:
  431. return -ENODEV;
  432. }
  433. #ifdef CONFIG_SMP
  434. register_cpu_notifier(&oprofile_cpu_nb);
  435. #endif
  436. /* default values, can be overwritten by model */
  437. ops->create_files = nmi_create_files;
  438. ops->setup = nmi_setup;
  439. ops->shutdown = nmi_shutdown;
  440. ops->start = nmi_start;
  441. ops->stop = nmi_stop;
  442. ops->cpu_type = cpu_type;
  443. if (model->init)
  444. ret = model->init(ops);
  445. if (ret)
  446. return ret;
  447. init_sysfs();
  448. using_nmi = 1;
  449. printk(KERN_INFO "oprofile: using NMI interrupt.\n");
  450. return 0;
  451. }
  452. void op_nmi_exit(void)
  453. {
  454. if (using_nmi) {
  455. exit_sysfs();
  456. #ifdef CONFIG_SMP
  457. unregister_cpu_notifier(&oprofile_cpu_nb);
  458. #endif
  459. }
  460. if (model->exit)
  461. model->exit();
  462. }