x86.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. /*
  2. * Kernel-based Virtual Machine driver for Linux
  3. *
  4. * derived from drivers/kvm/kvm_main.c
  5. *
  6. * Copyright (C) 2006 Qumranet, Inc.
  7. *
  8. * Authors:
  9. * Avi Kivity <avi@qumranet.com>
  10. * Yaniv Kamay <yaniv@qumranet.com>
  11. *
  12. * This work is licensed under the terms of the GNU GPL, version 2. See
  13. * the COPYING file in the top-level directory.
  14. *
  15. */
  16. #include "kvm.h"
  17. #include "x86.h"
  18. #include "segment_descriptor.h"
  19. #include "irq.h"
  20. #include <linux/kvm.h>
  21. #include <linux/fs.h>
  22. #include <linux/vmalloc.h>
  23. #include <linux/module.h>
  24. #include <asm/uaccess.h>
  25. #define MAX_IO_MSRS 256
  26. unsigned long segment_base(u16 selector)
  27. {
  28. struct descriptor_table gdt;
  29. struct segment_descriptor *d;
  30. unsigned long table_base;
  31. unsigned long v;
  32. if (selector == 0)
  33. return 0;
  34. asm("sgdt %0" : "=m"(gdt));
  35. table_base = gdt.base;
  36. if (selector & 4) { /* from ldt */
  37. u16 ldt_selector;
  38. asm("sldt %0" : "=g"(ldt_selector));
  39. table_base = segment_base(ldt_selector);
  40. }
  41. d = (struct segment_descriptor *)(table_base + (selector & ~7));
  42. v = d->base_low | ((unsigned long)d->base_mid << 16) |
  43. ((unsigned long)d->base_high << 24);
  44. #ifdef CONFIG_X86_64
  45. if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
  46. v |= ((unsigned long) \
  47. ((struct segment_descriptor_64 *)d)->base_higher) << 32;
  48. #endif
  49. return v;
  50. }
  51. EXPORT_SYMBOL_GPL(segment_base);
  52. /*
  53. * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  54. * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
  55. *
  56. * This list is modified at module load time to reflect the
  57. * capabilities of the host cpu.
  58. */
  59. static u32 msrs_to_save[] = {
  60. MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
  61. MSR_K6_STAR,
  62. #ifdef CONFIG_X86_64
  63. MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
  64. #endif
  65. MSR_IA32_TIME_STAMP_COUNTER,
  66. };
  67. static unsigned num_msrs_to_save;
  68. static u32 emulated_msrs[] = {
  69. MSR_IA32_MISC_ENABLE,
  70. };
  71. /*
  72. * Adapt set_msr() to msr_io()'s calling convention
  73. */
  74. static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
  75. {
  76. return kvm_set_msr(vcpu, index, *data);
  77. }
  78. /*
  79. * Read or write a bunch of msrs. All parameters are kernel addresses.
  80. *
  81. * @return number of msrs set successfully.
  82. */
  83. static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
  84. struct kvm_msr_entry *entries,
  85. int (*do_msr)(struct kvm_vcpu *vcpu,
  86. unsigned index, u64 *data))
  87. {
  88. int i;
  89. vcpu_load(vcpu);
  90. for (i = 0; i < msrs->nmsrs; ++i)
  91. if (do_msr(vcpu, entries[i].index, &entries[i].data))
  92. break;
  93. vcpu_put(vcpu);
  94. return i;
  95. }
  96. /*
  97. * Read or write a bunch of msrs. Parameters are user addresses.
  98. *
  99. * @return number of msrs set successfully.
  100. */
  101. static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
  102. int (*do_msr)(struct kvm_vcpu *vcpu,
  103. unsigned index, u64 *data),
  104. int writeback)
  105. {
  106. struct kvm_msrs msrs;
  107. struct kvm_msr_entry *entries;
  108. int r, n;
  109. unsigned size;
  110. r = -EFAULT;
  111. if (copy_from_user(&msrs, user_msrs, sizeof msrs))
  112. goto out;
  113. r = -E2BIG;
  114. if (msrs.nmsrs >= MAX_IO_MSRS)
  115. goto out;
  116. r = -ENOMEM;
  117. size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
  118. entries = vmalloc(size);
  119. if (!entries)
  120. goto out;
  121. r = -EFAULT;
  122. if (copy_from_user(entries, user_msrs->entries, size))
  123. goto out_free;
  124. r = n = __msr_io(vcpu, &msrs, entries, do_msr);
  125. if (r < 0)
  126. goto out_free;
  127. r = -EFAULT;
  128. if (writeback && copy_to_user(user_msrs->entries, entries, size))
  129. goto out_free;
  130. r = n;
  131. out_free:
  132. vfree(entries);
  133. out:
  134. return r;
  135. }
  136. long kvm_arch_dev_ioctl(struct file *filp,
  137. unsigned int ioctl, unsigned long arg)
  138. {
  139. void __user *argp = (void __user *)arg;
  140. long r;
  141. switch (ioctl) {
  142. case KVM_GET_MSR_INDEX_LIST: {
  143. struct kvm_msr_list __user *user_msr_list = argp;
  144. struct kvm_msr_list msr_list;
  145. unsigned n;
  146. r = -EFAULT;
  147. if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
  148. goto out;
  149. n = msr_list.nmsrs;
  150. msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
  151. if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
  152. goto out;
  153. r = -E2BIG;
  154. if (n < num_msrs_to_save)
  155. goto out;
  156. r = -EFAULT;
  157. if (copy_to_user(user_msr_list->indices, &msrs_to_save,
  158. num_msrs_to_save * sizeof(u32)))
  159. goto out;
  160. if (copy_to_user(user_msr_list->indices
  161. + num_msrs_to_save * sizeof(u32),
  162. &emulated_msrs,
  163. ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
  164. goto out;
  165. r = 0;
  166. break;
  167. }
  168. default:
  169. r = -EINVAL;
  170. }
  171. out:
  172. return r;
  173. }
  174. void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
  175. {
  176. kvm_x86_ops->vcpu_load(vcpu, cpu);
  177. }
  178. void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
  179. {
  180. kvm_x86_ops->vcpu_put(vcpu);
  181. }
  182. static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
  183. {
  184. u64 efer;
  185. int i;
  186. struct kvm_cpuid_entry *e, *entry;
  187. rdmsrl(MSR_EFER, efer);
  188. entry = NULL;
  189. for (i = 0; i < vcpu->cpuid_nent; ++i) {
  190. e = &vcpu->cpuid_entries[i];
  191. if (e->function == 0x80000001) {
  192. entry = e;
  193. break;
  194. }
  195. }
  196. if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
  197. entry->edx &= ~(1 << 20);
  198. printk(KERN_INFO "kvm: guest NX capability removed\n");
  199. }
  200. }
  201. static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
  202. struct kvm_cpuid *cpuid,
  203. struct kvm_cpuid_entry __user *entries)
  204. {
  205. int r;
  206. r = -E2BIG;
  207. if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
  208. goto out;
  209. r = -EFAULT;
  210. if (copy_from_user(&vcpu->cpuid_entries, entries,
  211. cpuid->nent * sizeof(struct kvm_cpuid_entry)))
  212. goto out;
  213. vcpu->cpuid_nent = cpuid->nent;
  214. cpuid_fix_nx_cap(vcpu);
  215. return 0;
  216. out:
  217. return r;
  218. }
  219. static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
  220. struct kvm_lapic_state *s)
  221. {
  222. vcpu_load(vcpu);
  223. memcpy(s->regs, vcpu->apic->regs, sizeof *s);
  224. vcpu_put(vcpu);
  225. return 0;
  226. }
  227. static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
  228. struct kvm_lapic_state *s)
  229. {
  230. vcpu_load(vcpu);
  231. memcpy(vcpu->apic->regs, s->regs, sizeof *s);
  232. kvm_apic_post_state_restore(vcpu);
  233. vcpu_put(vcpu);
  234. return 0;
  235. }
  236. long kvm_arch_vcpu_ioctl(struct file *filp,
  237. unsigned int ioctl, unsigned long arg)
  238. {
  239. struct kvm_vcpu *vcpu = filp->private_data;
  240. void __user *argp = (void __user *)arg;
  241. int r;
  242. switch (ioctl) {
  243. case KVM_GET_LAPIC: {
  244. struct kvm_lapic_state lapic;
  245. memset(&lapic, 0, sizeof lapic);
  246. r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
  247. if (r)
  248. goto out;
  249. r = -EFAULT;
  250. if (copy_to_user(argp, &lapic, sizeof lapic))
  251. goto out;
  252. r = 0;
  253. break;
  254. }
  255. case KVM_SET_LAPIC: {
  256. struct kvm_lapic_state lapic;
  257. r = -EFAULT;
  258. if (copy_from_user(&lapic, argp, sizeof lapic))
  259. goto out;
  260. r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
  261. if (r)
  262. goto out;
  263. r = 0;
  264. break;
  265. }
  266. case KVM_SET_CPUID: {
  267. struct kvm_cpuid __user *cpuid_arg = argp;
  268. struct kvm_cpuid cpuid;
  269. r = -EFAULT;
  270. if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
  271. goto out;
  272. r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
  273. if (r)
  274. goto out;
  275. break;
  276. }
  277. case KVM_GET_MSRS:
  278. r = msr_io(vcpu, argp, kvm_get_msr, 1);
  279. break;
  280. case KVM_SET_MSRS:
  281. r = msr_io(vcpu, argp, do_set_msr, 0);
  282. break;
  283. default:
  284. r = -EINVAL;
  285. }
  286. out:
  287. return r;
  288. }
  289. static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
  290. {
  291. int ret;
  292. if (addr > (unsigned int)(-3 * PAGE_SIZE))
  293. return -1;
  294. ret = kvm_x86_ops->set_tss_addr(kvm, addr);
  295. return ret;
  296. }
  297. static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
  298. u32 kvm_nr_mmu_pages)
  299. {
  300. if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
  301. return -EINVAL;
  302. mutex_lock(&kvm->lock);
  303. kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
  304. kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
  305. mutex_unlock(&kvm->lock);
  306. return 0;
  307. }
  308. static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
  309. {
  310. return kvm->n_alloc_mmu_pages;
  311. }
  312. /*
  313. * Set a new alias region. Aliases map a portion of physical memory into
  314. * another portion. This is useful for memory windows, for example the PC
  315. * VGA region.
  316. */
  317. static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
  318. struct kvm_memory_alias *alias)
  319. {
  320. int r, n;
  321. struct kvm_mem_alias *p;
  322. r = -EINVAL;
  323. /* General sanity checks */
  324. if (alias->memory_size & (PAGE_SIZE - 1))
  325. goto out;
  326. if (alias->guest_phys_addr & (PAGE_SIZE - 1))
  327. goto out;
  328. if (alias->slot >= KVM_ALIAS_SLOTS)
  329. goto out;
  330. if (alias->guest_phys_addr + alias->memory_size
  331. < alias->guest_phys_addr)
  332. goto out;
  333. if (alias->target_phys_addr + alias->memory_size
  334. < alias->target_phys_addr)
  335. goto out;
  336. mutex_lock(&kvm->lock);
  337. p = &kvm->aliases[alias->slot];
  338. p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
  339. p->npages = alias->memory_size >> PAGE_SHIFT;
  340. p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
  341. for (n = KVM_ALIAS_SLOTS; n > 0; --n)
  342. if (kvm->aliases[n - 1].npages)
  343. break;
  344. kvm->naliases = n;
  345. kvm_mmu_zap_all(kvm);
  346. mutex_unlock(&kvm->lock);
  347. return 0;
  348. out:
  349. return r;
  350. }
  351. static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
  352. {
  353. int r;
  354. r = 0;
  355. switch (chip->chip_id) {
  356. case KVM_IRQCHIP_PIC_MASTER:
  357. memcpy(&chip->chip.pic,
  358. &pic_irqchip(kvm)->pics[0],
  359. sizeof(struct kvm_pic_state));
  360. break;
  361. case KVM_IRQCHIP_PIC_SLAVE:
  362. memcpy(&chip->chip.pic,
  363. &pic_irqchip(kvm)->pics[1],
  364. sizeof(struct kvm_pic_state));
  365. break;
  366. case KVM_IRQCHIP_IOAPIC:
  367. memcpy(&chip->chip.ioapic,
  368. ioapic_irqchip(kvm),
  369. sizeof(struct kvm_ioapic_state));
  370. break;
  371. default:
  372. r = -EINVAL;
  373. break;
  374. }
  375. return r;
  376. }
  377. static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
  378. {
  379. int r;
  380. r = 0;
  381. switch (chip->chip_id) {
  382. case KVM_IRQCHIP_PIC_MASTER:
  383. memcpy(&pic_irqchip(kvm)->pics[0],
  384. &chip->chip.pic,
  385. sizeof(struct kvm_pic_state));
  386. break;
  387. case KVM_IRQCHIP_PIC_SLAVE:
  388. memcpy(&pic_irqchip(kvm)->pics[1],
  389. &chip->chip.pic,
  390. sizeof(struct kvm_pic_state));
  391. break;
  392. case KVM_IRQCHIP_IOAPIC:
  393. memcpy(ioapic_irqchip(kvm),
  394. &chip->chip.ioapic,
  395. sizeof(struct kvm_ioapic_state));
  396. break;
  397. default:
  398. r = -EINVAL;
  399. break;
  400. }
  401. kvm_pic_update_irq(pic_irqchip(kvm));
  402. return r;
  403. }
  404. long kvm_arch_vm_ioctl(struct file *filp,
  405. unsigned int ioctl, unsigned long arg)
  406. {
  407. struct kvm *kvm = filp->private_data;
  408. void __user *argp = (void __user *)arg;
  409. int r = -EINVAL;
  410. switch (ioctl) {
  411. case KVM_SET_TSS_ADDR:
  412. r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
  413. if (r < 0)
  414. goto out;
  415. break;
  416. case KVM_SET_MEMORY_REGION: {
  417. struct kvm_memory_region kvm_mem;
  418. struct kvm_userspace_memory_region kvm_userspace_mem;
  419. r = -EFAULT;
  420. if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
  421. goto out;
  422. kvm_userspace_mem.slot = kvm_mem.slot;
  423. kvm_userspace_mem.flags = kvm_mem.flags;
  424. kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
  425. kvm_userspace_mem.memory_size = kvm_mem.memory_size;
  426. r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
  427. if (r)
  428. goto out;
  429. break;
  430. }
  431. case KVM_SET_NR_MMU_PAGES:
  432. r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
  433. if (r)
  434. goto out;
  435. break;
  436. case KVM_GET_NR_MMU_PAGES:
  437. r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
  438. break;
  439. case KVM_SET_MEMORY_ALIAS: {
  440. struct kvm_memory_alias alias;
  441. r = -EFAULT;
  442. if (copy_from_user(&alias, argp, sizeof alias))
  443. goto out;
  444. r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
  445. if (r)
  446. goto out;
  447. break;
  448. }
  449. case KVM_CREATE_IRQCHIP:
  450. r = -ENOMEM;
  451. kvm->vpic = kvm_create_pic(kvm);
  452. if (kvm->vpic) {
  453. r = kvm_ioapic_init(kvm);
  454. if (r) {
  455. kfree(kvm->vpic);
  456. kvm->vpic = NULL;
  457. goto out;
  458. }
  459. } else
  460. goto out;
  461. break;
  462. case KVM_IRQ_LINE: {
  463. struct kvm_irq_level irq_event;
  464. r = -EFAULT;
  465. if (copy_from_user(&irq_event, argp, sizeof irq_event))
  466. goto out;
  467. if (irqchip_in_kernel(kvm)) {
  468. mutex_lock(&kvm->lock);
  469. if (irq_event.irq < 16)
  470. kvm_pic_set_irq(pic_irqchip(kvm),
  471. irq_event.irq,
  472. irq_event.level);
  473. kvm_ioapic_set_irq(kvm->vioapic,
  474. irq_event.irq,
  475. irq_event.level);
  476. mutex_unlock(&kvm->lock);
  477. r = 0;
  478. }
  479. break;
  480. }
  481. case KVM_GET_IRQCHIP: {
  482. /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
  483. struct kvm_irqchip chip;
  484. r = -EFAULT;
  485. if (copy_from_user(&chip, argp, sizeof chip))
  486. goto out;
  487. r = -ENXIO;
  488. if (!irqchip_in_kernel(kvm))
  489. goto out;
  490. r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
  491. if (r)
  492. goto out;
  493. r = -EFAULT;
  494. if (copy_to_user(argp, &chip, sizeof chip))
  495. goto out;
  496. r = 0;
  497. break;
  498. }
  499. case KVM_SET_IRQCHIP: {
  500. /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
  501. struct kvm_irqchip chip;
  502. r = -EFAULT;
  503. if (copy_from_user(&chip, argp, sizeof chip))
  504. goto out;
  505. r = -ENXIO;
  506. if (!irqchip_in_kernel(kvm))
  507. goto out;
  508. r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
  509. if (r)
  510. goto out;
  511. r = 0;
  512. break;
  513. }
  514. default:
  515. ;
  516. }
  517. out:
  518. return r;
  519. }
  520. static __init void kvm_init_msr_list(void)
  521. {
  522. u32 dummy[2];
  523. unsigned i, j;
  524. for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
  525. if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
  526. continue;
  527. if (j < i)
  528. msrs_to_save[j] = msrs_to_save[i];
  529. j++;
  530. }
  531. num_msrs_to_save = j;
  532. }
  533. __init void kvm_arch_init(void)
  534. {
  535. kvm_init_msr_list();
  536. }