mmu.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812
  1. /*
  2. * Kernel-based Virtual Machine driver for Linux
  3. *
  4. * This module enables machines with Intel VT-x extensions to run virtual
  5. * machines without emulation or binary translation.
  6. *
  7. * MMU support
  8. *
  9. * Copyright (C) 2006 Qumranet, Inc.
  10. *
  11. * Authors:
  12. * Yaniv Kamay <yaniv@qumranet.com>
  13. * Avi Kivity <avi@qumranet.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/types.h>
  20. #include <linux/string.h>
  21. #include <asm/page.h>
  22. #include <linux/mm.h>
  23. #include <linux/highmem.h>
  24. #include <linux/module.h>
  25. #include "vmx.h"
  26. #include "kvm.h"
  27. #define pgprintk(x...) do { } while (0)
  28. #define rmap_printk(x...) do { } while (0)
  29. #define ASSERT(x) \
  30. if (!(x)) { \
  31. printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
  32. __FILE__, __LINE__, #x); \
  33. }
  34. #define PT64_ENT_PER_PAGE 512
  35. #define PT32_ENT_PER_PAGE 1024
  36. #define PT_WRITABLE_SHIFT 1
  37. #define PT_PRESENT_MASK (1ULL << 0)
  38. #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
  39. #define PT_USER_MASK (1ULL << 2)
  40. #define PT_PWT_MASK (1ULL << 3)
  41. #define PT_PCD_MASK (1ULL << 4)
  42. #define PT_ACCESSED_MASK (1ULL << 5)
  43. #define PT_DIRTY_MASK (1ULL << 6)
  44. #define PT_PAGE_SIZE_MASK (1ULL << 7)
  45. #define PT_PAT_MASK (1ULL << 7)
  46. #define PT_GLOBAL_MASK (1ULL << 8)
  47. #define PT64_NX_MASK (1ULL << 63)
  48. #define PT_PAT_SHIFT 7
  49. #define PT_DIR_PAT_SHIFT 12
  50. #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
  51. #define PT32_DIR_PSE36_SIZE 4
  52. #define PT32_DIR_PSE36_SHIFT 13
  53. #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
  54. #define PT32_PTE_COPY_MASK \
  55. (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK)
  56. #define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK)
  57. #define PT_FIRST_AVAIL_BITS_SHIFT 9
  58. #define PT64_SECOND_AVAIL_BITS_SHIFT 52
  59. #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
  60. #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
  61. #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
  62. #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
  63. #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
  64. #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
  65. #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
  66. #define VALID_PAGE(x) ((x) != INVALID_PAGE)
  67. #define PT64_LEVEL_BITS 9
  68. #define PT64_LEVEL_SHIFT(level) \
  69. ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
  70. #define PT64_LEVEL_MASK(level) \
  71. (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
  72. #define PT64_INDEX(address, level)\
  73. (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
  74. #define PT32_LEVEL_BITS 10
  75. #define PT32_LEVEL_SHIFT(level) \
  76. ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
  77. #define PT32_LEVEL_MASK(level) \
  78. (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
  79. #define PT32_INDEX(address, level)\
  80. (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
  81. #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK)
  82. #define PT64_DIR_BASE_ADDR_MASK \
  83. (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
  84. #define PT32_BASE_ADDR_MASK PAGE_MASK
  85. #define PT32_DIR_BASE_ADDR_MASK \
  86. (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
  87. #define PFERR_PRESENT_MASK (1U << 0)
  88. #define PFERR_WRITE_MASK (1U << 1)
  89. #define PFERR_USER_MASK (1U << 2)
  90. #define PT64_ROOT_LEVEL 4
  91. #define PT32_ROOT_LEVEL 2
  92. #define PT32E_ROOT_LEVEL 3
  93. #define PT_DIRECTORY_LEVEL 2
  94. #define PT_PAGE_TABLE_LEVEL 1
  95. #define RMAP_EXT 4
  96. struct kvm_rmap_desc {
  97. u64 *shadow_ptes[RMAP_EXT];
  98. struct kvm_rmap_desc *more;
  99. };
  100. static int is_write_protection(struct kvm_vcpu *vcpu)
  101. {
  102. return vcpu->cr0 & CR0_WP_MASK;
  103. }
  104. static int is_cpuid_PSE36(void)
  105. {
  106. return 1;
  107. }
  108. static int is_present_pte(unsigned long pte)
  109. {
  110. return pte & PT_PRESENT_MASK;
  111. }
  112. static int is_writeble_pte(unsigned long pte)
  113. {
  114. return pte & PT_WRITABLE_MASK;
  115. }
  116. static int is_io_pte(unsigned long pte)
  117. {
  118. return pte & PT_SHADOW_IO_MARK;
  119. }
  120. static int is_rmap_pte(u64 pte)
  121. {
  122. return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
  123. == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
  124. }
  125. /*
  126. * Reverse mapping data structures:
  127. *
  128. * If page->private bit zero is zero, then page->private points to the
  129. * shadow page table entry that points to page_address(page).
  130. *
  131. * If page->private bit zero is one, (then page->private & ~1) points
  132. * to a struct kvm_rmap_desc containing more mappings.
  133. */
  134. static void rmap_add(struct kvm *kvm, u64 *spte)
  135. {
  136. struct page *page;
  137. struct kvm_rmap_desc *desc;
  138. int i;
  139. if (!is_rmap_pte(*spte))
  140. return;
  141. page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
  142. if (!page->private) {
  143. rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
  144. page->private = (unsigned long)spte;
  145. } else if (!(page->private & 1)) {
  146. rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
  147. desc = kzalloc(sizeof *desc, GFP_NOWAIT);
  148. if (!desc)
  149. BUG(); /* FIXME: return error */
  150. desc->shadow_ptes[0] = (u64 *)page->private;
  151. desc->shadow_ptes[1] = spte;
  152. page->private = (unsigned long)desc | 1;
  153. } else {
  154. rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
  155. desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
  156. while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
  157. desc = desc->more;
  158. if (desc->shadow_ptes[RMAP_EXT-1]) {
  159. desc->more = kzalloc(sizeof *desc->more, GFP_NOWAIT);
  160. if (!desc->more)
  161. BUG(); /* FIXME: return error */
  162. desc = desc->more;
  163. }
  164. for (i = 0; desc->shadow_ptes[i]; ++i)
  165. ;
  166. desc->shadow_ptes[i] = spte;
  167. }
  168. }
  169. static void rmap_desc_remove_entry(struct page *page,
  170. struct kvm_rmap_desc *desc,
  171. int i,
  172. struct kvm_rmap_desc *prev_desc)
  173. {
  174. int j;
  175. for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
  176. ;
  177. desc->shadow_ptes[i] = desc->shadow_ptes[j];
  178. desc->shadow_ptes[j] = 0;
  179. if (j != 0)
  180. return;
  181. if (!prev_desc && !desc->more)
  182. page->private = (unsigned long)desc->shadow_ptes[0];
  183. else
  184. if (prev_desc)
  185. prev_desc->more = desc->more;
  186. else
  187. page->private = (unsigned long)desc->more | 1;
  188. kfree(desc);
  189. }
  190. static void rmap_remove(struct kvm *kvm, u64 *spte)
  191. {
  192. struct page *page;
  193. struct kvm_rmap_desc *desc;
  194. struct kvm_rmap_desc *prev_desc;
  195. int i;
  196. if (!is_rmap_pte(*spte))
  197. return;
  198. page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
  199. if (!page->private) {
  200. printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
  201. BUG();
  202. } else if (!(page->private & 1)) {
  203. rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
  204. if ((u64 *)page->private != spte) {
  205. printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
  206. spte, *spte);
  207. BUG();
  208. }
  209. page->private = 0;
  210. } else {
  211. rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
  212. desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
  213. prev_desc = NULL;
  214. while (desc) {
  215. for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
  216. if (desc->shadow_ptes[i] == spte) {
  217. rmap_desc_remove_entry(page, desc, i,
  218. prev_desc);
  219. return;
  220. }
  221. prev_desc = desc;
  222. desc = desc->more;
  223. }
  224. BUG();
  225. }
  226. }
  227. static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
  228. {
  229. struct kvm_mmu_page *page_head = page_header(page_hpa);
  230. list_del(&page_head->link);
  231. page_head->page_hpa = page_hpa;
  232. list_add(&page_head->link, &vcpu->free_pages);
  233. }
  234. static int is_empty_shadow_page(hpa_t page_hpa)
  235. {
  236. u32 *pos;
  237. u32 *end;
  238. for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32);
  239. pos != end; pos++)
  240. if (*pos != 0)
  241. return 0;
  242. return 1;
  243. }
  244. static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
  245. {
  246. struct kvm_mmu_page *page;
  247. if (list_empty(&vcpu->free_pages))
  248. return INVALID_PAGE;
  249. page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
  250. list_del(&page->link);
  251. list_add(&page->link, &vcpu->kvm->active_mmu_pages);
  252. ASSERT(is_empty_shadow_page(page->page_hpa));
  253. page->slot_bitmap = 0;
  254. page->global = 1;
  255. page->parent_pte = parent_pte;
  256. return page->page_hpa;
  257. }
  258. static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
  259. {
  260. int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
  261. struct kvm_mmu_page *page_head = page_header(__pa(pte));
  262. __set_bit(slot, &page_head->slot_bitmap);
  263. }
  264. hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
  265. {
  266. hpa_t hpa = gpa_to_hpa(vcpu, gpa);
  267. return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
  268. }
  269. hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
  270. {
  271. struct kvm_memory_slot *slot;
  272. struct page *page;
  273. ASSERT((gpa & HPA_ERR_MASK) == 0);
  274. slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
  275. if (!slot)
  276. return gpa | HPA_ERR_MASK;
  277. page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
  278. return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
  279. | (gpa & (PAGE_SIZE-1));
  280. }
  281. hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
  282. {
  283. gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
  284. if (gpa == UNMAPPED_GVA)
  285. return UNMAPPED_GVA;
  286. return gpa_to_hpa(vcpu, gpa);
  287. }
  288. static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
  289. int level)
  290. {
  291. u64 *pos;
  292. u64 *end;
  293. ASSERT(vcpu);
  294. ASSERT(VALID_PAGE(page_hpa));
  295. ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
  296. for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
  297. pos != end; pos++) {
  298. u64 current_ent = *pos;
  299. if (is_present_pte(current_ent)) {
  300. if (level != 1)
  301. release_pt_page_64(vcpu,
  302. current_ent &
  303. PT64_BASE_ADDR_MASK,
  304. level - 1);
  305. else
  306. rmap_remove(vcpu->kvm, pos);
  307. }
  308. *pos = 0;
  309. }
  310. kvm_mmu_free_page(vcpu, page_hpa);
  311. }
  312. static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
  313. {
  314. }
  315. static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
  316. {
  317. int level = PT32E_ROOT_LEVEL;
  318. hpa_t table_addr = vcpu->mmu.root_hpa;
  319. for (; ; level--) {
  320. u32 index = PT64_INDEX(v, level);
  321. u64 *table;
  322. ASSERT(VALID_PAGE(table_addr));
  323. table = __va(table_addr);
  324. if (level == 1) {
  325. mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
  326. page_header_update_slot(vcpu->kvm, table, v);
  327. table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
  328. PT_USER_MASK;
  329. rmap_add(vcpu->kvm, &table[index]);
  330. return 0;
  331. }
  332. if (table[index] == 0) {
  333. hpa_t new_table = kvm_mmu_alloc_page(vcpu,
  334. &table[index]);
  335. if (!VALID_PAGE(new_table)) {
  336. pgprintk("nonpaging_map: ENOMEM\n");
  337. return -ENOMEM;
  338. }
  339. if (level == PT32E_ROOT_LEVEL)
  340. table[index] = new_table | PT_PRESENT_MASK;
  341. else
  342. table[index] = new_table | PT_PRESENT_MASK |
  343. PT_WRITABLE_MASK | PT_USER_MASK;
  344. }
  345. table_addr = table[index] & PT64_BASE_ADDR_MASK;
  346. }
  347. }
  348. static void nonpaging_flush(struct kvm_vcpu *vcpu)
  349. {
  350. hpa_t root = vcpu->mmu.root_hpa;
  351. ++kvm_stat.tlb_flush;
  352. pgprintk("nonpaging_flush\n");
  353. ASSERT(VALID_PAGE(root));
  354. release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
  355. root = kvm_mmu_alloc_page(vcpu, NULL);
  356. ASSERT(VALID_PAGE(root));
  357. vcpu->mmu.root_hpa = root;
  358. if (is_paging(vcpu))
  359. root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK));
  360. kvm_arch_ops->set_cr3(vcpu, root);
  361. kvm_arch_ops->tlb_flush(vcpu);
  362. }
  363. static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
  364. {
  365. return vaddr;
  366. }
  367. static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
  368. u32 error_code)
  369. {
  370. int ret;
  371. gpa_t addr = gva;
  372. ASSERT(vcpu);
  373. ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
  374. for (;;) {
  375. hpa_t paddr;
  376. paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
  377. if (is_error_hpa(paddr))
  378. return 1;
  379. ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
  380. if (ret) {
  381. nonpaging_flush(vcpu);
  382. continue;
  383. }
  384. break;
  385. }
  386. return ret;
  387. }
  388. static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
  389. {
  390. }
  391. static void nonpaging_free(struct kvm_vcpu *vcpu)
  392. {
  393. hpa_t root;
  394. ASSERT(vcpu);
  395. root = vcpu->mmu.root_hpa;
  396. if (VALID_PAGE(root))
  397. release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
  398. vcpu->mmu.root_hpa = INVALID_PAGE;
  399. }
  400. static int nonpaging_init_context(struct kvm_vcpu *vcpu)
  401. {
  402. struct kvm_mmu *context = &vcpu->mmu;
  403. context->new_cr3 = nonpaging_new_cr3;
  404. context->page_fault = nonpaging_page_fault;
  405. context->inval_page = nonpaging_inval_page;
  406. context->gva_to_gpa = nonpaging_gva_to_gpa;
  407. context->free = nonpaging_free;
  408. context->root_level = PT32E_ROOT_LEVEL;
  409. context->shadow_root_level = PT32E_ROOT_LEVEL;
  410. context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
  411. ASSERT(VALID_PAGE(context->root_hpa));
  412. kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
  413. return 0;
  414. }
  415. static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
  416. {
  417. struct kvm_mmu_page *page, *npage;
  418. list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
  419. link) {
  420. if (page->global)
  421. continue;
  422. if (!page->parent_pte)
  423. continue;
  424. *page->parent_pte = 0;
  425. release_pt_page_64(vcpu, page->page_hpa, 1);
  426. }
  427. ++kvm_stat.tlb_flush;
  428. kvm_arch_ops->tlb_flush(vcpu);
  429. }
  430. static void paging_new_cr3(struct kvm_vcpu *vcpu)
  431. {
  432. kvm_mmu_flush_tlb(vcpu);
  433. }
  434. static void mark_pagetable_nonglobal(void *shadow_pte)
  435. {
  436. page_header(__pa(shadow_pte))->global = 0;
  437. }
  438. static inline void set_pte_common(struct kvm_vcpu *vcpu,
  439. u64 *shadow_pte,
  440. gpa_t gaddr,
  441. int dirty,
  442. u64 access_bits)
  443. {
  444. hpa_t paddr;
  445. *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
  446. if (!dirty)
  447. access_bits &= ~PT_WRITABLE_MASK;
  448. if (access_bits & PT_WRITABLE_MASK)
  449. mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
  450. *shadow_pte |= access_bits;
  451. paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
  452. if (!(*shadow_pte & PT_GLOBAL_MASK))
  453. mark_pagetable_nonglobal(shadow_pte);
  454. if (is_error_hpa(paddr)) {
  455. *shadow_pte |= gaddr;
  456. *shadow_pte |= PT_SHADOW_IO_MARK;
  457. *shadow_pte &= ~PT_PRESENT_MASK;
  458. } else {
  459. *shadow_pte |= paddr;
  460. page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
  461. rmap_add(vcpu->kvm, shadow_pte);
  462. }
  463. }
  464. static void inject_page_fault(struct kvm_vcpu *vcpu,
  465. u64 addr,
  466. u32 err_code)
  467. {
  468. kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
  469. }
  470. static inline int fix_read_pf(u64 *shadow_ent)
  471. {
  472. if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
  473. !(*shadow_ent & PT_USER_MASK)) {
  474. /*
  475. * If supervisor write protect is disabled, we shadow kernel
  476. * pages as user pages so we can trap the write access.
  477. */
  478. *shadow_ent |= PT_USER_MASK;
  479. *shadow_ent &= ~PT_WRITABLE_MASK;
  480. return 1;
  481. }
  482. return 0;
  483. }
  484. static int may_access(u64 pte, int write, int user)
  485. {
  486. if (user && !(pte & PT_USER_MASK))
  487. return 0;
  488. if (write && !(pte & PT_WRITABLE_MASK))
  489. return 0;
  490. return 1;
  491. }
  492. /*
  493. * Remove a shadow pte.
  494. */
  495. static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
  496. {
  497. hpa_t page_addr = vcpu->mmu.root_hpa;
  498. int level = vcpu->mmu.shadow_root_level;
  499. ++kvm_stat.invlpg;
  500. for (; ; level--) {
  501. u32 index = PT64_INDEX(addr, level);
  502. u64 *table = __va(page_addr);
  503. if (level == PT_PAGE_TABLE_LEVEL ) {
  504. rmap_remove(vcpu->kvm, &table[index]);
  505. table[index] = 0;
  506. return;
  507. }
  508. if (!is_present_pte(table[index]))
  509. return;
  510. page_addr = table[index] & PT64_BASE_ADDR_MASK;
  511. if (level == PT_DIRECTORY_LEVEL &&
  512. (table[index] & PT_SHADOW_PS_MARK)) {
  513. table[index] = 0;
  514. release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
  515. kvm_arch_ops->tlb_flush(vcpu);
  516. return;
  517. }
  518. }
  519. }
  520. static void paging_free(struct kvm_vcpu *vcpu)
  521. {
  522. nonpaging_free(vcpu);
  523. }
  524. #define PTTYPE 64
  525. #include "paging_tmpl.h"
  526. #undef PTTYPE
  527. #define PTTYPE 32
  528. #include "paging_tmpl.h"
  529. #undef PTTYPE
  530. static int paging64_init_context(struct kvm_vcpu *vcpu)
  531. {
  532. struct kvm_mmu *context = &vcpu->mmu;
  533. ASSERT(is_pae(vcpu));
  534. context->new_cr3 = paging_new_cr3;
  535. context->page_fault = paging64_page_fault;
  536. context->inval_page = paging_inval_page;
  537. context->gva_to_gpa = paging64_gva_to_gpa;
  538. context->free = paging_free;
  539. context->root_level = PT64_ROOT_LEVEL;
  540. context->shadow_root_level = PT64_ROOT_LEVEL;
  541. context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
  542. ASSERT(VALID_PAGE(context->root_hpa));
  543. kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
  544. (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
  545. return 0;
  546. }
  547. static int paging32_init_context(struct kvm_vcpu *vcpu)
  548. {
  549. struct kvm_mmu *context = &vcpu->mmu;
  550. context->new_cr3 = paging_new_cr3;
  551. context->page_fault = paging32_page_fault;
  552. context->inval_page = paging_inval_page;
  553. context->gva_to_gpa = paging32_gva_to_gpa;
  554. context->free = paging_free;
  555. context->root_level = PT32_ROOT_LEVEL;
  556. context->shadow_root_level = PT32E_ROOT_LEVEL;
  557. context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
  558. ASSERT(VALID_PAGE(context->root_hpa));
  559. kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
  560. (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
  561. return 0;
  562. }
  563. static int paging32E_init_context(struct kvm_vcpu *vcpu)
  564. {
  565. int ret;
  566. if ((ret = paging64_init_context(vcpu)))
  567. return ret;
  568. vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
  569. vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
  570. return 0;
  571. }
  572. static int init_kvm_mmu(struct kvm_vcpu *vcpu)
  573. {
  574. ASSERT(vcpu);
  575. ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
  576. if (!is_paging(vcpu))
  577. return nonpaging_init_context(vcpu);
  578. else if (is_long_mode(vcpu))
  579. return paging64_init_context(vcpu);
  580. else if (is_pae(vcpu))
  581. return paging32E_init_context(vcpu);
  582. else
  583. return paging32_init_context(vcpu);
  584. }
  585. static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
  586. {
  587. ASSERT(vcpu);
  588. if (VALID_PAGE(vcpu->mmu.root_hpa)) {
  589. vcpu->mmu.free(vcpu);
  590. vcpu->mmu.root_hpa = INVALID_PAGE;
  591. }
  592. }
  593. int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
  594. {
  595. destroy_kvm_mmu(vcpu);
  596. return init_kvm_mmu(vcpu);
  597. }
  598. static void free_mmu_pages(struct kvm_vcpu *vcpu)
  599. {
  600. while (!list_empty(&vcpu->free_pages)) {
  601. struct kvm_mmu_page *page;
  602. page = list_entry(vcpu->free_pages.next,
  603. struct kvm_mmu_page, link);
  604. list_del(&page->link);
  605. __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
  606. page->page_hpa = INVALID_PAGE;
  607. }
  608. }
  609. static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
  610. {
  611. int i;
  612. ASSERT(vcpu);
  613. for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
  614. struct page *page;
  615. struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
  616. INIT_LIST_HEAD(&page_header->link);
  617. if ((page = alloc_page(GFP_KVM_MMU)) == NULL)
  618. goto error_1;
  619. page->private = (unsigned long)page_header;
  620. page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
  621. memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
  622. list_add(&page_header->link, &vcpu->free_pages);
  623. }
  624. return 0;
  625. error_1:
  626. free_mmu_pages(vcpu);
  627. return -ENOMEM;
  628. }
  629. int kvm_mmu_create(struct kvm_vcpu *vcpu)
  630. {
  631. ASSERT(vcpu);
  632. ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
  633. ASSERT(list_empty(&vcpu->free_pages));
  634. return alloc_mmu_pages(vcpu);
  635. }
  636. int kvm_mmu_setup(struct kvm_vcpu *vcpu)
  637. {
  638. ASSERT(vcpu);
  639. ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
  640. ASSERT(!list_empty(&vcpu->free_pages));
  641. return init_kvm_mmu(vcpu);
  642. }
  643. void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
  644. {
  645. ASSERT(vcpu);
  646. destroy_kvm_mmu(vcpu);
  647. free_mmu_pages(vcpu);
  648. }
  649. void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
  650. {
  651. struct kvm_mmu_page *page;
  652. list_for_each_entry(page, &kvm->active_mmu_pages, link) {
  653. int i;
  654. u64 *pt;
  655. if (!test_bit(slot, &page->slot_bitmap))
  656. continue;
  657. pt = __va(page->page_hpa);
  658. for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
  659. /* avoid RMW */
  660. if (pt[i] & PT_WRITABLE_MASK) {
  661. rmap_remove(kvm, &pt[i]);
  662. pt[i] &= ~PT_WRITABLE_MASK;
  663. }
  664. }
  665. }