mmu.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. /*
  2. * Kernel-based Virtual Machine driver for Linux
  3. *
  4. * This module enables machines with Intel VT-x extensions to run virtual
  5. * machines without emulation or binary translation.
  6. *
  7. * MMU support
  8. *
  9. * Copyright (C) 2006 Qumranet, Inc.
  10. *
  11. * Authors:
  12. * Yaniv Kamay <yaniv@qumranet.com>
  13. * Avi Kivity <avi@qumranet.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/types.h>
  20. #include <linux/string.h>
  21. #include <asm/page.h>
  22. #include <linux/mm.h>
  23. #include <linux/highmem.h>
  24. #include <linux/module.h>
  25. #include "vmx.h"
  26. #include "kvm.h"
  27. #define pgprintk(x...) do { } while (0)
  28. #define ASSERT(x) \
  29. if (!(x)) { \
  30. printk(KERN_WARNING "assertion failed %s:%d: %s\n", \
  31. __FILE__, __LINE__, #x); \
  32. }
  33. #define PT64_ENT_PER_PAGE 512
  34. #define PT32_ENT_PER_PAGE 1024
  35. #define PT_WRITABLE_SHIFT 1
  36. #define PT_PRESENT_MASK (1ULL << 0)
  37. #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
  38. #define PT_USER_MASK (1ULL << 2)
  39. #define PT_PWT_MASK (1ULL << 3)
  40. #define PT_PCD_MASK (1ULL << 4)
  41. #define PT_ACCESSED_MASK (1ULL << 5)
  42. #define PT_DIRTY_MASK (1ULL << 6)
  43. #define PT_PAGE_SIZE_MASK (1ULL << 7)
  44. #define PT_PAT_MASK (1ULL << 7)
  45. #define PT_GLOBAL_MASK (1ULL << 8)
  46. #define PT64_NX_MASK (1ULL << 63)
  47. #define PT_PAT_SHIFT 7
  48. #define PT_DIR_PAT_SHIFT 12
  49. #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
  50. #define PT32_DIR_PSE36_SIZE 4
  51. #define PT32_DIR_PSE36_SHIFT 13
  52. #define PT32_DIR_PSE36_MASK (((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
  53. #define PT32_PTE_COPY_MASK \
  54. (PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK | PT_GLOBAL_MASK)
  55. #define PT64_PTE_COPY_MASK (PT64_NX_MASK | PT32_PTE_COPY_MASK)
  56. #define PT_FIRST_AVAIL_BITS_SHIFT 9
  57. #define PT64_SECOND_AVAIL_BITS_SHIFT 52
  58. #define PT_SHADOW_PS_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
  59. #define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
  60. #define PT_SHADOW_WRITABLE_SHIFT (PT_FIRST_AVAIL_BITS_SHIFT + 1)
  61. #define PT_SHADOW_WRITABLE_MASK (1ULL << PT_SHADOW_WRITABLE_SHIFT)
  62. #define PT_SHADOW_USER_SHIFT (PT_SHADOW_WRITABLE_SHIFT + 1)
  63. #define PT_SHADOW_USER_MASK (1ULL << (PT_SHADOW_USER_SHIFT))
  64. #define PT_SHADOW_BITS_OFFSET (PT_SHADOW_WRITABLE_SHIFT - PT_WRITABLE_SHIFT)
  65. #define VALID_PAGE(x) ((x) != INVALID_PAGE)
  66. #define PT64_LEVEL_BITS 9
  67. #define PT64_LEVEL_SHIFT(level) \
  68. ( PAGE_SHIFT + (level - 1) * PT64_LEVEL_BITS )
  69. #define PT64_LEVEL_MASK(level) \
  70. (((1ULL << PT64_LEVEL_BITS) - 1) << PT64_LEVEL_SHIFT(level))
  71. #define PT64_INDEX(address, level)\
  72. (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1))
  73. #define PT32_LEVEL_BITS 10
  74. #define PT32_LEVEL_SHIFT(level) \
  75. ( PAGE_SHIFT + (level - 1) * PT32_LEVEL_BITS )
  76. #define PT32_LEVEL_MASK(level) \
  77. (((1ULL << PT32_LEVEL_BITS) - 1) << PT32_LEVEL_SHIFT(level))
  78. #define PT32_INDEX(address, level)\
  79. (((address) >> PT32_LEVEL_SHIFT(level)) & ((1 << PT32_LEVEL_BITS) - 1))
  80. #define PT64_BASE_ADDR_MASK (((1ULL << 52) - 1) & PAGE_MASK)
  81. #define PT64_DIR_BASE_ADDR_MASK \
  82. (PT64_BASE_ADDR_MASK & ~((1ULL << (PAGE_SHIFT + PT64_LEVEL_BITS)) - 1))
  83. #define PT32_BASE_ADDR_MASK PAGE_MASK
  84. #define PT32_DIR_BASE_ADDR_MASK \
  85. (PAGE_MASK & ~((1ULL << (PAGE_SHIFT + PT32_LEVEL_BITS)) - 1))
  86. #define PFERR_PRESENT_MASK (1U << 0)
  87. #define PFERR_WRITE_MASK (1U << 1)
  88. #define PFERR_USER_MASK (1U << 2)
  89. #define PT64_ROOT_LEVEL 4
  90. #define PT32_ROOT_LEVEL 2
  91. #define PT32E_ROOT_LEVEL 3
  92. #define PT_DIRECTORY_LEVEL 2
  93. #define PT_PAGE_TABLE_LEVEL 1
  94. static int is_write_protection(struct kvm_vcpu *vcpu)
  95. {
  96. return vcpu->cr0 & CR0_WP_MASK;
  97. }
  98. static int is_cpuid_PSE36(void)
  99. {
  100. return 1;
  101. }
  102. static int is_present_pte(unsigned long pte)
  103. {
  104. return pte & PT_PRESENT_MASK;
  105. }
  106. static int is_writeble_pte(unsigned long pte)
  107. {
  108. return pte & PT_WRITABLE_MASK;
  109. }
  110. static int is_io_pte(unsigned long pte)
  111. {
  112. return pte & PT_SHADOW_IO_MARK;
  113. }
  114. static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
  115. {
  116. struct kvm_mmu_page *page_head = page_header(page_hpa);
  117. list_del(&page_head->link);
  118. page_head->page_hpa = page_hpa;
  119. list_add(&page_head->link, &vcpu->free_pages);
  120. }
  121. static int is_empty_shadow_page(hpa_t page_hpa)
  122. {
  123. u32 *pos;
  124. u32 *end;
  125. for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32);
  126. pos != end; pos++)
  127. if (*pos != 0)
  128. return 0;
  129. return 1;
  130. }
  131. static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
  132. {
  133. struct kvm_mmu_page *page;
  134. if (list_empty(&vcpu->free_pages))
  135. return INVALID_PAGE;
  136. page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
  137. list_del(&page->link);
  138. list_add(&page->link, &vcpu->kvm->active_mmu_pages);
  139. ASSERT(is_empty_shadow_page(page->page_hpa));
  140. page->slot_bitmap = 0;
  141. page->global = 1;
  142. page->parent_pte = parent_pte;
  143. return page->page_hpa;
  144. }
  145. static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
  146. {
  147. int slot = memslot_id(kvm, gfn_to_memslot(kvm, gpa >> PAGE_SHIFT));
  148. struct kvm_mmu_page *page_head = page_header(__pa(pte));
  149. __set_bit(slot, &page_head->slot_bitmap);
  150. }
  151. hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
  152. {
  153. hpa_t hpa = gpa_to_hpa(vcpu, gpa);
  154. return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
  155. }
  156. hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
  157. {
  158. struct kvm_memory_slot *slot;
  159. struct page *page;
  160. ASSERT((gpa & HPA_ERR_MASK) == 0);
  161. slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
  162. if (!slot)
  163. return gpa | HPA_ERR_MASK;
  164. page = gfn_to_page(slot, gpa >> PAGE_SHIFT);
  165. return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
  166. | (gpa & (PAGE_SIZE-1));
  167. }
  168. hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
  169. {
  170. gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
  171. if (gpa == UNMAPPED_GVA)
  172. return UNMAPPED_GVA;
  173. return gpa_to_hpa(vcpu, gpa);
  174. }
  175. static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
  176. int level)
  177. {
  178. ASSERT(vcpu);
  179. ASSERT(VALID_PAGE(page_hpa));
  180. ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
  181. if (level == 1)
  182. memset(__va(page_hpa), 0, PAGE_SIZE);
  183. else {
  184. u64 *pos;
  185. u64 *end;
  186. for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
  187. pos != end; pos++) {
  188. u64 current_ent = *pos;
  189. *pos = 0;
  190. if (is_present_pte(current_ent))
  191. release_pt_page_64(vcpu,
  192. current_ent &
  193. PT64_BASE_ADDR_MASK,
  194. level - 1);
  195. }
  196. }
  197. kvm_mmu_free_page(vcpu, page_hpa);
  198. }
  199. static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
  200. {
  201. }
  202. static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
  203. {
  204. int level = PT32E_ROOT_LEVEL;
  205. hpa_t table_addr = vcpu->mmu.root_hpa;
  206. for (; ; level--) {
  207. u32 index = PT64_INDEX(v, level);
  208. u64 *table;
  209. ASSERT(VALID_PAGE(table_addr));
  210. table = __va(table_addr);
  211. if (level == 1) {
  212. mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
  213. page_header_update_slot(vcpu->kvm, table, v);
  214. table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
  215. PT_USER_MASK;
  216. return 0;
  217. }
  218. if (table[index] == 0) {
  219. hpa_t new_table = kvm_mmu_alloc_page(vcpu,
  220. &table[index]);
  221. if (!VALID_PAGE(new_table)) {
  222. pgprintk("nonpaging_map: ENOMEM\n");
  223. return -ENOMEM;
  224. }
  225. if (level == PT32E_ROOT_LEVEL)
  226. table[index] = new_table | PT_PRESENT_MASK;
  227. else
  228. table[index] = new_table | PT_PRESENT_MASK |
  229. PT_WRITABLE_MASK | PT_USER_MASK;
  230. }
  231. table_addr = table[index] & PT64_BASE_ADDR_MASK;
  232. }
  233. }
  234. static void nonpaging_flush(struct kvm_vcpu *vcpu)
  235. {
  236. hpa_t root = vcpu->mmu.root_hpa;
  237. ++kvm_stat.tlb_flush;
  238. pgprintk("nonpaging_flush\n");
  239. ASSERT(VALID_PAGE(root));
  240. release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
  241. root = kvm_mmu_alloc_page(vcpu, NULL);
  242. ASSERT(VALID_PAGE(root));
  243. vcpu->mmu.root_hpa = root;
  244. if (is_paging(vcpu))
  245. root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK));
  246. kvm_arch_ops->set_cr3(vcpu, root);
  247. kvm_arch_ops->tlb_flush(vcpu);
  248. }
  249. static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
  250. {
  251. return vaddr;
  252. }
  253. static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
  254. u32 error_code)
  255. {
  256. int ret;
  257. gpa_t addr = gva;
  258. ASSERT(vcpu);
  259. ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
  260. for (;;) {
  261. hpa_t paddr;
  262. paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
  263. if (is_error_hpa(paddr))
  264. return 1;
  265. ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
  266. if (ret) {
  267. nonpaging_flush(vcpu);
  268. continue;
  269. }
  270. break;
  271. }
  272. return ret;
  273. }
  274. static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
  275. {
  276. }
  277. static void nonpaging_free(struct kvm_vcpu *vcpu)
  278. {
  279. hpa_t root;
  280. ASSERT(vcpu);
  281. root = vcpu->mmu.root_hpa;
  282. if (VALID_PAGE(root))
  283. release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
  284. vcpu->mmu.root_hpa = INVALID_PAGE;
  285. }
  286. static int nonpaging_init_context(struct kvm_vcpu *vcpu)
  287. {
  288. struct kvm_mmu *context = &vcpu->mmu;
  289. context->new_cr3 = nonpaging_new_cr3;
  290. context->page_fault = nonpaging_page_fault;
  291. context->inval_page = nonpaging_inval_page;
  292. context->gva_to_gpa = nonpaging_gva_to_gpa;
  293. context->free = nonpaging_free;
  294. context->root_level = PT32E_ROOT_LEVEL;
  295. context->shadow_root_level = PT32E_ROOT_LEVEL;
  296. context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
  297. ASSERT(VALID_PAGE(context->root_hpa));
  298. kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
  299. return 0;
  300. }
  301. static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
  302. {
  303. struct kvm_mmu_page *page, *npage;
  304. list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
  305. link) {
  306. if (page->global)
  307. continue;
  308. if (!page->parent_pte)
  309. continue;
  310. *page->parent_pte = 0;
  311. release_pt_page_64(vcpu, page->page_hpa, 1);
  312. }
  313. ++kvm_stat.tlb_flush;
  314. kvm_arch_ops->tlb_flush(vcpu);
  315. }
  316. static void paging_new_cr3(struct kvm_vcpu *vcpu)
  317. {
  318. kvm_mmu_flush_tlb(vcpu);
  319. }
  320. static void mark_pagetable_nonglobal(void *shadow_pte)
  321. {
  322. page_header(__pa(shadow_pte))->global = 0;
  323. }
  324. static inline void set_pte_common(struct kvm_vcpu *vcpu,
  325. u64 *shadow_pte,
  326. gpa_t gaddr,
  327. int dirty,
  328. u64 access_bits)
  329. {
  330. hpa_t paddr;
  331. *shadow_pte |= access_bits << PT_SHADOW_BITS_OFFSET;
  332. if (!dirty)
  333. access_bits &= ~PT_WRITABLE_MASK;
  334. if (access_bits & PT_WRITABLE_MASK)
  335. mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
  336. *shadow_pte |= access_bits;
  337. paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
  338. if (!(*shadow_pte & PT_GLOBAL_MASK))
  339. mark_pagetable_nonglobal(shadow_pte);
  340. if (is_error_hpa(paddr)) {
  341. *shadow_pte |= gaddr;
  342. *shadow_pte |= PT_SHADOW_IO_MARK;
  343. *shadow_pte &= ~PT_PRESENT_MASK;
  344. } else {
  345. *shadow_pte |= paddr;
  346. page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
  347. }
  348. }
  349. static void inject_page_fault(struct kvm_vcpu *vcpu,
  350. u64 addr,
  351. u32 err_code)
  352. {
  353. kvm_arch_ops->inject_page_fault(vcpu, addr, err_code);
  354. }
  355. static inline int fix_read_pf(u64 *shadow_ent)
  356. {
  357. if ((*shadow_ent & PT_SHADOW_USER_MASK) &&
  358. !(*shadow_ent & PT_USER_MASK)) {
  359. /*
  360. * If supervisor write protect is disabled, we shadow kernel
  361. * pages as user pages so we can trap the write access.
  362. */
  363. *shadow_ent |= PT_USER_MASK;
  364. *shadow_ent &= ~PT_WRITABLE_MASK;
  365. return 1;
  366. }
  367. return 0;
  368. }
  369. static int may_access(u64 pte, int write, int user)
  370. {
  371. if (user && !(pte & PT_USER_MASK))
  372. return 0;
  373. if (write && !(pte & PT_WRITABLE_MASK))
  374. return 0;
  375. return 1;
  376. }
  377. /*
  378. * Remove a shadow pte.
  379. */
  380. static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
  381. {
  382. hpa_t page_addr = vcpu->mmu.root_hpa;
  383. int level = vcpu->mmu.shadow_root_level;
  384. ++kvm_stat.invlpg;
  385. for (; ; level--) {
  386. u32 index = PT64_INDEX(addr, level);
  387. u64 *table = __va(page_addr);
  388. if (level == PT_PAGE_TABLE_LEVEL ) {
  389. table[index] = 0;
  390. return;
  391. }
  392. if (!is_present_pte(table[index]))
  393. return;
  394. page_addr = table[index] & PT64_BASE_ADDR_MASK;
  395. if (level == PT_DIRECTORY_LEVEL &&
  396. (table[index] & PT_SHADOW_PS_MARK)) {
  397. table[index] = 0;
  398. release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
  399. kvm_arch_ops->tlb_flush(vcpu);
  400. return;
  401. }
  402. }
  403. }
  404. static void paging_free(struct kvm_vcpu *vcpu)
  405. {
  406. nonpaging_free(vcpu);
  407. }
  408. #define PTTYPE 64
  409. #include "paging_tmpl.h"
  410. #undef PTTYPE
  411. #define PTTYPE 32
  412. #include "paging_tmpl.h"
  413. #undef PTTYPE
  414. static int paging64_init_context(struct kvm_vcpu *vcpu)
  415. {
  416. struct kvm_mmu *context = &vcpu->mmu;
  417. ASSERT(is_pae(vcpu));
  418. context->new_cr3 = paging_new_cr3;
  419. context->page_fault = paging64_page_fault;
  420. context->inval_page = paging_inval_page;
  421. context->gva_to_gpa = paging64_gva_to_gpa;
  422. context->free = paging_free;
  423. context->root_level = PT64_ROOT_LEVEL;
  424. context->shadow_root_level = PT64_ROOT_LEVEL;
  425. context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
  426. ASSERT(VALID_PAGE(context->root_hpa));
  427. kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
  428. (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
  429. return 0;
  430. }
  431. static int paging32_init_context(struct kvm_vcpu *vcpu)
  432. {
  433. struct kvm_mmu *context = &vcpu->mmu;
  434. context->new_cr3 = paging_new_cr3;
  435. context->page_fault = paging32_page_fault;
  436. context->inval_page = paging_inval_page;
  437. context->gva_to_gpa = paging32_gva_to_gpa;
  438. context->free = paging_free;
  439. context->root_level = PT32_ROOT_LEVEL;
  440. context->shadow_root_level = PT32E_ROOT_LEVEL;
  441. context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL);
  442. ASSERT(VALID_PAGE(context->root_hpa));
  443. kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
  444. (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
  445. return 0;
  446. }
  447. static int paging32E_init_context(struct kvm_vcpu *vcpu)
  448. {
  449. int ret;
  450. if ((ret = paging64_init_context(vcpu)))
  451. return ret;
  452. vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
  453. vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
  454. return 0;
  455. }
  456. static int init_kvm_mmu(struct kvm_vcpu *vcpu)
  457. {
  458. ASSERT(vcpu);
  459. ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
  460. if (!is_paging(vcpu))
  461. return nonpaging_init_context(vcpu);
  462. else if (kvm_arch_ops->is_long_mode(vcpu))
  463. return paging64_init_context(vcpu);
  464. else if (is_pae(vcpu))
  465. return paging32E_init_context(vcpu);
  466. else
  467. return paging32_init_context(vcpu);
  468. }
  469. static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
  470. {
  471. ASSERT(vcpu);
  472. if (VALID_PAGE(vcpu->mmu.root_hpa)) {
  473. vcpu->mmu.free(vcpu);
  474. vcpu->mmu.root_hpa = INVALID_PAGE;
  475. }
  476. }
  477. int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
  478. {
  479. destroy_kvm_mmu(vcpu);
  480. return init_kvm_mmu(vcpu);
  481. }
  482. static void free_mmu_pages(struct kvm_vcpu *vcpu)
  483. {
  484. while (!list_empty(&vcpu->free_pages)) {
  485. struct kvm_mmu_page *page;
  486. page = list_entry(vcpu->free_pages.next,
  487. struct kvm_mmu_page, link);
  488. list_del(&page->link);
  489. __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
  490. page->page_hpa = INVALID_PAGE;
  491. }
  492. }
  493. static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
  494. {
  495. int i;
  496. ASSERT(vcpu);
  497. for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
  498. struct page *page;
  499. struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
  500. INIT_LIST_HEAD(&page_header->link);
  501. if ((page = alloc_page(GFP_KVM_MMU)) == NULL)
  502. goto error_1;
  503. page->private = (unsigned long)page_header;
  504. page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
  505. memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
  506. list_add(&page_header->link, &vcpu->free_pages);
  507. }
  508. return 0;
  509. error_1:
  510. free_mmu_pages(vcpu);
  511. return -ENOMEM;
  512. }
  513. int kvm_mmu_init(struct kvm_vcpu *vcpu)
  514. {
  515. int r;
  516. ASSERT(vcpu);
  517. ASSERT(!VALID_PAGE(vcpu->mmu.root_hpa));
  518. ASSERT(list_empty(&vcpu->free_pages));
  519. if ((r = alloc_mmu_pages(vcpu)))
  520. return r;
  521. if ((r = init_kvm_mmu(vcpu))) {
  522. free_mmu_pages(vcpu);
  523. return r;
  524. }
  525. return 0;
  526. }
  527. void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
  528. {
  529. ASSERT(vcpu);
  530. destroy_kvm_mmu(vcpu);
  531. free_mmu_pages(vcpu);
  532. }
  533. void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
  534. {
  535. struct kvm_mmu_page *page;
  536. list_for_each_entry(page, &kvm->active_mmu_pages, link) {
  537. int i;
  538. u64 *pt;
  539. if (!test_bit(slot, &page->slot_bitmap))
  540. continue;
  541. pt = __va(page->page_hpa);
  542. for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
  543. /* avoid RMW */
  544. if (pt[i] & PT_WRITABLE_MASK)
  545. pt[i] &= ~PT_WRITABLE_MASK;
  546. }
  547. }