mmu_audit.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /*
  2. * mmu_audit.c:
  3. *
  4. * Audit code for KVM MMU
  5. *
  6. * Copyright (C) 2006 Qumranet, Inc.
  7. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  8. *
  9. * Authors:
  10. * Yaniv Kamay <yaniv@qumranet.com>
  11. * Avi Kivity <avi@qumranet.com>
  12. * Marcelo Tosatti <mtosatti@redhat.com>
  13. * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/ratelimit.h>
  20. char const *audit_point_name[] = {
  21. "pre page fault",
  22. "post page fault",
  23. "pre pte write",
  24. "post pte write",
  25. "pre sync",
  26. "post sync"
  27. };
  28. #define audit_printk(kvm, fmt, args...) \
  29. printk(KERN_ERR "audit: (%s) error: " \
  30. fmt, audit_point_name[kvm->arch.audit_point], ##args)
  31. typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
  32. static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  33. inspect_spte_fn fn, int level)
  34. {
  35. int i;
  36. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  37. u64 *ent = sp->spt;
  38. fn(vcpu, ent + i, level);
  39. if (is_shadow_present_pte(ent[i]) &&
  40. !is_last_spte(ent[i], level)) {
  41. struct kvm_mmu_page *child;
  42. child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
  43. __mmu_spte_walk(vcpu, child, fn, level - 1);
  44. }
  45. }
  46. }
  47. static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
  48. {
  49. int i;
  50. struct kvm_mmu_page *sp;
  51. if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
  52. return;
  53. if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
  54. hpa_t root = vcpu->arch.mmu.root_hpa;
  55. sp = page_header(root);
  56. __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
  57. return;
  58. }
  59. for (i = 0; i < 4; ++i) {
  60. hpa_t root = vcpu->arch.mmu.pae_root[i];
  61. if (root && VALID_PAGE(root)) {
  62. root &= PT64_BASE_ADDR_MASK;
  63. sp = page_header(root);
  64. __mmu_spte_walk(vcpu, sp, fn, 2);
  65. }
  66. }
  67. return;
  68. }
  69. typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
  70. static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
  71. {
  72. struct kvm_mmu_page *sp;
  73. list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
  74. fn(kvm, sp);
  75. }
  76. static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  77. {
  78. struct kvm_mmu_page *sp;
  79. gfn_t gfn;
  80. pfn_t pfn;
  81. hpa_t hpa;
  82. sp = page_header(__pa(sptep));
  83. if (sp->unsync) {
  84. if (level != PT_PAGE_TABLE_LEVEL) {
  85. audit_printk(vcpu->kvm, "unsync sp: %p "
  86. "level = %d\n", sp, level);
  87. return;
  88. }
  89. }
  90. if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
  91. return;
  92. gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
  93. pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
  94. if (is_error_pfn(pfn)) {
  95. kvm_release_pfn_clean(pfn);
  96. return;
  97. }
  98. hpa = pfn << PAGE_SHIFT;
  99. if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
  100. audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
  101. "ent %llxn", vcpu->arch.mmu.root_level, pfn,
  102. hpa, *sptep);
  103. }
  104. static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
  105. {
  106. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  107. unsigned long *rmapp;
  108. struct kvm_mmu_page *rev_sp;
  109. gfn_t gfn;
  110. rev_sp = page_header(__pa(sptep));
  111. gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
  112. if (!gfn_to_memslot(kvm, gfn)) {
  113. if (!__ratelimit(&ratelimit_state))
  114. return;
  115. audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
  116. audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
  117. (long int)(sptep - rev_sp->spt), rev_sp->gfn);
  118. dump_stack();
  119. return;
  120. }
  121. rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
  122. if (!*rmapp) {
  123. if (!__ratelimit(&ratelimit_state))
  124. return;
  125. audit_printk(kvm, "no rmap for writable spte %llx\n",
  126. *sptep);
  127. dump_stack();
  128. }
  129. }
  130. static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  131. {
  132. if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
  133. inspect_spte_has_rmap(vcpu->kvm, sptep);
  134. }
  135. static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  136. {
  137. struct kvm_mmu_page *sp = page_header(__pa(sptep));
  138. if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
  139. audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
  140. "root.\n", sp);
  141. }
  142. static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
  143. {
  144. int i;
  145. if (sp->role.level != PT_PAGE_TABLE_LEVEL)
  146. return;
  147. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  148. if (!is_rmap_spte(sp->spt[i]))
  149. continue;
  150. inspect_spte_has_rmap(kvm, sp->spt + i);
  151. }
  152. }
  153. static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
  154. {
  155. struct kvm_memory_slot *slot;
  156. unsigned long *rmapp;
  157. u64 *spte;
  158. if (sp->role.direct || sp->unsync || sp->role.invalid)
  159. return;
  160. slot = gfn_to_memslot(kvm, sp->gfn);
  161. rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
  162. spte = rmap_next(rmapp, NULL);
  163. while (spte) {
  164. if (is_writable_pte(*spte))
  165. audit_printk(kvm, "shadow page has writable "
  166. "mappings: gfn %llx role %x\n",
  167. sp->gfn, sp->role.word);
  168. spte = rmap_next(rmapp, spte);
  169. }
  170. }
  171. static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  172. {
  173. check_mappings_rmap(kvm, sp);
  174. audit_write_protection(kvm, sp);
  175. }
  176. static void audit_all_active_sps(struct kvm *kvm)
  177. {
  178. walk_all_active_sps(kvm, audit_sp);
  179. }
  180. static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  181. {
  182. audit_sptes_have_rmaps(vcpu, sptep, level);
  183. audit_mappings(vcpu, sptep, level);
  184. audit_spte_after_sync(vcpu, sptep, level);
  185. }
  186. static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
  187. {
  188. mmu_spte_walk(vcpu, audit_spte);
  189. }
  190. static bool mmu_audit;
  191. static struct static_key mmu_audit_key;
  192. static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  193. {
  194. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  195. if (!__ratelimit(&ratelimit_state))
  196. return;
  197. vcpu->kvm->arch.audit_point = point;
  198. audit_all_active_sps(vcpu->kvm);
  199. audit_vcpu_spte(vcpu);
  200. }
  201. static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  202. {
  203. if (static_key_false((&mmu_audit_key)))
  204. __kvm_mmu_audit(vcpu, point);
  205. }
  206. static void mmu_audit_enable(void)
  207. {
  208. if (mmu_audit)
  209. return;
  210. static_key_slow_inc(&mmu_audit_key);
  211. mmu_audit = true;
  212. }
  213. static void mmu_audit_disable(void)
  214. {
  215. if (!mmu_audit)
  216. return;
  217. static_key_slow_dec(&mmu_audit_key);
  218. mmu_audit = false;
  219. }
  220. static int mmu_audit_set(const char *val, const struct kernel_param *kp)
  221. {
  222. int ret;
  223. unsigned long enable;
  224. ret = strict_strtoul(val, 10, &enable);
  225. if (ret < 0)
  226. return -EINVAL;
  227. switch (enable) {
  228. case 0:
  229. mmu_audit_disable();
  230. break;
  231. case 1:
  232. mmu_audit_enable();
  233. break;
  234. default:
  235. return -EINVAL;
  236. }
  237. return 0;
  238. }
  239. static struct kernel_param_ops audit_param_ops = {
  240. .set = mmu_audit_set,
  241. .get = param_get_bool,
  242. };
  243. module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);