mmu_audit.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. /*
  2. * mmu_audit.c:
  3. *
  4. * Audit code for KVM MMU
  5. *
  6. * Copyright (C) 2006 Qumranet, Inc.
  7. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  8. *
  9. * Authors:
  10. * Yaniv Kamay <yaniv@qumranet.com>
  11. * Avi Kivity <avi@qumranet.com>
  12. * Marcelo Tosatti <mtosatti@redhat.com>
  13. * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/ratelimit.h>
  20. char const *audit_point_name[] = {
  21. "pre page fault",
  22. "post page fault",
  23. "pre pte write",
  24. "post pte write",
  25. "pre sync",
  26. "post sync"
  27. };
  28. #define audit_printk(kvm, fmt, args...) \
  29. printk(KERN_ERR "audit: (%s) error: " \
  30. fmt, audit_point_name[kvm->arch.audit_point], ##args)
  31. typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
  32. static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  33. inspect_spte_fn fn, int level)
  34. {
  35. int i;
  36. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  37. u64 *ent = sp->spt;
  38. fn(vcpu, ent + i, level);
  39. if (is_shadow_present_pte(ent[i]) &&
  40. !is_last_spte(ent[i], level)) {
  41. struct kvm_mmu_page *child;
  42. child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
  43. __mmu_spte_walk(vcpu, child, fn, level - 1);
  44. }
  45. }
  46. }
  47. static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
  48. {
  49. int i;
  50. struct kvm_mmu_page *sp;
  51. if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
  52. return;
  53. if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
  54. hpa_t root = vcpu->arch.mmu.root_hpa;
  55. sp = page_header(root);
  56. __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
  57. return;
  58. }
  59. for (i = 0; i < 4; ++i) {
  60. hpa_t root = vcpu->arch.mmu.pae_root[i];
  61. if (root && VALID_PAGE(root)) {
  62. root &= PT64_BASE_ADDR_MASK;
  63. sp = page_header(root);
  64. __mmu_spte_walk(vcpu, sp, fn, 2);
  65. }
  66. }
  67. return;
  68. }
  69. typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
  70. static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
  71. {
  72. struct kvm_mmu_page *sp;
  73. list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
  74. fn(kvm, sp);
  75. }
  76. static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  77. {
  78. struct kvm_mmu_page *sp;
  79. gfn_t gfn;
  80. pfn_t pfn;
  81. hpa_t hpa;
  82. sp = page_header(__pa(sptep));
  83. if (sp->unsync) {
  84. if (level != PT_PAGE_TABLE_LEVEL) {
  85. audit_printk(vcpu->kvm, "unsync sp: %p "
  86. "level = %d\n", sp, level);
  87. return;
  88. }
  89. }
  90. if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
  91. return;
  92. gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
  93. pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
  94. if (is_error_pfn(pfn))
  95. return;
  96. hpa = pfn << PAGE_SHIFT;
  97. if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
  98. audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
  99. "ent %llxn", vcpu->arch.mmu.root_level, pfn,
  100. hpa, *sptep);
  101. }
  102. static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
  103. {
  104. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  105. unsigned long *rmapp;
  106. struct kvm_mmu_page *rev_sp;
  107. gfn_t gfn;
  108. rev_sp = page_header(__pa(sptep));
  109. gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
  110. if (!gfn_to_memslot(kvm, gfn)) {
  111. if (!__ratelimit(&ratelimit_state))
  112. return;
  113. audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
  114. audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
  115. (long int)(sptep - rev_sp->spt), rev_sp->gfn);
  116. dump_stack();
  117. return;
  118. }
  119. rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
  120. if (!*rmapp) {
  121. if (!__ratelimit(&ratelimit_state))
  122. return;
  123. audit_printk(kvm, "no rmap for writable spte %llx\n",
  124. *sptep);
  125. dump_stack();
  126. }
  127. }
  128. static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  129. {
  130. if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
  131. inspect_spte_has_rmap(vcpu->kvm, sptep);
  132. }
  133. static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  134. {
  135. struct kvm_mmu_page *sp = page_header(__pa(sptep));
  136. if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
  137. audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
  138. "root.\n", sp);
  139. }
  140. static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
  141. {
  142. int i;
  143. if (sp->role.level != PT_PAGE_TABLE_LEVEL)
  144. return;
  145. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  146. if (!is_rmap_spte(sp->spt[i]))
  147. continue;
  148. inspect_spte_has_rmap(kvm, sp->spt + i);
  149. }
  150. }
  151. static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
  152. {
  153. unsigned long *rmapp;
  154. u64 *sptep;
  155. struct rmap_iterator iter;
  156. if (sp->role.direct || sp->unsync || sp->role.invalid)
  157. return;
  158. rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
  159. for (sptep = rmap_get_first(*rmapp, &iter); sptep;
  160. sptep = rmap_get_next(&iter)) {
  161. if (is_writable_pte(*sptep))
  162. audit_printk(kvm, "shadow page has writable "
  163. "mappings: gfn %llx role %x\n",
  164. sp->gfn, sp->role.word);
  165. }
  166. }
  167. static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  168. {
  169. check_mappings_rmap(kvm, sp);
  170. audit_write_protection(kvm, sp);
  171. }
  172. static void audit_all_active_sps(struct kvm *kvm)
  173. {
  174. walk_all_active_sps(kvm, audit_sp);
  175. }
  176. static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  177. {
  178. audit_sptes_have_rmaps(vcpu, sptep, level);
  179. audit_mappings(vcpu, sptep, level);
  180. audit_spte_after_sync(vcpu, sptep, level);
  181. }
  182. static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
  183. {
  184. mmu_spte_walk(vcpu, audit_spte);
  185. }
  186. static bool mmu_audit;
  187. static struct static_key mmu_audit_key;
  188. static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  189. {
  190. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  191. if (!__ratelimit(&ratelimit_state))
  192. return;
  193. vcpu->kvm->arch.audit_point = point;
  194. audit_all_active_sps(vcpu->kvm);
  195. audit_vcpu_spte(vcpu);
  196. }
  197. static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  198. {
  199. if (static_key_false((&mmu_audit_key)))
  200. __kvm_mmu_audit(vcpu, point);
  201. }
  202. static void mmu_audit_enable(void)
  203. {
  204. if (mmu_audit)
  205. return;
  206. static_key_slow_inc(&mmu_audit_key);
  207. mmu_audit = true;
  208. }
  209. static void mmu_audit_disable(void)
  210. {
  211. if (!mmu_audit)
  212. return;
  213. static_key_slow_dec(&mmu_audit_key);
  214. mmu_audit = false;
  215. }
  216. static int mmu_audit_set(const char *val, const struct kernel_param *kp)
  217. {
  218. int ret;
  219. unsigned long enable;
  220. ret = strict_strtoul(val, 10, &enable);
  221. if (ret < 0)
  222. return -EINVAL;
  223. switch (enable) {
  224. case 0:
  225. mmu_audit_disable();
  226. break;
  227. case 1:
  228. mmu_audit_enable();
  229. break;
  230. default:
  231. return -EINVAL;
  232. }
  233. return 0;
  234. }
  235. static struct kernel_param_ops audit_param_ops = {
  236. .set = mmu_audit_set,
  237. .get = param_get_bool,
  238. };
  239. module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);