mmu_audit.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. /*
  2. * mmu_audit.c:
  3. *
  4. * Audit code for KVM MMU
  5. *
  6. * Copyright (C) 2006 Qumranet, Inc.
  7. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  8. *
  9. * Authors:
  10. * Yaniv Kamay <yaniv@qumranet.com>
  11. * Avi Kivity <avi@qumranet.com>
  12. * Marcelo Tosatti <mtosatti@redhat.com>
  13. * Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
  14. *
  15. * This work is licensed under the terms of the GNU GPL, version 2. See
  16. * the COPYING file in the top-level directory.
  17. *
  18. */
  19. #include <linux/ratelimit.h>
  20. char const *audit_point_name[] = {
  21. "pre page fault",
  22. "post page fault",
  23. "pre pte write",
  24. "post pte write",
  25. "pre sync",
  26. "post sync"
  27. };
  28. #define audit_printk(kvm, fmt, args...) \
  29. printk(KERN_ERR "audit: (%s) error: " \
  30. fmt, audit_point_name[kvm->arch.audit_point], ##args)
  31. typedef void (*inspect_spte_fn) (struct kvm_vcpu *vcpu, u64 *sptep, int level);
  32. static void __mmu_spte_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
  33. inspect_spte_fn fn, int level)
  34. {
  35. int i;
  36. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  37. u64 *ent = sp->spt;
  38. fn(vcpu, ent + i, level);
  39. if (is_shadow_present_pte(ent[i]) &&
  40. !is_last_spte(ent[i], level)) {
  41. struct kvm_mmu_page *child;
  42. child = page_header(ent[i] & PT64_BASE_ADDR_MASK);
  43. __mmu_spte_walk(vcpu, child, fn, level - 1);
  44. }
  45. }
  46. }
  47. static void mmu_spte_walk(struct kvm_vcpu *vcpu, inspect_spte_fn fn)
  48. {
  49. int i;
  50. struct kvm_mmu_page *sp;
  51. if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
  52. return;
  53. if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
  54. hpa_t root = vcpu->arch.mmu.root_hpa;
  55. sp = page_header(root);
  56. __mmu_spte_walk(vcpu, sp, fn, PT64_ROOT_LEVEL);
  57. return;
  58. }
  59. for (i = 0; i < 4; ++i) {
  60. hpa_t root = vcpu->arch.mmu.pae_root[i];
  61. if (root && VALID_PAGE(root)) {
  62. root &= PT64_BASE_ADDR_MASK;
  63. sp = page_header(root);
  64. __mmu_spte_walk(vcpu, sp, fn, 2);
  65. }
  66. }
  67. return;
  68. }
  69. typedef void (*sp_handler) (struct kvm *kvm, struct kvm_mmu_page *sp);
  70. static void walk_all_active_sps(struct kvm *kvm, sp_handler fn)
  71. {
  72. struct kvm_mmu_page *sp;
  73. list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link)
  74. fn(kvm, sp);
  75. }
  76. static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  77. {
  78. struct kvm_mmu_page *sp;
  79. gfn_t gfn;
  80. pfn_t pfn;
  81. hpa_t hpa;
  82. sp = page_header(__pa(sptep));
  83. if (sp->unsync) {
  84. if (level != PT_PAGE_TABLE_LEVEL) {
  85. audit_printk(vcpu->kvm, "unsync sp: %p "
  86. "level = %d\n", sp, level);
  87. return;
  88. }
  89. }
  90. if (!is_shadow_present_pte(*sptep) || !is_last_spte(*sptep, level))
  91. return;
  92. gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
  93. pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
  94. if (is_error_pfn(pfn)) {
  95. kvm_release_pfn_clean(pfn);
  96. return;
  97. }
  98. hpa = pfn << PAGE_SHIFT;
  99. if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
  100. audit_printk(vcpu->kvm, "levels %d pfn %llx hpa %llx "
  101. "ent %llxn", vcpu->arch.mmu.root_level, pfn,
  102. hpa, *sptep);
  103. }
  104. static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
  105. {
  106. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  107. unsigned long *rmapp;
  108. struct kvm_mmu_page *rev_sp;
  109. gfn_t gfn;
  110. rev_sp = page_header(__pa(sptep));
  111. gfn = kvm_mmu_page_get_gfn(rev_sp, sptep - rev_sp->spt);
  112. if (!gfn_to_memslot(kvm, gfn)) {
  113. if (!__ratelimit(&ratelimit_state))
  114. return;
  115. audit_printk(kvm, "no memslot for gfn %llx\n", gfn);
  116. audit_printk(kvm, "index %ld of sp (gfn=%llx)\n",
  117. (long int)(sptep - rev_sp->spt), rev_sp->gfn);
  118. dump_stack();
  119. return;
  120. }
  121. rmapp = gfn_to_rmap(kvm, gfn, rev_sp->role.level);
  122. if (!*rmapp) {
  123. if (!__ratelimit(&ratelimit_state))
  124. return;
  125. audit_printk(kvm, "no rmap for writable spte %llx\n",
  126. *sptep);
  127. dump_stack();
  128. }
  129. }
  130. static void audit_sptes_have_rmaps(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  131. {
  132. if (is_shadow_present_pte(*sptep) && is_last_spte(*sptep, level))
  133. inspect_spte_has_rmap(vcpu->kvm, sptep);
  134. }
  135. static void audit_spte_after_sync(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  136. {
  137. struct kvm_mmu_page *sp = page_header(__pa(sptep));
  138. if (vcpu->kvm->arch.audit_point == AUDIT_POST_SYNC && sp->unsync)
  139. audit_printk(vcpu->kvm, "meet unsync sp(%p) after sync "
  140. "root.\n", sp);
  141. }
  142. static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
  143. {
  144. int i;
  145. if (sp->role.level != PT_PAGE_TABLE_LEVEL)
  146. return;
  147. for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
  148. if (!is_rmap_spte(sp->spt[i]))
  149. continue;
  150. inspect_spte_has_rmap(kvm, sp->spt + i);
  151. }
  152. }
  153. static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
  154. {
  155. unsigned long *rmapp;
  156. u64 *sptep;
  157. struct rmap_iterator iter;
  158. if (sp->role.direct || sp->unsync || sp->role.invalid)
  159. return;
  160. rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
  161. for (sptep = rmap_get_first(*rmapp, &iter); sptep;
  162. sptep = rmap_get_next(&iter)) {
  163. if (is_writable_pte(*sptep))
  164. audit_printk(kvm, "shadow page has writable "
  165. "mappings: gfn %llx role %x\n",
  166. sp->gfn, sp->role.word);
  167. }
  168. }
  169. static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
  170. {
  171. check_mappings_rmap(kvm, sp);
  172. audit_write_protection(kvm, sp);
  173. }
  174. static void audit_all_active_sps(struct kvm *kvm)
  175. {
  176. walk_all_active_sps(kvm, audit_sp);
  177. }
  178. static void audit_spte(struct kvm_vcpu *vcpu, u64 *sptep, int level)
  179. {
  180. audit_sptes_have_rmaps(vcpu, sptep, level);
  181. audit_mappings(vcpu, sptep, level);
  182. audit_spte_after_sync(vcpu, sptep, level);
  183. }
  184. static void audit_vcpu_spte(struct kvm_vcpu *vcpu)
  185. {
  186. mmu_spte_walk(vcpu, audit_spte);
  187. }
  188. static bool mmu_audit;
  189. static struct static_key mmu_audit_key;
  190. static void __kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  191. {
  192. static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
  193. if (!__ratelimit(&ratelimit_state))
  194. return;
  195. vcpu->kvm->arch.audit_point = point;
  196. audit_all_active_sps(vcpu->kvm);
  197. audit_vcpu_spte(vcpu);
  198. }
  199. static inline void kvm_mmu_audit(struct kvm_vcpu *vcpu, int point)
  200. {
  201. if (static_key_false((&mmu_audit_key)))
  202. __kvm_mmu_audit(vcpu, point);
  203. }
  204. static void mmu_audit_enable(void)
  205. {
  206. if (mmu_audit)
  207. return;
  208. static_key_slow_inc(&mmu_audit_key);
  209. mmu_audit = true;
  210. }
  211. static void mmu_audit_disable(void)
  212. {
  213. if (!mmu_audit)
  214. return;
  215. static_key_slow_dec(&mmu_audit_key);
  216. mmu_audit = false;
  217. }
  218. static int mmu_audit_set(const char *val, const struct kernel_param *kp)
  219. {
  220. int ret;
  221. unsigned long enable;
  222. ret = strict_strtoul(val, 10, &enable);
  223. if (ret < 0)
  224. return -EINVAL;
  225. switch (enable) {
  226. case 0:
  227. mmu_audit_disable();
  228. break;
  229. case 1:
  230. mmu_audit_enable();
  231. break;
  232. default:
  233. return -EINVAL;
  234. }
  235. return 0;
  236. }
  237. static struct kernel_param_ops audit_param_ops = {
  238. .set = mmu_audit_set,
  239. .get = param_get_bool,
  240. };
  241. module_param_cb(mmu_audit, &audit_param_ops, &mmu_audit, 0644);