pageattr_64.c 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /*
  2. * Copyright 2002 Andi Kleen, SuSE Labs.
  3. * Thanks to Ben LaHaise for precious feedback.
  4. */
  5. #include <linux/highmem.h>
  6. #include <linux/module.h>
  7. #include <linux/sched.h>
  8. #include <linux/slab.h>
  9. #include <linux/mm.h>
  10. #include <asm/processor.h>
  11. #include <asm/tlbflush.h>
  12. #include <asm/sections.h>
  13. #include <asm/uaccess.h>
  14. #include <asm/pgalloc.h>
  15. pte_t *lookup_address(unsigned long address, int *level)
  16. {
  17. pgd_t *pgd = pgd_offset_k(address);
  18. pud_t *pud;
  19. pmd_t *pmd;
  20. if (pgd_none(*pgd))
  21. return NULL;
  22. pud = pud_offset(pgd, address);
  23. if (pud_none(*pud))
  24. return NULL;
  25. pmd = pmd_offset(pud, address);
  26. if (pmd_none(*pmd))
  27. return NULL;
  28. *level = 3;
  29. if (pmd_large(*pmd))
  30. return (pte_t *)pmd;
  31. *level = 4;
  32. return pte_offset_kernel(pmd, address);
  33. }
  34. static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  35. {
  36. /* change init_mm */
  37. set_pte_atomic(kpte, pte);
  38. #ifdef CONFIG_X86_32
  39. if (SHARED_KERNEL_PMD)
  40. return;
  41. {
  42. struct page *page;
  43. for (page = pgd_list; page; page = (struct page *)page->index) {
  44. pgd_t *pgd;
  45. pud_t *pud;
  46. pmd_t *pmd;
  47. pgd = (pgd_t *)page_address(page) + pgd_index(address);
  48. pud = pud_offset(pgd, address);
  49. pmd = pmd_offset(pud, address);
  50. set_pte_atomic((pte_t *)pmd, pte);
  51. }
  52. }
  53. #endif
  54. }
  55. static int split_large_page(pte_t *kpte, unsigned long address)
  56. {
  57. pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
  58. gfp_t gfp_flags = GFP_KERNEL;
  59. unsigned long flags;
  60. unsigned long addr;
  61. pte_t *pbase, *tmp;
  62. struct page *base;
  63. int i, level;
  64. #ifdef CONFIG_DEBUG_PAGEALLOC
  65. gfp_flags = GFP_ATOMIC;
  66. #endif
  67. base = alloc_pages(gfp_flags, 0);
  68. if (!base)
  69. return -ENOMEM;
  70. spin_lock_irqsave(&pgd_lock, flags);
  71. /*
  72. * Check for races, another CPU might have split this page
  73. * up for us already:
  74. */
  75. tmp = lookup_address(address, &level);
  76. if (tmp != kpte) {
  77. WARN_ON_ONCE(1);
  78. goto out_unlock;
  79. }
  80. address = __pa(address);
  81. addr = address & LARGE_PAGE_MASK;
  82. pbase = (pte_t *)page_address(base);
  83. #ifdef CONFIG_X86_32
  84. paravirt_alloc_pt(&init_mm, page_to_pfn(base));
  85. #endif
  86. for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
  87. set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
  88. /*
  89. * Install the new, split up pagetable:
  90. */
  91. __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
  92. base = NULL;
  93. out_unlock:
  94. spin_unlock_irqrestore(&pgd_lock, flags);
  95. if (base)
  96. __free_pages(base, 0);
  97. return 0;
  98. }
  99. static int
  100. __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
  101. {
  102. struct page *kpte_page;
  103. int level, err = 0;
  104. pte_t *kpte;
  105. BUG_ON(PageHighMem(page));
  106. repeat:
  107. kpte = lookup_address(address, &level);
  108. if (!kpte)
  109. return -EINVAL;
  110. kpte_page = virt_to_page(kpte);
  111. BUG_ON(PageLRU(kpte_page));
  112. BUG_ON(PageCompound(kpte_page));
  113. /*
  114. * Better fail early if someone sets the kernel text to NX.
  115. * Does not cover __inittext
  116. */
  117. BUG_ON(address >= (unsigned long)&_text &&
  118. address < (unsigned long)&_etext &&
  119. (pgprot_val(prot) & _PAGE_NX));
  120. if (level == 4) {
  121. set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
  122. } else {
  123. err = split_large_page(kpte, address);
  124. if (!err)
  125. goto repeat;
  126. }
  127. return err;
  128. }
  129. /**
  130. * change_page_attr_addr - Change page table attributes in linear mapping
  131. * @address: Virtual address in linear mapping.
  132. * @numpages: Number of pages to change
  133. * @prot: New page table attribute (PAGE_*)
  134. *
  135. * Change page attributes of a page in the direct mapping. This is a variant
  136. * of change_page_attr() that also works on memory holes that do not have
  137. * mem_map entry (pfn_valid() is false).
  138. *
  139. * See change_page_attr() documentation for more details.
  140. */
  141. int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
  142. {
  143. int err = 0, kernel_map = 0, i;
  144. #ifdef CONFIG_X86_64
  145. if (address >= __START_KERNEL_map &&
  146. address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
  147. address = (unsigned long)__va(__pa(address));
  148. kernel_map = 1;
  149. }
  150. #endif
  151. for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
  152. unsigned long pfn = __pa(address) >> PAGE_SHIFT;
  153. if (!kernel_map || pte_present(pfn_pte(0, prot))) {
  154. err = __change_page_attr(address, pfn_to_page(pfn), prot);
  155. if (err)
  156. break;
  157. }
  158. #ifdef CONFIG_X86_64
  159. /*
  160. * Handle kernel mapping too which aliases part of
  161. * lowmem:
  162. */
  163. if (__pa(address) < KERNEL_TEXT_SIZE) {
  164. unsigned long addr2;
  165. pgprot_t prot2;
  166. addr2 = __START_KERNEL_map + __pa(address);
  167. /* Make sure the kernel mappings stay executable */
  168. prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
  169. err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
  170. }
  171. #endif
  172. }
  173. return err;
  174. }
  175. /**
  176. * change_page_attr - Change page table attributes in the linear mapping.
  177. * @page: First page to change
  178. * @numpages: Number of pages to change
  179. * @prot: New protection/caching type (PAGE_*)
  180. *
  181. * Returns 0 on success, otherwise a negated errno.
  182. *
  183. * This should be used when a page is mapped with a different caching policy
  184. * than write-back somewhere - some CPUs do not like it when mappings with
  185. * different caching policies exist. This changes the page attributes of the
  186. * in kernel linear mapping too.
  187. *
  188. * Caller must call global_flush_tlb() later to make the changes active.
  189. *
  190. * The caller needs to ensure that there are no conflicting mappings elsewhere
  191. * (e.g. in user space) * This function only deals with the kernel linear map.
  192. *
  193. * For MMIO areas without mem_map use change_page_attr_addr() instead.
  194. */
  195. int change_page_attr(struct page *page, int numpages, pgprot_t prot)
  196. {
  197. unsigned long addr = (unsigned long)page_address(page);
  198. return change_page_attr_addr(addr, numpages, prot);
  199. }
  200. EXPORT_SYMBOL(change_page_attr);
  201. static void flush_kernel_map(void *arg)
  202. {
  203. /*
  204. * Flush all to work around Errata in early athlons regarding
  205. * large page flushing.
  206. */
  207. __flush_tlb_all();
  208. if (boot_cpu_data.x86_model >= 4)
  209. wbinvd();
  210. }
  211. void global_flush_tlb(void)
  212. {
  213. BUG_ON(irqs_disabled());
  214. on_each_cpu(flush_kernel_map, NULL, 1, 1);
  215. }
  216. EXPORT_SYMBOL(global_flush_tlb);
  217. #ifdef CONFIG_DEBUG_PAGEALLOC
  218. void kernel_map_pages(struct page *page, int numpages, int enable)
  219. {
  220. if (PageHighMem(page))
  221. return;
  222. if (!enable) {
  223. debug_check_no_locks_freed(page_address(page),
  224. numpages * PAGE_SIZE);
  225. }
  226. /*
  227. * If page allocator is not up yet then do not call c_p_a():
  228. */
  229. if (!debug_pagealloc_enabled)
  230. return;
  231. /*
  232. * the return value is ignored - the calls cannot fail,
  233. * large pages are disabled at boot time.
  234. */
  235. change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
  236. /*
  237. * we should perform an IPI and flush all tlbs,
  238. * but that can deadlock->flush only current cpu.
  239. */
  240. __flush_tlb_all();
  241. }
  242. #endif