pageattr.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * Copyright 2002 Andi Kleen, SuSE Labs.
  3. * Thanks to Ben LaHaise for precious feedback.
  4. */
  5. #include <linux/highmem.h>
  6. #include <linux/module.h>
  7. #include <linux/sched.h>
  8. #include <linux/slab.h>
  9. #include <linux/mm.h>
  10. void clflush_cache_range(void *addr, int size)
  11. {
  12. int i;
  13. for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
  14. clflush(addr+i);
  15. }
  16. #include <asm/processor.h>
  17. #include <asm/tlbflush.h>
  18. #include <asm/sections.h>
  19. #include <asm/uaccess.h>
  20. #include <asm/pgalloc.h>
  21. pte_t *lookup_address(unsigned long address, int *level)
  22. {
  23. pgd_t *pgd = pgd_offset_k(address);
  24. pud_t *pud;
  25. pmd_t *pmd;
  26. *level = PG_LEVEL_NONE;
  27. if (pgd_none(*pgd))
  28. return NULL;
  29. pud = pud_offset(pgd, address);
  30. if (pud_none(*pud))
  31. return NULL;
  32. pmd = pmd_offset(pud, address);
  33. if (pmd_none(*pmd))
  34. return NULL;
  35. *level = PG_LEVEL_2M;
  36. if (pmd_large(*pmd))
  37. return (pte_t *)pmd;
  38. *level = PG_LEVEL_4K;
  39. return pte_offset_kernel(pmd, address);
  40. }
  41. static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  42. {
  43. /* change init_mm */
  44. set_pte_atomic(kpte, pte);
  45. #ifdef CONFIG_X86_32
  46. if (!SHARED_KERNEL_PMD) {
  47. struct page *page;
  48. for (page = pgd_list; page; page = (struct page *)page->index) {
  49. pgd_t *pgd;
  50. pud_t *pud;
  51. pmd_t *pmd;
  52. pgd = (pgd_t *)page_address(page) + pgd_index(address);
  53. pud = pud_offset(pgd, address);
  54. pmd = pmd_offset(pud, address);
  55. set_pte_atomic((pte_t *)pmd, pte);
  56. }
  57. }
  58. #endif
  59. }
  60. static int split_large_page(pte_t *kpte, unsigned long address)
  61. {
  62. pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
  63. gfp_t gfp_flags = GFP_KERNEL;
  64. unsigned long flags;
  65. unsigned long addr;
  66. pte_t *pbase, *tmp;
  67. struct page *base;
  68. int i, level;
  69. #ifdef CONFIG_DEBUG_PAGEALLOC
  70. gfp_flags = GFP_ATOMIC;
  71. #endif
  72. base = alloc_pages(gfp_flags, 0);
  73. if (!base)
  74. return -ENOMEM;
  75. spin_lock_irqsave(&pgd_lock, flags);
  76. /*
  77. * Check for races, another CPU might have split this page
  78. * up for us already:
  79. */
  80. tmp = lookup_address(address, &level);
  81. if (tmp != kpte) {
  82. WARN_ON_ONCE(1);
  83. goto out_unlock;
  84. }
  85. address = __pa(address);
  86. addr = address & LARGE_PAGE_MASK;
  87. pbase = (pte_t *)page_address(base);
  88. #ifdef CONFIG_X86_32
  89. paravirt_alloc_pt(&init_mm, page_to_pfn(base));
  90. #endif
  91. for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
  92. set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
  93. /*
  94. * Install the new, split up pagetable:
  95. */
  96. __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
  97. base = NULL;
  98. out_unlock:
  99. spin_unlock_irqrestore(&pgd_lock, flags);
  100. if (base)
  101. __free_pages(base, 0);
  102. return 0;
  103. }
  104. static int
  105. __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
  106. {
  107. struct page *kpte_page;
  108. int level, err = 0;
  109. pte_t *kpte;
  110. BUG_ON(PageHighMem(page));
  111. repeat:
  112. kpte = lookup_address(address, &level);
  113. if (!kpte)
  114. return -EINVAL;
  115. kpte_page = virt_to_page(kpte);
  116. BUG_ON(PageLRU(kpte_page));
  117. BUG_ON(PageCompound(kpte_page));
  118. /*
  119. * Better fail early if someone sets the kernel text to NX.
  120. * Does not cover __inittext
  121. */
  122. BUG_ON(address >= (unsigned long)&_text &&
  123. address < (unsigned long)&_etext &&
  124. (pgprot_val(prot) & _PAGE_NX));
  125. if (level == PG_LEVEL_4K) {
  126. set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
  127. } else {
  128. err = split_large_page(kpte, address);
  129. if (!err)
  130. goto repeat;
  131. }
  132. return err;
  133. }
  134. /**
  135. * change_page_attr_addr - Change page table attributes in linear mapping
  136. * @address: Virtual address in linear mapping.
  137. * @numpages: Number of pages to change
  138. * @prot: New page table attribute (PAGE_*)
  139. *
  140. * Change page attributes of a page in the direct mapping. This is a variant
  141. * of change_page_attr() that also works on memory holes that do not have
  142. * mem_map entry (pfn_valid() is false).
  143. *
  144. * See change_page_attr() documentation for more details.
  145. */
  146. int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
  147. {
  148. int err = 0, kernel_map = 0, i;
  149. #ifdef CONFIG_X86_64
  150. if (address >= __START_KERNEL_map &&
  151. address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
  152. address = (unsigned long)__va(__pa(address));
  153. kernel_map = 1;
  154. }
  155. #endif
  156. for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
  157. unsigned long pfn = __pa(address) >> PAGE_SHIFT;
  158. if (!kernel_map || pte_present(pfn_pte(0, prot))) {
  159. err = __change_page_attr(address, pfn_to_page(pfn), prot);
  160. if (err)
  161. break;
  162. }
  163. #ifdef CONFIG_X86_64
  164. /*
  165. * Handle kernel mapping too which aliases part of
  166. * lowmem:
  167. */
  168. if (__pa(address) < KERNEL_TEXT_SIZE) {
  169. unsigned long addr2;
  170. pgprot_t prot2;
  171. addr2 = __START_KERNEL_map + __pa(address);
  172. /* Make sure the kernel mappings stay executable */
  173. prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
  174. err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
  175. }
  176. #endif
  177. }
  178. return err;
  179. }
  180. /**
  181. * change_page_attr - Change page table attributes in the linear mapping.
  182. * @page: First page to change
  183. * @numpages: Number of pages to change
  184. * @prot: New protection/caching type (PAGE_*)
  185. *
  186. * Returns 0 on success, otherwise a negated errno.
  187. *
  188. * This should be used when a page is mapped with a different caching policy
  189. * than write-back somewhere - some CPUs do not like it when mappings with
  190. * different caching policies exist. This changes the page attributes of the
  191. * in kernel linear mapping too.
  192. *
  193. * Caller must call global_flush_tlb() later to make the changes active.
  194. *
  195. * The caller needs to ensure that there are no conflicting mappings elsewhere
  196. * (e.g. in user space) * This function only deals with the kernel linear map.
  197. *
  198. * For MMIO areas without mem_map use change_page_attr_addr() instead.
  199. */
  200. int change_page_attr(struct page *page, int numpages, pgprot_t prot)
  201. {
  202. unsigned long addr = (unsigned long)page_address(page);
  203. return change_page_attr_addr(addr, numpages, prot);
  204. }
  205. EXPORT_SYMBOL(change_page_attr);
  206. static void flush_kernel_map(void *arg)
  207. {
  208. /*
  209. * Flush all to work around Errata in early athlons regarding
  210. * large page flushing.
  211. */
  212. __flush_tlb_all();
  213. if (boot_cpu_data.x86_model >= 4)
  214. wbinvd();
  215. }
  216. void global_flush_tlb(void)
  217. {
  218. BUG_ON(irqs_disabled());
  219. on_each_cpu(flush_kernel_map, NULL, 1, 1);
  220. }
  221. EXPORT_SYMBOL(global_flush_tlb);
  222. #ifdef CONFIG_DEBUG_PAGEALLOC
  223. void kernel_map_pages(struct page *page, int numpages, int enable)
  224. {
  225. if (PageHighMem(page))
  226. return;
  227. if (!enable) {
  228. debug_check_no_locks_freed(page_address(page),
  229. numpages * PAGE_SIZE);
  230. }
  231. /*
  232. * If page allocator is not up yet then do not call c_p_a():
  233. */
  234. if (!debug_pagealloc_enabled)
  235. return;
  236. /*
  237. * The return value is ignored - the calls cannot fail,
  238. * large pages are disabled at boot time:
  239. */
  240. change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
  241. /*
  242. * We should perform an IPI and flush all tlbs,
  243. * but that can deadlock->flush only current cpu:
  244. */
  245. __flush_tlb_all();
  246. }
  247. #endif