pageattr.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * Copyright 2002 Andi Kleen, SuSE Labs.
  3. * Thanks to Ben LaHaise for precious feedback.
  4. */
  5. #include <linux/highmem.h>
  6. #include <linux/module.h>
  7. #include <linux/sched.h>
  8. #include <linux/slab.h>
  9. #include <linux/mm.h>
  10. void clflush_cache_range(void *addr, int size)
  11. {
  12. int i;
  13. for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
  14. clflush(addr+i);
  15. }
  16. #include <asm/processor.h>
  17. #include <asm/tlbflush.h>
  18. #include <asm/sections.h>
  19. #include <asm/uaccess.h>
  20. #include <asm/pgalloc.h>
  21. pte_t *lookup_address(unsigned long address, int *level)
  22. {
  23. pgd_t *pgd = pgd_offset_k(address);
  24. pud_t *pud;
  25. pmd_t *pmd;
  26. if (pgd_none(*pgd))
  27. return NULL;
  28. pud = pud_offset(pgd, address);
  29. if (pud_none(*pud))
  30. return NULL;
  31. pmd = pmd_offset(pud, address);
  32. if (pmd_none(*pmd))
  33. return NULL;
  34. *level = 3;
  35. if (pmd_large(*pmd))
  36. return (pte_t *)pmd;
  37. *level = 4;
  38. return pte_offset_kernel(pmd, address);
  39. }
  40. static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  41. {
  42. /* change init_mm */
  43. set_pte_atomic(kpte, pte);
  44. #ifdef CONFIG_X86_32
  45. if (SHARED_KERNEL_PMD)
  46. return;
  47. {
  48. struct page *page;
  49. for (page = pgd_list; page; page = (struct page *)page->index) {
  50. pgd_t *pgd;
  51. pud_t *pud;
  52. pmd_t *pmd;
  53. pgd = (pgd_t *)page_address(page) + pgd_index(address);
  54. pud = pud_offset(pgd, address);
  55. pmd = pmd_offset(pud, address);
  56. set_pte_atomic((pte_t *)pmd, pte);
  57. }
  58. }
  59. #endif
  60. }
  61. static int split_large_page(pte_t *kpte, unsigned long address)
  62. {
  63. pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
  64. gfp_t gfp_flags = GFP_KERNEL;
  65. unsigned long flags;
  66. unsigned long addr;
  67. pte_t *pbase, *tmp;
  68. struct page *base;
  69. int i, level;
  70. #ifdef CONFIG_DEBUG_PAGEALLOC
  71. gfp_flags = GFP_ATOMIC;
  72. #endif
  73. base = alloc_pages(gfp_flags, 0);
  74. if (!base)
  75. return -ENOMEM;
  76. spin_lock_irqsave(&pgd_lock, flags);
  77. /*
  78. * Check for races, another CPU might have split this page
  79. * up for us already:
  80. */
  81. tmp = lookup_address(address, &level);
  82. if (tmp != kpte) {
  83. WARN_ON_ONCE(1);
  84. goto out_unlock;
  85. }
  86. address = __pa(address);
  87. addr = address & LARGE_PAGE_MASK;
  88. pbase = (pte_t *)page_address(base);
  89. #ifdef CONFIG_X86_32
  90. paravirt_alloc_pt(&init_mm, page_to_pfn(base));
  91. #endif
  92. for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
  93. set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
  94. /*
  95. * Install the new, split up pagetable:
  96. */
  97. __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
  98. base = NULL;
  99. out_unlock:
  100. spin_unlock_irqrestore(&pgd_lock, flags);
  101. if (base)
  102. __free_pages(base, 0);
  103. return 0;
  104. }
  105. static int
  106. __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
  107. {
  108. struct page *kpte_page;
  109. int level, err = 0;
  110. pte_t *kpte;
  111. BUG_ON(PageHighMem(page));
  112. repeat:
  113. kpte = lookup_address(address, &level);
  114. if (!kpte)
  115. return -EINVAL;
  116. kpte_page = virt_to_page(kpte);
  117. BUG_ON(PageLRU(kpte_page));
  118. BUG_ON(PageCompound(kpte_page));
  119. /*
  120. * Better fail early if someone sets the kernel text to NX.
  121. * Does not cover __inittext
  122. */
  123. BUG_ON(address >= (unsigned long)&_text &&
  124. address < (unsigned long)&_etext &&
  125. (pgprot_val(prot) & _PAGE_NX));
  126. if (level == 4) {
  127. set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
  128. } else {
  129. err = split_large_page(kpte, address);
  130. if (!err)
  131. goto repeat;
  132. }
  133. return err;
  134. }
  135. /**
  136. * change_page_attr_addr - Change page table attributes in linear mapping
  137. * @address: Virtual address in linear mapping.
  138. * @numpages: Number of pages to change
  139. * @prot: New page table attribute (PAGE_*)
  140. *
  141. * Change page attributes of a page in the direct mapping. This is a variant
  142. * of change_page_attr() that also works on memory holes that do not have
  143. * mem_map entry (pfn_valid() is false).
  144. *
  145. * See change_page_attr() documentation for more details.
  146. */
  147. int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
  148. {
  149. int err = 0, kernel_map = 0, i;
  150. #ifdef CONFIG_X86_64
  151. if (address >= __START_KERNEL_map &&
  152. address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
  153. address = (unsigned long)__va(__pa(address));
  154. kernel_map = 1;
  155. }
  156. #endif
  157. for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
  158. unsigned long pfn = __pa(address) >> PAGE_SHIFT;
  159. if (!kernel_map || pte_present(pfn_pte(0, prot))) {
  160. err = __change_page_attr(address, pfn_to_page(pfn), prot);
  161. if (err)
  162. break;
  163. }
  164. #ifdef CONFIG_X86_64
  165. /*
  166. * Handle kernel mapping too which aliases part of
  167. * lowmem:
  168. */
  169. if (__pa(address) < KERNEL_TEXT_SIZE) {
  170. unsigned long addr2;
  171. pgprot_t prot2;
  172. addr2 = __START_KERNEL_map + __pa(address);
  173. /* Make sure the kernel mappings stay executable */
  174. prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
  175. err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
  176. }
  177. #endif
  178. }
  179. return err;
  180. }
  181. /**
  182. * change_page_attr - Change page table attributes in the linear mapping.
  183. * @page: First page to change
  184. * @numpages: Number of pages to change
  185. * @prot: New protection/caching type (PAGE_*)
  186. *
  187. * Returns 0 on success, otherwise a negated errno.
  188. *
  189. * This should be used when a page is mapped with a different caching policy
  190. * than write-back somewhere - some CPUs do not like it when mappings with
  191. * different caching policies exist. This changes the page attributes of the
  192. * in kernel linear mapping too.
  193. *
  194. * Caller must call global_flush_tlb() later to make the changes active.
  195. *
  196. * The caller needs to ensure that there are no conflicting mappings elsewhere
  197. * (e.g. in user space) * This function only deals with the kernel linear map.
  198. *
  199. * For MMIO areas without mem_map use change_page_attr_addr() instead.
  200. */
  201. int change_page_attr(struct page *page, int numpages, pgprot_t prot)
  202. {
  203. unsigned long addr = (unsigned long)page_address(page);
  204. return change_page_attr_addr(addr, numpages, prot);
  205. }
  206. EXPORT_SYMBOL(change_page_attr);
  207. static void flush_kernel_map(void *arg)
  208. {
  209. /*
  210. * Flush all to work around Errata in early athlons regarding
  211. * large page flushing.
  212. */
  213. __flush_tlb_all();
  214. if (boot_cpu_data.x86_model >= 4)
  215. wbinvd();
  216. }
  217. void global_flush_tlb(void)
  218. {
  219. BUG_ON(irqs_disabled());
  220. on_each_cpu(flush_kernel_map, NULL, 1, 1);
  221. }
  222. EXPORT_SYMBOL(global_flush_tlb);
  223. #ifdef CONFIG_DEBUG_PAGEALLOC
  224. void kernel_map_pages(struct page *page, int numpages, int enable)
  225. {
  226. if (PageHighMem(page))
  227. return;
  228. if (!enable) {
  229. debug_check_no_locks_freed(page_address(page),
  230. numpages * PAGE_SIZE);
  231. }
  232. /*
  233. * If page allocator is not up yet then do not call c_p_a():
  234. */
  235. if (!debug_pagealloc_enabled)
  236. return;
  237. /*
  238. * the return value is ignored - the calls cannot fail,
  239. * large pages are disabled at boot time.
  240. */
  241. change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
  242. /*
  243. * we should perform an IPI and flush all tlbs,
  244. * but that can deadlock->flush only current cpu.
  245. */
  246. __flush_tlb_all();
  247. }
  248. #endif