pageattr.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /*
  2. * Copyright 2002 Andi Kleen, SuSE Labs.
  3. * Thanks to Ben LaHaise for precious feedback.
  4. */
  5. #include <linux/highmem.h>
  6. #include <linux/module.h>
  7. #include <linux/sched.h>
  8. #include <linux/slab.h>
  9. #include <linux/mm.h>
  10. void clflush_cache_range(void *addr, int size)
  11. {
  12. int i;
  13. for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size)
  14. clflush(addr+i);
  15. }
  16. #include <asm/processor.h>
  17. #include <asm/tlbflush.h>
  18. #include <asm/sections.h>
  19. #include <asm/uaccess.h>
  20. #include <asm/pgalloc.h>
  21. pte_t *lookup_address(unsigned long address, int *level)
  22. {
  23. pgd_t *pgd = pgd_offset_k(address);
  24. pud_t *pud;
  25. pmd_t *pmd;
  26. if (pgd_none(*pgd))
  27. return NULL;
  28. pud = pud_offset(pgd, address);
  29. if (pud_none(*pud))
  30. return NULL;
  31. pmd = pmd_offset(pud, address);
  32. if (pmd_none(*pmd))
  33. return NULL;
  34. *level = 3;
  35. if (pmd_large(*pmd))
  36. return (pte_t *)pmd;
  37. *level = 4;
  38. return pte_offset_kernel(pmd, address);
  39. }
  40. static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
  41. {
  42. /* change init_mm */
  43. set_pte_atomic(kpte, pte);
  44. #ifdef CONFIG_X86_32
  45. if (!SHARED_KERNEL_PMD) {
  46. struct page *page;
  47. for (page = pgd_list; page; page = (struct page *)page->index) {
  48. pgd_t *pgd;
  49. pud_t *pud;
  50. pmd_t *pmd;
  51. pgd = (pgd_t *)page_address(page) + pgd_index(address);
  52. pud = pud_offset(pgd, address);
  53. pmd = pmd_offset(pud, address);
  54. set_pte_atomic((pte_t *)pmd, pte);
  55. }
  56. }
  57. #endif
  58. }
  59. static int split_large_page(pte_t *kpte, unsigned long address)
  60. {
  61. pgprot_t ref_prot = pte_pgprot(pte_clrhuge(*kpte));
  62. gfp_t gfp_flags = GFP_KERNEL;
  63. unsigned long flags;
  64. unsigned long addr;
  65. pte_t *pbase, *tmp;
  66. struct page *base;
  67. int i, level;
  68. #ifdef CONFIG_DEBUG_PAGEALLOC
  69. gfp_flags = GFP_ATOMIC;
  70. #endif
  71. base = alloc_pages(gfp_flags, 0);
  72. if (!base)
  73. return -ENOMEM;
  74. spin_lock_irqsave(&pgd_lock, flags);
  75. /*
  76. * Check for races, another CPU might have split this page
  77. * up for us already:
  78. */
  79. tmp = lookup_address(address, &level);
  80. if (tmp != kpte) {
  81. WARN_ON_ONCE(1);
  82. goto out_unlock;
  83. }
  84. address = __pa(address);
  85. addr = address & LARGE_PAGE_MASK;
  86. pbase = (pte_t *)page_address(base);
  87. #ifdef CONFIG_X86_32
  88. paravirt_alloc_pt(&init_mm, page_to_pfn(base));
  89. #endif
  90. for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE)
  91. set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, ref_prot));
  92. /*
  93. * Install the new, split up pagetable:
  94. */
  95. __set_pmd_pte(kpte, address, mk_pte(base, ref_prot));
  96. base = NULL;
  97. out_unlock:
  98. spin_unlock_irqrestore(&pgd_lock, flags);
  99. if (base)
  100. __free_pages(base, 0);
  101. return 0;
  102. }
  103. static int
  104. __change_page_attr(unsigned long address, struct page *page, pgprot_t prot)
  105. {
  106. struct page *kpte_page;
  107. int level, err = 0;
  108. pte_t *kpte;
  109. BUG_ON(PageHighMem(page));
  110. repeat:
  111. kpte = lookup_address(address, &level);
  112. if (!kpte)
  113. return -EINVAL;
  114. kpte_page = virt_to_page(kpte);
  115. BUG_ON(PageLRU(kpte_page));
  116. BUG_ON(PageCompound(kpte_page));
  117. /*
  118. * Better fail early if someone sets the kernel text to NX.
  119. * Does not cover __inittext
  120. */
  121. BUG_ON(address >= (unsigned long)&_text &&
  122. address < (unsigned long)&_etext &&
  123. (pgprot_val(prot) & _PAGE_NX));
  124. if (level == 4) {
  125. set_pte_atomic(kpte, mk_pte(page, canon_pgprot(prot)));
  126. } else {
  127. err = split_large_page(kpte, address);
  128. if (!err)
  129. goto repeat;
  130. }
  131. return err;
  132. }
  133. /**
  134. * change_page_attr_addr - Change page table attributes in linear mapping
  135. * @address: Virtual address in linear mapping.
  136. * @numpages: Number of pages to change
  137. * @prot: New page table attribute (PAGE_*)
  138. *
  139. * Change page attributes of a page in the direct mapping. This is a variant
  140. * of change_page_attr() that also works on memory holes that do not have
  141. * mem_map entry (pfn_valid() is false).
  142. *
  143. * See change_page_attr() documentation for more details.
  144. */
  145. int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
  146. {
  147. int err = 0, kernel_map = 0, i;
  148. #ifdef CONFIG_X86_64
  149. if (address >= __START_KERNEL_map &&
  150. address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
  151. address = (unsigned long)__va(__pa(address));
  152. kernel_map = 1;
  153. }
  154. #endif
  155. for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
  156. unsigned long pfn = __pa(address) >> PAGE_SHIFT;
  157. if (!kernel_map || pte_present(pfn_pte(0, prot))) {
  158. err = __change_page_attr(address, pfn_to_page(pfn), prot);
  159. if (err)
  160. break;
  161. }
  162. #ifdef CONFIG_X86_64
  163. /*
  164. * Handle kernel mapping too which aliases part of
  165. * lowmem:
  166. */
  167. if (__pa(address) < KERNEL_TEXT_SIZE) {
  168. unsigned long addr2;
  169. pgprot_t prot2;
  170. addr2 = __START_KERNEL_map + __pa(address);
  171. /* Make sure the kernel mappings stay executable */
  172. prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
  173. err = __change_page_attr(addr2, pfn_to_page(pfn), prot2);
  174. }
  175. #endif
  176. }
  177. return err;
  178. }
  179. /**
  180. * change_page_attr - Change page table attributes in the linear mapping.
  181. * @page: First page to change
  182. * @numpages: Number of pages to change
  183. * @prot: New protection/caching type (PAGE_*)
  184. *
  185. * Returns 0 on success, otherwise a negated errno.
  186. *
  187. * This should be used when a page is mapped with a different caching policy
  188. * than write-back somewhere - some CPUs do not like it when mappings with
  189. * different caching policies exist. This changes the page attributes of the
  190. * in kernel linear mapping too.
  191. *
  192. * Caller must call global_flush_tlb() later to make the changes active.
  193. *
  194. * The caller needs to ensure that there are no conflicting mappings elsewhere
  195. * (e.g. in user space) * This function only deals with the kernel linear map.
  196. *
  197. * For MMIO areas without mem_map use change_page_attr_addr() instead.
  198. */
  199. int change_page_attr(struct page *page, int numpages, pgprot_t prot)
  200. {
  201. unsigned long addr = (unsigned long)page_address(page);
  202. return change_page_attr_addr(addr, numpages, prot);
  203. }
  204. EXPORT_SYMBOL(change_page_attr);
  205. static void flush_kernel_map(void *arg)
  206. {
  207. /*
  208. * Flush all to work around Errata in early athlons regarding
  209. * large page flushing.
  210. */
  211. __flush_tlb_all();
  212. if (boot_cpu_data.x86_model >= 4)
  213. wbinvd();
  214. }
  215. void global_flush_tlb(void)
  216. {
  217. BUG_ON(irqs_disabled());
  218. on_each_cpu(flush_kernel_map, NULL, 1, 1);
  219. }
  220. EXPORT_SYMBOL(global_flush_tlb);
  221. #ifdef CONFIG_DEBUG_PAGEALLOC
  222. void kernel_map_pages(struct page *page, int numpages, int enable)
  223. {
  224. if (PageHighMem(page))
  225. return;
  226. if (!enable) {
  227. debug_check_no_locks_freed(page_address(page),
  228. numpages * PAGE_SIZE);
  229. }
  230. /*
  231. * If page allocator is not up yet then do not call c_p_a():
  232. */
  233. if (!debug_pagealloc_enabled)
  234. return;
  235. /*
  236. * The return value is ignored - the calls cannot fail,
  237. * large pages are disabled at boot time:
  238. */
  239. change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
  240. /*
  241. * We should perform an IPI and flush all tlbs,
  242. * but that can deadlock->flush only current cpu:
  243. */
  244. __flush_tlb_all();
  245. }
  246. #endif