pgtable_32.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. #ifndef _I386_PGTABLE_H
  2. #define _I386_PGTABLE_H
  3. /*
  4. * The Linux memory management assumes a three-level page table setup. On
  5. * the i386, we use that, but "fold" the mid level into the top-level page
  6. * table, so that we physically have the same two-level page table as the
  7. * i386 mmu expects.
  8. *
  9. * This file contains the functions and defines necessary to modify and use
  10. * the i386 page table tree.
  11. */
  12. #ifndef __ASSEMBLY__
  13. #include <asm/processor.h>
  14. #include <asm/fixmap.h>
  15. #include <linux/threads.h>
  16. #include <asm/paravirt.h>
  17. #include <linux/bitops.h>
  18. #include <linux/slab.h>
  19. #include <linux/list.h>
  20. #include <linux/spinlock.h>
  21. struct mm_struct;
  22. struct vm_area_struct;
  23. /*
  24. * ZERO_PAGE is a global shared page that is always zero: used
  25. * for zero-mapped memory areas etc..
  26. */
  27. #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
  28. extern unsigned long empty_zero_page[1024];
  29. extern pgd_t swapper_pg_dir[1024];
  30. extern struct kmem_cache *pmd_cache;
  31. extern spinlock_t pgd_lock;
  32. extern struct page *pgd_list;
  33. void check_pgt_cache(void);
  34. void pmd_ctor(struct kmem_cache *, void *);
  35. void pgtable_cache_init(void);
  36. void paging_init(void);
  37. /*
  38. * The Linux x86 paging architecture is 'compile-time dual-mode', it
  39. * implements both the traditional 2-level x86 page tables and the
  40. * newer 3-level PAE-mode page tables.
  41. */
  42. #ifdef CONFIG_X86_PAE
  43. # include <asm/pgtable-3level-defs.h>
  44. # define PMD_SIZE (1UL << PMD_SHIFT)
  45. # define PMD_MASK (~(PMD_SIZE-1))
  46. #else
  47. # include <asm/pgtable-2level-defs.h>
  48. #endif
  49. #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
  50. #define PGDIR_MASK (~(PGDIR_SIZE-1))
  51. #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
  52. #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
  53. #define TWOLEVEL_PGDIR_SHIFT 22
  54. #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
  55. #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
  56. /* Just any arbitrary offset to the start of the vmalloc VM area: the
  57. * current 8MB value just means that there will be a 8MB "hole" after the
  58. * physical memory until the kernel virtual memory starts. That means that
  59. * any out-of-bounds memory accesses will hopefully be caught.
  60. * The vmalloc() routines leaves a hole of 4kB between each vmalloced
  61. * area for the same reason. ;)
  62. */
  63. #define VMALLOC_OFFSET (8*1024*1024)
  64. #define VMALLOC_START (((unsigned long) high_memory + \
  65. 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
  66. #ifdef CONFIG_HIGHMEM
  67. # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
  68. #else
  69. # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
  70. #endif
  71. /*
  72. * Define this if things work differently on an i386 and an i486:
  73. * it will (on an i486) warn about kernel memory accesses that are
  74. * done without a 'access_ok(VERIFY_WRITE,..)'
  75. */
  76. #undef TEST_ACCESS_OK
  77. /* The boot page tables (all created as a single array) */
  78. extern unsigned long pg0[];
  79. #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
  80. /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
  81. #define pmd_none(x) (!(unsigned long)pmd_val(x))
  82. #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
  83. #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
  84. #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
  85. /*
  86. * The following only work if pte_present() is true.
  87. * Undefined behaviour if not..
  88. */
  89. static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
  90. static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
  91. static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
  92. static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
  93. static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
  94. static inline int pmd_large(pmd_t pte) {
  95. return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
  96. (_PAGE_PSE|_PAGE_PRESENT);
  97. }
  98. static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
  99. static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
  100. static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_RW); }
  101. static inline pte_t pte_mkexec(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_NX); }
  102. static inline pte_t pte_mkdirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_DIRTY); }
  103. static inline pte_t pte_mkyoung(pte_t pte) { return __pte(pte_val(pte) | _PAGE_ACCESSED); }
  104. static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); }
  105. static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_PSE); }
  106. static inline pte_t pte_clrhuge(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_PSE); }
  107. #ifdef CONFIG_X86_PAE
  108. # include <asm/pgtable-3level.h>
  109. #else
  110. # include <asm/pgtable-2level.h>
  111. #endif
  112. #ifndef CONFIG_PARAVIRT
  113. /*
  114. * Rules for using pte_update - it must be called after any PTE update which
  115. * has not been done using the set_pte / clear_pte interfaces. It is used by
  116. * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
  117. * updates should either be sets, clears, or set_pte_atomic for P->P
  118. * transitions, which means this hook should only be called for user PTEs.
  119. * This hook implies a P->P protection or access change has taken place, which
  120. * requires a subsequent TLB flush. The notification can optionally be delayed
  121. * until the TLB flush event by using the pte_update_defer form of the
  122. * interface, but care must be taken to assure that the flush happens while
  123. * still holding the same page table lock so that the shadow and primary pages
  124. * do not become out of sync on SMP.
  125. */
  126. #define pte_update(mm, addr, ptep) do { } while (0)
  127. #define pte_update_defer(mm, addr, ptep) do { } while (0)
  128. #endif
  129. /* local pte updates need not use xchg for locking */
  130. static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
  131. {
  132. pte_t res = *ptep;
  133. /* Pure native function needs no input for mm, addr */
  134. native_pte_clear(NULL, 0, ptep);
  135. return res;
  136. }
  137. /*
  138. * We only update the dirty/accessed state if we set
  139. * the dirty bit by hand in the kernel, since the hardware
  140. * will do the accessed bit for us, and we don't want to
  141. * race with other CPU's that might be updating the dirty
  142. * bit at the same time.
  143. */
  144. #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  145. #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
  146. ({ \
  147. int __changed = !pte_same(*(ptep), entry); \
  148. if (__changed && dirty) { \
  149. (ptep)->pte_low = (entry).pte_low; \
  150. pte_update_defer((vma)->vm_mm, (address), (ptep)); \
  151. flush_tlb_page(vma, address); \
  152. } \
  153. __changed; \
  154. })
  155. #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  156. #define ptep_test_and_clear_young(vma, addr, ptep) ({ \
  157. int __ret = 0; \
  158. if (pte_young(*(ptep))) \
  159. __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
  160. &(ptep)->pte_low); \
  161. if (__ret) \
  162. pte_update((vma)->vm_mm, addr, ptep); \
  163. __ret; \
  164. })
  165. #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  166. #define ptep_clear_flush_young(vma, address, ptep) \
  167. ({ \
  168. int __young; \
  169. __young = ptep_test_and_clear_young((vma), (address), (ptep)); \
  170. if (__young) \
  171. flush_tlb_page(vma, address); \
  172. __young; \
  173. })
  174. #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
  175. static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  176. {
  177. pte_t pte = native_ptep_get_and_clear(ptep);
  178. pte_update(mm, addr, ptep);
  179. return pte;
  180. }
  181. #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  182. static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
  183. {
  184. pte_t pte;
  185. if (full) {
  186. /*
  187. * Full address destruction in progress; paravirt does not
  188. * care about updates and native needs no locking
  189. */
  190. pte = native_local_ptep_get_and_clear(ptep);
  191. } else {
  192. pte = ptep_get_and_clear(mm, addr, ptep);
  193. }
  194. return pte;
  195. }
  196. #define __HAVE_ARCH_PTEP_SET_WRPROTECT
  197. static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  198. {
  199. clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
  200. pte_update(mm, addr, ptep);
  201. }
  202. /*
  203. * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  204. *
  205. * dst - pointer to pgd range anwhere on a pgd page
  206. * src - ""
  207. * count - the number of pgds to copy.
  208. *
  209. * dst and src can be on the same page, but the range must not overlap,
  210. * and must not cross a page boundary.
  211. */
  212. static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
  213. {
  214. memcpy(dst, src, count * sizeof(pgd_t));
  215. }
  216. /*
  217. * Macro to mark a page protection value as "uncacheable". On processors which do not support
  218. * it, this is a no-op.
  219. */
  220. #define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \
  221. ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
  222. /*
  223. * Conversion functions: convert a page and protection to a page entry,
  224. * and a page entry and page directory to the page they refer to.
  225. */
  226. #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
  227. static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  228. {
  229. pte.pte_low &= _PAGE_CHG_MASK;
  230. pte.pte_low |= pgprot_val(newprot);
  231. #ifdef CONFIG_X86_PAE
  232. /*
  233. * Chop off the NX bit (if present), and add the NX portion of
  234. * the newprot (if present):
  235. */
  236. pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
  237. pte.pte_high |= (pgprot_val(newprot) >> 32) & \
  238. (__supported_pte_mask >> 32);
  239. #endif
  240. return pte;
  241. }
  242. #define pmd_large(pmd) \
  243. ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
  244. /*
  245. * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
  246. *
  247. * this macro returns the index of the entry in the pgd page which would
  248. * control the given virtual address
  249. */
  250. #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
  251. #define pgd_index_k(addr) pgd_index(addr)
  252. /*
  253. * pgd_offset() returns a (pgd_t *)
  254. * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  255. */
  256. #define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
  257. /*
  258. * a shortcut which implies the use of the kernel's pgd, instead
  259. * of a process's
  260. */
  261. #define pgd_offset_k(address) pgd_offset(&init_mm, address)
  262. /*
  263. * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
  264. *
  265. * this macro returns the index of the entry in the pmd page which would
  266. * control the given virtual address
  267. */
  268. #define pmd_index(address) \
  269. (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
  270. /*
  271. * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
  272. *
  273. * this macro returns the index of the entry in the pte page which would
  274. * control the given virtual address
  275. */
  276. #define pte_index(address) \
  277. (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
  278. #define pte_offset_kernel(dir, address) \
  279. ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
  280. #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
  281. #define pmd_page_vaddr(pmd) \
  282. ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
  283. /*
  284. * Helper function that returns the kernel pagetable entry controlling
  285. * the virtual address 'address'. NULL means no pagetable entry present.
  286. * NOTE: the return type is pte_t but if the pmd is PSE then we return it
  287. * as a pte too.
  288. */
  289. extern pte_t *lookup_address(unsigned long address);
  290. /*
  291. * Make a given kernel text page executable/non-executable.
  292. * Returns the previous executability setting of that page (which
  293. * is used to restore the previous state). Used by the SMP bootup code.
  294. * NOTE: this is an __init function for security reasons.
  295. */
  296. #ifdef CONFIG_X86_PAE
  297. extern int set_kernel_exec(unsigned long vaddr, int enable);
  298. #else
  299. static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
  300. #endif
  301. #if defined(CONFIG_HIGHPTE)
  302. #define pte_offset_map(dir, address) \
  303. ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
  304. #define pte_offset_map_nested(dir, address) \
  305. ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
  306. #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
  307. #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
  308. #else
  309. #define pte_offset_map(dir, address) \
  310. ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
  311. #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
  312. #define pte_unmap(pte) do { } while (0)
  313. #define pte_unmap_nested(pte) do { } while (0)
  314. #endif
  315. /* Clear a kernel PTE and flush it from the TLB */
  316. #define kpte_clear_flush(ptep, vaddr) \
  317. do { \
  318. pte_clear(&init_mm, vaddr, ptep); \
  319. __flush_tlb_one(vaddr); \
  320. } while (0)
  321. /*
  322. * The i386 doesn't have any external MMU info: the kernel page
  323. * tables contain all the necessary information.
  324. */
  325. #define update_mmu_cache(vma,address,pte) do { } while (0)
  326. void native_pagetable_setup_start(pgd_t *base);
  327. void native_pagetable_setup_done(pgd_t *base);
  328. #ifndef CONFIG_PARAVIRT
  329. static inline void paravirt_pagetable_setup_start(pgd_t *base)
  330. {
  331. native_pagetable_setup_start(base);
  332. }
  333. static inline void paravirt_pagetable_setup_done(pgd_t *base)
  334. {
  335. native_pagetable_setup_done(base);
  336. }
  337. #endif /* !CONFIG_PARAVIRT */
  338. #endif /* !__ASSEMBLY__ */
  339. /*
  340. * kern_addr_valid() is (1) for FLATMEM and (0) for
  341. * SPARSEMEM and DISCONTIGMEM
  342. */
  343. #ifdef CONFIG_FLATMEM
  344. #define kern_addr_valid(addr) (1)
  345. #else
  346. #define kern_addr_valid(kaddr) (0)
  347. #endif
  348. #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
  349. remap_pfn_range(vma, vaddr, pfn, size, prot)
  350. #include <asm-generic/pgtable.h>
  351. #endif /* _I386_PGTABLE_H */