pgtable_32.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. #ifndef _I386_PGTABLE_H
  2. #define _I386_PGTABLE_H
  3. /*
  4. * The Linux memory management assumes a three-level page table setup. On
  5. * the i386, we use that, but "fold" the mid level into the top-level page
  6. * table, so that we physically have the same two-level page table as the
  7. * i386 mmu expects.
  8. *
  9. * This file contains the functions and defines necessary to modify and use
  10. * the i386 page table tree.
  11. */
  12. #ifndef __ASSEMBLY__
  13. #include <asm/processor.h>
  14. #include <asm/fixmap.h>
  15. #include <linux/threads.h>
  16. #include <asm/paravirt.h>
  17. #include <linux/bitops.h>
  18. #include <linux/slab.h>
  19. #include <linux/list.h>
  20. #include <linux/spinlock.h>
  21. struct mm_struct;
  22. struct vm_area_struct;
  23. /*
  24. * ZERO_PAGE is a global shared page that is always zero: used
  25. * for zero-mapped memory areas etc..
  26. */
  27. #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
  28. extern unsigned long empty_zero_page[1024];
  29. extern pgd_t swapper_pg_dir[1024];
  30. extern struct kmem_cache *pmd_cache;
  31. extern spinlock_t pgd_lock;
  32. extern struct page *pgd_list;
  33. void check_pgt_cache(void);
  34. void pmd_ctor(struct kmem_cache *, void *);
  35. void pgtable_cache_init(void);
  36. void paging_init(void);
  37. /*
  38. * The Linux x86 paging architecture is 'compile-time dual-mode', it
  39. * implements both the traditional 2-level x86 page tables and the
  40. * newer 3-level PAE-mode page tables.
  41. */
  42. #ifdef CONFIG_X86_PAE
  43. # include <asm/pgtable-3level-defs.h>
  44. # define PMD_SIZE (1UL << PMD_SHIFT)
  45. # define PMD_MASK (~(PMD_SIZE-1))
  46. #else
  47. # include <asm/pgtable-2level-defs.h>
  48. #endif
  49. #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
  50. #define PGDIR_MASK (~(PGDIR_SIZE-1))
  51. #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
  52. #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
  53. #define TWOLEVEL_PGDIR_SHIFT 22
  54. #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
  55. #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
  56. /* Just any arbitrary offset to the start of the vmalloc VM area: the
  57. * current 8MB value just means that there will be a 8MB "hole" after the
  58. * physical memory until the kernel virtual memory starts. That means that
  59. * any out-of-bounds memory accesses will hopefully be caught.
  60. * The vmalloc() routines leaves a hole of 4kB between each vmalloced
  61. * area for the same reason. ;)
  62. */
  63. #define VMALLOC_OFFSET (8*1024*1024)
  64. #define VMALLOC_START (((unsigned long) high_memory + \
  65. 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
  66. #ifdef CONFIG_HIGHMEM
  67. # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
  68. #else
  69. # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
  70. #endif
  71. /*
  72. * Define this if things work differently on an i386 and an i486:
  73. * it will (on an i486) warn about kernel memory accesses that are
  74. * done without a 'access_ok(VERIFY_WRITE,..)'
  75. */
  76. #undef TEST_ACCESS_OK
  77. /* The boot page tables (all created as a single array) */
  78. extern unsigned long pg0[];
  79. #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
  80. /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
  81. #define pmd_none(x) (!(unsigned long)pmd_val(x))
  82. #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
  83. #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
  84. #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
  85. /*
  86. * The following only work if pte_present() is true.
  87. * Undefined behaviour if not..
  88. */
  89. static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
  90. static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
  91. static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
  92. static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
  93. /*
  94. * The following only works if pte_present() is not true.
  95. */
  96. static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
  97. static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_DIRTY); }
  98. static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_ACCESSED); }
  99. static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_RW); }
  100. static inline pte_t pte_mkdirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_DIRTY); }
  101. static inline pte_t pte_mkyoung(pte_t pte) { return __pte(pte_val(pte) | _PAGE_ACCESSED); }
  102. static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); }
  103. static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_PSE); }
  104. static inline pte_t pte_clrhuge(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_PSE); }
  105. static inline pte_t pte_mkexec(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_NX); }
  106. static inline int pmd_large(pmd_t pte) {
  107. return (pmd_val(pte) & (_PAGE_PSE|_PAGE_PRESENT)) ==
  108. (_PAGE_PSE|_PAGE_PRESENT);
  109. }
  110. #ifdef CONFIG_X86_PAE
  111. # include <asm/pgtable-3level.h>
  112. #else
  113. # include <asm/pgtable-2level.h>
  114. #endif
  115. #ifndef CONFIG_PARAVIRT
  116. /*
  117. * Rules for using pte_update - it must be called after any PTE update which
  118. * has not been done using the set_pte / clear_pte interfaces. It is used by
  119. * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
  120. * updates should either be sets, clears, or set_pte_atomic for P->P
  121. * transitions, which means this hook should only be called for user PTEs.
  122. * This hook implies a P->P protection or access change has taken place, which
  123. * requires a subsequent TLB flush. The notification can optionally be delayed
  124. * until the TLB flush event by using the pte_update_defer form of the
  125. * interface, but care must be taken to assure that the flush happens while
  126. * still holding the same page table lock so that the shadow and primary pages
  127. * do not become out of sync on SMP.
  128. */
  129. #define pte_update(mm, addr, ptep) do { } while (0)
  130. #define pte_update_defer(mm, addr, ptep) do { } while (0)
  131. #endif
  132. /* local pte updates need not use xchg for locking */
  133. static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
  134. {
  135. pte_t res = *ptep;
  136. /* Pure native function needs no input for mm, addr */
  137. native_pte_clear(NULL, 0, ptep);
  138. return res;
  139. }
  140. /*
  141. * We only update the dirty/accessed state if we set
  142. * the dirty bit by hand in the kernel, since the hardware
  143. * will do the accessed bit for us, and we don't want to
  144. * race with other CPU's that might be updating the dirty
  145. * bit at the same time.
  146. */
  147. #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  148. #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
  149. ({ \
  150. int __changed = !pte_same(*(ptep), entry); \
  151. if (__changed && dirty) { \
  152. (ptep)->pte_low = (entry).pte_low; \
  153. pte_update_defer((vma)->vm_mm, (address), (ptep)); \
  154. flush_tlb_page(vma, address); \
  155. } \
  156. __changed; \
  157. })
  158. #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  159. #define ptep_test_and_clear_young(vma, addr, ptep) ({ \
  160. int __ret = 0; \
  161. if (pte_young(*(ptep))) \
  162. __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
  163. &(ptep)->pte_low); \
  164. if (__ret) \
  165. pte_update((vma)->vm_mm, addr, ptep); \
  166. __ret; \
  167. })
  168. #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  169. #define ptep_clear_flush_young(vma, address, ptep) \
  170. ({ \
  171. int __young; \
  172. __young = ptep_test_and_clear_young((vma), (address), (ptep)); \
  173. if (__young) \
  174. flush_tlb_page(vma, address); \
  175. __young; \
  176. })
  177. #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
  178. static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  179. {
  180. pte_t pte = native_ptep_get_and_clear(ptep);
  181. pte_update(mm, addr, ptep);
  182. return pte;
  183. }
  184. #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  185. static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
  186. {
  187. pte_t pte;
  188. if (full) {
  189. /*
  190. * Full address destruction in progress; paravirt does not
  191. * care about updates and native needs no locking
  192. */
  193. pte = native_local_ptep_get_and_clear(ptep);
  194. } else {
  195. pte = ptep_get_and_clear(mm, addr, ptep);
  196. }
  197. return pte;
  198. }
  199. #define __HAVE_ARCH_PTEP_SET_WRPROTECT
  200. static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  201. {
  202. clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
  203. pte_update(mm, addr, ptep);
  204. }
  205. /*
  206. * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  207. *
  208. * dst - pointer to pgd range anwhere on a pgd page
  209. * src - ""
  210. * count - the number of pgds to copy.
  211. *
  212. * dst and src can be on the same page, but the range must not overlap,
  213. * and must not cross a page boundary.
  214. */
  215. static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
  216. {
  217. memcpy(dst, src, count * sizeof(pgd_t));
  218. }
  219. /*
  220. * Macro to mark a page protection value as "uncacheable". On processors which do not support
  221. * it, this is a no-op.
  222. */
  223. #define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \
  224. ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
  225. /*
  226. * Conversion functions: convert a page and protection to a page entry,
  227. * and a page entry and page directory to the page they refer to.
  228. */
  229. #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
  230. static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  231. {
  232. pte.pte_low &= _PAGE_CHG_MASK;
  233. pte.pte_low |= pgprot_val(newprot);
  234. #ifdef CONFIG_X86_PAE
  235. /*
  236. * Chop off the NX bit (if present), and add the NX portion of
  237. * the newprot (if present):
  238. */
  239. pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
  240. pte.pte_high |= (pgprot_val(newprot) >> 32) & \
  241. (__supported_pte_mask >> 32);
  242. #endif
  243. return pte;
  244. }
  245. #define pmd_large(pmd) \
  246. ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
  247. /*
  248. * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
  249. *
  250. * this macro returns the index of the entry in the pgd page which would
  251. * control the given virtual address
  252. */
  253. #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
  254. #define pgd_index_k(addr) pgd_index(addr)
  255. /*
  256. * pgd_offset() returns a (pgd_t *)
  257. * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  258. */
  259. #define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
  260. /*
  261. * a shortcut which implies the use of the kernel's pgd, instead
  262. * of a process's
  263. */
  264. #define pgd_offset_k(address) pgd_offset(&init_mm, address)
  265. /*
  266. * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
  267. *
  268. * this macro returns the index of the entry in the pmd page which would
  269. * control the given virtual address
  270. */
  271. #define pmd_index(address) \
  272. (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
  273. /*
  274. * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
  275. *
  276. * this macro returns the index of the entry in the pte page which would
  277. * control the given virtual address
  278. */
  279. #define pte_index(address) \
  280. (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
  281. #define pte_offset_kernel(dir, address) \
  282. ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
  283. #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
  284. #define pmd_page_vaddr(pmd) \
  285. ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
  286. /*
  287. * Helper function that returns the kernel pagetable entry controlling
  288. * the virtual address 'address'. NULL means no pagetable entry present.
  289. * NOTE: the return type is pte_t but if the pmd is PSE then we return it
  290. * as a pte too.
  291. */
  292. extern pte_t *lookup_address(unsigned long address);
  293. /*
  294. * Make a given kernel text page executable/non-executable.
  295. * Returns the previous executability setting of that page (which
  296. * is used to restore the previous state). Used by the SMP bootup code.
  297. * NOTE: this is an __init function for security reasons.
  298. */
  299. #ifdef CONFIG_X86_PAE
  300. extern int set_kernel_exec(unsigned long vaddr, int enable);
  301. #else
  302. static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
  303. #endif
  304. #if defined(CONFIG_HIGHPTE)
  305. #define pte_offset_map(dir, address) \
  306. ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
  307. #define pte_offset_map_nested(dir, address) \
  308. ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
  309. #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
  310. #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
  311. #else
  312. #define pte_offset_map(dir, address) \
  313. ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
  314. #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
  315. #define pte_unmap(pte) do { } while (0)
  316. #define pte_unmap_nested(pte) do { } while (0)
  317. #endif
  318. /* Clear a kernel PTE and flush it from the TLB */
  319. #define kpte_clear_flush(ptep, vaddr) \
  320. do { \
  321. pte_clear(&init_mm, vaddr, ptep); \
  322. __flush_tlb_one(vaddr); \
  323. } while (0)
  324. /*
  325. * The i386 doesn't have any external MMU info: the kernel page
  326. * tables contain all the necessary information.
  327. */
  328. #define update_mmu_cache(vma,address,pte) do { } while (0)
  329. void native_pagetable_setup_start(pgd_t *base);
  330. void native_pagetable_setup_done(pgd_t *base);
  331. #ifndef CONFIG_PARAVIRT
  332. static inline void paravirt_pagetable_setup_start(pgd_t *base)
  333. {
  334. native_pagetable_setup_start(base);
  335. }
  336. static inline void paravirt_pagetable_setup_done(pgd_t *base)
  337. {
  338. native_pagetable_setup_done(base);
  339. }
  340. #endif /* !CONFIG_PARAVIRT */
  341. #endif /* !__ASSEMBLY__ */
  342. /*
  343. * kern_addr_valid() is (1) for FLATMEM and (0) for
  344. * SPARSEMEM and DISCONTIGMEM
  345. */
  346. #ifdef CONFIG_FLATMEM
  347. #define kern_addr_valid(addr) (1)
  348. #else
  349. #define kern_addr_valid(kaddr) (0)
  350. #endif
  351. #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
  352. remap_pfn_range(vma, vaddr, pfn, size, prot)
  353. #include <asm-generic/pgtable.h>
  354. #endif /* _I386_PGTABLE_H */