pgtable_32.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. #ifndef _I386_PGTABLE_H
  2. #define _I386_PGTABLE_H
  3. /*
  4. * The Linux memory management assumes a three-level page table setup. On
  5. * the i386, we use that, but "fold" the mid level into the top-level page
  6. * table, so that we physically have the same two-level page table as the
  7. * i386 mmu expects.
  8. *
  9. * This file contains the functions and defines necessary to modify and use
  10. * the i386 page table tree.
  11. */
  12. #ifndef __ASSEMBLY__
  13. #include <asm/processor.h>
  14. #include <asm/fixmap.h>
  15. #include <linux/threads.h>
  16. #include <asm/paravirt.h>
  17. #include <linux/bitops.h>
  18. #include <linux/slab.h>
  19. #include <linux/list.h>
  20. #include <linux/spinlock.h>
  21. struct mm_struct;
  22. struct vm_area_struct;
  23. /*
  24. * ZERO_PAGE is a global shared page that is always zero: used
  25. * for zero-mapped memory areas etc..
  26. */
  27. #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
  28. extern unsigned long empty_zero_page[1024];
  29. extern pgd_t swapper_pg_dir[1024];
  30. extern struct kmem_cache *pmd_cache;
  31. extern spinlock_t pgd_lock;
  32. extern struct page *pgd_list;
  33. void check_pgt_cache(void);
  34. void pmd_ctor(struct kmem_cache *, void *);
  35. void pgtable_cache_init(void);
  36. void paging_init(void);
  37. /*
  38. * The Linux x86 paging architecture is 'compile-time dual-mode', it
  39. * implements both the traditional 2-level x86 page tables and the
  40. * newer 3-level PAE-mode page tables.
  41. */
  42. #ifdef CONFIG_X86_PAE
  43. # include <asm/pgtable-3level-defs.h>
  44. # define PMD_SIZE (1UL << PMD_SHIFT)
  45. # define PMD_MASK (~(PMD_SIZE-1))
  46. #else
  47. # include <asm/pgtable-2level-defs.h>
  48. #endif
  49. #define PGDIR_SIZE (1UL << PGDIR_SHIFT)
  50. #define PGDIR_MASK (~(PGDIR_SIZE-1))
  51. #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
  52. #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
  53. #define TWOLEVEL_PGDIR_SHIFT 22
  54. #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
  55. #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
  56. /* Just any arbitrary offset to the start of the vmalloc VM area: the
  57. * current 8MB value just means that there will be a 8MB "hole" after the
  58. * physical memory until the kernel virtual memory starts. That means that
  59. * any out-of-bounds memory accesses will hopefully be caught.
  60. * The vmalloc() routines leaves a hole of 4kB between each vmalloced
  61. * area for the same reason. ;)
  62. */
  63. #define VMALLOC_OFFSET (8*1024*1024)
  64. #define VMALLOC_START (((unsigned long) high_memory + \
  65. 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
  66. #ifdef CONFIG_HIGHMEM
  67. # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
  68. #else
  69. # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE)
  70. #endif
  71. /*
  72. * Define this if things work differently on an i386 and an i486:
  73. * it will (on an i486) warn about kernel memory accesses that are
  74. * done without a 'access_ok(VERIFY_WRITE,..)'
  75. */
  76. #undef TEST_ACCESS_OK
  77. /* The boot page tables (all created as a single array) */
  78. extern unsigned long pg0[];
  79. #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
  80. /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
  81. #define pmd_none(x) (!(unsigned long)pmd_val(x))
  82. #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
  83. #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
  84. #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
  85. #ifdef CONFIG_X86_PAE
  86. # include <asm/pgtable-3level.h>
  87. #else
  88. # include <asm/pgtable-2level.h>
  89. #endif
  90. #ifndef CONFIG_PARAVIRT
  91. /*
  92. * Rules for using pte_update - it must be called after any PTE update which
  93. * has not been done using the set_pte / clear_pte interfaces. It is used by
  94. * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
  95. * updates should either be sets, clears, or set_pte_atomic for P->P
  96. * transitions, which means this hook should only be called for user PTEs.
  97. * This hook implies a P->P protection or access change has taken place, which
  98. * requires a subsequent TLB flush. The notification can optionally be delayed
  99. * until the TLB flush event by using the pte_update_defer form of the
  100. * interface, but care must be taken to assure that the flush happens while
  101. * still holding the same page table lock so that the shadow and primary pages
  102. * do not become out of sync on SMP.
  103. */
  104. #define pte_update(mm, addr, ptep) do { } while (0)
  105. #define pte_update_defer(mm, addr, ptep) do { } while (0)
  106. #endif
  107. /* local pte updates need not use xchg for locking */
  108. static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
  109. {
  110. pte_t res = *ptep;
  111. /* Pure native function needs no input for mm, addr */
  112. native_pte_clear(NULL, 0, ptep);
  113. return res;
  114. }
  115. /*
  116. * We only update the dirty/accessed state if we set
  117. * the dirty bit by hand in the kernel, since the hardware
  118. * will do the accessed bit for us, and we don't want to
  119. * race with other CPU's that might be updating the dirty
  120. * bit at the same time.
  121. */
  122. #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
  123. #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
  124. ({ \
  125. int __changed = !pte_same(*(ptep), entry); \
  126. if (__changed && dirty) { \
  127. (ptep)->pte_low = (entry).pte_low; \
  128. pte_update_defer((vma)->vm_mm, (address), (ptep)); \
  129. flush_tlb_page(vma, address); \
  130. } \
  131. __changed; \
  132. })
  133. #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
  134. #define ptep_test_and_clear_young(vma, addr, ptep) ({ \
  135. int __ret = 0; \
  136. if (pte_young(*(ptep))) \
  137. __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
  138. &(ptep)->pte_low); \
  139. if (__ret) \
  140. pte_update((vma)->vm_mm, addr, ptep); \
  141. __ret; \
  142. })
  143. #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
  144. #define ptep_clear_flush_young(vma, address, ptep) \
  145. ({ \
  146. int __young; \
  147. __young = ptep_test_and_clear_young((vma), (address), (ptep)); \
  148. if (__young) \
  149. flush_tlb_page(vma, address); \
  150. __young; \
  151. })
  152. #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
  153. static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  154. {
  155. pte_t pte = native_ptep_get_and_clear(ptep);
  156. pte_update(mm, addr, ptep);
  157. return pte;
  158. }
  159. #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
  160. static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
  161. {
  162. pte_t pte;
  163. if (full) {
  164. /*
  165. * Full address destruction in progress; paravirt does not
  166. * care about updates and native needs no locking
  167. */
  168. pte = native_local_ptep_get_and_clear(ptep);
  169. } else {
  170. pte = ptep_get_and_clear(mm, addr, ptep);
  171. }
  172. return pte;
  173. }
  174. #define __HAVE_ARCH_PTEP_SET_WRPROTECT
  175. static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
  176. {
  177. clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
  178. pte_update(mm, addr, ptep);
  179. }
  180. /*
  181. * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
  182. *
  183. * dst - pointer to pgd range anwhere on a pgd page
  184. * src - ""
  185. * count - the number of pgds to copy.
  186. *
  187. * dst and src can be on the same page, but the range must not overlap,
  188. * and must not cross a page boundary.
  189. */
  190. static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
  191. {
  192. memcpy(dst, src, count * sizeof(pgd_t));
  193. }
  194. /*
  195. * Macro to mark a page protection value as "uncacheable". On processors which do not support
  196. * it, this is a no-op.
  197. */
  198. #define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \
  199. ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot))
  200. /*
  201. * Conversion functions: convert a page and protection to a page entry,
  202. * and a page entry and page directory to the page they refer to.
  203. */
  204. #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
  205. /*
  206. * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
  207. *
  208. * this macro returns the index of the entry in the pgd page which would
  209. * control the given virtual address
  210. */
  211. #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
  212. #define pgd_index_k(addr) pgd_index(addr)
  213. /*
  214. * pgd_offset() returns a (pgd_t *)
  215. * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
  216. */
  217. #define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
  218. /*
  219. * a shortcut which implies the use of the kernel's pgd, instead
  220. * of a process's
  221. */
  222. #define pgd_offset_k(address) pgd_offset(&init_mm, address)
  223. /*
  224. * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
  225. *
  226. * this macro returns the index of the entry in the pmd page which would
  227. * control the given virtual address
  228. */
  229. #define pmd_index(address) \
  230. (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
  231. /*
  232. * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
  233. *
  234. * this macro returns the index of the entry in the pte page which would
  235. * control the given virtual address
  236. */
  237. #define pte_index(address) \
  238. (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
  239. #define pte_offset_kernel(dir, address) \
  240. ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
  241. #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
  242. #define pmd_page_vaddr(pmd) \
  243. ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
  244. /*
  245. * Helper function that returns the kernel pagetable entry controlling
  246. * the virtual address 'address'. NULL means no pagetable entry present.
  247. * NOTE: the return type is pte_t but if the pmd is PSE then we return it
  248. * as a pte too.
  249. */
  250. extern pte_t *lookup_address(unsigned long address);
  251. /*
  252. * Make a given kernel text page executable/non-executable.
  253. * Returns the previous executability setting of that page (which
  254. * is used to restore the previous state). Used by the SMP bootup code.
  255. * NOTE: this is an __init function for security reasons.
  256. */
  257. #ifdef CONFIG_X86_PAE
  258. extern int set_kernel_exec(unsigned long vaddr, int enable);
  259. #else
  260. static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
  261. #endif
  262. #if defined(CONFIG_HIGHPTE)
  263. #define pte_offset_map(dir, address) \
  264. ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
  265. #define pte_offset_map_nested(dir, address) \
  266. ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
  267. #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
  268. #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
  269. #else
  270. #define pte_offset_map(dir, address) \
  271. ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address))
  272. #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address)
  273. #define pte_unmap(pte) do { } while (0)
  274. #define pte_unmap_nested(pte) do { } while (0)
  275. #endif
  276. /* Clear a kernel PTE and flush it from the TLB */
  277. #define kpte_clear_flush(ptep, vaddr) \
  278. do { \
  279. pte_clear(&init_mm, vaddr, ptep); \
  280. __flush_tlb_one(vaddr); \
  281. } while (0)
  282. /*
  283. * The i386 doesn't have any external MMU info: the kernel page
  284. * tables contain all the necessary information.
  285. */
  286. #define update_mmu_cache(vma,address,pte) do { } while (0)
  287. void native_pagetable_setup_start(pgd_t *base);
  288. void native_pagetable_setup_done(pgd_t *base);
  289. #ifndef CONFIG_PARAVIRT
  290. static inline void paravirt_pagetable_setup_start(pgd_t *base)
  291. {
  292. native_pagetable_setup_start(base);
  293. }
  294. static inline void paravirt_pagetable_setup_done(pgd_t *base)
  295. {
  296. native_pagetable_setup_done(base);
  297. }
  298. #endif /* !CONFIG_PARAVIRT */
  299. #endif /* !__ASSEMBLY__ */
  300. /*
  301. * kern_addr_valid() is (1) for FLATMEM and (0) for
  302. * SPARSEMEM and DISCONTIGMEM
  303. */
  304. #ifdef CONFIG_FLATMEM
  305. #define kern_addr_valid(addr) (1)
  306. #else
  307. #define kern_addr_valid(kaddr) (0)
  308. #endif
  309. #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
  310. remap_pfn_range(vma, vaddr, pfn, size, prot)
  311. #include <asm-generic/pgtable.h>
  312. #endif /* _I386_PGTABLE_H */