pagemap.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. #ifndef _LINUX_PAGEMAP_H
  2. #define _LINUX_PAGEMAP_H
  3. /*
  4. * Copyright 1995 Linus Torvalds
  5. */
  6. #include <linux/mm.h>
  7. #include <linux/fs.h>
  8. #include <linux/list.h>
  9. #include <linux/highmem.h>
  10. #include <linux/compiler.h>
  11. #include <asm/uaccess.h>
  12. #include <linux/gfp.h>
  13. #include <linux/bitops.h>
  14. #include <linux/hardirq.h> /* for in_interrupt() */
  15. /*
  16. * Bits in mapping->flags. The lower __GFP_BITS_SHIFT bits are the page
  17. * allocation mode flags.
  18. */
  19. enum mapping_flags {
  20. AS_EIO = __GFP_BITS_SHIFT + 0, /* IO error on async write */
  21. AS_ENOSPC = __GFP_BITS_SHIFT + 1, /* ENOSPC on async write */
  22. AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
  23. #ifdef CONFIG_UNEVICTABLE_LRU
  24. AS_UNEVICTABLE = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
  25. #endif
  26. };
  27. static inline void mapping_set_error(struct address_space *mapping, int error)
  28. {
  29. if (unlikely(error)) {
  30. if (error == -ENOSPC)
  31. set_bit(AS_ENOSPC, &mapping->flags);
  32. else
  33. set_bit(AS_EIO, &mapping->flags);
  34. }
  35. }
  36. #ifdef CONFIG_UNEVICTABLE_LRU
  37. static inline void mapping_set_unevictable(struct address_space *mapping)
  38. {
  39. set_bit(AS_UNEVICTABLE, &mapping->flags);
  40. }
  41. static inline void mapping_clear_unevictable(struct address_space *mapping)
  42. {
  43. clear_bit(AS_UNEVICTABLE, &mapping->flags);
  44. }
  45. static inline int mapping_unevictable(struct address_space *mapping)
  46. {
  47. if (likely(mapping))
  48. return test_bit(AS_UNEVICTABLE, &mapping->flags);
  49. return !!mapping;
  50. }
  51. #else
  52. static inline void mapping_set_unevictable(struct address_space *mapping) { }
  53. static inline void mapping_clear_unevictable(struct address_space *mapping) { }
  54. static inline int mapping_unevictable(struct address_space *mapping)
  55. {
  56. return 0;
  57. }
  58. #endif
  59. static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
  60. {
  61. return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
  62. }
  63. /*
  64. * This is non-atomic. Only to be used before the mapping is activated.
  65. * Probably needs a barrier...
  66. */
  67. static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  68. {
  69. m->flags = (m->flags & ~(__force unsigned long)__GFP_BITS_MASK) |
  70. (__force unsigned long)mask;
  71. }
  72. /*
  73. * The page cache can done in larger chunks than
  74. * one page, because it allows for more efficient
  75. * throughput (it can then be mapped into user
  76. * space in smaller chunks for same flexibility).
  77. *
  78. * Or rather, it _will_ be done in larger chunks.
  79. */
  80. #define PAGE_CACHE_SHIFT PAGE_SHIFT
  81. #define PAGE_CACHE_SIZE PAGE_SIZE
  82. #define PAGE_CACHE_MASK PAGE_MASK
  83. #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
  84. #define page_cache_get(page) get_page(page)
  85. #define page_cache_release(page) put_page(page)
  86. void release_pages(struct page **pages, int nr, int cold);
  87. /*
  88. * speculatively take a reference to a page.
  89. * If the page is free (_count == 0), then _count is untouched, and 0
  90. * is returned. Otherwise, _count is incremented by 1 and 1 is returned.
  91. *
  92. * This function must be called inside the same rcu_read_lock() section as has
  93. * been used to lookup the page in the pagecache radix-tree (or page table):
  94. * this allows allocators to use a synchronize_rcu() to stabilize _count.
  95. *
  96. * Unless an RCU grace period has passed, the count of all pages coming out
  97. * of the allocator must be considered unstable. page_count may return higher
  98. * than expected, and put_page must be able to do the right thing when the
  99. * page has been finished with, no matter what it is subsequently allocated
  100. * for (because put_page is what is used here to drop an invalid speculative
  101. * reference).
  102. *
  103. * This is the interesting part of the lockless pagecache (and lockless
  104. * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
  105. * has the following pattern:
  106. * 1. find page in radix tree
  107. * 2. conditionally increment refcount
  108. * 3. check the page is still in pagecache (if no, goto 1)
  109. *
  110. * Remove-side that cares about stability of _count (eg. reclaim) has the
  111. * following (with tree_lock held for write):
  112. * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
  113. * B. remove page from pagecache
  114. * C. free the page
  115. *
  116. * There are 2 critical interleavings that matter:
  117. * - 2 runs before A: in this case, A sees elevated refcount and bails out
  118. * - A runs before 2: in this case, 2 sees zero refcount and retries;
  119. * subsequently, B will complete and 1 will find no page, causing the
  120. * lookup to return NULL.
  121. *
  122. * It is possible that between 1 and 2, the page is removed then the exact same
  123. * page is inserted into the same position in pagecache. That's OK: the
  124. * old find_get_page using tree_lock could equally have run before or after
  125. * such a re-insertion, depending on order that locks are granted.
  126. *
  127. * Lookups racing against pagecache insertion isn't a big problem: either 1
  128. * will find the page or it will not. Likewise, the old find_get_page could run
  129. * either before the insertion or afterwards, depending on timing.
  130. */
  131. static inline int page_cache_get_speculative(struct page *page)
  132. {
  133. VM_BUG_ON(in_interrupt());
  134. #if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
  135. # ifdef CONFIG_PREEMPT
  136. VM_BUG_ON(!in_atomic());
  137. # endif
  138. /*
  139. * Preempt must be disabled here - we rely on rcu_read_lock doing
  140. * this for us.
  141. *
  142. * Pagecache won't be truncated from interrupt context, so if we have
  143. * found a page in the radix tree here, we have pinned its refcount by
  144. * disabling preempt, and hence no need for the "speculative get" that
  145. * SMP requires.
  146. */
  147. VM_BUG_ON(page_count(page) == 0);
  148. atomic_inc(&page->_count);
  149. #else
  150. if (unlikely(!get_page_unless_zero(page))) {
  151. /*
  152. * Either the page has been freed, or will be freed.
  153. * In either case, retry here and the caller should
  154. * do the right thing (see comments above).
  155. */
  156. return 0;
  157. }
  158. #endif
  159. VM_BUG_ON(PageTail(page));
  160. return 1;
  161. }
  162. /*
  163. * Same as above, but add instead of inc (could just be merged)
  164. */
  165. static inline int page_cache_add_speculative(struct page *page, int count)
  166. {
  167. VM_BUG_ON(in_interrupt());
  168. #if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
  169. # ifdef CONFIG_PREEMPT
  170. VM_BUG_ON(!in_atomic());
  171. # endif
  172. VM_BUG_ON(page_count(page) == 0);
  173. atomic_add(count, &page->_count);
  174. #else
  175. if (unlikely(!atomic_add_unless(&page->_count, count, 0)))
  176. return 0;
  177. #endif
  178. VM_BUG_ON(PageCompound(page) && page != compound_head(page));
  179. return 1;
  180. }
  181. static inline int page_freeze_refs(struct page *page, int count)
  182. {
  183. return likely(atomic_cmpxchg(&page->_count, count, 0) == count);
  184. }
  185. static inline void page_unfreeze_refs(struct page *page, int count)
  186. {
  187. VM_BUG_ON(page_count(page) != 0);
  188. VM_BUG_ON(count == 0);
  189. atomic_set(&page->_count, count);
  190. }
  191. #ifdef CONFIG_NUMA
  192. extern struct page *__page_cache_alloc(gfp_t gfp);
  193. #else
  194. static inline struct page *__page_cache_alloc(gfp_t gfp)
  195. {
  196. return alloc_pages(gfp, 0);
  197. }
  198. #endif
  199. static inline struct page *page_cache_alloc(struct address_space *x)
  200. {
  201. return __page_cache_alloc(mapping_gfp_mask(x));
  202. }
  203. static inline struct page *page_cache_alloc_cold(struct address_space *x)
  204. {
  205. return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
  206. }
  207. typedef int filler_t(void *, struct page *);
  208. extern struct page * find_get_page(struct address_space *mapping,
  209. pgoff_t index);
  210. extern struct page * find_lock_page(struct address_space *mapping,
  211. pgoff_t index);
  212. extern struct page * find_or_create_page(struct address_space *mapping,
  213. pgoff_t index, gfp_t gfp_mask);
  214. unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
  215. unsigned int nr_pages, struct page **pages);
  216. unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
  217. unsigned int nr_pages, struct page **pages);
  218. unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
  219. int tag, unsigned int nr_pages, struct page **pages);
  220. struct page *grab_cache_page_write_begin(struct address_space *mapping,
  221. pgoff_t index, unsigned flags);
  222. /*
  223. * Returns locked page at given index in given cache, creating it if needed.
  224. */
  225. static inline struct page *grab_cache_page(struct address_space *mapping,
  226. pgoff_t index)
  227. {
  228. return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
  229. }
  230. extern struct page * grab_cache_page_nowait(struct address_space *mapping,
  231. pgoff_t index);
  232. extern struct page * read_cache_page_async(struct address_space *mapping,
  233. pgoff_t index, filler_t *filler,
  234. void *data);
  235. extern struct page * read_cache_page(struct address_space *mapping,
  236. pgoff_t index, filler_t *filler,
  237. void *data);
  238. extern int read_cache_pages(struct address_space *mapping,
  239. struct list_head *pages, filler_t *filler, void *data);
  240. static inline struct page *read_mapping_page_async(
  241. struct address_space *mapping,
  242. pgoff_t index, void *data)
  243. {
  244. filler_t *filler = (filler_t *)mapping->a_ops->readpage;
  245. return read_cache_page_async(mapping, index, filler, data);
  246. }
  247. static inline struct page *read_mapping_page(struct address_space *mapping,
  248. pgoff_t index, void *data)
  249. {
  250. filler_t *filler = (filler_t *)mapping->a_ops->readpage;
  251. return read_cache_page(mapping, index, filler, data);
  252. }
  253. /*
  254. * Return byte-offset into filesystem object for page.
  255. */
  256. static inline loff_t page_offset(struct page *page)
  257. {
  258. return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
  259. }
  260. static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
  261. unsigned long address)
  262. {
  263. pgoff_t pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
  264. pgoff += vma->vm_pgoff;
  265. return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
  266. }
  267. extern void __lock_page(struct page *page);
  268. extern int __lock_page_killable(struct page *page);
  269. extern void __lock_page_nosync(struct page *page);
  270. extern void unlock_page(struct page *page);
  271. static inline void __set_page_locked(struct page *page)
  272. {
  273. __set_bit(PG_locked, &page->flags);
  274. }
  275. static inline void __clear_page_locked(struct page *page)
  276. {
  277. __clear_bit(PG_locked, &page->flags);
  278. }
  279. static inline int trylock_page(struct page *page)
  280. {
  281. return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
  282. }
  283. /*
  284. * lock_page may only be called if we have the page's inode pinned.
  285. */
  286. static inline void lock_page(struct page *page)
  287. {
  288. might_sleep();
  289. if (!trylock_page(page))
  290. __lock_page(page);
  291. }
  292. /*
  293. * lock_page_killable is like lock_page but can be interrupted by fatal
  294. * signals. It returns 0 if it locked the page and -EINTR if it was
  295. * killed while waiting.
  296. */
  297. static inline int lock_page_killable(struct page *page)
  298. {
  299. might_sleep();
  300. if (!trylock_page(page))
  301. return __lock_page_killable(page);
  302. return 0;
  303. }
  304. /*
  305. * lock_page_nosync should only be used if we can't pin the page's inode.
  306. * Doesn't play quite so well with block device plugging.
  307. */
  308. static inline void lock_page_nosync(struct page *page)
  309. {
  310. might_sleep();
  311. if (!trylock_page(page))
  312. __lock_page_nosync(page);
  313. }
  314. /*
  315. * This is exported only for wait_on_page_locked/wait_on_page_writeback.
  316. * Never use this directly!
  317. */
  318. extern void wait_on_page_bit(struct page *page, int bit_nr);
  319. /*
  320. * Wait for a page to be unlocked.
  321. *
  322. * This must be called with the caller "holding" the page,
  323. * ie with increased "page->count" so that the page won't
  324. * go away during the wait..
  325. */
  326. static inline void wait_on_page_locked(struct page *page)
  327. {
  328. if (PageLocked(page))
  329. wait_on_page_bit(page, PG_locked);
  330. }
  331. /*
  332. * Wait for a page to complete writeback
  333. */
  334. static inline void wait_on_page_writeback(struct page *page)
  335. {
  336. if (PageWriteback(page))
  337. wait_on_page_bit(page, PG_writeback);
  338. }
  339. extern void end_page_writeback(struct page *page);
  340. /*
  341. * Add an arbitrary waiter to a page's wait queue
  342. */
  343. extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
  344. /*
  345. * Fault a userspace page into pagetables. Return non-zero on a fault.
  346. *
  347. * This assumes that two userspace pages are always sufficient. That's
  348. * not true if PAGE_CACHE_SIZE > PAGE_SIZE.
  349. */
  350. static inline int fault_in_pages_writeable(char __user *uaddr, int size)
  351. {
  352. int ret;
  353. if (unlikely(size == 0))
  354. return 0;
  355. /*
  356. * Writing zeroes into userspace here is OK, because we know that if
  357. * the zero gets there, we'll be overwriting it.
  358. */
  359. ret = __put_user(0, uaddr);
  360. if (ret == 0) {
  361. char __user *end = uaddr + size - 1;
  362. /*
  363. * If the page was already mapped, this will get a cache miss
  364. * for sure, so try to avoid doing it.
  365. */
  366. if (((unsigned long)uaddr & PAGE_MASK) !=
  367. ((unsigned long)end & PAGE_MASK))
  368. ret = __put_user(0, end);
  369. }
  370. return ret;
  371. }
  372. static inline int fault_in_pages_readable(const char __user *uaddr, int size)
  373. {
  374. volatile char c;
  375. int ret;
  376. if (unlikely(size == 0))
  377. return 0;
  378. ret = __get_user(c, uaddr);
  379. if (ret == 0) {
  380. const char __user *end = uaddr + size - 1;
  381. if (((unsigned long)uaddr & PAGE_MASK) !=
  382. ((unsigned long)end & PAGE_MASK))
  383. ret = __get_user(c, end);
  384. }
  385. return ret;
  386. }
  387. int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
  388. pgoff_t index, gfp_t gfp_mask);
  389. int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
  390. pgoff_t index, gfp_t gfp_mask);
  391. extern void remove_from_page_cache(struct page *page);
  392. extern void __remove_from_page_cache(struct page *page);
  393. /*
  394. * Like add_to_page_cache_locked, but used to add newly allocated pages:
  395. * the page is new, so we can just run __set_page_locked() against it.
  396. */
  397. static inline int add_to_page_cache(struct page *page,
  398. struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
  399. {
  400. int error;
  401. __set_page_locked(page);
  402. error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
  403. if (unlikely(error))
  404. __clear_page_locked(page);
  405. return error;
  406. }
  407. #endif /* _LINUX_PAGEMAP_H */