mincore.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /*
  2. * linux/mm/mincore.c
  3. *
  4. * Copyright (C) 1994-2006 Linus Torvalds
  5. */
  6. /*
  7. * The mincore() system call.
  8. */
  9. #include <linux/pagemap.h>
  10. #include <linux/gfp.h>
  11. #include <linux/mm.h>
  12. #include <linux/mman.h>
  13. #include <linux/syscalls.h>
  14. #include <linux/swap.h>
  15. #include <linux/swapops.h>
  16. #include <linux/hugetlb.h>
  17. #include <asm/uaccess.h>
  18. #include <asm/pgtable.h>
  19. /*
  20. * Later we can get more picky about what "in core" means precisely.
  21. * For now, simply check to see if the page is in the page cache,
  22. * and is up to date; i.e. that no page-in operation would be required
  23. * at this time if an application were to map and access this page.
  24. */
  25. static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
  26. {
  27. unsigned char present = 0;
  28. struct page *page;
  29. /*
  30. * When tmpfs swaps out a page from a file, any process mapping that
  31. * file will not get a swp_entry_t in its pte, but rather it is like
  32. * any other file mapping (ie. marked !present and faulted in with
  33. * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
  34. *
  35. * However when tmpfs moves the page from pagecache and into swapcache,
  36. * it is still in core, but the find_get_page below won't find it.
  37. * No big deal, but make a note of it.
  38. */
  39. page = find_get_page(mapping, pgoff);
  40. if (page) {
  41. present = PageUptodate(page);
  42. page_cache_release(page);
  43. }
  44. return present;
  45. }
  46. /*
  47. * Do a chunk of "sys_mincore()". We've already checked
  48. * all the arguments, we hold the mmap semaphore: we should
  49. * just return the amount of info we're asked for.
  50. */
  51. static long do_mincore(unsigned long addr, unsigned char *vec, unsigned long pages)
  52. {
  53. pgd_t *pgd;
  54. pud_t *pud;
  55. pmd_t *pmd;
  56. pte_t *ptep;
  57. spinlock_t *ptl;
  58. unsigned long nr;
  59. int i;
  60. pgoff_t pgoff;
  61. struct vm_area_struct *vma = find_vma(current->mm, addr);
  62. /*
  63. * find_vma() didn't find anything above us, or we're
  64. * in an unmapped hole in the address space: ENOMEM.
  65. */
  66. if (!vma || addr < vma->vm_start)
  67. return -ENOMEM;
  68. #ifdef CONFIG_HUGETLB_PAGE
  69. if (is_vm_hugetlb_page(vma)) {
  70. struct hstate *h;
  71. unsigned long nr_huge;
  72. unsigned char present;
  73. i = 0;
  74. nr = min(pages, (vma->vm_end - addr) >> PAGE_SHIFT);
  75. h = hstate_vma(vma);
  76. nr_huge = ((addr + pages * PAGE_SIZE - 1) >> huge_page_shift(h))
  77. - (addr >> huge_page_shift(h)) + 1;
  78. nr_huge = min(nr_huge,
  79. (vma->vm_end - addr) >> huge_page_shift(h));
  80. while (1) {
  81. /* hugepage always in RAM for now,
  82. * but generally it needs to be check */
  83. ptep = huge_pte_offset(current->mm,
  84. addr & huge_page_mask(h));
  85. present = !!(ptep &&
  86. !huge_pte_none(huge_ptep_get(ptep)));
  87. while (1) {
  88. vec[i++] = present;
  89. addr += PAGE_SIZE;
  90. /* reach buffer limit */
  91. if (i == nr)
  92. return nr;
  93. /* check hugepage border */
  94. if (!((addr & ~huge_page_mask(h))
  95. >> PAGE_SHIFT))
  96. break;
  97. }
  98. }
  99. return nr;
  100. }
  101. #endif
  102. /*
  103. * Calculate how many pages there are left in the last level of the
  104. * PTE array for our address.
  105. */
  106. nr = PTRS_PER_PTE - ((addr >> PAGE_SHIFT) & (PTRS_PER_PTE-1));
  107. /*
  108. * Don't overrun this vma
  109. */
  110. nr = min(nr, (vma->vm_end - addr) >> PAGE_SHIFT);
  111. /*
  112. * Don't return more than the caller asked for
  113. */
  114. nr = min(nr, pages);
  115. pgd = pgd_offset(vma->vm_mm, addr);
  116. if (pgd_none_or_clear_bad(pgd))
  117. goto none_mapped;
  118. pud = pud_offset(pgd, addr);
  119. if (pud_none_or_clear_bad(pud))
  120. goto none_mapped;
  121. pmd = pmd_offset(pud, addr);
  122. if (pmd_none_or_clear_bad(pmd))
  123. goto none_mapped;
  124. ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  125. for (i = 0; i < nr; i++, ptep++, addr += PAGE_SIZE) {
  126. unsigned char present;
  127. pte_t pte = *ptep;
  128. if (pte_present(pte)) {
  129. present = 1;
  130. } else if (pte_none(pte)) {
  131. if (vma->vm_file) {
  132. pgoff = linear_page_index(vma, addr);
  133. present = mincore_page(vma->vm_file->f_mapping,
  134. pgoff);
  135. } else
  136. present = 0;
  137. } else if (pte_file(pte)) {
  138. pgoff = pte_to_pgoff(pte);
  139. present = mincore_page(vma->vm_file->f_mapping, pgoff);
  140. } else { /* pte is a swap entry */
  141. swp_entry_t entry = pte_to_swp_entry(pte);
  142. if (is_migration_entry(entry)) {
  143. /* migration entries are always uptodate */
  144. present = 1;
  145. } else {
  146. #ifdef CONFIG_SWAP
  147. pgoff = entry.val;
  148. present = mincore_page(&swapper_space, pgoff);
  149. #else
  150. WARN_ON(1);
  151. present = 1;
  152. #endif
  153. }
  154. }
  155. vec[i] = present;
  156. }
  157. pte_unmap_unlock(ptep-1, ptl);
  158. return nr;
  159. none_mapped:
  160. if (vma->vm_file) {
  161. pgoff = linear_page_index(vma, addr);
  162. for (i = 0; i < nr; i++, pgoff++)
  163. vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
  164. } else {
  165. for (i = 0; i < nr; i++)
  166. vec[i] = 0;
  167. }
  168. return nr;
  169. }
  170. /*
  171. * The mincore(2) system call.
  172. *
  173. * mincore() returns the memory residency status of the pages in the
  174. * current process's address space specified by [addr, addr + len).
  175. * The status is returned in a vector of bytes. The least significant
  176. * bit of each byte is 1 if the referenced page is in memory, otherwise
  177. * it is zero.
  178. *
  179. * Because the status of a page can change after mincore() checks it
  180. * but before it returns to the application, the returned vector may
  181. * contain stale information. Only locked pages are guaranteed to
  182. * remain in memory.
  183. *
  184. * return values:
  185. * zero - success
  186. * -EFAULT - vec points to an illegal address
  187. * -EINVAL - addr is not a multiple of PAGE_CACHE_SIZE
  188. * -ENOMEM - Addresses in the range [addr, addr + len] are
  189. * invalid for the address space of this process, or
  190. * specify one or more pages which are not currently
  191. * mapped
  192. * -EAGAIN - A kernel resource was temporarily unavailable.
  193. */
  194. SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len,
  195. unsigned char __user *, vec)
  196. {
  197. long retval;
  198. unsigned long pages;
  199. unsigned char *tmp;
  200. /* Check the start address: needs to be page-aligned.. */
  201. if (start & ~PAGE_CACHE_MASK)
  202. return -EINVAL;
  203. /* ..and we need to be passed a valid user-space range */
  204. if (!access_ok(VERIFY_READ, (void __user *) start, len))
  205. return -ENOMEM;
  206. /* This also avoids any overflows on PAGE_CACHE_ALIGN */
  207. pages = len >> PAGE_SHIFT;
  208. pages += (len & ~PAGE_MASK) != 0;
  209. if (!access_ok(VERIFY_WRITE, vec, pages))
  210. return -EFAULT;
  211. tmp = (void *) __get_free_page(GFP_USER);
  212. if (!tmp)
  213. return -EAGAIN;
  214. retval = 0;
  215. while (pages) {
  216. /*
  217. * Do at most PAGE_SIZE entries per iteration, due to
  218. * the temporary buffer size.
  219. */
  220. down_read(&current->mm->mmap_sem);
  221. retval = do_mincore(start, tmp, min(pages, PAGE_SIZE));
  222. up_read(&current->mm->mmap_sem);
  223. if (retval <= 0)
  224. break;
  225. if (copy_to_user(vec, tmp, retval)) {
  226. retval = -EFAULT;
  227. break;
  228. }
  229. pages -= retval;
  230. vec += retval;
  231. start += retval << PAGE_SHIFT;
  232. retval = 0;
  233. }
  234. free_page((unsigned long) tmp);
  235. return retval;
  236. }