msync.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. /*
  2. * linux/mm/msync.c
  3. *
  4. * Copyright (C) 1994-1999 Linus Torvalds
  5. */
  6. /*
  7. * The msync() system call.
  8. */
  9. #include <linux/slab.h>
  10. #include <linux/pagemap.h>
  11. #include <linux/mm.h>
  12. #include <linux/mman.h>
  13. #include <linux/hugetlb.h>
  14. #include <linux/writeback.h>
  15. #include <linux/file.h>
  16. #include <linux/syscalls.h>
  17. #include <asm/pgtable.h>
  18. #include <asm/tlbflush.h>
  19. static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
  20. unsigned long addr, unsigned long end)
  21. {
  22. pte_t *pte;
  23. spinlock_t *ptl;
  24. int progress = 0;
  25. unsigned long ret = 0;
  26. again:
  27. pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
  28. do {
  29. struct page *page;
  30. if (progress >= 64) {
  31. progress = 0;
  32. if (need_resched() || need_lockbreak(ptl))
  33. break;
  34. }
  35. progress++;
  36. if (!pte_present(*pte))
  37. continue;
  38. if (!pte_maybe_dirty(*pte))
  39. continue;
  40. page = vm_normal_page(vma, addr, *pte);
  41. if (!page)
  42. continue;
  43. if (ptep_clear_flush_dirty(vma, addr, pte) ||
  44. page_test_and_clear_dirty(page))
  45. ret += set_page_dirty(page);
  46. progress += 3;
  47. } while (pte++, addr += PAGE_SIZE, addr != end);
  48. pte_unmap_unlock(pte - 1, ptl);
  49. cond_resched();
  50. if (addr != end)
  51. goto again;
  52. return ret;
  53. }
  54. static inline unsigned long msync_pmd_range(struct vm_area_struct *vma,
  55. pud_t *pud, unsigned long addr, unsigned long end)
  56. {
  57. pmd_t *pmd;
  58. unsigned long next;
  59. unsigned long ret = 0;
  60. pmd = pmd_offset(pud, addr);
  61. do {
  62. next = pmd_addr_end(addr, end);
  63. if (pmd_none_or_clear_bad(pmd))
  64. continue;
  65. ret += msync_pte_range(vma, pmd, addr, next);
  66. } while (pmd++, addr = next, addr != end);
  67. return ret;
  68. }
  69. static inline unsigned long msync_pud_range(struct vm_area_struct *vma,
  70. pgd_t *pgd, unsigned long addr, unsigned long end)
  71. {
  72. pud_t *pud;
  73. unsigned long next;
  74. unsigned long ret = 0;
  75. pud = pud_offset(pgd, addr);
  76. do {
  77. next = pud_addr_end(addr, end);
  78. if (pud_none_or_clear_bad(pud))
  79. continue;
  80. ret += msync_pmd_range(vma, pud, addr, next);
  81. } while (pud++, addr = next, addr != end);
  82. return ret;
  83. }
  84. static unsigned long msync_page_range(struct vm_area_struct *vma,
  85. unsigned long addr, unsigned long end)
  86. {
  87. pgd_t *pgd;
  88. unsigned long next;
  89. unsigned long ret = 0;
  90. /* For hugepages we can't go walking the page table normally,
  91. * but that's ok, hugetlbfs is memory based, so we don't need
  92. * to do anything more on an msync().
  93. */
  94. if (vma->vm_flags & VM_HUGETLB)
  95. return 0;
  96. BUG_ON(addr >= end);
  97. pgd = pgd_offset(vma->vm_mm, addr);
  98. flush_cache_range(vma, addr, end);
  99. do {
  100. next = pgd_addr_end(addr, end);
  101. if (pgd_none_or_clear_bad(pgd))
  102. continue;
  103. ret += msync_pud_range(vma, pgd, addr, next);
  104. } while (pgd++, addr = next, addr != end);
  105. return ret;
  106. }
  107. /*
  108. * MS_SYNC syncs the entire file - including mappings.
  109. *
  110. * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just
  111. * marks the relevant pages dirty. The application may now run fsync() to
  112. * write out the dirty pages and wait on the writeout and check the result.
  113. * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
  114. * async writeout immediately.
  115. * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
  116. * applications.
  117. */
  118. static int msync_interval(struct vm_area_struct *vma, unsigned long addr,
  119. unsigned long end, int flags,
  120. unsigned long *nr_pages_dirtied)
  121. {
  122. int ret = 0;
  123. struct file *file = vma->vm_file;
  124. if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
  125. return -EBUSY;
  126. if (file && (vma->vm_flags & VM_SHARED)) {
  127. *nr_pages_dirtied = msync_page_range(vma, addr, end);
  128. if (flags & MS_SYNC) {
  129. struct address_space *mapping = file->f_mapping;
  130. int err;
  131. ret = filemap_fdatawrite(mapping);
  132. if (file->f_op && file->f_op->fsync) {
  133. /*
  134. * We don't take i_mutex here because mmap_sem
  135. * is already held.
  136. */
  137. err = file->f_op->fsync(file,file->f_dentry,1);
  138. if (err && !ret)
  139. ret = err;
  140. }
  141. err = filemap_fdatawait(mapping);
  142. if (!ret)
  143. ret = err;
  144. }
  145. }
  146. return ret;
  147. }
  148. asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
  149. {
  150. unsigned long end;
  151. struct vm_area_struct *vma;
  152. int unmapped_error, error = -EINVAL;
  153. int done = 0;
  154. if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
  155. goto out;
  156. if (start & ~PAGE_MASK)
  157. goto out;
  158. if ((flags & MS_ASYNC) && (flags & MS_SYNC))
  159. goto out;
  160. error = -ENOMEM;
  161. len = (len + ~PAGE_MASK) & PAGE_MASK;
  162. end = start + len;
  163. if (end < start)
  164. goto out;
  165. error = 0;
  166. if (end == start)
  167. goto out;
  168. /*
  169. * If the interval [start,end) covers some unmapped address ranges,
  170. * just ignore them, but return -ENOMEM at the end.
  171. */
  172. down_read(&current->mm->mmap_sem);
  173. if (flags & MS_SYNC)
  174. current->flags |= PF_SYNCWRITE;
  175. vma = find_vma(current->mm, start);
  176. unmapped_error = 0;
  177. do {
  178. unsigned long nr_pages_dirtied = 0;
  179. struct file *file;
  180. /* Still start < end. */
  181. error = -ENOMEM;
  182. if (!vma)
  183. goto out_unlock;
  184. /* Here start < vma->vm_end. */
  185. if (start < vma->vm_start) {
  186. unmapped_error = -ENOMEM;
  187. start = vma->vm_start;
  188. }
  189. /* Here vma->vm_start <= start < vma->vm_end. */
  190. if (end <= vma->vm_end) {
  191. if (start < end) {
  192. error = msync_interval(vma, start, end, flags,
  193. &nr_pages_dirtied);
  194. if (error)
  195. goto out_unlock;
  196. }
  197. error = unmapped_error;
  198. done = 1;
  199. } else {
  200. /* Here vma->vm_start <= start < vma->vm_end < end. */
  201. error = msync_interval(vma, start, vma->vm_end, flags,
  202. &nr_pages_dirtied);
  203. if (error)
  204. goto out_unlock;
  205. }
  206. file = vma->vm_file;
  207. start = vma->vm_end;
  208. if ((flags & MS_ASYNC) && file && nr_pages_dirtied) {
  209. get_file(file);
  210. up_read(&current->mm->mmap_sem);
  211. balance_dirty_pages_ratelimited_nr(file->f_mapping,
  212. nr_pages_dirtied);
  213. fput(file);
  214. down_read(&current->mm->mmap_sem);
  215. vma = find_vma(current->mm, start);
  216. } else {
  217. vma = vma->vm_next;
  218. }
  219. } while (!done);
  220. out_unlock:
  221. current->flags &= ~PF_SYNCWRITE;
  222. up_read(&current->mm->mmap_sem);
  223. out:
  224. return error;
  225. }