|
@@ -1826,11 +1826,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
|
filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, written);
|
|
filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, written);
|
|
|
|
|
|
do {
|
|
do {
|
|
|
|
+ struct page *src_page;
|
|
struct page *page;
|
|
struct page *page;
|
|
pgoff_t index; /* Pagecache index for current page */
|
|
pgoff_t index; /* Pagecache index for current page */
|
|
unsigned long offset; /* Offset into pagecache page */
|
|
unsigned long offset; /* Offset into pagecache page */
|
|
- unsigned long maxlen; /* Bytes remaining in current iovec */
|
|
|
|
- size_t bytes; /* Bytes to write to page */
|
|
|
|
|
|
+ unsigned long seglen; /* Bytes remaining in current iovec */
|
|
|
|
+ unsigned long bytes; /* Bytes to write to page */
|
|
size_t copied; /* Bytes copied from user */
|
|
size_t copied; /* Bytes copied from user */
|
|
|
|
|
|
buf = cur_iov->iov_base + iov_offset;
|
|
buf = cur_iov->iov_base + iov_offset;
|
|
@@ -1840,20 +1841,30 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
|
if (bytes > count)
|
|
if (bytes > count)
|
|
bytes = count;
|
|
bytes = count;
|
|
|
|
|
|
- maxlen = cur_iov->iov_len - iov_offset;
|
|
|
|
- if (maxlen > bytes)
|
|
|
|
- maxlen = bytes;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * a non-NULL src_page indicates that we're doing the
|
|
|
|
+ * copy via get_user_pages and kmap.
|
|
|
|
+ */
|
|
|
|
+ src_page = NULL;
|
|
|
|
+
|
|
|
|
+ seglen = cur_iov->iov_len - iov_offset;
|
|
|
|
+ if (seglen > bytes)
|
|
|
|
+ seglen = bytes;
|
|
|
|
|
|
-#ifndef CONFIG_DEBUG_VM
|
|
|
|
/*
|
|
/*
|
|
* Bring in the user page that we will copy from _first_.
|
|
* Bring in the user page that we will copy from _first_.
|
|
* Otherwise there's a nasty deadlock on copying from the
|
|
* Otherwise there's a nasty deadlock on copying from the
|
|
* same page as we're writing to, without it being marked
|
|
* same page as we're writing to, without it being marked
|
|
* up-to-date.
|
|
* up-to-date.
|
|
|
|
+ *
|
|
|
|
+ * Not only is this an optimisation, but it is also required
|
|
|
|
+ * to check that the address is actually valid, when atomic
|
|
|
|
+ * usercopies are used, below.
|
|
*/
|
|
*/
|
|
- fault_in_pages_readable(buf, maxlen);
|
|
|
|
-#endif
|
|
|
|
-
|
|
|
|
|
|
+ if (unlikely(fault_in_pages_readable(buf, seglen))) {
|
|
|
|
+ status = -EFAULT;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
|
|
page = __grab_cache_page(mapping, index);
|
|
page = __grab_cache_page(mapping, index);
|
|
if (!page) {
|
|
if (!page) {
|
|
@@ -1861,32 +1872,104 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /*
|
|
|
|
+ * non-uptodate pages cannot cope with short copies, and we
|
|
|
|
+ * cannot take a pagefault with the destination page locked.
|
|
|
|
+ * So pin the source page to copy it.
|
|
|
|
+ */
|
|
|
|
+ if (!PageUptodate(page)) {
|
|
|
|
+ unlock_page(page);
|
|
|
|
+
|
|
|
|
+ src_page = alloc_page(GFP_KERNEL);
|
|
|
|
+ if (!src_page) {
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ status = -ENOMEM;
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Cannot get_user_pages with a page locked for the
|
|
|
|
+ * same reason as we can't take a page fault with a
|
|
|
|
+ * page locked (as explained below).
|
|
|
|
+ */
|
|
|
|
+ copied = filemap_copy_from_user(src_page, offset,
|
|
|
|
+ cur_iov, nr_segs, iov_offset, bytes);
|
|
|
|
+ if (unlikely(copied == 0)) {
|
|
|
|
+ status = -EFAULT;
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ page_cache_release(src_page);
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ bytes = copied;
|
|
|
|
+
|
|
|
|
+ lock_page(page);
|
|
|
|
+ /*
|
|
|
|
+ * Can't handle the page going uptodate here, because
|
|
|
|
+ * that means we would use non-atomic usercopies, which
|
|
|
|
+ * zero out the tail of the page, which can cause
|
|
|
|
+ * zeroes to become transiently visible. We could just
|
|
|
|
+ * use a non-zeroing copy, but the APIs aren't too
|
|
|
|
+ * consistent.
|
|
|
|
+ */
|
|
|
|
+ if (unlikely(!page->mapping || PageUptodate(page))) {
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ page_cache_release(src_page);
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
status = a_ops->prepare_write(file, page, offset, offset+bytes);
|
|
status = a_ops->prepare_write(file, page, offset, offset+bytes);
|
|
if (unlikely(status))
|
|
if (unlikely(status))
|
|
goto fs_write_aop_error;
|
|
goto fs_write_aop_error;
|
|
|
|
|
|
- copied = filemap_copy_from_user(page, offset,
|
|
|
|
|
|
+ if (!src_page) {
|
|
|
|
+ /*
|
|
|
|
+ * Must not enter the pagefault handler here, because
|
|
|
|
+ * we hold the page lock, so we might recursively
|
|
|
|
+ * deadlock on the same lock, or get an ABBA deadlock
|
|
|
|
+ * against a different lock, or against the mmap_sem
|
|
|
|
+ * (which nests outside the page lock). So increment
|
|
|
|
+ * preempt count, and use _atomic usercopies.
|
|
|
|
+ *
|
|
|
|
+ * The page is uptodate so we are OK to encounter a
|
|
|
|
+ * short copy: if unmodified parts of the page are
|
|
|
|
+ * marked dirty and written out to disk, it doesn't
|
|
|
|
+ * really matter.
|
|
|
|
+ */
|
|
|
|
+ pagefault_disable();
|
|
|
|
+ copied = filemap_copy_from_user_atomic(page, offset,
|
|
cur_iov, nr_segs, iov_offset, bytes);
|
|
cur_iov, nr_segs, iov_offset, bytes);
|
|
|
|
+ pagefault_enable();
|
|
|
|
+ } else {
|
|
|
|
+ void *src, *dst;
|
|
|
|
+ src = kmap_atomic(src_page, KM_USER0);
|
|
|
|
+ dst = kmap_atomic(page, KM_USER1);
|
|
|
|
+ memcpy(dst + offset, src + offset, bytes);
|
|
|
|
+ kunmap_atomic(dst, KM_USER1);
|
|
|
|
+ kunmap_atomic(src, KM_USER0);
|
|
|
|
+ copied = bytes;
|
|
|
|
+ }
|
|
flush_dcache_page(page);
|
|
flush_dcache_page(page);
|
|
|
|
|
|
status = a_ops->commit_write(file, page, offset, offset+bytes);
|
|
status = a_ops->commit_write(file, page, offset, offset+bytes);
|
|
if (unlikely(status < 0 || status == AOP_TRUNCATED_PAGE))
|
|
if (unlikely(status < 0 || status == AOP_TRUNCATED_PAGE))
|
|
goto fs_write_aop_error;
|
|
goto fs_write_aop_error;
|
|
- if (unlikely(copied != bytes)) {
|
|
|
|
- status = -EFAULT;
|
|
|
|
- goto fs_write_aop_error;
|
|
|
|
- }
|
|
|
|
if (unlikely(status > 0)) /* filesystem did partial write */
|
|
if (unlikely(status > 0)) /* filesystem did partial write */
|
|
- copied = status;
|
|
|
|
|
|
+ copied = min_t(size_t, copied, status);
|
|
|
|
+
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ mark_page_accessed(page);
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ if (src_page)
|
|
|
|
+ page_cache_release(src_page);
|
|
|
|
|
|
written += copied;
|
|
written += copied;
|
|
count -= copied;
|
|
count -= copied;
|
|
pos += copied;
|
|
pos += copied;
|
|
filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, copied);
|
|
filemap_set_next_iovec(&cur_iov, nr_segs, &iov_offset, copied);
|
|
|
|
|
|
- unlock_page(page);
|
|
|
|
- mark_page_accessed(page);
|
|
|
|
- page_cache_release(page);
|
|
|
|
balance_dirty_pages_ratelimited(mapping);
|
|
balance_dirty_pages_ratelimited(mapping);
|
|
cond_resched();
|
|
cond_resched();
|
|
continue;
|
|
continue;
|
|
@@ -1895,6 +1978,8 @@ fs_write_aop_error:
|
|
if (status != AOP_TRUNCATED_PAGE)
|
|
if (status != AOP_TRUNCATED_PAGE)
|
|
unlock_page(page);
|
|
unlock_page(page);
|
|
page_cache_release(page);
|
|
page_cache_release(page);
|
|
|
|
+ if (src_page)
|
|
|
|
+ page_cache_release(src_page);
|
|
|
|
|
|
/*
|
|
/*
|
|
* prepare_write() may have instantiated a few blocks
|
|
* prepare_write() may have instantiated a few blocks
|
|
@@ -1907,7 +1992,6 @@ fs_write_aop_error:
|
|
continue;
|
|
continue;
|
|
else
|
|
else
|
|
break;
|
|
break;
|
|
-
|
|
|
|
} while (count);
|
|
} while (count);
|
|
*ppos = pos;
|
|
*ppos = pos;
|
|
|
|
|