|
@@ -334,7 +334,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
|
|
|
|
|
|
BUG_ON(req->inode != inode);
|
|
|
curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
|
|
|
- if (curr_index == index) {
|
|
|
+ if (curr_index <= index &&
|
|
|
+ index < curr_index + req->num_pages) {
|
|
|
found = true;
|
|
|
break;
|
|
|
}
|
|
@@ -1409,8 +1410,13 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
|
|
|
|
|
|
static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
|
|
|
{
|
|
|
- __free_page(req->pages[0]);
|
|
|
- fuse_file_put(req->ff, false);
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < req->num_pages; i++)
|
|
|
+ __free_page(req->pages[i]);
|
|
|
+
|
|
|
+ if (req->ff)
|
|
|
+ fuse_file_put(req->ff, false);
|
|
|
}
|
|
|
|
|
|
static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
|
|
@@ -1418,30 +1424,34 @@ static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
|
|
|
struct inode *inode = req->inode;
|
|
|
struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
|
|
|
+ int i;
|
|
|
|
|
|
list_del(&req->writepages_entry);
|
|
|
- dec_bdi_stat(bdi, BDI_WRITEBACK);
|
|
|
- dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
|
|
|
- bdi_writeout_inc(bdi);
|
|
|
+ for (i = 0; i < req->num_pages; i++) {
|
|
|
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
|
|
|
+ dec_zone_page_state(req->pages[i], NR_WRITEBACK_TEMP);
|
|
|
+ bdi_writeout_inc(bdi);
|
|
|
+ }
|
|
|
wake_up(&fi->page_waitq);
|
|
|
}
|
|
|
|
|
|
/* Called under fc->lock, may release and reacquire it */
|
|
|
-static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
|
|
|
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req,
|
|
|
+ loff_t size)
|
|
|
__releases(fc->lock)
|
|
|
__acquires(fc->lock)
|
|
|
{
|
|
|
struct fuse_inode *fi = get_fuse_inode(req->inode);
|
|
|
- loff_t size = i_size_read(req->inode);
|
|
|
struct fuse_write_in *inarg = &req->misc.write.in;
|
|
|
+ __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
|
|
|
|
|
|
if (!fc->connected)
|
|
|
goto out_free;
|
|
|
|
|
|
- if (inarg->offset + PAGE_CACHE_SIZE <= size) {
|
|
|
- inarg->size = PAGE_CACHE_SIZE;
|
|
|
+ if (inarg->offset + data_size <= size) {
|
|
|
+ inarg->size = data_size;
|
|
|
} else if (inarg->offset < size) {
|
|
|
- inarg->size = size & (PAGE_CACHE_SIZE - 1);
|
|
|
+ inarg->size = size - inarg->offset;
|
|
|
} else {
|
|
|
/* Got truncated off completely */
|
|
|
goto out_free;
|
|
@@ -1472,12 +1482,13 @@ __acquires(fc->lock)
|
|
|
{
|
|
|
struct fuse_conn *fc = get_fuse_conn(inode);
|
|
|
struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
+ size_t crop = i_size_read(inode);
|
|
|
struct fuse_req *req;
|
|
|
|
|
|
while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
|
|
|
req = list_entry(fi->queued_writes.next, struct fuse_req, list);
|
|
|
list_del_init(&req->list);
|
|
|
- fuse_send_writepage(fc, req);
|
|
|
+ fuse_send_writepage(fc, req, crop);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -1488,12 +1499,62 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
|
|
|
|
|
|
mapping_set_error(inode->i_mapping, req->out.h.error);
|
|
|
spin_lock(&fc->lock);
|
|
|
+ while (req->misc.write.next) {
|
|
|
+ struct fuse_conn *fc = get_fuse_conn(inode);
|
|
|
+ struct fuse_write_in *inarg = &req->misc.write.in;
|
|
|
+ struct fuse_req *next = req->misc.write.next;
|
|
|
+ req->misc.write.next = next->misc.write.next;
|
|
|
+ next->misc.write.next = NULL;
|
|
|
+ next->ff = fuse_file_get(req->ff);
|
|
|
+ list_add(&next->writepages_entry, &fi->writepages);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Skip fuse_flush_writepages() to make it easy to crop requests
|
|
|
+ * based on primary request size.
|
|
|
+ *
|
|
|
+ * 1st case (trivial): there are no concurrent activities using
|
|
|
+ * fuse_set/release_nowrite. Then we're on safe side because
|
|
|
+ * fuse_flush_writepages() would call fuse_send_writepage()
|
|
|
+ * anyway.
|
|
|
+ *
|
|
|
+ * 2nd case: someone called fuse_set_nowrite and it is waiting
|
|
|
+ * now for completion of all in-flight requests. This happens
|
|
|
+ * rarely and no more than once per page, so this should be
|
|
|
+ * okay.
|
|
|
+ *
|
|
|
+ * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle
|
|
|
+ * of fuse_set_nowrite..fuse_release_nowrite section. The fact
|
|
|
+ * that fuse_set_nowrite returned implies that all in-flight
|
|
|
+ * requests were completed along with all of their secondary
|
|
|
+ * requests. Further primary requests are blocked by negative
|
|
|
+ * writectr. Hence there cannot be any in-flight requests and
|
|
|
+ * no invocations of fuse_writepage_end() while we're in
|
|
|
+ * fuse_set_nowrite..fuse_release_nowrite section.
|
|
|
+ */
|
|
|
+ fuse_send_writepage(fc, next, inarg->offset + inarg->size);
|
|
|
+ }
|
|
|
fi->writectr--;
|
|
|
fuse_writepage_finish(fc, req);
|
|
|
spin_unlock(&fc->lock);
|
|
|
fuse_writepage_free(fc, req);
|
|
|
}
|
|
|
|
|
|
+static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
|
|
|
+ struct fuse_inode *fi)
|
|
|
+{
|
|
|
+ struct fuse_file *ff = NULL;
|
|
|
+
|
|
|
+ spin_lock(&fc->lock);
|
|
|
+ if (!WARN_ON(list_empty(&fi->write_files))) {
|
|
|
+ ff = list_entry(fi->write_files.next, struct fuse_file,
|
|
|
+ write_entry);
|
|
|
+ fuse_file_get(ff);
|
|
|
+ }
|
|
|
+ spin_unlock(&fc->lock);
|
|
|
+
|
|
|
+ return ff;
|
|
|
+}
|
|
|
+
|
|
|
static int fuse_writepage_locked(struct page *page)
|
|
|
{
|
|
|
struct address_space *mapping = page->mapping;
|
|
@@ -1501,8 +1562,8 @@ static int fuse_writepage_locked(struct page *page)
|
|
|
struct fuse_conn *fc = get_fuse_conn(inode);
|
|
|
struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
struct fuse_req *req;
|
|
|
- struct fuse_file *ff;
|
|
|
struct page *tmp_page;
|
|
|
+ int error = -ENOMEM;
|
|
|
|
|
|
set_page_writeback(page);
|
|
|
|
|
@@ -1515,16 +1576,16 @@ static int fuse_writepage_locked(struct page *page)
|
|
|
if (!tmp_page)
|
|
|
goto err_free;
|
|
|
|
|
|
- spin_lock(&fc->lock);
|
|
|
- BUG_ON(list_empty(&fi->write_files));
|
|
|
- ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
|
|
|
- req->ff = fuse_file_get(ff);
|
|
|
- spin_unlock(&fc->lock);
|
|
|
+ error = -EIO;
|
|
|
+ req->ff = fuse_write_file_get(fc, fi);
|
|
|
+ if (!req->ff)
|
|
|
+ goto err_free;
|
|
|
|
|
|
- fuse_write_fill(req, ff, page_offset(page), 0);
|
|
|
+ fuse_write_fill(req, req->ff, page_offset(page), 0);
|
|
|
|
|
|
copy_highpage(tmp_page, page);
|
|
|
req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
|
|
|
+ req->misc.write.next = NULL;
|
|
|
req->in.argpages = 1;
|
|
|
req->num_pages = 1;
|
|
|
req->pages[0] = tmp_page;
|
|
@@ -1550,19 +1611,263 @@ err_free:
|
|
|
fuse_request_free(req);
|
|
|
err:
|
|
|
end_page_writeback(page);
|
|
|
- return -ENOMEM;
|
|
|
+ return error;
|
|
|
}
|
|
|
|
|
|
static int fuse_writepage(struct page *page, struct writeback_control *wbc)
|
|
|
{
|
|
|
int err;
|
|
|
|
|
|
+ if (fuse_page_is_writeback(page->mapping->host, page->index)) {
|
|
|
+ /*
|
|
|
+ * ->writepages() should be called for sync() and friends. We
|
|
|
+ * should only get here on direct reclaim and then we are
|
|
|
+ * allowed to skip a page which is already in flight
|
|
|
+ */
|
|
|
+ WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
|
|
|
+
|
|
|
+ redirty_page_for_writepage(wbc, page);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
err = fuse_writepage_locked(page);
|
|
|
unlock_page(page);
|
|
|
|
|
|
return err;
|
|
|
}
|
|
|
|
|
|
+struct fuse_fill_wb_data {
|
|
|
+ struct fuse_req *req;
|
|
|
+ struct fuse_file *ff;
|
|
|
+ struct inode *inode;
|
|
|
+ struct page **orig_pages;
|
|
|
+};
|
|
|
+
|
|
|
+static void fuse_writepages_send(struct fuse_fill_wb_data *data)
|
|
|
+{
|
|
|
+ struct fuse_req *req = data->req;
|
|
|
+ struct inode *inode = data->inode;
|
|
|
+ struct fuse_conn *fc = get_fuse_conn(inode);
|
|
|
+ struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
+ int num_pages = req->num_pages;
|
|
|
+ int i;
|
|
|
+
|
|
|
+ req->ff = fuse_file_get(data->ff);
|
|
|
+ spin_lock(&fc->lock);
|
|
|
+ list_add_tail(&req->list, &fi->queued_writes);
|
|
|
+ fuse_flush_writepages(inode);
|
|
|
+ spin_unlock(&fc->lock);
|
|
|
+
|
|
|
+ for (i = 0; i < num_pages; i++)
|
|
|
+ end_page_writeback(data->orig_pages[i]);
|
|
|
+}
|
|
|
+
|
|
|
+static bool fuse_writepage_in_flight(struct fuse_req *new_req,
|
|
|
+ struct page *page)
|
|
|
+{
|
|
|
+ struct fuse_conn *fc = get_fuse_conn(new_req->inode);
|
|
|
+ struct fuse_inode *fi = get_fuse_inode(new_req->inode);
|
|
|
+ struct fuse_req *tmp;
|
|
|
+ struct fuse_req *old_req;
|
|
|
+ bool found = false;
|
|
|
+ pgoff_t curr_index;
|
|
|
+
|
|
|
+ BUG_ON(new_req->num_pages != 0);
|
|
|
+
|
|
|
+ spin_lock(&fc->lock);
|
|
|
+ list_del(&new_req->writepages_entry);
|
|
|
+ list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
|
|
|
+ BUG_ON(old_req->inode != new_req->inode);
|
|
|
+ curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
|
|
|
+ if (curr_index <= page->index &&
|
|
|
+ page->index < curr_index + old_req->num_pages) {
|
|
|
+ found = true;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (!found) {
|
|
|
+ list_add(&new_req->writepages_entry, &fi->writepages);
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+ new_req->num_pages = 1;
|
|
|
+ for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
|
|
|
+ BUG_ON(tmp->inode != new_req->inode);
|
|
|
+ curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
|
|
|
+ if (tmp->num_pages == 1 &&
|
|
|
+ curr_index == page->index) {
|
|
|
+ old_req = tmp;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (old_req->num_pages == 1 && (old_req->state == FUSE_REQ_INIT ||
|
|
|
+ old_req->state == FUSE_REQ_PENDING)) {
|
|
|
+ struct backing_dev_info *bdi = page->mapping->backing_dev_info;
|
|
|
+
|
|
|
+ copy_highpage(old_req->pages[0], page);
|
|
|
+ spin_unlock(&fc->lock);
|
|
|
+
|
|
|
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
|
|
|
+ dec_zone_page_state(page, NR_WRITEBACK_TEMP);
|
|
|
+ bdi_writeout_inc(bdi);
|
|
|
+ fuse_writepage_free(fc, new_req);
|
|
|
+ fuse_request_free(new_req);
|
|
|
+ goto out;
|
|
|
+ } else {
|
|
|
+ new_req->misc.write.next = old_req->misc.write.next;
|
|
|
+ old_req->misc.write.next = new_req;
|
|
|
+ }
|
|
|
+out_unlock:
|
|
|
+ spin_unlock(&fc->lock);
|
|
|
+out:
|
|
|
+ return found;
|
|
|
+}
|
|
|
+
|
|
|
+static int fuse_writepages_fill(struct page *page,
|
|
|
+ struct writeback_control *wbc, void *_data)
|
|
|
+{
|
|
|
+ struct fuse_fill_wb_data *data = _data;
|
|
|
+ struct fuse_req *req = data->req;
|
|
|
+ struct inode *inode = data->inode;
|
|
|
+ struct fuse_conn *fc = get_fuse_conn(inode);
|
|
|
+ struct page *tmp_page;
|
|
|
+ bool is_writeback;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ if (!data->ff) {
|
|
|
+ err = -EIO;
|
|
|
+ data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
|
|
|
+ if (!data->ff)
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Being under writeback is unlikely but possible. For example direct
|
|
|
+ * read to an mmaped fuse file will set the page dirty twice; once when
|
|
|
+ * the pages are faulted with get_user_pages(), and then after the read
|
|
|
+ * completed.
|
|
|
+ */
|
|
|
+ is_writeback = fuse_page_is_writeback(inode, page->index);
|
|
|
+
|
|
|
+ if (req && req->num_pages &&
|
|
|
+ (is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
|
|
|
+ (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
|
|
|
+ data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
|
|
|
+ fuse_writepages_send(data);
|
|
|
+ data->req = NULL;
|
|
|
+ }
|
|
|
+ err = -ENOMEM;
|
|
|
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
|
|
+ if (!tmp_page)
|
|
|
+ goto out_unlock;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The page must not be redirtied until the writeout is completed
|
|
|
+ * (i.e. userspace has sent a reply to the write request). Otherwise
|
|
|
+ * there could be more than one temporary page instance for each real
|
|
|
+ * page.
|
|
|
+ *
|
|
|
+ * This is ensured by holding the page lock in page_mkwrite() while
|
|
|
+ * checking fuse_page_is_writeback(). We already hold the page lock
|
|
|
+ * since clear_page_dirty_for_io() and keep it held until we add the
|
|
|
+ * request to the fi->writepages list and increment req->num_pages.
|
|
|
+ * After this fuse_page_is_writeback() will indicate that the page is
|
|
|
+ * under writeback, so we can release the page lock.
|
|
|
+ */
|
|
|
+ if (data->req == NULL) {
|
|
|
+ struct fuse_inode *fi = get_fuse_inode(inode);
|
|
|
+
|
|
|
+ err = -ENOMEM;
|
|
|
+ req = fuse_request_alloc_nofs(FUSE_MAX_PAGES_PER_REQ);
|
|
|
+ if (!req) {
|
|
|
+ __free_page(tmp_page);
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+
|
|
|
+ fuse_write_fill(req, data->ff, page_offset(page), 0);
|
|
|
+ req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
|
|
|
+ req->misc.write.next = NULL;
|
|
|
+ req->in.argpages = 1;
|
|
|
+ req->background = 1;
|
|
|
+ req->num_pages = 0;
|
|
|
+ req->end = fuse_writepage_end;
|
|
|
+ req->inode = inode;
|
|
|
+
|
|
|
+ spin_lock(&fc->lock);
|
|
|
+ list_add(&req->writepages_entry, &fi->writepages);
|
|
|
+ spin_unlock(&fc->lock);
|
|
|
+
|
|
|
+ data->req = req;
|
|
|
+ }
|
|
|
+ set_page_writeback(page);
|
|
|
+
|
|
|
+ copy_highpage(tmp_page, page);
|
|
|
+ req->pages[req->num_pages] = tmp_page;
|
|
|
+ req->page_descs[req->num_pages].offset = 0;
|
|
|
+ req->page_descs[req->num_pages].length = PAGE_SIZE;
|
|
|
+
|
|
|
+ inc_bdi_stat(page->mapping->backing_dev_info, BDI_WRITEBACK);
|
|
|
+ inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
|
|
|
+
|
|
|
+ err = 0;
|
|
|
+ if (is_writeback && fuse_writepage_in_flight(req, page)) {
|
|
|
+ end_page_writeback(page);
|
|
|
+ data->req = NULL;
|
|
|
+ goto out_unlock;
|
|
|
+ }
|
|
|
+ data->orig_pages[req->num_pages] = page;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Protected by fc->lock against concurrent access by
|
|
|
+ * fuse_page_is_writeback().
|
|
|
+ */
|
|
|
+ spin_lock(&fc->lock);
|
|
|
+ req->num_pages++;
|
|
|
+ spin_unlock(&fc->lock);
|
|
|
+
|
|
|
+out_unlock:
|
|
|
+ unlock_page(page);
|
|
|
+
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
+static int fuse_writepages(struct address_space *mapping,
|
|
|
+ struct writeback_control *wbc)
|
|
|
+{
|
|
|
+ struct inode *inode = mapping->host;
|
|
|
+ struct fuse_fill_wb_data data;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ err = -EIO;
|
|
|
+ if (is_bad_inode(inode))
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ data.inode = inode;
|
|
|
+ data.req = NULL;
|
|
|
+ data.ff = NULL;
|
|
|
+
|
|
|
+ err = -ENOMEM;
|
|
|
+ data.orig_pages = kzalloc(sizeof(struct page *) *
|
|
|
+ FUSE_MAX_PAGES_PER_REQ,
|
|
|
+ GFP_NOFS);
|
|
|
+ if (!data.orig_pages)
|
|
|
+ goto out;
|
|
|
+
|
|
|
+ err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data);
|
|
|
+ if (data.req) {
|
|
|
+ /* Ignore errors if we can write at least one page */
|
|
|
+ BUG_ON(!data.req->num_pages);
|
|
|
+ fuse_writepages_send(&data);
|
|
|
+ err = 0;
|
|
|
+ }
|
|
|
+ if (data.ff)
|
|
|
+ fuse_file_put(data.ff, false);
|
|
|
+
|
|
|
+ kfree(data.orig_pages);
|
|
|
+out:
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
static int fuse_launder_page(struct page *page)
|
|
|
{
|
|
|
int err = 0;
|
|
@@ -1602,14 +1907,17 @@ static void fuse_vma_close(struct vm_area_struct *vma)
|
|
|
static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
|
{
|
|
|
struct page *page = vmf->page;
|
|
|
- /*
|
|
|
- * Don't use page->mapping as it may become NULL from a
|
|
|
- * concurrent truncate.
|
|
|
- */
|
|
|
- struct inode *inode = vma->vm_file->f_mapping->host;
|
|
|
+ struct inode *inode = file_inode(vma->vm_file);
|
|
|
+
|
|
|
+ file_update_time(vma->vm_file);
|
|
|
+ lock_page(page);
|
|
|
+ if (page->mapping != inode->i_mapping) {
|
|
|
+ unlock_page(page);
|
|
|
+ return VM_FAULT_NOPAGE;
|
|
|
+ }
|
|
|
|
|
|
fuse_wait_on_page_writeback(inode, page->index);
|
|
|
- return 0;
|
|
|
+ return VM_FAULT_LOCKED;
|
|
|
}
|
|
|
|
|
|
static const struct vm_operations_struct fuse_file_vm_ops = {
|
|
@@ -2581,6 +2889,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
|
|
|
static const struct address_space_operations fuse_file_aops = {
|
|
|
.readpage = fuse_readpage,
|
|
|
.writepage = fuse_writepage,
|
|
|
+ .writepages = fuse_writepages,
|
|
|
.launder_page = fuse_launder_page,
|
|
|
.readpages = fuse_readpages,
|
|
|
.set_page_dirty = __set_page_dirty_nobuffers,
|