|
@@ -84,6 +84,18 @@ struct shmem_xattr {
|
|
|
char value[0];
|
|
|
};
|
|
|
|
|
|
+/*
|
|
|
+ * shmem_fallocate and shmem_writepage communicate via inode->i_private
|
|
|
+ * (with i_mutex making sure that it has only one user at a time):
|
|
|
+ * we would prefer not to enlarge the shmem inode just for that.
|
|
|
+ */
|
|
|
+struct shmem_falloc {
|
|
|
+ pgoff_t start; /* start of range currently being fallocated */
|
|
|
+ pgoff_t next; /* the next page offset to be fallocated */
|
|
|
+ pgoff_t nr_falloced; /* how many new pages have been fallocated */
|
|
|
+ pgoff_t nr_unswapped; /* how often writepage refused to swap out */
|
|
|
+};
|
|
|
+
|
|
|
/* Flag allocation requirements to shmem_getpage */
|
|
|
enum sgp_type {
|
|
|
SGP_READ, /* don't exceed i_size, don't allocate page */
|
|
@@ -791,8 +803,28 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
|
|
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
|
|
|
* value into swapfile.c, the only way we can correctly account for a
|
|
|
* fallocated page arriving here is now to initialize it and write it.
|
|
|
+ *
|
|
|
+ * That's okay for a page already fallocated earlier, but if we have
|
|
|
+ * not yet completed the fallocation, then (a) we want to keep track
|
|
|
+ * of this page in case we have to undo it, and (b) it may not be a
|
|
|
+ * good idea to continue anyway, once we're pushing into swap. So
|
|
|
+ * reactivate the page, and let shmem_fallocate() quit when too many.
|
|
|
*/
|
|
|
if (!PageUptodate(page)) {
|
|
|
+ if (inode->i_private) {
|
|
|
+ struct shmem_falloc *shmem_falloc;
|
|
|
+ spin_lock(&inode->i_lock);
|
|
|
+ shmem_falloc = inode->i_private;
|
|
|
+ if (shmem_falloc &&
|
|
|
+ index >= shmem_falloc->start &&
|
|
|
+ index < shmem_falloc->next)
|
|
|
+ shmem_falloc->nr_unswapped++;
|
|
|
+ else
|
|
|
+ shmem_falloc = NULL;
|
|
|
+ spin_unlock(&inode->i_lock);
|
|
|
+ if (shmem_falloc)
|
|
|
+ goto redirty;
|
|
|
+ }
|
|
|
clear_highpage(page);
|
|
|
flush_dcache_page(page);
|
|
|
SetPageUptodate(page);
|
|
@@ -1647,6 +1679,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
|
{
|
|
|
struct inode *inode = file->f_path.dentry->d_inode;
|
|
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
|
|
+ struct shmem_falloc shmem_falloc;
|
|
|
pgoff_t start, index, end;
|
|
|
int error;
|
|
|
|
|
@@ -1679,6 +1712,14 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
|
goto out;
|
|
|
}
|
|
|
|
|
|
+ shmem_falloc.start = start;
|
|
|
+ shmem_falloc.next = start;
|
|
|
+ shmem_falloc.nr_falloced = 0;
|
|
|
+ shmem_falloc.nr_unswapped = 0;
|
|
|
+ spin_lock(&inode->i_lock);
|
|
|
+ inode->i_private = &shmem_falloc;
|
|
|
+ spin_unlock(&inode->i_lock);
|
|
|
+
|
|
|
for (index = start; index < end; index++) {
|
|
|
struct page *page;
|
|
|
|
|
@@ -1688,6 +1729,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
|
*/
|
|
|
if (signal_pending(current))
|
|
|
error = -EINTR;
|
|
|
+ else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
|
|
|
+ error = -ENOMEM;
|
|
|
else
|
|
|
error = shmem_getpage(inode, index, &page, SGP_FALLOC,
|
|
|
NULL);
|
|
@@ -1696,9 +1739,17 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
|
shmem_undo_range(inode,
|
|
|
(loff_t)start << PAGE_CACHE_SHIFT,
|
|
|
(loff_t)index << PAGE_CACHE_SHIFT, true);
|
|
|
- goto ctime;
|
|
|
+ goto undone;
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * Inform shmem_writepage() how far we have reached.
|
|
|
+ * No need for lock or barrier: we have the page lock.
|
|
|
+ */
|
|
|
+ shmem_falloc.next++;
|
|
|
+ if (!PageUptodate(page))
|
|
|
+ shmem_falloc.nr_falloced++;
|
|
|
+
|
|
|
/*
|
|
|
* If !PageUptodate, leave it that way so that freeable pages
|
|
|
* can be recognized if we need to rollback on error later.
|
|
@@ -1714,8 +1765,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
|
|
|
|
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
|
|
|
i_size_write(inode, offset + len);
|
|
|
-ctime:
|
|
|
inode->i_ctime = CURRENT_TIME;
|
|
|
+undone:
|
|
|
+ spin_lock(&inode->i_lock);
|
|
|
+ inode->i_private = NULL;
|
|
|
+ spin_unlock(&inode->i_lock);
|
|
|
out:
|
|
|
mutex_unlock(&inode->i_mutex);
|
|
|
return error;
|