|
@@ -89,7 +89,8 @@ enum sgp_type {
|
|
SGP_READ, /* don't exceed i_size, don't allocate page */
|
|
SGP_READ, /* don't exceed i_size, don't allocate page */
|
|
SGP_CACHE, /* don't exceed i_size, may allocate page */
|
|
SGP_CACHE, /* don't exceed i_size, may allocate page */
|
|
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
|
|
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
|
|
- SGP_WRITE, /* may exceed i_size, may allocate page */
|
|
|
|
|
|
+ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
|
|
|
|
+ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
|
|
};
|
|
};
|
|
|
|
|
|
#ifdef CONFIG_TMPFS
|
|
#ifdef CONFIG_TMPFS
|
|
@@ -427,8 +428,10 @@ void shmem_unlock_mapping(struct address_space *mapping)
|
|
|
|
|
|
/*
|
|
/*
|
|
* Remove range of pages and swap entries from radix tree, and free them.
|
|
* Remove range of pages and swap entries from radix tree, and free them.
|
|
|
|
+ * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
|
|
*/
|
|
*/
|
|
-void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
|
|
|
|
+static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
|
|
|
|
+ bool unfalloc)
|
|
{
|
|
{
|
|
struct address_space *mapping = inode->i_mapping;
|
|
struct address_space *mapping = inode->i_mapping;
|
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
|
@@ -462,6 +465,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
break;
|
|
break;
|
|
|
|
|
|
if (radix_tree_exceptional_entry(page)) {
|
|
if (radix_tree_exceptional_entry(page)) {
|
|
|
|
+ if (unfalloc)
|
|
|
|
+ continue;
|
|
nr_swaps_freed += !shmem_free_swap(mapping,
|
|
nr_swaps_freed += !shmem_free_swap(mapping,
|
|
index, page);
|
|
index, page);
|
|
continue;
|
|
continue;
|
|
@@ -469,9 +474,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
|
|
|
|
if (!trylock_page(page))
|
|
if (!trylock_page(page))
|
|
continue;
|
|
continue;
|
|
- if (page->mapping == mapping) {
|
|
|
|
- VM_BUG_ON(PageWriteback(page));
|
|
|
|
- truncate_inode_page(mapping, page);
|
|
|
|
|
|
+ if (!unfalloc || !PageUptodate(page)) {
|
|
|
|
+ if (page->mapping == mapping) {
|
|
|
|
+ VM_BUG_ON(PageWriteback(page));
|
|
|
|
+ truncate_inode_page(mapping, page);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
unlock_page(page);
|
|
unlock_page(page);
|
|
}
|
|
}
|
|
@@ -517,12 +524,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
min(end - index, (pgoff_t)PAGEVEC_SIZE),
|
|
min(end - index, (pgoff_t)PAGEVEC_SIZE),
|
|
pvec.pages, indices);
|
|
pvec.pages, indices);
|
|
if (!pvec.nr) {
|
|
if (!pvec.nr) {
|
|
- if (index == start)
|
|
|
|
|
|
+ if (index == start || unfalloc)
|
|
break;
|
|
break;
|
|
index = start;
|
|
index = start;
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
- if (index == start && indices[0] >= end) {
|
|
|
|
|
|
+ if ((index == start || unfalloc) && indices[0] >= end) {
|
|
shmem_deswap_pagevec(&pvec);
|
|
shmem_deswap_pagevec(&pvec);
|
|
pagevec_release(&pvec);
|
|
pagevec_release(&pvec);
|
|
break;
|
|
break;
|
|
@@ -536,15 +543,19 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
break;
|
|
break;
|
|
|
|
|
|
if (radix_tree_exceptional_entry(page)) {
|
|
if (radix_tree_exceptional_entry(page)) {
|
|
|
|
+ if (unfalloc)
|
|
|
|
+ continue;
|
|
nr_swaps_freed += !shmem_free_swap(mapping,
|
|
nr_swaps_freed += !shmem_free_swap(mapping,
|
|
index, page);
|
|
index, page);
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
|
|
lock_page(page);
|
|
lock_page(page);
|
|
- if (page->mapping == mapping) {
|
|
|
|
- VM_BUG_ON(PageWriteback(page));
|
|
|
|
- truncate_inode_page(mapping, page);
|
|
|
|
|
|
+ if (!unfalloc || !PageUptodate(page)) {
|
|
|
|
+ if (page->mapping == mapping) {
|
|
|
|
+ VM_BUG_ON(PageWriteback(page));
|
|
|
|
+ truncate_inode_page(mapping, page);
|
|
|
|
+ }
|
|
}
|
|
}
|
|
unlock_page(page);
|
|
unlock_page(page);
|
|
}
|
|
}
|
|
@@ -558,7 +569,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
info->swapped -= nr_swaps_freed;
|
|
info->swapped -= nr_swaps_freed;
|
|
shmem_recalc_inode(inode);
|
|
shmem_recalc_inode(inode);
|
|
spin_unlock(&info->lock);
|
|
spin_unlock(&info->lock);
|
|
|
|
+}
|
|
|
|
|
|
|
|
+void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
|
|
+{
|
|
|
|
+ shmem_undo_range(inode, lstart, lend, false);
|
|
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
|
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL_GPL(shmem_truncate_range);
|
|
EXPORT_SYMBOL_GPL(shmem_truncate_range);
|
|
@@ -771,6 +786,18 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
|
WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
|
|
WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
|
|
goto redirty;
|
|
goto redirty;
|
|
}
|
|
}
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
|
|
|
|
+ * value into swapfile.c, the only way we can correctly account for a
|
|
|
|
+ * fallocated page arriving here is now to initialize it and write it.
|
|
|
|
+ */
|
|
|
|
+ if (!PageUptodate(page)) {
|
|
|
|
+ clear_highpage(page);
|
|
|
|
+ flush_dcache_page(page);
|
|
|
|
+ SetPageUptodate(page);
|
|
|
|
+ }
|
|
|
|
+
|
|
swap = get_swap_page();
|
|
swap = get_swap_page();
|
|
if (!swap.val)
|
|
if (!swap.val)
|
|
goto redirty;
|
|
goto redirty;
|
|
@@ -994,6 +1021,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
|
|
swp_entry_t swap;
|
|
swp_entry_t swap;
|
|
int error;
|
|
int error;
|
|
int once = 0;
|
|
int once = 0;
|
|
|
|
+ int alloced = 0;
|
|
|
|
|
|
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
|
|
if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
|
|
return -EFBIG;
|
|
return -EFBIG;
|
|
@@ -1005,19 +1033,21 @@ repeat:
|
|
page = NULL;
|
|
page = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
- if (sgp != SGP_WRITE &&
|
|
|
|
|
|
+ if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
|
|
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
|
|
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
|
|
error = -EINVAL;
|
|
error = -EINVAL;
|
|
goto failed;
|
|
goto failed;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ /* fallocated page? */
|
|
|
|
+ if (page && !PageUptodate(page)) {
|
|
|
|
+ if (sgp != SGP_READ)
|
|
|
|
+ goto clear;
|
|
|
|
+ unlock_page(page);
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ page = NULL;
|
|
|
|
+ }
|
|
if (page || (sgp == SGP_READ && !swap.val)) {
|
|
if (page || (sgp == SGP_READ && !swap.val)) {
|
|
- /*
|
|
|
|
- * Once we can get the page lock, it must be uptodate:
|
|
|
|
- * if there were an error in reading back from swap,
|
|
|
|
- * the page would not be inserted into the filecache.
|
|
|
|
- */
|
|
|
|
- BUG_ON(page && !PageUptodate(page));
|
|
|
|
*pagep = page;
|
|
*pagep = page;
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -1114,9 +1144,18 @@ repeat:
|
|
inode->i_blocks += BLOCKS_PER_PAGE;
|
|
inode->i_blocks += BLOCKS_PER_PAGE;
|
|
shmem_recalc_inode(inode);
|
|
shmem_recalc_inode(inode);
|
|
spin_unlock(&info->lock);
|
|
spin_unlock(&info->lock);
|
|
|
|
+ alloced = true;
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Let SGP_WRITE caller clear ends if write does not fill page
|
|
|
|
|
|
+ * Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
|
|
|
|
+ */
|
|
|
|
+ if (sgp == SGP_FALLOC)
|
|
|
|
+ sgp = SGP_WRITE;
|
|
|
|
+clear:
|
|
|
|
+ /*
|
|
|
|
+ * Let SGP_WRITE caller clear ends if write does not fill page;
|
|
|
|
+ * but SGP_FALLOC on a page fallocated earlier must initialize
|
|
|
|
+ * it now, lest undo on failure cancel our earlier guarantee.
|
|
*/
|
|
*/
|
|
if (sgp != SGP_WRITE) {
|
|
if (sgp != SGP_WRITE) {
|
|
clear_highpage(page);
|
|
clear_highpage(page);
|
|
@@ -1128,10 +1167,13 @@ repeat:
|
|
}
|
|
}
|
|
|
|
|
|
/* Perhaps the file has been truncated since we checked */
|
|
/* Perhaps the file has been truncated since we checked */
|
|
- if (sgp != SGP_WRITE &&
|
|
|
|
|
|
+ if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
|
|
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
|
|
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
|
|
error = -EINVAL;
|
|
error = -EINVAL;
|
|
- goto trunc;
|
|
|
|
|
|
+ if (alloced)
|
|
|
|
+ goto trunc;
|
|
|
|
+ else
|
|
|
|
+ goto failed;
|
|
}
|
|
}
|
|
*pagep = page;
|
|
*pagep = page;
|
|
return 0;
|
|
return 0;
|
|
@@ -1140,6 +1182,7 @@ repeat:
|
|
* Error recovery.
|
|
* Error recovery.
|
|
*/
|
|
*/
|
|
trunc:
|
|
trunc:
|
|
|
|
+ info = SHMEM_I(inode);
|
|
ClearPageDirty(page);
|
|
ClearPageDirty(page);
|
|
delete_from_page_cache(page);
|
|
delete_from_page_cache(page);
|
|
spin_lock(&info->lock);
|
|
spin_lock(&info->lock);
|
|
@@ -1147,6 +1190,7 @@ trunc:
|
|
inode->i_blocks -= BLOCKS_PER_PAGE;
|
|
inode->i_blocks -= BLOCKS_PER_PAGE;
|
|
spin_unlock(&info->lock);
|
|
spin_unlock(&info->lock);
|
|
decused:
|
|
decused:
|
|
|
|
+ sbinfo = SHMEM_SB(inode->i_sb);
|
|
if (sbinfo->max_blocks)
|
|
if (sbinfo->max_blocks)
|
|
percpu_counter_add(&sbinfo->used_blocks, -1);
|
|
percpu_counter_add(&sbinfo->used_blocks, -1);
|
|
unacct:
|
|
unacct:
|
|
@@ -1645,25 +1689,20 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
|
|
if (signal_pending(current))
|
|
if (signal_pending(current))
|
|
error = -EINTR;
|
|
error = -EINTR;
|
|
else
|
|
else
|
|
- error = shmem_getpage(inode, index, &page, SGP_WRITE,
|
|
|
|
|
|
+ error = shmem_getpage(inode, index, &page, SGP_FALLOC,
|
|
NULL);
|
|
NULL);
|
|
if (error) {
|
|
if (error) {
|
|
- /*
|
|
|
|
- * We really ought to free what we allocated so far,
|
|
|
|
- * but it would be wrong to free pages allocated
|
|
|
|
- * earlier, or already now in use: i_mutex does not
|
|
|
|
- * exclude all cases. We do not know what to free.
|
|
|
|
- */
|
|
|
|
|
|
+ /* Remove the !PageUptodate pages we added */
|
|
|
|
+ shmem_undo_range(inode,
|
|
|
|
+ (loff_t)start << PAGE_CACHE_SHIFT,
|
|
|
|
+ (loff_t)index << PAGE_CACHE_SHIFT, true);
|
|
goto ctime;
|
|
goto ctime;
|
|
}
|
|
}
|
|
|
|
|
|
- if (!PageUptodate(page)) {
|
|
|
|
- clear_highpage(page);
|
|
|
|
- flush_dcache_page(page);
|
|
|
|
- SetPageUptodate(page);
|
|
|
|
- }
|
|
|
|
/*
|
|
/*
|
|
- * set_page_dirty so that memory pressure will swap rather
|
|
|
|
|
|
+ * If !PageUptodate, leave it that way so that freeable pages
|
|
|
|
+ * can be recognized if we need to rollback on error later.
|
|
|
|
+ * But set_page_dirty so that memory pressure will swap rather
|
|
* than free the pages we are allocating (and SGP_CACHE pages
|
|
* than free the pages we are allocating (and SGP_CACHE pages
|
|
* might still be clean: we now need to mark those dirty too).
|
|
* might still be clean: we now need to mark those dirty too).
|
|
*/
|
|
*/
|