|
@@ -238,6 +238,111 @@ static swp_entry_t shmem_get_swap(struct shmem_inode_info *info, pgoff_t index)
|
|
info->i_direct[index] : (swp_entry_t){0};
|
|
info->i_direct[index] : (swp_entry_t){0};
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Replace item expected in radix tree by a new item, while holding tree lock.
|
|
|
|
+ */
|
|
|
|
+static int shmem_radix_tree_replace(struct address_space *mapping,
|
|
|
|
+ pgoff_t index, void *expected, void *replacement)
|
|
|
|
+{
|
|
|
|
+ void **pslot;
|
|
|
|
+ void *item = NULL;
|
|
|
|
+
|
|
|
|
+ VM_BUG_ON(!expected);
|
|
|
|
+ pslot = radix_tree_lookup_slot(&mapping->page_tree, index);
|
|
|
|
+ if (pslot)
|
|
|
|
+ item = radix_tree_deref_slot_protected(pslot,
|
|
|
|
+ &mapping->tree_lock);
|
|
|
|
+ if (item != expected)
|
|
|
|
+ return -ENOENT;
|
|
|
|
+ if (replacement)
|
|
|
|
+ radix_tree_replace_slot(pslot, replacement);
|
|
|
|
+ else
|
|
|
|
+ radix_tree_delete(&mapping->page_tree, index);
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Like find_get_pages, but collecting swap entries as well as pages.
|
|
|
|
+ */
|
|
|
|
+static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping,
|
|
|
|
+ pgoff_t start, unsigned int nr_pages,
|
|
|
|
+ struct page **pages, pgoff_t *indices)
|
|
|
|
+{
|
|
|
|
+ unsigned int i;
|
|
|
|
+ unsigned int ret;
|
|
|
|
+ unsigned int nr_found;
|
|
|
|
+
|
|
|
|
+ rcu_read_lock();
|
|
|
|
+restart:
|
|
|
|
+ nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
|
|
|
|
+ (void ***)pages, indices, start, nr_pages);
|
|
|
|
+ ret = 0;
|
|
|
|
+ for (i = 0; i < nr_found; i++) {
|
|
|
|
+ struct page *page;
|
|
|
|
+repeat:
|
|
|
|
+ page = radix_tree_deref_slot((void **)pages[i]);
|
|
|
|
+ if (unlikely(!page))
|
|
|
|
+ continue;
|
|
|
|
+ if (radix_tree_exception(page)) {
|
|
|
|
+ if (radix_tree_exceptional_entry(page))
|
|
|
|
+ goto export;
|
|
|
|
+ /* radix_tree_deref_retry(page) */
|
|
|
|
+ goto restart;
|
|
|
|
+ }
|
|
|
|
+ if (!page_cache_get_speculative(page))
|
|
|
|
+ goto repeat;
|
|
|
|
+
|
|
|
|
+ /* Has the page moved? */
|
|
|
|
+ if (unlikely(page != *((void **)pages[i]))) {
|
|
|
|
+ page_cache_release(page);
|
|
|
|
+ goto repeat;
|
|
|
|
+ }
|
|
|
|
+export:
|
|
|
|
+ indices[ret] = indices[i];
|
|
|
|
+ pages[ret] = page;
|
|
|
|
+ ret++;
|
|
|
|
+ }
|
|
|
|
+ if (unlikely(!ret && nr_found))
|
|
|
|
+ goto restart;
|
|
|
|
+ rcu_read_unlock();
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Remove swap entry from radix tree, free the swap and its page cache.
|
|
|
|
+ */
|
|
|
|
+static int shmem_free_swap(struct address_space *mapping,
|
|
|
|
+ pgoff_t index, void *radswap)
|
|
|
|
+{
|
|
|
|
+ int error;
|
|
|
|
+
|
|
|
|
+ spin_lock_irq(&mapping->tree_lock);
|
|
|
|
+ error = shmem_radix_tree_replace(mapping, index, radswap, NULL);
|
|
|
|
+ spin_unlock_irq(&mapping->tree_lock);
|
|
|
|
+ if (!error)
|
|
|
|
+ free_swap_and_cache(radix_to_swp_entry(radswap));
|
|
|
|
+ return error;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Pagevec may contain swap entries, so shuffle up pages before releasing.
|
|
|
|
+ */
|
|
|
|
+static void shmem_pagevec_release(struct pagevec *pvec)
|
|
|
|
+{
|
|
|
|
+ int i, j;
|
|
|
|
+
|
|
|
|
+ for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
|
|
|
|
+ struct page *page = pvec->pages[i];
|
|
|
|
+ if (!radix_tree_exceptional_entry(page))
|
|
|
|
+ pvec->pages[j++] = page;
|
|
|
|
+ }
|
|
|
|
+ pvec->nr = j;
|
|
|
|
+ pagevec_release(pvec);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Remove range of pages and swap entries from radix tree, and free them.
|
|
|
|
+ */
|
|
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
{
|
|
{
|
|
struct address_space *mapping = inode->i_mapping;
|
|
struct address_space *mapping = inode->i_mapping;
|
|
@@ -246,36 +351,44 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
|
|
unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
|
|
pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
|
|
pgoff_t end = (lend >> PAGE_CACHE_SHIFT);
|
|
struct pagevec pvec;
|
|
struct pagevec pvec;
|
|
|
|
+ pgoff_t indices[PAGEVEC_SIZE];
|
|
|
|
+ long nr_swaps_freed = 0;
|
|
pgoff_t index;
|
|
pgoff_t index;
|
|
- swp_entry_t swap;
|
|
|
|
int i;
|
|
int i;
|
|
|
|
|
|
BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
|
|
BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
|
|
|
|
|
|
pagevec_init(&pvec, 0);
|
|
pagevec_init(&pvec, 0);
|
|
index = start;
|
|
index = start;
|
|
- while (index <= end && pagevec_lookup(&pvec, mapping, index,
|
|
|
|
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
|
|
|
|
|
|
+ while (index <= end) {
|
|
|
|
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
|
|
|
|
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
|
|
|
|
+ pvec.pages, indices);
|
|
|
|
+ if (!pvec.nr)
|
|
|
|
+ break;
|
|
mem_cgroup_uncharge_start();
|
|
mem_cgroup_uncharge_start();
|
|
for (i = 0; i < pagevec_count(&pvec); i++) {
|
|
for (i = 0; i < pagevec_count(&pvec); i++) {
|
|
struct page *page = pvec.pages[i];
|
|
struct page *page = pvec.pages[i];
|
|
|
|
|
|
- /* We rely upon deletion not changing page->index */
|
|
|
|
- index = page->index;
|
|
|
|
|
|
+ index = indices[i];
|
|
if (index > end)
|
|
if (index > end)
|
|
break;
|
|
break;
|
|
|
|
|
|
- if (!trylock_page(page))
|
|
|
|
|
|
+ if (radix_tree_exceptional_entry(page)) {
|
|
|
|
+ nr_swaps_freed += !shmem_free_swap(mapping,
|
|
|
|
+ index, page);
|
|
continue;
|
|
continue;
|
|
- WARN_ON(page->index != index);
|
|
|
|
- if (PageWriteback(page)) {
|
|
|
|
- unlock_page(page);
|
|
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!trylock_page(page))
|
|
continue;
|
|
continue;
|
|
|
|
+ if (page->mapping == mapping) {
|
|
|
|
+ VM_BUG_ON(PageWriteback(page));
|
|
|
|
+ truncate_inode_page(mapping, page);
|
|
}
|
|
}
|
|
- truncate_inode_page(mapping, page);
|
|
|
|
unlock_page(page);
|
|
unlock_page(page);
|
|
}
|
|
}
|
|
- pagevec_release(&pvec);
|
|
|
|
|
|
+ shmem_pagevec_release(&pvec);
|
|
mem_cgroup_uncharge_end();
|
|
mem_cgroup_uncharge_end();
|
|
cond_resched();
|
|
cond_resched();
|
|
index++;
|
|
index++;
|
|
@@ -295,59 +408,47 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
|
|
index = start;
|
|
index = start;
|
|
for ( ; ; ) {
|
|
for ( ; ; ) {
|
|
cond_resched();
|
|
cond_resched();
|
|
- if (!pagevec_lookup(&pvec, mapping, index,
|
|
|
|
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
|
|
|
|
|
|
+ pvec.nr = shmem_find_get_pages_and_swap(mapping, index,
|
|
|
|
+ min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
|
|
|
|
+ pvec.pages, indices);
|
|
|
|
+ if (!pvec.nr) {
|
|
if (index == start)
|
|
if (index == start)
|
|
break;
|
|
break;
|
|
index = start;
|
|
index = start;
|
|
continue;
|
|
continue;
|
|
}
|
|
}
|
|
- if (index == start && pvec.pages[0]->index > end) {
|
|
|
|
- pagevec_release(&pvec);
|
|
|
|
|
|
+ if (index == start && indices[0] > end) {
|
|
|
|
+ shmem_pagevec_release(&pvec);
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
mem_cgroup_uncharge_start();
|
|
mem_cgroup_uncharge_start();
|
|
for (i = 0; i < pagevec_count(&pvec); i++) {
|
|
for (i = 0; i < pagevec_count(&pvec); i++) {
|
|
struct page *page = pvec.pages[i];
|
|
struct page *page = pvec.pages[i];
|
|
|
|
|
|
- /* We rely upon deletion not changing page->index */
|
|
|
|
- index = page->index;
|
|
|
|
|
|
+ index = indices[i];
|
|
if (index > end)
|
|
if (index > end)
|
|
break;
|
|
break;
|
|
|
|
|
|
|
|
+ if (radix_tree_exceptional_entry(page)) {
|
|
|
|
+ nr_swaps_freed += !shmem_free_swap(mapping,
|
|
|
|
+ index, page);
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+
|
|
lock_page(page);
|
|
lock_page(page);
|
|
- WARN_ON(page->index != index);
|
|
|
|
- wait_on_page_writeback(page);
|
|
|
|
- truncate_inode_page(mapping, page);
|
|
|
|
|
|
+ if (page->mapping == mapping) {
|
|
|
|
+ VM_BUG_ON(PageWriteback(page));
|
|
|
|
+ truncate_inode_page(mapping, page);
|
|
|
|
+ }
|
|
unlock_page(page);
|
|
unlock_page(page);
|
|
}
|
|
}
|
|
- pagevec_release(&pvec);
|
|
|
|
|
|
+ shmem_pagevec_release(&pvec);
|
|
mem_cgroup_uncharge_end();
|
|
mem_cgroup_uncharge_end();
|
|
index++;
|
|
index++;
|
|
}
|
|
}
|
|
|
|
|
|
- if (end > SHMEM_NR_DIRECT)
|
|
|
|
- end = SHMEM_NR_DIRECT;
|
|
|
|
-
|
|
|
|
spin_lock(&info->lock);
|
|
spin_lock(&info->lock);
|
|
- for (index = start; index < end; index++) {
|
|
|
|
- swap = shmem_get_swap(info, index);
|
|
|
|
- if (swap.val) {
|
|
|
|
- free_swap_and_cache(swap);
|
|
|
|
- shmem_put_swap(info, index, (swp_entry_t){0});
|
|
|
|
- info->swapped--;
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- if (mapping->nrpages) {
|
|
|
|
- spin_unlock(&info->lock);
|
|
|
|
- /*
|
|
|
|
- * A page may have meanwhile sneaked in from swap.
|
|
|
|
- */
|
|
|
|
- truncate_inode_pages_range(mapping, lstart, lend);
|
|
|
|
- spin_lock(&info->lock);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
|
|
+ info->swapped -= nr_swaps_freed;
|
|
shmem_recalc_inode(inode);
|
|
shmem_recalc_inode(inode);
|
|
spin_unlock(&info->lock);
|
|
spin_unlock(&info->lock);
|
|
|
|
|
|
@@ -552,11 +653,10 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Just for this patch, we have a toy implementation,
|
|
|
|
- * which can swap out only the first SHMEM_NR_DIRECT pages:
|
|
|
|
- * for simple demonstration of where we need to think about swap.
|
|
|
|
|
|
+ * Disable even the toy swapping implementation, while we convert
|
|
|
|
+ * functions one by one to having swap entries in the radix tree.
|
|
*/
|
|
*/
|
|
- if (index >= SHMEM_NR_DIRECT)
|
|
|
|
|
|
+ if (index < ULONG_MAX)
|
|
goto redirty;
|
|
goto redirty;
|
|
|
|
|
|
swap = get_swap_page();
|
|
swap = get_swap_page();
|