|
@@ -71,26 +71,42 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
|
|
|
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
|
|
unsigned long old_addr, unsigned long old_end,
|
|
|
struct vm_area_struct *new_vma, pmd_t *new_pmd,
|
|
|
- unsigned long new_addr)
|
|
|
+ unsigned long new_addr, bool need_rmap_locks)
|
|
|
{
|
|
|
struct address_space *mapping = NULL;
|
|
|
- struct anon_vma *anon_vma = vma->anon_vma;
|
|
|
+ struct anon_vma *anon_vma = NULL;
|
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
|
pte_t *old_pte, *new_pte, pte;
|
|
|
spinlock_t *old_ptl, *new_ptl;
|
|
|
|
|
|
- if (vma->vm_file) {
|
|
|
- /*
|
|
|
- * Subtle point from Rajesh Venkatasubramanian: before
|
|
|
- * moving file-based ptes, we must lock truncate_pagecache
|
|
|
- * out, since it might clean the dst vma before the src vma,
|
|
|
- * and we propagate stale pages into the dst afterward.
|
|
|
- */
|
|
|
- mapping = vma->vm_file->f_mapping;
|
|
|
- mutex_lock(&mapping->i_mmap_mutex);
|
|
|
+ /*
|
|
|
+ * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
|
|
|
+ * locks to ensure that rmap will always observe either the old or the
|
|
|
+ * new ptes. This is the easiest way to avoid races with
|
|
|
+ * truncate_pagecache(), page migration, etc...
|
|
|
+ *
|
|
|
+ * When need_rmap_locks is false, we use other ways to avoid
|
|
|
+ * such races:
|
|
|
+ *
|
|
|
+ * - During exec() shift_arg_pages(), we use a specially tagged vma
|
|
|
+ * which rmap call sites look for using is_vma_temporary_stack().
|
|
|
+ *
|
|
|
+ * - During mremap(), new_vma is often known to be placed after vma
|
|
|
+ * in rmap traversal order. This ensures rmap will always observe
|
|
|
+ * either the old pte, or the new pte, or both (the page table locks
|
|
|
+ * serialize access to individual ptes, but only rmap traversal
|
|
|
+ * order guarantees that we won't miss both the old and new ptes).
|
|
|
+ */
|
|
|
+ if (need_rmap_locks) {
|
|
|
+ if (vma->vm_file) {
|
|
|
+ mapping = vma->vm_file->f_mapping;
|
|
|
+ mutex_lock(&mapping->i_mmap_mutex);
|
|
|
+ }
|
|
|
+ if (vma->anon_vma) {
|
|
|
+ anon_vma = vma->anon_vma;
|
|
|
+ anon_vma_lock(anon_vma);
|
|
|
+ }
|
|
|
}
|
|
|
- if (anon_vma)
|
|
|
- anon_vma_lock(anon_vma);
|
|
|
|
|
|
/*
|
|
|
* We don't have to worry about the ordering of src and dst
|
|
@@ -127,7 +143,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
|
|
|
|
|
unsigned long move_page_tables(struct vm_area_struct *vma,
|
|
|
unsigned long old_addr, struct vm_area_struct *new_vma,
|
|
|
- unsigned long new_addr, unsigned long len)
|
|
|
+ unsigned long new_addr, unsigned long len,
|
|
|
+ bool need_rmap_locks)
|
|
|
{
|
|
|
unsigned long extent, next, old_end;
|
|
|
pmd_t *old_pmd, *new_pmd;
|
|
@@ -174,7 +191,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
|
|
|
if (extent > LATENCY_LIMIT)
|
|
|
extent = LATENCY_LIMIT;
|
|
|
move_ptes(vma, old_pmd, old_addr, old_addr + extent,
|
|
|
- new_vma, new_pmd, new_addr);
|
|
|
+ new_vma, new_pmd, new_addr, need_rmap_locks);
|
|
|
need_flush = true;
|
|
|
}
|
|
|
if (likely(need_flush))
|
|
@@ -198,6 +215,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
|
|
unsigned long hiwater_vm;
|
|
|
int split = 0;
|
|
|
int err;
|
|
|
+ bool need_rmap_locks;
|
|
|
|
|
|
/*
|
|
|
* We'd prefer to avoid failure later on in do_munmap:
|
|
@@ -219,18 +237,21 @@ static unsigned long move_vma(struct vm_area_struct *vma,
|
|
|
return err;
|
|
|
|
|
|
new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
|
|
|
- new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff);
|
|
|
+ new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
|
|
|
+ &need_rmap_locks);
|
|
|
if (!new_vma)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
- moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
|
|
|
+ moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
|
|
|
+ need_rmap_locks);
|
|
|
if (moved_len < old_len) {
|
|
|
/*
|
|
|
* On error, move entries back from new area to old,
|
|
|
* which will succeed since page tables still there,
|
|
|
* and then proceed to unmap new area instead of old.
|
|
|
*/
|
|
|
- move_page_tables(new_vma, new_addr, vma, old_addr, moved_len);
|
|
|
+ move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
|
|
|
+ true);
|
|
|
vma = new_vma;
|
|
|
old_len = new_len;
|
|
|
old_addr = new_addr;
|