15 years ago · 570a335b8e
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -145,15 +145,18 @@ enum {
 
				 	SWP_DISCARDABLE = (1 << 2),	/* blkdev supports discard */
			
 
				 	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
			
 
				 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
			
 
				+	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
			
 
				 					/* add others here before... */
			
 
				 	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */
			
 
				 };
			
 
				 
			
 
				 #define SWAP_CLUSTER_MAX 32
			
 
				 
			
 
				-#define SWAP_MAP_MAX	0x7e
			
 
				-#define SWAP_MAP_BAD	0x7f
			
 
				-#define SWAP_HAS_CACHE	0x80		/* There is a swap cache of entry. */
			
 
				+#define SWAP_MAP_MAX	0x3e	/* Max duplication count, in first swap_map */
			
 
				+#define SWAP_MAP_BAD	0x3f	/* Note pageblock is bad, in first swap_map */
			
 
				+#define SWAP_HAS_CACHE	0x40	/* Flag page is cached, in first swap_map */
			
 
				+#define SWAP_CONT_MAX	0x7f	/* Max count, in each swap_map continuation */
			
 
				+#define COUNT_CONTINUED	0x80	/* See swap_map continuation for full count */
			
 
				 
			
 
				 /*
			
 
				  * The in-memory structure used to track swap areas.
			
@@ -311,9 +314,10 @@ extern long total_swap_pages;
 
				 extern void si_swapinfo(struct sysinfo *);
			
 
				 extern swp_entry_t get_swap_page(void);
			
 
				 extern swp_entry_t get_swap_page_of_type(int);
			
 
				-extern void swap_duplicate(swp_entry_t);
			
 
				-extern int swapcache_prepare(swp_entry_t);
			
 
				 extern int valid_swaphandles(swp_entry_t, unsigned long *);
			
 
				+extern int add_swap_count_continuation(swp_entry_t, gfp_t);
			
 
				+extern int swap_duplicate(swp_entry_t);
			
 
				+extern int swapcache_prepare(swp_entry_t);
			
 
				 extern void swap_free(swp_entry_t);
			
 
				 extern void swapcache_free(swp_entry_t, struct page *page);
			
 
				 extern int free_swap_and_cache(swp_entry_t);
			
@@ -385,8 +389,14 @@ static inline void show_swap_cache_info(void)
 
				 #define free_swap_and_cache(swp)	is_migration_entry(swp)
			
 
				 #define swapcache_prepare(swp)		is_migration_entry(swp)
			
 
				 
			
 
				-static inline void swap_duplicate(swp_entry_t swp)
			
 
				+static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
			
 
				 {
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static inline int swap_duplicate(swp_entry_t swp)
			
 
				+{
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static inline void swap_free(swp_entry_t swp)
			
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -572,7 +572,7 @@ out:
 
				  * covered by this vma.
			
 
				  */
			
 
				 
			
 
				-static inline void
			
 
				+static inline unsigned long
			
 
				 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
			
 
				 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
			
 
				 		unsigned long addr, int *rss)
			
@@ -586,7 +586,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
				 		if (!pte_file(pte)) {
			
 
				 			swp_entry_t entry = pte_to_swp_entry(pte);
			
 
				 
			
 
				-			swap_duplicate(entry);
			
 
				+			if (swap_duplicate(entry) < 0)
			
 
				+				return entry.val;
			
 
				+
			
 
				 			/* make sure dst_mm is on swapoff's mmlist. */
			
 
				 			if (unlikely(list_empty(&dst_mm->mmlist))) {
			
 
				 				spin_lock(&mmlist_lock);
			
@@ -635,6 +637,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
				 
			
 
				 out_set_pte:
			
 
				 	set_pte_at(dst_mm, addr, dst_pte, pte);
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
			
@@ -646,6 +649,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
				 	spinlock_t *src_ptl, *dst_ptl;
			
 
				 	int progress = 0;
			
 
				 	int rss[2];
			
 
				+	swp_entry_t entry = (swp_entry_t){0};
			
 
				 
			
 
				 again:
			
 
				 	rss[1] = rss[0] = 0;
			
@@ -674,7 +678,10 @@ again:
 
				 			progress++;
			
 
				 			continue;
			
 
				 		}
			
 
				-		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
			
 
				+		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
			
 
				+							vma, addr, rss);
			
 
				+		if (entry.val)
			
 
				+			break;
			
 
				 		progress += 8;
			
 
				 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
			
 
				 
			
@@ -684,6 +691,12 @@ again:
 
				 	add_mm_rss(dst_mm, rss[0], rss[1]);
			
 
				 	pte_unmap_unlock(orig_dst_pte, dst_ptl);
			
 
				 	cond_resched();
			
 
				+
			
 
				+	if (entry.val) {
			
 
				+		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
			
 
				+			return -ENOMEM;
			
 
				+		progress = 0;
			
 
				+	}
			
 
				 	if (addr != end)
			
 
				 		goto again;
			
 
				 	return 0;
			
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -822,7 +822,11 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
				 			 * Store the swap location in the pte.
			
 
				 			 * See handle_pte_fault() ...
			
 
				 			 */
			
 
				-			swap_duplicate(entry);
			
 
				+			if (swap_duplicate(entry) < 0) {
			
 
				+				set_pte_at(mm, address, pte, pteval);
			
 
				+				ret = SWAP_FAIL;
			
 
				+				goto out_unmap;
			
 
				+			}
			
 
				 			if (list_empty(&mm->mmlist)) {
			
 
				 				spin_lock(&mmlist_lock);
			
 
				 				if (list_empty(&mm->mmlist))
			
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -35,11 +35,14 @@
 
				 #include <linux/swapops.h>
			
 
				 #include <linux/page_cgroup.h>
			
 
				 
			
 
				+static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
			
 
				+				 unsigned char);
			
 
				+static void free_swap_count_continuations(struct swap_info_struct *);
			
 
				+
			
 
				 static DEFINE_SPINLOCK(swap_lock);
			
 
				 static unsigned int nr_swapfiles;
			
 
				 long nr_swap_pages;
			
 
				 long total_swap_pages;
			
 
				-static int swap_overflow;
			
 
				 static int least_priority;
			
 
				 
			
 
				 static const char Bad_file[] = "Bad swap file entry ";
			
@@ -55,7 +58,7 @@ static DEFINE_MUTEX(swapon_mutex);
 
				 
			
 
				 static inline unsigned char swap_count(unsigned char ent)
			
 
				 {
			
 
				-	return ent & ~SWAP_HAS_CACHE;
			
 
				+	return ent & ~SWAP_HAS_CACHE;	/* may include SWAP_HAS_CONT flag */
			
 
				 }
			
 
				 
			
 
				 /* returns 1 if swap entry is freed */
			
@@ -545,8 +548,15 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 
				 	if (usage == SWAP_HAS_CACHE) {
			
 
				 		VM_BUG_ON(!has_cache);
			
 
				 		has_cache = 0;
			
 
				-	} else if (count < SWAP_MAP_MAX)
			
 
				-		count--;
			
 
				+	} else if ((count & ~COUNT_CONTINUED) <= SWAP_MAP_MAX) {
			
 
				+		if (count == COUNT_CONTINUED) {
			
 
				+			if (swap_count_continued(p, offset, count))
			
 
				+				count = SWAP_MAP_MAX | COUNT_CONTINUED;
			
 
				+			else
			
 
				+				count = SWAP_MAP_MAX;
			
 
				+		} else
			
 
				+			count--;
			
 
				+	}
			
 
				 
			
 
				 	if (!count)
			
 
				 		mem_cgroup_uncharge_swap(entry);
			
@@ -604,6 +614,8 @@ void swapcache_free(swp_entry_t entry, struct page *page)
 
				 
			
 
				 /*
			
 
				  * How many references to page are currently swapped out?
			
 
				+ * This does not give an exact answer when swap count is continued,
			
 
				+ * but does include the high COUNT_CONTINUED flag to allow for that.
			
 
				  */
			
 
				 static inline int page_swapcount(struct page *page)
			
 
				 {
			
@@ -1019,7 +1031,6 @@ static int try_to_unuse(unsigned int type)
 
				 	swp_entry_t entry;
			
 
				 	unsigned int i = 0;
			
 
				 	int retval = 0;
			
 
				-	int reset_overflow = 0;
			
 
				 	int shmem;
			
 
				 
			
 
				 	/*
			
@@ -1034,8 +1045,7 @@ static int try_to_unuse(unsigned int type)
 
				 	 * together, child after parent.  If we race with dup_mmap(), we
			
 
				 	 * prefer to resolve parent before child, lest we miss entries
			
 
				 	 * duplicated after we scanned child: using last mm would invert
			
 
				-	 * that.  Though it's only a serious concern when an overflowed
			
 
				-	 * swap count is reset from SWAP_MAP_MAX, preventing a rescan.
			
 
				+	 * that.
			
 
				 	 */
			
 
				 	start_mm = &init_mm;
			
 
				 	atomic_inc(&init_mm.mm_users);
			
@@ -1164,36 +1174,6 @@ static int try_to_unuse(unsigned int type)
 
				 			break;
			
 
				 		}
			
 
				 
			
 
				-		/*
			
 
				-		 * How could swap count reach 0x7ffe ?
			
 
				-		 * There's no way to repeat a swap page within an mm
			
 
				-		 * (except in shmem, where it's the shared object which takes
			
 
				-		 * the reference count)?
			
 
				-		 * We believe SWAP_MAP_MAX cannot occur.(if occur, unsigned
			
 
				-		 * short is too small....)
			
 
				-		 * If that's wrong, then we should worry more about
			
 
				-		 * exit_mmap() and do_munmap() cases described above:
			
 
				-		 * we might be resetting SWAP_MAP_MAX too early here.
			
 
				-		 *
			
 
				-		 * Yes, that's wrong: though very unlikely, swap count 0x7ffe
			
 
				-		 * could surely occur if pid_max raised from PID_MAX_DEFAULT;
			
 
				-		 * and we are now lowering SWAP_MAP_MAX to 0x7e, making it
			
 
				-		 * much easier to reach.  But the next patch will fix that.
			
 
				-		 *
			
 
				-		 * We know "Undead"s can happen, they're okay, so don't
			
 
				-		 * report them; but do report if we reset SWAP_MAP_MAX.
			
 
				-		 */
			
 
				-		/* We might release the lock_page() in unuse_mm(). */
			
 
				-		if (!PageSwapCache(page) || page_private(page) != entry.val)
			
 
				-			goto retry;
			
 
				-
			
 
				-		if (swap_count(*swap_map) == SWAP_MAP_MAX) {
			
 
				-			spin_lock(&swap_lock);
			
 
				-			*swap_map = SWAP_HAS_CACHE;
			
 
				-			spin_unlock(&swap_lock);
			
 
				-			reset_overflow = 1;
			
 
				-		}
			
 
				-
			
 
				 		/*
			
 
				 		 * If a reference remains (rare), we would like to leave
			
 
				 		 * the page in the swap cache; but try_to_unmap could
			
@@ -1235,7 +1215,6 @@ static int try_to_unuse(unsigned int type)
 
				 		 * mark page dirty so shrink_page_list will preserve it.
			
 
				 		 */
			
 
				 		SetPageDirty(page);
			
 
				-retry:
			
 
				 		unlock_page(page);
			
 
				 		page_cache_release(page);
			
 
				 
			
@@ -1247,10 +1226,6 @@ retry:
 
				 	}
			
 
				 
			
 
				 	mmput(start_mm);
			
 
				-	if (reset_overflow) {
			
 
				-		printk(KERN_WARNING "swapoff: cleared swap entry overflow\n");
			
 
				-		swap_overflow = 0;
			
 
				-	}
			
 
				 	return retval;
			
 
				 }
			
 
				 
			
@@ -1593,6 +1568,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 
				 	up_write(&swap_unplug_sem);
			
 
				 
			
 
				 	destroy_swap_extents(p);
			
 
				+	if (p->flags & SWP_CONTINUED)
			
 
				+		free_swap_count_continuations(p);
			
 
				+
			
 
				 	mutex_lock(&swapon_mutex);
			
 
				 	spin_lock(&swap_lock);
			
 
				 	drain_mmlist();
			
@@ -2079,14 +2057,13 @@ void si_swapinfo(struct sysinfo *val)
 
				 /*
			
 
				  * Verify that a swap entry is valid and increment its swap map count.
			
 
				  *
			
 
				- * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
			
 
				- * "permanent", but will be reclaimed by the next swapoff.
			
 
				  * Returns error code in following case.
			
 
				  * - success -> 0
			
 
				  * - swp_entry is invalid -> EINVAL
			
 
				  * - swp_entry is migration entry -> EINVAL
			
 
				  * - swap-cache reference is requested but there is already one. -> EEXIST
			
 
				  * - swap-cache reference is requested but the entry is not used. -> ENOENT
			
 
				+ * - swap-mapped reference requested but needs continued swap count. -> ENOMEM
			
 
				  */
			
 
				 static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
			
 
				 {
			
@@ -2126,15 +2103,14 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
 
				 
			
 
				 	} else if (count || has_cache) {
			
 
				 
			
 
				-		if (count < SWAP_MAP_MAX - 1)
			
 
				-			count++;
			
 
				-		else if (count <= SWAP_MAP_MAX) {
			
 
				-			if (swap_overflow++ < 5)
			
 
				-				printk(KERN_WARNING
			
 
				-				       "swap_dup: swap entry overflow\n");
			
 
				-			count = SWAP_MAP_MAX;
			
 
				-		} else
			
 
				+		if ((count & ~COUNT_CONTINUED) < SWAP_MAP_MAX)
			
 
				+			count += usage;
			
 
				+		else if ((count & ~COUNT_CONTINUED) > SWAP_MAP_MAX)
			
 
				 			err = -EINVAL;
			
 
				+		else if (swap_count_continued(p, offset, count))
			
 
				+			count = COUNT_CONTINUED;
			
 
				+		else
			
 
				+			err = -ENOMEM;
			
 
				 	} else
			
 
				 		err = -ENOENT;			/* unused swap entry */
			
 
				 
			
@@ -2153,9 +2129,13 @@ bad_file:
 
				 /*
			
 
				  * increase reference count of swap entry by 1.
			
 
				  */
			
 
				-void swap_duplicate(swp_entry_t entry)
			
 
				+int swap_duplicate(swp_entry_t entry)
			
 
				 {
			
 
				-	__swap_duplicate(entry, 1);
			
 
				+	int err = 0;
			
 
				+
			
 
				+	while (!err && __swap_duplicate(entry, 1) == -ENOMEM)
			
 
				+		err = add_swap_count_continuation(entry, GFP_ATOMIC);
			
 
				+	return err;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -2222,3 +2202,219 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 
				 	*offset = ++toff;
			
 
				 	return nr_pages? ++nr_pages: 0;
			
 
				 }
			
 
				+
			
 
				+/*
			
 
				+ * add_swap_count_continuation - called when a swap count is duplicated
			
 
				+ * beyond SWAP_MAP_MAX, it allocates a new page and links that to the entry's
			
 
				+ * page of the original vmalloc'ed swap_map, to hold the continuation count
			
 
				+ * (for that entry and for its neighbouring PAGE_SIZE swap entries).  Called
			
 
				+ * again when count is duplicated beyond SWAP_MAP_MAX * SWAP_CONT_MAX, etc.
			
 
				+ *
			
 
				+ * These continuation pages are seldom referenced: the common paths all work
			
 
				+ * on the original swap_map, only referring to a continuation page when the
			
 
				+ * low "digit" of a count is incremented or decremented through SWAP_MAP_MAX.
			
 
				+ *
			
 
				+ * add_swap_count_continuation(, GFP_ATOMIC) can be called while holding
			
 
				+ * page table locks; if it fails, add_swap_count_continuation(, GFP_KERNEL)
			
 
				+ * can be called after dropping locks.
			
 
				+ */
			
 
				+int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
			
 
				+{
			
 
				+	struct swap_info_struct *si;
			
 
				+	struct page *head;
			
 
				+	struct page *page;
			
 
				+	struct page *list_page;
			
 
				+	pgoff_t offset;
			
 
				+	unsigned char count;
			
 
				+
			
 
				+	/*
			
 
				+	 * When debugging, it's easier to use __GFP_ZERO here; but it's better
			
 
				+	 * for latency not to zero a page while GFP_ATOMIC and holding locks.
			
 
				+	 */
			
 
				+	page = alloc_page(gfp_mask | __GFP_HIGHMEM);
			
 
				+
			
 
				+	si = swap_info_get(entry);
			
 
				+	if (!si) {
			
 
				+		/*
			
 
				+		 * An acceptable race has occurred since the failing
			
 
				+		 * __swap_duplicate(): the swap entry has been freed,
			
 
				+		 * perhaps even the whole swap_map cleared for swapoff.
			
 
				+		 */
			
 
				+		goto outer;
			
 
				+	}
			
 
				+
			
 
				+	offset = swp_offset(entry);
			
 
				+	count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
			
 
				+
			
 
				+	if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
			
 
				+		/*
			
 
				+		 * The higher the swap count, the more likely it is that tasks
			
 
				+		 * will race to add swap count continuation: we need to avoid
			
 
				+		 * over-provisioning.
			
 
				+		 */
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (!page) {
			
 
				+		spin_unlock(&swap_lock);
			
 
				+		return -ENOMEM;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * We are fortunate that although vmalloc_to_page uses pte_offset_map,
			
 
				+	 * no architecture is using highmem pages for kernel pagetables: so it
			
 
				+	 * will not corrupt the GFP_ATOMIC caller's atomic pagetable kmaps.
			
 
				+	 */
			
 
				+	head = vmalloc_to_page(si->swap_map + offset);
			
 
				+	offset &= ~PAGE_MASK;
			
 
				+
			
 
				+	/*
			
 
				+	 * Page allocation does not initialize the page's lru field,
			
 
				+	 * but it does always reset its private field.
			
 
				+	 */
			
 
				+	if (!page_private(head)) {
			
 
				+		BUG_ON(count & COUNT_CONTINUED);
			
 
				+		INIT_LIST_HEAD(&head->lru);
			
 
				+		set_page_private(head, SWP_CONTINUED);
			
 
				+		si->flags |= SWP_CONTINUED;
			
 
				+	}
			
 
				+
			
 
				+	list_for_each_entry(list_page, &head->lru, lru) {
			
 
				+		unsigned char *map;
			
 
				+
			
 
				+		/*
			
 
				+		 * If the previous map said no continuation, but we've found
			
 
				+		 * a continuation page, free our allocation and use this one.
			
 
				+		 */
			
 
				+		if (!(count & COUNT_CONTINUED))
			
 
				+			goto out;
			
 
				+
			
 
				+		map = kmap_atomic(list_page, KM_USER0) + offset;
			
 
				+		count = *map;
			
 
				+		kunmap_atomic(map, KM_USER0);
			
 
				+
			
 
				+		/*
			
 
				+		 * If this continuation count now has some space in it,
			
 
				+		 * free our allocation and use this one.
			
 
				+		 */
			
 
				+		if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
			
 
				+			goto out;
			
 
				+	}
			
 
				+
			
 
				+	list_add_tail(&page->lru, &head->lru);
			
 
				+	page = NULL;			/* now it's attached, don't free it */
			
 
				+out:
			
 
				+	spin_unlock(&swap_lock);
			
 
				+outer:
			
 
				+	if (page)
			
 
				+		__free_page(page);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * swap_count_continued - when the original swap_map count is incremented
			
 
				+ * from SWAP_MAP_MAX, check if there is already a continuation page to carry
			
 
				+ * into, carry if so, or else fail until a new continuation page is allocated;
			
 
				+ * when the original swap_map count is decremented from 0 with continuation,
			
 
				+ * borrow from the continuation and report whether it still holds more.
			
 
				+ * Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
			
 
				+ */
			
 
				+static bool swap_count_continued(struct swap_info_struct *si,
			
 
				+				 pgoff_t offset, unsigned char count)
			
 
				+{
			
 
				+	struct page *head;
			
 
				+	struct page *page;
			
 
				+	unsigned char *map;
			
 
				+
			
 
				+	head = vmalloc_to_page(si->swap_map + offset);
			
 
				+	if (page_private(head) != SWP_CONTINUED) {
			
 
				+		BUG_ON(count & COUNT_CONTINUED);
			
 
				+		return false;		/* need to add count continuation */
			
 
				+	}
			
 
				+
			
 
				+	offset &= ~PAGE_MASK;
			
 
				+	page = list_entry(head->lru.next, struct page, lru);
			
 
				+	map = kmap_atomic(page, KM_USER0) + offset;
			
 
				+
			
 
				+	if (count == SWAP_MAP_MAX)	/* initial increment from swap_map */
			
 
				+		goto init_map;		/* jump over SWAP_CONT_MAX checks */
			
 
				+
			
 
				+	if (count == (SWAP_MAP_MAX | COUNT_CONTINUED)) { /* incrementing */
			
 
				+		/*
			
 
				+		 * Think of how you add 1 to 999
			
 
				+		 */
			
 
				+		while (*map == (SWAP_CONT_MAX | COUNT_CONTINUED)) {
			
 
				+			kunmap_atomic(map, KM_USER0);
			
 
				+			page = list_entry(page->lru.next, struct page, lru);
			
 
				+			BUG_ON(page == head);
			
 
				+			map = kmap_atomic(page, KM_USER0) + offset;
			
 
				+		}
			
 
				+		if (*map == SWAP_CONT_MAX) {
			
 
				+			kunmap_atomic(map, KM_USER0);
			
 
				+			page = list_entry(page->lru.next, struct page, lru);
			
 
				+			if (page == head)
			
 
				+				return false;	/* add count continuation */
			
 
				+			map = kmap_atomic(page, KM_USER0) + offset;
			
 
				+init_map:		*map = 0;		/* we didn't zero the page */
			
 
				+		}
			
 
				+		*map += 1;
			
 
				+		kunmap_atomic(map, KM_USER0);
			
 
				+		page = list_entry(page->lru.prev, struct page, lru);
			
 
				+		while (page != head) {
			
 
				+			map = kmap_atomic(page, KM_USER0) + offset;
			
 
				+			*map = COUNT_CONTINUED;
			
 
				+			kunmap_atomic(map, KM_USER0);
			
 
				+			page = list_entry(page->lru.prev, struct page, lru);
			
 
				+		}
			
 
				+		return true;			/* incremented */
			
 
				+
			
 
				+	} else {				/* decrementing */
			
 
				+		/*
			
 
				+		 * Think of how you subtract 1 from 1000
			
 
				+		 */
			
 
				+		BUG_ON(count != COUNT_CONTINUED);
			
 
				+		while (*map == COUNT_CONTINUED) {
			
 
				+			kunmap_atomic(map, KM_USER0);
			
 
				+			page = list_entry(page->lru.next, struct page, lru);
			
 
				+			BUG_ON(page == head);
			
 
				+			map = kmap_atomic(page, KM_USER0) + offset;
			
 
				+		}
			
 
				+		BUG_ON(*map == 0);
			
 
				+		*map -= 1;
			
 
				+		if (*map == 0)
			
 
				+			count = 0;
			
 
				+		kunmap_atomic(map, KM_USER0);
			
 
				+		page = list_entry(page->lru.prev, struct page, lru);
			
 
				+		while (page != head) {
			
 
				+			map = kmap_atomic(page, KM_USER0) + offset;
			
 
				+			*map = SWAP_CONT_MAX | count;
			
 
				+			count = COUNT_CONTINUED;
			
 
				+			kunmap_atomic(map, KM_USER0);
			
 
				+			page = list_entry(page->lru.prev, struct page, lru);
			
 
				+		}
			
 
				+		return count == COUNT_CONTINUED;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * free_swap_count_continuations - swapoff free all the continuation pages
			
 
				+ * appended to the swap_map, after swap_map is quiesced, before vfree'ing it.
			
 
				+ */
			
 
				+static void free_swap_count_continuations(struct swap_info_struct *si)
			
 
				+{
			
 
				+	pgoff_t offset;
			
 
				+
			
 
				+	for (offset = 0; offset < si->max; offset += PAGE_SIZE) {
			
 
				+		struct page *head;
			
 
				+		head = vmalloc_to_page(si->swap_map + offset);
			
 
				+		if (page_private(head)) {
			
 
				+			struct list_head *this, *next;
			
 
				+			list_for_each_safe(this, next, &head->lru) {
			
 
				+				struct page *page;
			
 
				+				page = list_entry(this, struct page, lru);
			
 
				+				list_del(this);
			
 
				+				__free_page(page);
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}