Browse Source

Hibernation: Fix mark_nosave_pages()

There is a problem in the hibernation code that triggers on some NUMA
systems on which pfn_valid() returns 'true' for some PFNs that don't
belong to any zone.  Namely, there is a BUG_ON() in
memory_bm_find_bit() that triggers for PFNs not belonging to any
zone and passing the pfn_valid() test.  On the affected systems it
triggers when we mark PFNs reported by the platform as not saveable,
because the PFNs in question belong to a region mapped directly using
iorepam() (i.e. the ACPI data area) and they pass the pfn_valid()
test.

Modify memory_bm_find_bit() so that it returns an error if given PFN
doesn't belong to any zone instead of crashing the kernel and ignore
the result returned by it in mark_nosave_pages(), while marking the
"nosave" memory regions.

This doesn't affect the hibernation functionality, as we won't touch
the PFNs in question anyway.

http://bugzilla.kernel.org/show_bug.cgi?id=9966 .

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
Rafael J. Wysocki 17 years ago
parent
commit
a82f7119fd
1 changed files with 34 additions and 7 deletions
  1. 34 7
      kernel/power/snapshot.c

+ 34 - 7
kernel/power/snapshot.c

@@ -447,7 +447,7 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
  *	of @bm->cur_zone_bm are updated.
  *	of @bm->cur_zone_bm are updated.
  */
  */
 
 
-static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
+static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 				void **addr, unsigned int *bit_nr)
 				void **addr, unsigned int *bit_nr)
 {
 {
 	struct zone_bitmap *zone_bm;
 	struct zone_bitmap *zone_bm;
@@ -461,7 +461,8 @@ static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 		while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
 		while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
 			zone_bm = zone_bm->next;
 			zone_bm = zone_bm->next;
 
 
-			BUG_ON(!zone_bm);
+			if (!zone_bm)
+				return -EFAULT;
 		}
 		}
 		bm->cur.zone_bm = zone_bm;
 		bm->cur.zone_bm = zone_bm;
 	}
 	}
@@ -479,23 +480,40 @@ static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
 	pfn -= bb->start_pfn;
 	pfn -= bb->start_pfn;
 	*bit_nr = pfn % BM_BITS_PER_CHUNK;
 	*bit_nr = pfn % BM_BITS_PER_CHUNK;
 	*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
 	*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
+	return 0;
 }
 }
 
 
 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 {
 	void *addr;
 	void *addr;
 	unsigned int bit;
 	unsigned int bit;
+	int error;
 
 
-	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	BUG_ON(error);
 	set_bit(bit, addr);
 	set_bit(bit, addr);
 }
 }
 
 
+static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
+{
+	void *addr;
+	unsigned int bit;
+	int error;
+
+	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	if (!error)
+		set_bit(bit, addr);
+	return error;
+}
+
 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 {
 	void *addr;
 	void *addr;
 	unsigned int bit;
 	unsigned int bit;
+	int error;
 
 
-	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	BUG_ON(error);
 	clear_bit(bit, addr);
 	clear_bit(bit, addr);
 }
 }
 
 
@@ -503,8 +521,10 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
 {
 	void *addr;
 	void *addr;
 	unsigned int bit;
 	unsigned int bit;
+	int error;
 
 
-	memory_bm_find_bit(bm, pfn, &addr, &bit);
+	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+	BUG_ON(error);
 	return test_bit(bit, addr);
 	return test_bit(bit, addr);
 }
 }
 
 
@@ -709,8 +729,15 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
 				region->end_pfn << PAGE_SHIFT);
 				region->end_pfn << PAGE_SHIFT);
 
 
 		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
 		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
-			if (pfn_valid(pfn))
-				memory_bm_set_bit(bm, pfn);
+			if (pfn_valid(pfn)) {
+				/*
+				 * It is safe to ignore the result of
+				 * mem_bm_set_bit_check() here, since we won't
+				 * touch the PFNs for which the error is
+				 * returned anyway.
+				 */
+				mem_bm_set_bit_check(bm, pfn);
+			}
 	}
 	}
 }
 }