Преглед изворни кода

md: Support write-intent bitmaps with externally managed metadata.

In this case, the metadata needs to not be in the same
sector as the bitmap.
md will not read/write any bitmap metadata.  Config must be
done via sysfs and when a recovery makes the array non-degraded
again, writing 'true' to 'bitmap/can_clear' will allow bits in
the bitmap to be cleared again.

Signed-off-by: NeilBrown <neilb@suse.de>
NeilBrown пре 15 година
родитељ
комит
ece5cff0da
4 измењених фајлова са 137 додато и 33 уклоњено
  1. 16 0
      Documentation/md.txt
  2. 119 23
      drivers/md/bitmap.c
  3. 1 10
      drivers/md/bitmap.h
  4. 1 0
      drivers/md/md.h

+ 16 - 0
Documentation/md.txt

@@ -322,6 +322,22 @@ All md devices contain:
      'backlog' sets a limit on the number of concurrent background
      'backlog' sets a limit on the number of concurrent background
      writes.  If there are more than this, new writes will by
      writes.  If there are more than this, new writes will by
      synchronous.
      synchronous.
+  bitmap/metadata
+     This can be either 'internal' or 'external'.
+     'internal' is the default and means the metadata for the bitmap
+     is stored in the first 256 bytes of the allocated space and is
+     managed by the md module.
+     'external' means that bitmap metadata is managed externally to
+     the kernel (i.e. by some userspace program)
+  bitmap/can_clear
+     This is either 'true' or 'false'.  If 'true', then bits in the
+     bitmap will be cleared when the corresponding blocks are thought
+     to be in-sync.  If 'false', bits will never be cleared.
+     This is automatically set to 'false' if a write happens on a
+     degraded array, or if the array becomes degraded during a write.
+     When metadata is managed externally, it should be set to true
+     once the array becomes non-degraded, and this fact has been
+     recorded in the metadata.
      
      
      
      
      
      

+ 119 - 23
drivers/md/bitmap.c

@@ -497,6 +497,8 @@ void bitmap_update_sb(struct bitmap *bitmap)
 
 
 	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
 	if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
 		return;
 		return;
+	if (bitmap->mddev->bitmap_info.external)
+		return;
 	spin_lock_irqsave(&bitmap->lock, flags);
 	spin_lock_irqsave(&bitmap->lock, flags);
 	if (!bitmap->sb_page) { /* no superblock */
 	if (!bitmap->sb_page) { /* no superblock */
 		spin_unlock_irqrestore(&bitmap->lock, flags);
 		spin_unlock_irqrestore(&bitmap->lock, flags);
@@ -676,16 +678,26 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
  * general bitmap file operations
  * general bitmap file operations
  */
  */
 
 
+/*
+ * on-disk bitmap:
+ *
+ * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
+ * file a page at a time. There's a superblock at the start of the file.
+ */
 /* calculate the index of the page that contains this bit */
 /* calculate the index of the page that contains this bit */
-static inline unsigned long file_page_index(unsigned long chunk)
+static inline unsigned long file_page_index(struct bitmap *bitmap, unsigned long chunk)
 {
 {
-	return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
+	if (!bitmap->mddev->bitmap_info.external)
+		chunk += sizeof(bitmap_super_t) << 3;
+	return chunk >> PAGE_BIT_SHIFT;
 }
 }
 
 
 /* calculate the (bit) offset of this bit within a page */
 /* calculate the (bit) offset of this bit within a page */
-static inline unsigned long file_page_offset(unsigned long chunk)
+static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned long chunk)
 {
 {
-	return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
+	if (!bitmap->mddev->bitmap_info.external)
+		chunk += sizeof(bitmap_super_t) << 3;
+	return chunk & (PAGE_BITS - 1);
 }
 }
 
 
 /*
 /*
@@ -698,8 +710,9 @@ static inline unsigned long file_page_offset(unsigned long chunk)
 static inline struct page *filemap_get_page(struct bitmap *bitmap,
 static inline struct page *filemap_get_page(struct bitmap *bitmap,
 					unsigned long chunk)
 					unsigned long chunk)
 {
 {
-	if (file_page_index(chunk) >= bitmap->file_pages) return NULL;
-	return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
+	if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL;
+	return bitmap->filemap[file_page_index(bitmap, chunk)
+			       - file_page_index(bitmap, 0)];
 }
 }
 
 
 
 
@@ -722,7 +735,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
 	spin_unlock_irqrestore(&bitmap->lock, flags);
 	spin_unlock_irqrestore(&bitmap->lock, flags);
 
 
 	while (pages--)
 	while (pages--)
-		if (map[pages]->index != 0) /* 0 is sb_page, release it below */
+		if (map[pages] != sb_page) /* 0 is sb_page, release it below */
 			free_buffers(map[pages]);
 			free_buffers(map[pages]);
 	kfree(map);
 	kfree(map);
 	kfree(attr);
 	kfree(attr);
@@ -833,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
 
 
 	page = filemap_get_page(bitmap, chunk);
 	page = filemap_get_page(bitmap, chunk);
 	if (!page) return;
 	if (!page) return;
-	bit = file_page_offset(chunk);
+	bit = file_page_offset(bitmap, chunk);
 
 
  	/* set the bit */
  	/* set the bit */
 	kaddr = kmap_atomic(page, KM_USER0);
 	kaddr = kmap_atomic(page, KM_USER0);
@@ -931,14 +944,17 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 			"recovery\n", bmname(bitmap));
 			"recovery\n", bmname(bitmap));
 
 
 	bytes = (chunks + 7) / 8;
 	bytes = (chunks + 7) / 8;
+	if (!bitmap->mddev->bitmap_info.external)
+		bytes += sizeof(bitmap_super_t);
 
 
-	num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
+	
+	num_pages = (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
 
 
-	if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
+	if (file && i_size_read(file->f_mapping->host) < bytes) {
 		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
 		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
 			bmname(bitmap),
 			bmname(bitmap),
 			(unsigned long) i_size_read(file->f_mapping->host),
 			(unsigned long) i_size_read(file->f_mapping->host),
-			bytes + sizeof(bitmap_super_t));
+			bytes);
 		goto err;
 		goto err;
 	}
 	}
 
 
@@ -959,17 +975,16 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 
 
 	for (i = 0; i < chunks; i++) {
 	for (i = 0; i < chunks; i++) {
 		int b;
 		int b;
-		index = file_page_index(i);
-		bit = file_page_offset(i);
+		index = file_page_index(bitmap, i);
+		bit = file_page_offset(bitmap, i);
 		if (index != oldindex) { /* this is a new page, read it in */
 		if (index != oldindex) { /* this is a new page, read it in */
 			int count;
 			int count;
 			/* unmap the old page, we're done with it */
 			/* unmap the old page, we're done with it */
 			if (index == num_pages-1)
 			if (index == num_pages-1)
-				count = bytes + sizeof(bitmap_super_t)
-					- index * PAGE_SIZE;
+				count = bytes - index * PAGE_SIZE;
 			else
 			else
 				count = PAGE_SIZE;
 				count = PAGE_SIZE;
-			if (index == 0) {
+			if (index == 0 && bitmap->sb_page) {
 				/*
 				/*
 				 * if we're here then the superblock page
 				 * if we're here then the superblock page
 				 * contains some bits (PAGE_SIZE != sizeof sb)
 				 * contains some bits (PAGE_SIZE != sizeof sb)
@@ -1164,7 +1179,8 @@ void bitmap_daemon_work(mddev_t *mddev)
 			/* We are possibly going to clear some bits, so make
 			/* We are possibly going to clear some bits, so make
 			 * sure that events_cleared is up-to-date.
 			 * sure that events_cleared is up-to-date.
 			 */
 			 */
-			if (bitmap->need_sync) {
+			if (bitmap->need_sync &&
+			    bitmap->mddev->bitmap_info.external == 0) {
 				bitmap_super_t *sb;
 				bitmap_super_t *sb;
 				bitmap->need_sync = 0;
 				bitmap->need_sync = 0;
 				sb = kmap_atomic(bitmap->sb_page, KM_USER0);
 				sb = kmap_atomic(bitmap->sb_page, KM_USER0);
@@ -1174,7 +1190,8 @@ void bitmap_daemon_work(mddev_t *mddev)
 				write_page(bitmap, bitmap->sb_page, 1);
 				write_page(bitmap, bitmap->sb_page, 1);
 			}
 			}
 			spin_lock_irqsave(&bitmap->lock, flags);
 			spin_lock_irqsave(&bitmap->lock, flags);
-			clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
+			if (!bitmap->need_sync)
+				clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
 		}
 		}
 		bmc = bitmap_get_counter(bitmap,
 		bmc = bitmap_get_counter(bitmap,
 					 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
 					 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
@@ -1189,7 +1206,7 @@ void bitmap_daemon_work(mddev_t *mddev)
 			if (*bmc == 2) {
 			if (*bmc == 2) {
 				*bmc=1; /* maybe clear the bit next time */
 				*bmc=1; /* maybe clear the bit next time */
 				set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
 				set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
-			} else if (*bmc == 1) {
+			} else if (*bmc == 1 && !bitmap->need_sync) {
 				/* we can clear the bit */
 				/* we can clear the bit */
 				*bmc = 0;
 				*bmc = 0;
 				bitmap_count_page(bitmap,
 				bitmap_count_page(bitmap,
@@ -1199,9 +1216,11 @@ void bitmap_daemon_work(mddev_t *mddev)
 				/* clear the bit */
 				/* clear the bit */
 				paddr = kmap_atomic(page, KM_USER0);
 				paddr = kmap_atomic(page, KM_USER0);
 				if (bitmap->flags & BITMAP_HOSTENDIAN)
 				if (bitmap->flags & BITMAP_HOSTENDIAN)
-					clear_bit(file_page_offset(j), paddr);
+					clear_bit(file_page_offset(bitmap, j),
+						  paddr);
 				else
 				else
-					ext2_clear_bit(file_page_offset(j), paddr);
+					ext2_clear_bit(file_page_offset(bitmap, j),
+						       paddr);
 				kunmap_atomic(paddr, KM_USER0);
 				kunmap_atomic(paddr, KM_USER0);
 			}
 			}
 		} else
 		} else
@@ -1356,6 +1375,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
 		    bitmap->events_cleared < bitmap->mddev->events) {
 		    bitmap->events_cleared < bitmap->mddev->events) {
 			bitmap->events_cleared = bitmap->mddev->events;
 			bitmap->events_cleared = bitmap->mddev->events;
 			bitmap->need_sync = 1;
 			bitmap->need_sync = 1;
+			sysfs_notify_dirent(bitmap->sysfs_can_clear);
 		}
 		}
 
 
 		if (!success && ! (*bmc & NEEDED_MASK))
 		if (!success && ! (*bmc & NEEDED_MASK))
@@ -1613,6 +1633,9 @@ void bitmap_destroy(mddev_t *mddev)
 	if (mddev->thread)
 	if (mddev->thread)
 		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
 		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
 
 
+	if (bitmap->sysfs_can_clear)
+		sysfs_put(bitmap->sysfs_can_clear);
+
 	bitmap_free(bitmap);
 	bitmap_free(bitmap);
 }
 }
 
 
@@ -1629,6 +1652,7 @@ int bitmap_create(mddev_t *mddev)
 	struct file *file = mddev->bitmap_info.file;
 	struct file *file = mddev->bitmap_info.file;
 	int err;
 	int err;
 	sector_t start;
 	sector_t start;
+	struct sysfs_dirent *bm;
 
 
 	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
 	BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
 
 
@@ -1648,6 +1672,13 @@ int bitmap_create(mddev_t *mddev)
 
 
 	bitmap->mddev = mddev;
 	bitmap->mddev = mddev;
 
 
+	bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
+	if (bm) {
+		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
+		sysfs_put(bm);
+	} else
+		bitmap->sysfs_can_clear = NULL;
+
 	bitmap->file = file;
 	bitmap->file = file;
 	if (file) {
 	if (file) {
 		get_file(file);
 		get_file(file);
@@ -1658,7 +1689,16 @@ int bitmap_create(mddev_t *mddev)
 		vfs_fsync(file, file->f_dentry, 1);
 		vfs_fsync(file, file->f_dentry, 1);
 	}
 	}
 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
 	/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
-	err = bitmap_read_sb(bitmap);
+	if (!mddev->bitmap_info.external)
+		err = bitmap_read_sb(bitmap);
+	else {
+		err = 0;
+		if (mddev->bitmap_info.chunksize == 0 ||
+		    mddev->bitmap_info.daemon_sleep == 0)
+			/* chunksize and time_base need to be
+			 * set first. */
+			err = -EINVAL;
+	}
 	if (err)
 	if (err)
 		goto error;
 		goto error;
 
 
@@ -1777,7 +1817,8 @@ location_store(mddev_t *mddev, const char *buf, size_t len)
 				return rv;
 				return rv;
 			if (offset == 0)
 			if (offset == 0)
 				return -EINVAL;
 				return -EINVAL;
-			if (mddev->major_version == 0 &&
+			if (mddev->bitmap_info.external == 0 &&
+			    mddev->major_version == 0 &&
 			    offset != mddev->bitmap_info.default_offset)
 			    offset != mddev->bitmap_info.default_offset)
 				return -EINVAL;
 				return -EINVAL;
 			mddev->bitmap_info.offset = offset;
 			mddev->bitmap_info.offset = offset;
@@ -1906,11 +1947,66 @@ chunksize_store(mddev_t *mddev, const char *buf, size_t len)
 static struct md_sysfs_entry bitmap_chunksize =
 static struct md_sysfs_entry bitmap_chunksize =
 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
 
 
+static ssize_t metadata_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%s\n", (mddev->bitmap_info.external
+				      ? "external" : "internal"));
+}
+
+static ssize_t metadata_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	if (mddev->bitmap ||
+	    mddev->bitmap_info.file ||
+	    mddev->bitmap_info.offset)
+		return -EBUSY;
+	if (strncmp(buf, "external", 8) == 0)
+		mddev->bitmap_info.external = 1;
+	else if (strncmp(buf, "internal", 8) == 0)
+		mddev->bitmap_info.external = 0;
+	else
+		return -EINVAL;
+	return len;
+}
+
+static struct md_sysfs_entry bitmap_metadata =
+__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
+
+static ssize_t can_clear_show(mddev_t *mddev, char *page)
+{
+	int len;
+	if (mddev->bitmap)
+		len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
+					     "false" : "true"));
+	else
+		len = sprintf(page, "\n");
+	return len;
+}
+
+static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	if (mddev->bitmap == NULL)
+		return -ENOENT;
+	if (strncmp(buf, "false", 5) == 0)
+		mddev->bitmap->need_sync = 1;
+	else if (strncmp(buf, "true", 4) == 0) {
+		if (mddev->degraded)
+			return -EBUSY;
+		mddev->bitmap->need_sync = 0;
+	} else
+		return -EINVAL;
+	return len;
+}
+
+static struct md_sysfs_entry bitmap_can_clear =
+__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
+
 static struct attribute *md_bitmap_attrs[] = {
 static struct attribute *md_bitmap_attrs[] = {
 	&bitmap_location.attr,
 	&bitmap_location.attr,
 	&bitmap_timeout.attr,
 	&bitmap_timeout.attr,
 	&bitmap_backlog.attr,
 	&bitmap_backlog.attr,
 	&bitmap_chunksize.attr,
 	&bitmap_chunksize.attr,
+	&bitmap_metadata.attr,
+	&bitmap_can_clear.attr,
 	NULL
 	NULL
 };
 };
 struct attribute_group md_bitmap_group = {
 struct attribute_group md_bitmap_group = {

+ 1 - 10
drivers/md/bitmap.h

@@ -118,16 +118,6 @@ typedef __u16 bitmap_counter_t;
 			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
 			(CHUNK_BLOCK_SHIFT(bitmap) + PAGE_COUNTER_SHIFT - 1)
 #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
 #define PAGEPTR_BLOCK_MASK(bitmap) (PAGEPTR_BLOCK_RATIO(bitmap) - 1)
 
 
-/*
- * on-disk bitmap:
- *
- * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
- * file a page at a time. There's a superblock at the start of the file.
- */
-
-/* map chunks (bits) to file pages - offset by the size of the superblock */
-#define CHUNK_BIT_OFFSET(chunk) ((chunk) + (sizeof(bitmap_super_t) << 3))
-
 #endif
 #endif
 
 
 /*
 /*
@@ -250,6 +240,7 @@ struct bitmap {
 	wait_queue_head_t write_wait;
 	wait_queue_head_t write_wait;
 	wait_queue_head_t overflow_wait;
 	wait_queue_head_t overflow_wait;
 
 
+	struct sysfs_dirent *sysfs_can_clear;
 };
 };
 
 
 /* the bitmap API */
 /* the bitmap API */

+ 1 - 0
drivers/md/md.h

@@ -296,6 +296,7 @@ struct mddev_s
 		unsigned long		chunksize;
 		unsigned long		chunksize;
 		unsigned long		daemon_sleep; /* how many seconds between updates? */
 		unsigned long		daemon_sleep; /* how many seconds between updates? */
 		unsigned long		max_write_behind; /* write-behind mode */
 		unsigned long		max_write_behind; /* write-behind mode */
+		int			external;
 	} bitmap_info;
 	} bitmap_info;
 
 
 	struct list_head		all_mddevs;
 	struct list_head		all_mddevs;