12 năm trước cách đây · 9228ff9038
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -743,7 +743,6 @@ void __init printk_all_partitions(void)
 
				 		struct hd_struct *part;
			
 
				 		char name_buf[BDEVNAME_SIZE];
			
 
				 		char devt_buf[BDEVT_SIZE];
			
 
				-		char uuid_buf[PARTITION_META_INFO_UUIDLTH * 2 + 5];
			
 
				 
			
 
				 		/*
			
 
				 		 * Don't show empty devices or things that have been
			
@@ -762,16 +761,11 @@ void __init printk_all_partitions(void)
 
				 		while ((part = disk_part_iter_next(&piter))) {
			
 
				 			bool is_part0 = part == &disk->part0;
			
 
				 
			
 
				-			uuid_buf[0] = '\0';
			
 
				-			if (part->info)
			
 
				-				snprintf(uuid_buf, sizeof(uuid_buf), "%pU",
			
 
				-					 part->info->uuid);
			
 
				-
			
 
				 			printk("%s%s %10llu %s %s", is_part0 ? "" : "  ",
			
 
				 			       bdevt_str(part_devt(part), devt_buf),
			
 
				 			       (unsigned long long)part_nr_sects_read(part) >> 1
			
 
				 			       , disk_name(disk, part->partno, name_buf),
			
 
				-			       uuid_buf);
			
 
				+			       part->info ? part->info->uuid : "");
			
 
				 			if (is_part0) {
			
 
				 				if (disk->driverfs_dev != NULL &&
			
 
				 				    disk->driverfs_dev->driver != NULL)
			
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -620,7 +620,6 @@ int efi_partition(struct parsed_partitions *state)
 
				 	gpt_entry *ptes = NULL;
			
 
				 	u32 i;
			
 
				 	unsigned ssz = bdev_logical_block_size(state->bdev) / 512;
			
 
				-	u8 unparsed_guid[37];
			
 
				 
			
 
				 	if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) {
			
 
				 		kfree(gpt);
			
@@ -649,11 +648,7 @@ int efi_partition(struct parsed_partitions *state)
 
				 			state->parts[i + 1].flags = ADDPART_FLAG_RAID;
			
 
				 
			
 
				 		info = &state->parts[i + 1].info;
			
 
				-		/* Instead of doing a manual swap to big endian, reuse the
			
 
				-		 * common ASCII hex format as the interim.
			
 
				-		 */
			
 
				-		efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid);
			
 
				-		part_pack_uuid(unparsed_guid, info->uuid);
			
 
				+		efi_guid_unparse(&ptes[i].unique_partition_guid, info->uuid);
			
 
				 
			
 
				 		/* Naively convert UTF16-LE to 7 bits. */
			
 
				 		label_max = min(sizeof(info->volname) - 1,
			
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -94,6 +94,17 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+static void set_info(struct parsed_partitions *state, int slot,
			
 
				+		     u32 disksig)
			
 
				+{
			
 
				+	struct partition_meta_info *info = &state->parts[slot].info;
			
 
				+
			
 
				+	snprintf(info->uuid, sizeof(info->uuid), "%08x-%02x", disksig,
			
 
				+		 slot);
			
 
				+	info->volname[0] = 0;
			
 
				+	state->parts[slot].has_info = true;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Create devices for each logical partition in an extended partition.
			
 
				  * The logical partitions form a linked list, with each entry being
			
@@ -106,7 +117,8 @@ static int aix_magic_present(struct parsed_partitions *state, unsigned char *p)
 
				  */
			
 
				 
			
 
				 static void parse_extended(struct parsed_partitions *state,
			
 
				-			   sector_t first_sector, sector_t first_size)
			
 
				+			   sector_t first_sector, sector_t first_size,
			
 
				+			   u32 disksig)
			
 
				 {
			
 
				 	struct partition *p;
			
 
				 	Sector sect;
			
@@ -166,6 +178,7 @@ static void parse_extended(struct parsed_partitions *state,
 
				 			}
			
 
				 
			
 
				 			put_partition(state, state->next, next, size);
			
 
				+			set_info(state, state->next, disksig);
			
 
				 			if (SYS_IND(p) == LINUX_RAID_PARTITION)
			
 
				 				state->parts[state->next].flags = ADDPART_FLAG_RAID;
			
 
				 			loopct = 0;
			
@@ -437,6 +450,7 @@ int msdos_partition(struct parsed_partitions *state)
 
				 	struct partition *p;
			
 
				 	struct fat_boot_sector *fb;
			
 
				 	int slot;
			
 
				+	u32 disksig;
			
 
				 
			
 
				 	data = read_part_sector(state, 0, &sect);
			
 
				 	if (!data)
			
@@ -491,6 +505,8 @@ int msdos_partition(struct parsed_partitions *state)
 
				 #endif
			
 
				 	p = (struct partition *) (data + 0x1be);
			
 
				 
			
 
				+	disksig = le32_to_cpup((__le32 *)(data + 0x1b8));
			
 
				+
			
 
				 	/*
			
 
				 	 * Look for partitions in two passes:
			
 
				 	 * First find the primary and DOS-type extended partitions.
			
@@ -515,11 +531,12 @@ int msdos_partition(struct parsed_partitions *state)
 
				 			put_partition(state, slot, start, n);
			
 
				 
			
 
				 			strlcat(state->pp_buf, " <", PAGE_SIZE);
			
 
				-			parse_extended(state, start, size);
			
 
				+			parse_extended(state, start, size, disksig);
			
 
				 			strlcat(state->pp_buf, " >", PAGE_SIZE);
			
 
				 			continue;
			
 
				 		}
			
 
				 		put_partition(state, slot, start, size);
			
 
				+		set_info(state, slot, disksig);
			
 
				 		if (SYS_IND(p) == LINUX_RAID_PARTITION)
			
 
				 			state->parts[slot].flags = ADDPART_FLAG_RAID;
			
 
				 		if (SYS_IND(p) == DM6_PARTITION)
			
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -41,8 +41,9 @@
 
				 #include <linux/spinlock.h>
			
 
				 #include <linux/compat.h>
			
 
				 #include <linux/mutex.h>
			
 
				+#include <linux/bitmap.h>
			
 
				+#include <linux/io.h>
			
 
				 #include <asm/uaccess.h>
			
 
				-#include <asm/io.h>
			
 
				 
			
 
				 #include <linux/dma-mapping.h>
			
 
				 #include <linux/blkdev.h>
			
@@ -978,8 +979,7 @@ static CommandList_struct *cmd_alloc(ctlr_info_t *h)
 
				 		i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
			
 
				 		if (i == h->nr_cmds)
			
 
				 			return NULL;
			
 
				-	} while (test_and_set_bit(i & (BITS_PER_LONG - 1),
			
 
				-		  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
			
 
				+	} while (test_and_set_bit(i, h->cmd_pool_bits) != 0);
			
 
				 	c = h->cmd_pool + i;
			
 
				 	memset(c, 0, sizeof(CommandList_struct));
			
 
				 	cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
			
@@ -1046,8 +1046,7 @@ static void cmd_free(ctlr_info_t *h, CommandList_struct *c)
 
				 	int i;
			
 
				 
			
 
				 	i = c - h->cmd_pool;
			
 
				-	clear_bit(i & (BITS_PER_LONG - 1),
			
 
				-		  h->cmd_pool_bits + (i / BITS_PER_LONG));
			
 
				+	clear_bit(i, h->cmd_pool_bits);
			
 
				 	h->nr_frees++;
			
 
				 }
			
 
				 
			
@@ -4268,10 +4267,7 @@ static void __devinit cciss_find_board_params(ctlr_info_t *h)
 
				 
			
 
				 static inline bool CISS_signature_present(ctlr_info_t *h)
			
 
				 {
			
 
				-	if ((readb(&h->cfgtable->Signature[0]) != 'C') ||
			
 
				-	    (readb(&h->cfgtable->Signature[1]) != 'I') ||
			
 
				-	    (readb(&h->cfgtable->Signature[2]) != 'S') ||
			
 
				-	    (readb(&h->cfgtable->Signature[3]) != 'S')) {
			
 
				+	if (!check_signature(h->cfgtable->Signature, "CISS", 4)) {
			
 
				 		dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
			
 
				 		return false;
			
 
				 	}
			
@@ -4812,8 +4808,7 @@ static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
 
				 
			
 
				 static __devinit int cciss_allocate_cmd_pool(ctlr_info_t *h)
			
 
				 {
			
 
				-	h->cmd_pool_bits = kmalloc(
			
 
				-		DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG) *
			
 
				+	h->cmd_pool_bits = kmalloc(BITS_TO_LONGS(h->nr_cmds) *
			
 
				 		sizeof(unsigned long), GFP_KERNEL);
			
 
				 	h->cmd_pool = pci_alloc_consistent(h->pdev,
			
 
				 		h->nr_cmds * sizeof(CommandList_struct),
			
@@ -5068,9 +5063,7 @@ reinit_after_soft_reset:
 
				 	pci_set_drvdata(pdev, h);
			
 
				 	/* command and error info recs zeroed out before
			
 
				 	   they are used */
			
 
				-	memset(h->cmd_pool_bits, 0,
			
 
				-	       DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
			
 
				-			* sizeof(unsigned long));
			
 
				+	bitmap_zero(h->cmd_pool_bits, h->nr_cmds);
			
 
				 
			
 
				 	h->num_luns = 0;
			
 
				 	h->highest_lun = -1;
			
--- a/drivers/block/drbd/Kconfig
+++ b/drivers/block/drbd/Kconfig
@@ -2,13 +2,14 @@
 
				 # DRBD device driver configuration
			
 
				 #
			
 
				 
			
 
				-comment "DRBD disabled because PROC_FS, INET or CONNECTOR not selected"
			
 
				-	depends on PROC_FS='n' || INET='n' || CONNECTOR='n'
			
 
				+comment "DRBD disabled because PROC_FS or INET not selected"
			
 
				+	depends on PROC_FS='n' || INET='n'
			
 
				 
			
 
				 config BLK_DEV_DRBD
			
 
				 	tristate "DRBD Distributed Replicated Block Device support"
			
 
				-	depends on PROC_FS && INET && CONNECTOR
			
 
				+	depends on PROC_FS && INET
			
 
				 	select LRU_CACHE
			
 
				+	select LIBCRC32C
			
 
				 	default n
			
 
				 	help
			
 
				 
			
@@ -58,7 +59,8 @@ config DRBD_FAULT_INJECTION
 
				 	  32	data read
			
 
				 	  64	read ahead
			
 
				 	  128	kmalloc of bitmap
			
 
				-	  256	allocation of EE (epoch_entries)
			
 
				+	  256	allocation of peer_requests
			
 
				+	  512	insert data corruption on receiving side
			
 
				 
			
 
				 	  fault_devs: bitmask of minor numbers
			
 
				 	  fault_rate: frequency in percent
			
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -1,5 +1,7 @@
 
				 drbd-y := drbd_bitmap.o drbd_proc.o
			
 
				 drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
			
 
				 drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
			
 
				+drbd-y += drbd_interval.o drbd_state.o
			
 
				+drbd-y += drbd_nla.o
			
 
				 
			
 
				 obj-$(CONFIG_BLK_DEV_DRBD)     += drbd.o
			
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
 
				 	if (!__ratelimit(&drbd_ratelimit_state))
			
 
				 		return;
			
 
				 	dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n",
			
 
				-	    current == mdev->receiver.task ? "receiver" :
			
 
				-	    current == mdev->asender.task  ? "asender"  :
			
 
				-	    current == mdev->worker.task   ? "worker"   : current->comm,
			
 
				-	    func, b->bm_why ?: "?",
			
 
				-	    b->bm_task == mdev->receiver.task ? "receiver" :
			
 
				-	    b->bm_task == mdev->asender.task  ? "asender"  :
			
 
				-	    b->bm_task == mdev->worker.task   ? "worker"   : "?");
			
 
				+		drbd_task_to_thread_name(mdev->tconn, current),
			
 
				+		func, b->bm_why ?: "?",
			
 
				+		drbd_task_to_thread_name(mdev->tconn, b->bm_task));
			
 
				 }
			
 
				 
			
 
				 void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
			
@@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
 
				 
			
 
				 	if (trylock_failed) {
			
 
				 		dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
			
 
				-		    current == mdev->receiver.task ? "receiver" :
			
 
				-		    current == mdev->asender.task  ? "asender"  :
			
 
				-		    current == mdev->worker.task   ? "worker"   : current->comm,
			
 
				-		    why, b->bm_why ?: "?",
			
 
				-		    b->bm_task == mdev->receiver.task ? "receiver" :
			
 
				-		    b->bm_task == mdev->asender.task  ? "asender"  :
			
 
				-		    b->bm_task == mdev->worker.task   ? "worker"   : "?");
			
 
				+			 drbd_task_to_thread_name(mdev->tconn, current),
			
 
				+			 why, b->bm_why ?: "?",
			
 
				+			 drbd_task_to_thread_name(mdev->tconn, b->bm_task));
			
 
				 		mutex_lock(&b->bm_change);
			
 
				 	}
			
 
				 	if (BM_LOCKED_MASK & b->bm_flags)
			
@@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
 
				 /* to mark for lazy writeout once syncer cleared all clearable bits,
			
 
				  * we if bits have been cleared since last IO. */
			
 
				 #define BM_PAGE_LAZY_WRITEOUT	28
			
 
				+/* pages marked with this "HINT" will be considered for writeout
			
 
				+ * on activity log transactions */
			
 
				+#define BM_PAGE_HINT_WRITEOUT	27
			
 
				 
			
 
				 /* store_page_idx uses non-atomic assignment. It is only used directly after
			
 
				  * allocating the page.  All other bm_set_page_* and bm_clear_page_* need to
			
@@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr)
 
				 {
			
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				 	void *addr = &page_private(b->bm_pages[page_nr]);
			
 
				-	clear_bit(BM_PAGE_IO_LOCK, addr);
			
 
				-	smp_mb__after_clear_bit();
			
 
				+	clear_bit_unlock(BM_PAGE_IO_LOCK, addr);
			
 
				 	wake_up(&mdev->bitmap->bm_io_wait);
			
 
				 }
			
 
				 
			
@@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page)
 
				 	set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page));
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @page_nr:	the bitmap page to mark with the "hint" flag
			
 
				+ *
			
 
				+ * From within an activity log transaction, we mark a few pages with these
			
 
				+ * hints, then call drbd_bm_write_hinted(), which will only write out changed
			
 
				+ * pages which are flagged with this mark.
			
 
				+ */
			
 
				+void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr)
			
 
				+{
			
 
				+	struct page *page;
			
 
				+	if (page_nr >= mdev->bitmap->bm_number_of_pages) {
			
 
				+		dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n",
			
 
				+			 page_nr, (int)mdev->bitmap->bm_number_of_pages);
			
 
				+		return;
			
 
				+	}
			
 
				+	page = mdev->bitmap->bm_pages[page_nr];
			
 
				+	set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page));
			
 
				+}
			
 
				+
			
 
				 static int bm_test_page_unchanged(struct page *page)
			
 
				 {
			
 
				 	volatile const unsigned long *addr = &page_private(page);
			
@@ -373,14 +388,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 
				 		return old_pages;
			
 
				 
			
 
				 	/* Trying kmalloc first, falling back to vmalloc.
			
 
				-	 * GFP_KERNEL is ok, as this is done when a lower level disk is
			
 
				-	 * "attached" to the drbd.  Context is receiver thread or cqueue
			
 
				-	 * thread.  As we have no disk yet, we are not in the IO path,
			
 
				-	 * not even the IO path of the peer. */
			
 
				+	 * GFP_NOIO, as this is called while drbd IO is "suspended",
			
 
				+	 * and during resize or attach on diskless Primary,
			
 
				+	 * we must not block on IO to ourselves.
			
 
				+	 * Context is receiver thread or dmsetup. */
			
 
				 	bytes = sizeof(struct page *)*want;
			
 
				-	new_pages = kzalloc(bytes, GFP_KERNEL);
			
 
				+	new_pages = kzalloc(bytes, GFP_NOIO);
			
 
				 	if (!new_pages) {
			
 
				-		new_pages = vzalloc(bytes);
			
 
				+		new_pages = __vmalloc(bytes,
			
 
				+				GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO,
			
 
				+				PAGE_KERNEL);
			
 
				 		if (!new_pages)
			
 
				 			return NULL;
			
 
				 		vmalloced = 1;
			
@@ -390,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
 
				 		for (i = 0; i < have; i++)
			
 
				 			new_pages[i] = old_pages[i];
			
 
				 		for (; i < want; i++) {
			
 
				-			page = alloc_page(GFP_HIGHUSER);
			
 
				+			page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
			
 
				 			if (!page) {
			
 
				 				bm_free_pages(new_pages + have, i - have);
			
 
				 				bm_vk_free(new_pages, vmalloced);
			
@@ -439,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev)
 
				 
			
 
				 sector_t drbd_bm_capacity(struct drbd_conf *mdev)
			
 
				 {
			
 
				-	ERR_IF(!mdev->bitmap) return 0;
			
 
				+	if (!expect(mdev->bitmap))
			
 
				+		return 0;
			
 
				 	return mdev->bitmap->bm_dev_capacity;
			
 
				 }
			
 
				 
			
@@ -447,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev)
 
				  */
			
 
				 void drbd_bm_cleanup(struct drbd_conf *mdev)
			
 
				 {
			
 
				-	ERR_IF (!mdev->bitmap) return;
			
 
				+	if (!expect(mdev->bitmap))
			
 
				+		return;
			
 
				 	bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
			
 
				 	bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
			
 
				 	kfree(mdev->bitmap);
			
@@ -610,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
 
				 	int err = 0, growing;
			
 
				 	int opages_vmalloced;
			
 
				 
			
 
				-	ERR_IF(!b) return -ENOMEM;
			
 
				+	if (!expect(b))
			
 
				+		return -ENOMEM;
			
 
				 
			
 
				 	drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
			
 
				 
			
@@ -732,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
 
				 	unsigned long s;
			
 
				 	unsigned long flags;
			
 
				 
			
 
				-	ERR_IF(!b) return 0;
			
 
				-	ERR_IF(!b->bm_pages) return 0;
			
 
				+	if (!expect(b))
			
 
				+		return 0;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return 0;
			
 
				 
			
 
				 	spin_lock_irqsave(&b->bm_lock, flags);
			
 
				 	s = b->bm_set;
			
@@ -756,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev)
 
				 size_t drbd_bm_words(struct drbd_conf *mdev)
			
 
				 {
			
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				-	ERR_IF(!b) return 0;
			
 
				-	ERR_IF(!b->bm_pages) return 0;
			
 
				+	if (!expect(b))
			
 
				+		return 0;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return 0;
			
 
				 
			
 
				 	return b->bm_words;
			
 
				 }
			
@@ -765,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev)
 
				 unsigned long drbd_bm_bits(struct drbd_conf *mdev)
			
 
				 {
			
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				-	ERR_IF(!b) return 0;
			
 
				+	if (!expect(b))
			
 
				+		return 0;
			
 
				 
			
 
				 	return b->bm_bits;
			
 
				 }
			
@@ -786,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number,
 
				 
			
 
				 	end = offset + number;
			
 
				 
			
 
				-	ERR_IF(!b) return;
			
 
				-	ERR_IF(!b->bm_pages) return;
			
 
				+	if (!expect(b))
			
 
				+		return;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return;
			
 
				 	if (number == 0)
			
 
				 		return;
			
 
				 	WARN_ON(offset >= b->bm_words);
			
@@ -831,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
 
				 
			
 
				 	end = offset + number;
			
 
				 
			
 
				-	ERR_IF(!b) return;
			
 
				-	ERR_IF(!b->bm_pages) return;
			
 
				+	if (!expect(b))
			
 
				+		return;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return;
			
 
				 
			
 
				 	spin_lock_irq(&b->bm_lock);
			
 
				 	if ((offset >= b->bm_words) ||
			
@@ -860,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number,
 
				 void drbd_bm_set_all(struct drbd_conf *mdev)
			
 
				 {
			
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				-	ERR_IF(!b) return;
			
 
				-	ERR_IF(!b->bm_pages) return;
			
 
				+	if (!expect(b))
			
 
				+		return;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return;
			
 
				 
			
 
				 	spin_lock_irq(&b->bm_lock);
			
 
				 	bm_memset(b, 0, 0xff, b->bm_words);
			
@@ -874,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev)
 
				 void drbd_bm_clear_all(struct drbd_conf *mdev)
			
 
				 {
			
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				-	ERR_IF(!b) return;
			
 
				-	ERR_IF(!b->bm_pages) return;
			
 
				+	if (!expect(b))
			
 
				+		return;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return;
			
 
				 
			
 
				 	spin_lock_irq(&b->bm_lock);
			
 
				 	bm_memset(b, 0, 0, b->bm_words);
			
@@ -889,7 +922,8 @@ struct bm_aio_ctx {
 
				 	unsigned int done;
			
 
				 	unsigned flags;
			
 
				 #define BM_AIO_COPY_PAGES	1
			
 
				-#define BM_WRITE_ALL_PAGES	2
			
 
				+#define BM_AIO_WRITE_HINTED	2
			
 
				+#define BM_WRITE_ALL_PAGES	4
			
 
				 	int error;
			
 
				 	struct kref kref;
			
 
				 };
			
@@ -977,17 +1011,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
 
				 	bm_set_page_unchanged(b->bm_pages[page_nr]);
			
 
				 
			
 
				 	if (ctx->flags & BM_AIO_COPY_PAGES) {
			
 
				-		void *src, *dest;
			
 
				 		page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
			
 
				-		dest = kmap_atomic(page);
			
 
				-		src = kmap_atomic(b->bm_pages[page_nr]);
			
 
				-		memcpy(dest, src, PAGE_SIZE);
			
 
				-		kunmap_atomic(src);
			
 
				-		kunmap_atomic(dest);
			
 
				+		copy_highpage(page, b->bm_pages[page_nr]);
			
 
				 		bm_store_page_idx(page, page_nr);
			
 
				 	} else
			
 
				 		page = b->bm_pages[page_nr];
			
 
				-
			
 
				 	bio->bi_bdev = mdev->ldev->md_bdev;
			
 
				 	bio->bi_sector = on_disk_sector;
			
 
				 	/* bio_add_page of a single page to an empty bio will always succeed,
			
@@ -1060,6 +1088,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 
				 		if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
			
 
				 			break;
			
 
				 		if (rw & WRITE) {
			
 
				+			if ((flags & BM_AIO_WRITE_HINTED) &&
			
 
				+			    !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
			
 
				+				    &page_private(b->bm_pages[i])))
			
 
				+				continue;
			
 
				+
			
 
				 			if (!(flags & BM_WRITE_ALL_PAGES) &&
			
 
				 			    bm_test_page_unchanged(b->bm_pages[i])) {
			
 
				 				dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i);
			
@@ -1088,13 +1121,15 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 
				 	 * "in_flight reached zero, all done" event.
			
 
				 	 */
			
 
				 	if (!atomic_dec_and_test(&ctx->in_flight))
			
 
				-		wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
			
 
				+		wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
			
 
				 	else
			
 
				 		kref_put(&ctx->kref, &bm_aio_ctx_destroy);
			
 
				 
			
 
				-	dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
			
 
				-			rw == WRITE ? "WRITE" : "READ",
			
 
				-			count, jiffies - now);
			
 
				+	/* summary for global bitmap IO */
			
 
				+	if (flags == 0)
			
 
				+		dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
			
 
				+			 rw == WRITE ? "WRITE" : "READ",
			
 
				+			 count, jiffies - now);
			
 
				 
			
 
				 	if (ctx->error) {
			
 
				 		dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
			
@@ -1103,7 +1138,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 
				 	}
			
 
				 
			
 
				 	if (atomic_read(&ctx->in_flight))
			
 
				-		err = -EIO; /* Disk failed during IO... */
			
 
				+		err = -EIO; /* Disk timeout/force-detach during IO... */
			
 
				 
			
 
				 	now = jiffies;
			
 
				 	if (rw == WRITE) {
			
@@ -1115,8 +1150,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
 
				 	}
			
 
				 	now = b->bm_set;
			
 
				 
			
 
				-	dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
			
 
				-	     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
			
 
				+	if (flags == 0)
			
 
				+		dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
			
 
				+		     ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
			
 
				 
			
 
				 	kref_put(&ctx->kref, &bm_aio_ctx_destroy);
			
 
				 	return err;
			
@@ -1179,9 +1215,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
 
				 	return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed.
			
 
				+ * @mdev:	DRBD device.
			
 
				+ */
			
 
				+int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local)
			
 
				+{
			
 
				+	return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
			
 
				+}
			
 
				 
			
 
				 /**
			
 
				- * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap
			
 
				+ * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
			
 
				  * @mdev:	DRBD device.
			
 
				  * @idx:	bitmap page index
			
 
				  *
			
@@ -1222,11 +1266,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
 
				 	}
			
 
				 
			
 
				 	bm_page_io_async(ctx, idx, WRITE_SYNC);
			
 
				-	wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
			
 
				+	wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done);
			
 
				 
			
 
				 	if (ctx->error)
			
 
				 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
			
 
				-		/* that should force detach, so the in memory bitmap will be
			
 
				+		/* that causes us to detach, so the in memory bitmap will be
			
 
				 		 * gone in a moment as well. */
			
 
				 
			
 
				 	mdev->bm_writ_cnt++;
			
@@ -1289,8 +1333,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				 	unsigned long i = DRBD_END_OF_BITMAP;
			
 
				 
			
 
				-	ERR_IF(!b) return i;
			
 
				-	ERR_IF(!b->bm_pages) return i;
			
 
				+	if (!expect(b))
			
 
				+		return i;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return i;
			
 
				 
			
 
				 	spin_lock_irq(&b->bm_lock);
			
 
				 	if (BM_DONT_TEST & b->bm_flags)
			
@@ -1391,8 +1437,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
 
				 	struct drbd_bitmap *b = mdev->bitmap;
			
 
				 	int c = 0;
			
 
				 
			
 
				-	ERR_IF(!b) return 1;
			
 
				-	ERR_IF(!b->bm_pages) return 0;
			
 
				+	if (!expect(b))
			
 
				+		return 1;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return 0;
			
 
				 
			
 
				 	spin_lock_irqsave(&b->bm_lock, flags);
			
 
				 	if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
			
@@ -1423,13 +1471,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b,
 
				 {
			
 
				 	int i;
			
 
				 	int bits;
			
 
				+	int changed = 0;
			
 
				 	unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]);
			
 
				 	for (i = first_word; i < last_word; i++) {
			
 
				 		bits = hweight_long(paddr[i]);
			
 
				 		paddr[i] = ~0UL;
			
 
				-		b->bm_set += BITS_PER_LONG - bits;
			
 
				+		changed += BITS_PER_LONG - bits;
			
 
				 	}
			
 
				 	kunmap_atomic(paddr);
			
 
				+	if (changed) {
			
 
				+		/* We only need lazy writeout, the information is still in the
			
 
				+		 * remote bitmap as well, and is reconstructed during the next
			
 
				+		 * bitmap exchange, if lost locally due to a crash. */
			
 
				+		bm_set_page_lazy_writeout(b->bm_pages[page_nr]);
			
 
				+		b->bm_set += changed;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /* Same thing as drbd_bm_set_bits,
			
@@ -1524,8 +1580,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
 
				 	unsigned long *p_addr;
			
 
				 	int i;
			
 
				 
			
 
				-	ERR_IF(!b) return 0;
			
 
				-	ERR_IF(!b->bm_pages) return 0;
			
 
				+	if (!expect(b))
			
 
				+		return 0;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return 0;
			
 
				 
			
 
				 	spin_lock_irqsave(&b->bm_lock, flags);
			
 
				 	if (BM_DONT_TEST & b->bm_flags)
			
@@ -1559,8 +1617,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
 
				 	 * robust in case we screwed up elsewhere, in that case pretend there
			
 
				 	 * was one dirty bit in the requested area, so we won't try to do a
			
 
				 	 * local read there (no bitmap probably implies no disk) */
			
 
				-	ERR_IF(!b) return 1;
			
 
				-	ERR_IF(!b->bm_pages) return 1;
			
 
				+	if (!expect(b))
			
 
				+		return 1;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return 1;
			
 
				 
			
 
				 	spin_lock_irqsave(&b->bm_lock, flags);
			
 
				 	if (BM_DONT_TEST & b->bm_flags)
			
@@ -1573,11 +1633,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
 
				 				bm_unmap(p_addr);
			
 
				 			p_addr = bm_map_pidx(b, idx);
			
 
				 		}
			
 
				-		ERR_IF (bitnr >= b->bm_bits) {
			
 
				-			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
			
 
				-		} else {
			
 
				+		if (expect(bitnr < b->bm_bits))
			
 
				 			c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr));
			
 
				-		}
			
 
				+		else
			
 
				+			dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits);
			
 
				 	}
			
 
				 	if (p_addr)
			
 
				 		bm_unmap(p_addr);
			
@@ -1607,8 +1666,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
 
				 	unsigned long flags;
			
 
				 	unsigned long *p_addr, *bm;
			
 
				 
			
 
				-	ERR_IF(!b) return 0;
			
 
				-	ERR_IF(!b->bm_pages) return 0;
			
 
				+	if (!expect(b))
			
 
				+		return 0;
			
 
				+	if (!expect(b->bm_pages))
			
 
				+		return 0;
			
 
				 
			
 
				 	spin_lock_irqsave(&b->bm_lock, flags);
			
 
				 	if (BM_DONT_TEST & b->bm_flags)
			
@@ -1630,47 +1691,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
 
				 	spin_unlock_irqrestore(&b->bm_lock, flags);
			
 
				 	return count;
			
 
				 }
			
 
				-
			
 
				-/* Set all bits covered by the AL-extent al_enr.
			
 
				- * Returns number of bits changed. */
			
 
				-unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
			
 
				-{
			
 
				-	struct drbd_bitmap *b = mdev->bitmap;
			
 
				-	unsigned long *p_addr, *bm;
			
 
				-	unsigned long weight;
			
 
				-	unsigned long s, e;
			
 
				-	int count, i, do_now;
			
 
				-	ERR_IF(!b) return 0;
			
 
				-	ERR_IF(!b->bm_pages) return 0;
			
 
				-
			
 
				-	spin_lock_irq(&b->bm_lock);
			
 
				-	if (BM_DONT_SET & b->bm_flags)
			
 
				-		bm_print_lock_info(mdev);
			
 
				-	weight = b->bm_set;
			
 
				-
			
 
				-	s = al_enr * BM_WORDS_PER_AL_EXT;
			
 
				-	e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words);
			
 
				-	/* assert that s and e are on the same page */
			
 
				-	D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3)
			
 
				-	      ==  s    >> (PAGE_SHIFT - LN2_BPL + 3));
			
 
				-	count = 0;
			
 
				-	if (s < b->bm_words) {
			
 
				-		i = do_now = e-s;
			
 
				-		p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s));
			
 
				-		bm = p_addr + MLPP(s);
			
 
				-		while (i--) {
			
 
				-			count += hweight_long(*bm);
			
 
				-			*bm = -1UL;
			
 
				-			bm++;
			
 
				-		}
			
 
				-		bm_unmap(p_addr);
			
 
				-		b->bm_set += do_now*BITS_PER_LONG - count;
			
 
				-		if (e == b->bm_words)
			
 
				-			b->bm_set -= bm_clear_surplus(b);
			
 
				-	} else {
			
 
				-		dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
			
 
				-	}
			
 
				-	weight = b->bm_set - weight;
			
 
				-	spin_unlock_irq(&b->bm_lock);
			
 
				-	return weight;
			
 
				-}
			
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
--- a/drivers/block/drbd/drbd_interval.c
+++ b/drivers/block/drbd/drbd_interval.c
@@ -0,0 +1,207 @@
 
				+#include <asm/bug.h>
			
 
				+#include <linux/rbtree_augmented.h>
			
 
				+#include "drbd_interval.h"
			
 
				+
			
 
				+/**
			
 
				+ * interval_end  -  return end of @node
			
 
				+ */
			
 
				+static inline
			
 
				+sector_t interval_end(struct rb_node *node)
			
 
				+{
			
 
				+	struct drbd_interval *this = rb_entry(node, struct drbd_interval, rb);
			
 
				+	return this->end;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * compute_subtree_last  -  compute end of @node
			
 
				+ *
			
 
				+ * The end of an interval is the highest (start + (size >> 9)) value of this
			
 
				+ * node and of its children.  Called for @node and its parents whenever the end
			
 
				+ * may have changed.
			
 
				+ */
			
 
				+static inline sector_t
			
 
				+compute_subtree_last(struct drbd_interval *node)
			
 
				+{
			
 
				+	sector_t max = node->sector + (node->size >> 9);
			
 
				+
			
 
				+	if (node->rb.rb_left) {
			
 
				+		sector_t left = interval_end(node->rb.rb_left);
			
 
				+		if (left > max)
			
 
				+			max = left;
			
 
				+	}
			
 
				+	if (node->rb.rb_right) {
			
 
				+		sector_t right = interval_end(node->rb.rb_right);
			
 
				+		if (right > max)
			
 
				+			max = right;
			
 
				+	}
			
 
				+	return max;
			
 
				+}
			
 
				+
			
 
				+static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
			
 
				+{
			
 
				+	while (rb != stop) {
			
 
				+		struct drbd_interval *node = rb_entry(rb, struct drbd_interval, rb);
			
 
				+		sector_t subtree_last = compute_subtree_last(node);
			
 
				+		if (node->end == subtree_last)
			
 
				+			break;
			
 
				+		node->end = subtree_last;
			
 
				+		rb = rb_parent(&node->rb);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
			
 
				+{
			
 
				+	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
			
 
				+	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
			
 
				+
			
 
				+	new->end = old->end;
			
 
				+}
			
 
				+
			
 
				+static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
			
 
				+{
			
 
				+	struct drbd_interval *old = rb_entry(rb_old, struct drbd_interval, rb);
			
 
				+	struct drbd_interval *new = rb_entry(rb_new, struct drbd_interval, rb);
			
 
				+
			
 
				+	new->end = old->end;
			
 
				+	old->end = compute_subtree_last(old);
			
 
				+}
			
 
				+
			
 
				+static const struct rb_augment_callbacks augment_callbacks = {
			
 
				+	augment_propagate,
			
 
				+	augment_copy,
			
 
				+	augment_rotate,
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * drbd_insert_interval  -  insert a new interval into a tree
			
 
				+ */
			
 
				+bool
			
 
				+drbd_insert_interval(struct rb_root *root, struct drbd_interval *this)
			
 
				+{
			
 
				+	struct rb_node **new = &root->rb_node, *parent = NULL;
			
 
				+
			
 
				+	BUG_ON(!IS_ALIGNED(this->size, 512));
			
 
				+
			
 
				+	while (*new) {
			
 
				+		struct drbd_interval *here =
			
 
				+			rb_entry(*new, struct drbd_interval, rb);
			
 
				+
			
 
				+		parent = *new;
			
 
				+		if (this->sector < here->sector)
			
 
				+			new = &(*new)->rb_left;
			
 
				+		else if (this->sector > here->sector)
			
 
				+			new = &(*new)->rb_right;
			
 
				+		else if (this < here)
			
 
				+			new = &(*new)->rb_left;
			
 
				+		else if (this > here)
			
 
				+			new = &(*new)->rb_right;
			
 
				+		else
			
 
				+			return false;
			
 
				+	}
			
 
				+
			
 
				+	rb_link_node(&this->rb, parent, new);
			
 
				+	rb_insert_augmented(&this->rb, root, &augment_callbacks);
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * drbd_contains_interval  -  check if a tree contains a given interval
			
 
				+ * @sector:	start sector of @interval
			
 
				+ * @interval:	may not be a valid pointer
			
 
				+ *
			
 
				+ * Returns if the tree contains the node @interval with start sector @start.
			
 
				+ * Does not dereference @interval until @interval is known to be a valid object
			
 
				+ * in @tree.  Returns %false if @interval is in the tree but with a different
			
 
				+ * sector number.
			
 
				+ */
			
 
				+bool
			
 
				+drbd_contains_interval(struct rb_root *root, sector_t sector,
			
 
				+		       struct drbd_interval *interval)
			
 
				+{
			
 
				+	struct rb_node *node = root->rb_node;
			
 
				+
			
 
				+	while (node) {
			
 
				+		struct drbd_interval *here =
			
 
				+			rb_entry(node, struct drbd_interval, rb);
			
 
				+
			
 
				+		if (sector < here->sector)
			
 
				+			node = node->rb_left;
			
 
				+		else if (sector > here->sector)
			
 
				+			node = node->rb_right;
			
 
				+		else if (interval < here)
			
 
				+			node = node->rb_left;
			
 
				+		else if (interval > here)
			
 
				+			node = node->rb_right;
			
 
				+		else
			
 
				+			return true;
			
 
				+	}
			
 
				+	return false;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * drbd_remove_interval  -  remove an interval from a tree
			
 
				+ */
			
 
				+void
			
 
				+drbd_remove_interval(struct rb_root *root, struct drbd_interval *this)
			
 
				+{
			
 
				+	rb_erase_augmented(&this->rb, root, &augment_callbacks);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * drbd_find_overlap  - search for an interval overlapping with [sector, sector + size)
			
 
				+ * @sector:	start sector
			
 
				+ * @size:	size, aligned to 512 bytes
			
 
				+ *
			
 
				+ * Returns an interval overlapping with [sector, sector + size), or NULL if
			
 
				+ * there is none.  When there is more than one overlapping interval in the
			
 
				+ * tree, the interval with the lowest start sector is returned, and all other
			
 
				+ * overlapping intervals will be on the right side of the tree, reachable with
			
 
				+ * rb_next().
			
 
				+ */
			
 
				+struct drbd_interval *
			
 
				+drbd_find_overlap(struct rb_root *root, sector_t sector, unsigned int size)
			
 
				+{
			
 
				+	struct rb_node *node = root->rb_node;
			
 
				+	struct drbd_interval *overlap = NULL;
			
 
				+	sector_t end = sector + (size >> 9);
			
 
				+
			
 
				+	BUG_ON(!IS_ALIGNED(size, 512));
			
 
				+
			
 
				+	while (node) {
			
 
				+		struct drbd_interval *here =
			
 
				+			rb_entry(node, struct drbd_interval, rb);
			
 
				+
			
 
				+		if (node->rb_left &&
			
 
				+		    sector < interval_end(node->rb_left)) {
			
 
				+			/* Overlap if any must be on left side */
			
 
				+			node = node->rb_left;
			
 
				+		} else if (here->sector < end &&
			
 
				+			   sector < here->sector + (here->size >> 9)) {
			
 
				+			overlap = here;
			
 
				+			break;
			
 
				+		} else if (sector >= here->sector) {
			
 
				+			/* Overlap if any must be on right side */
			
 
				+			node = node->rb_right;
			
 
				+		} else
			
 
				+			break;
			
 
				+	}
			
 
				+	return overlap;
			
 
				+}
			
 
				+
			
 
				+struct drbd_interval *
			
 
				+drbd_next_overlap(struct drbd_interval *i, sector_t sector, unsigned int size)
			
 
				+{
			
 
				+	sector_t end = sector + (size >> 9);
			
 
				+	struct rb_node *node;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		node = rb_next(&i->rb);
			
 
				+		if (!node)
			
 
				+			return NULL;
			
 
				+		i = rb_entry(node, struct drbd_interval, rb);
			
 
				+		if (i->sector >= end)
			
 
				+			return NULL;
			
 
				+		if (sector < i->sector + (i->size >> 9))
			
 
				+			return i;
			
 
				+	}
			
 
				+}
			
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -0,0 +1,40 @@
 
				+#ifndef __DRBD_INTERVAL_H
			
 
				+#define __DRBD_INTERVAL_H
			
 
				+
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/rbtree.h>
			
 
				+
			
 
				+struct drbd_interval {
			
 
				+	struct rb_node rb;
			
 
				+	sector_t sector;	/* start sector of the interval */
			
 
				+	unsigned int size;	/* size in bytes */
			
 
				+	sector_t end;		/* highest interval end in subtree */
			
 
				+	int local:1		/* local or remote request? */;
			
 
				+	int waiting:1;
			
 
				+};
			
 
				+
			
 
				+static inline void drbd_clear_interval(struct drbd_interval *i)
			
 
				+{
			
 
				+	RB_CLEAR_NODE(&i->rb);
			
 
				+}
			
 
				+
			
 
				+static inline bool drbd_interval_empty(struct drbd_interval *i)
			
 
				+{
			
 
				+	return RB_EMPTY_NODE(&i->rb);
			
 
				+}
			
 
				+
			
 
				+extern bool drbd_insert_interval(struct rb_root *, struct drbd_interval *);
			
 
				+extern bool drbd_contains_interval(struct rb_root *, sector_t,
			
 
				+				   struct drbd_interval *);
			
 
				+extern void drbd_remove_interval(struct rb_root *, struct drbd_interval *);
			
 
				+extern struct drbd_interval *drbd_find_overlap(struct rb_root *, sector_t,
			
 
				+					unsigned int);
			
 
				+extern struct drbd_interval *drbd_next_overlap(struct drbd_interval *, sector_t,
			
 
				+					unsigned int);
			
 
				+
			
 
				+#define drbd_for_each_overlap(i, root, sector, size)		\
			
 
				+	for (i = drbd_find_overlap(root, sector, size);		\
			
 
				+	     i;							\
			
 
				+	     i = drbd_next_overlap(i, sector, size))
			
 
				+
			
 
				+#endif  /* __DRBD_INTERVAL_H */
			
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
--- a/drivers/block/drbd/drbd_nla.c
+++ b/drivers/block/drbd/drbd_nla.c
@@ -0,0 +1,55 @@
 
				+#include "drbd_wrappers.h"
			
 
				+#include <linux/kernel.h>
			
 
				+#include <net/netlink.h>
			
 
				+#include <linux/drbd_genl_api.h>
			
 
				+#include "drbd_nla.h"
			
 
				+
			
 
				+static int drbd_nla_check_mandatory(int maxtype, struct nlattr *nla)
			
 
				+{
			
 
				+	struct nlattr *head = nla_data(nla);
			
 
				+	int len = nla_len(nla);
			
 
				+	int rem;
			
 
				+
			
 
				+	/*
			
 
				+	 * validate_nla (called from nla_parse_nested) ignores attributes
			
 
				+	 * beyond maxtype, and does not understand the DRBD_GENLA_F_MANDATORY flag.
			
 
				+	 * In order to have it validate attributes with the DRBD_GENLA_F_MANDATORY
			
 
				+	 * flag set also, check and remove that flag before calling
			
 
				+	 * nla_parse_nested.
			
 
				+	 */
			
 
				+
			
 
				+	nla_for_each_attr(nla, head, len, rem) {
			
 
				+		if (nla->nla_type & DRBD_GENLA_F_MANDATORY) {
			
 
				+			nla->nla_type &= ~DRBD_GENLA_F_MANDATORY;
			
 
				+			if (nla_type(nla) > maxtype)
			
 
				+				return -EOPNOTSUPP;
			
 
				+		}
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
			
 
				+			  const struct nla_policy *policy)
			
 
				+{
			
 
				+	int err;
			
 
				+
			
 
				+	err = drbd_nla_check_mandatory(maxtype, nla);
			
 
				+	if (!err)
			
 
				+		err = nla_parse_nested(tb, maxtype, nla, policy);
			
 
				+
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype)
			
 
				+{
			
 
				+	int err;
			
 
				+	/*
			
 
				+	 * If any nested attribute has the DRBD_GENLA_F_MANDATORY flag set and
			
 
				+	 * we don't know about that attribute, reject all the nested
			
 
				+	 * attributes.
			
 
				+	 */
			
 
				+	err = drbd_nla_check_mandatory(maxtype, nla);
			
 
				+	if (err)
			
 
				+		return ERR_PTR(err);
			
 
				+	return nla_find_nested(nla, attrtype);
			
 
				+}
			
--- a/drivers/block/drbd/drbd_nla.h
+++ b/drivers/block/drbd/drbd_nla.h
@@ -0,0 +1,8 @@
 
				+#ifndef __DRBD_NLA_H
			
 
				+#define __DRBD_NLA_H
			
 
				+
			
 
				+extern int drbd_nla_parse_nested(struct nlattr *tb[], int maxtype, struct nlattr *nla,
			
 
				+				 const struct nla_policy *policy);
			
 
				+extern struct nlattr *drbd_nla_find_nested(int maxtype, struct nlattr *nla, int attrtype);
			
 
				+
			
 
				+#endif  /* __DRBD_NLA_H */
			
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -167,18 +167,24 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 
				 		 * we convert to sectors in the display below. */
			
 
				 		unsigned long bm_bits = drbd_bm_bits(mdev);
			
 
				 		unsigned long bit_pos;
			
 
				+		unsigned long long stop_sector = 0;
			
 
				 		if (mdev->state.conn == C_VERIFY_S ||
			
 
				-		    mdev->state.conn == C_VERIFY_T)
			
 
				+		    mdev->state.conn == C_VERIFY_T) {
			
 
				 			bit_pos = bm_bits - mdev->ov_left;
			
 
				-		else
			
 
				+			if (verify_can_do_stop_sector(mdev))
			
 
				+				stop_sector = mdev->ov_stop_sector;
			
 
				+		} else
			
 
				 			bit_pos = mdev->bm_resync_fo;
			
 
				 		/* Total sectors may be slightly off for oddly
			
 
				 		 * sized devices. So what. */
			
 
				 		seq_printf(seq,
			
 
				-			"\t%3d%% sector pos: %llu/%llu\n",
			
 
				+			"\t%3d%% sector pos: %llu/%llu",
			
 
				 			(int)(bit_pos / (bm_bits/100+1)),
			
 
				 			(unsigned long long)bit_pos * BM_SECT_PER_BIT,
			
 
				 			(unsigned long long)bm_bits * BM_SECT_PER_BIT);
			
 
				+		if (stop_sector != 0 && stop_sector != ULLONG_MAX)
			
 
				+			seq_printf(seq, " stop sector: %llu", stop_sector);
			
 
				+		seq_printf(seq, "\n");
			
 
				 	}
			
 
				 }
			
 
				 
			
@@ -194,9 +200,11 @@ static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
 
				 
			
 
				 static int drbd_seq_show(struct seq_file *seq, void *v)
			
 
				 {
			
 
				-	int i, hole = 0;
			
 
				+	int i, prev_i = -1;
			
 
				 	const char *sn;
			
 
				 	struct drbd_conf *mdev;
			
 
				+	struct net_conf *nc;
			
 
				+	char wp;
			
 
				 
			
 
				 	static char write_ordering_chars[] = {
			
 
				 		[WO_none] = 'n',
			
@@ -227,16 +235,11 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
				 	 oos .. known out-of-sync kB
			
 
				 	*/
			
 
				 
			
 
				-	for (i = 0; i < minor_count; i++) {
			
 
				-		mdev = minor_to_mdev(i);
			
 
				-		if (!mdev) {
			
 
				-			hole = 1;
			
 
				-			continue;
			
 
				-		}
			
 
				-		if (hole) {
			
 
				-			hole = 0;
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&minors, mdev, i) {
			
 
				+		if (prev_i != i - 1)
			
 
				 			seq_printf(seq, "\n");
			
 
				-		}
			
 
				+		prev_i = i;
			
 
				 
			
 
				 		sn = drbd_conn_str(mdev->state.conn);
			
 
				 
			
@@ -248,6 +251,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
				 			/* reset mdev->congestion_reason */
			
 
				 			bdi_rw_congested(&mdev->rq_queue->backing_dev_info);
			
 
				 
			
 
				+			nc = rcu_dereference(mdev->tconn->net_conf);
			
 
				+			wp = nc ? nc->wire_protocol - DRBD_PROT_A + 'A' : ' ';
			
 
				 			seq_printf(seq,
			
 
				 			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
			
 
				 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
			
@@ -257,9 +262,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
				 			   drbd_role_str(mdev->state.peer),
			
 
				 			   drbd_disk_str(mdev->state.disk),
			
 
				 			   drbd_disk_str(mdev->state.pdsk),
			
 
				-			   (mdev->net_conf == NULL ? ' ' :
			
 
				-			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
			
 
				-			   is_susp(mdev->state) ? 's' : 'r',
			
 
				+			   wp,
			
 
				+			   drbd_suspended(mdev) ? 's' : 'r',
			
 
				 			   mdev->state.aftr_isp ? 'a' : '-',
			
 
				 			   mdev->state.peer_isp ? 'p' : '-',
			
 
				 			   mdev->state.user_isp ? 'u' : '-',
			
@@ -276,8 +280,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
				 			   atomic_read(&mdev->rs_pending_cnt),
			
 
				 			   atomic_read(&mdev->unacked_cnt),
			
 
				 			   atomic_read(&mdev->ap_bio_cnt),
			
 
				-			   mdev->epochs,
			
 
				-			   write_ordering_chars[mdev->write_ordering]
			
 
				+			   mdev->tconn->epochs,
			
 
				+			   write_ordering_chars[mdev->tconn->write_ordering]
			
 
				 			);
			
 
				 			seq_printf(seq, " oos:%llu\n",
			
 
				 				   Bit2KB((unsigned long long)
			
@@ -302,6 +306,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -77,40 +77,41 @@
 
				  */
			
 
				 
			
 
				 enum drbd_req_event {
			
 
				-	created,
			
 
				-	to_be_send,
			
 
				-	to_be_submitted,
			
 
				+	CREATED,
			
 
				+	TO_BE_SENT,
			
 
				+	TO_BE_SUBMITTED,
			
 
				 
			
 
				 	/* XXX yes, now I am inconsistent...
			
 
				 	 * these are not "events" but "actions"
			
 
				 	 * oh, well... */
			
 
				-	queue_for_net_write,
			
 
				-	queue_for_net_read,
			
 
				-	queue_for_send_oos,
			
 
				-
			
 
				-	send_canceled,
			
 
				-	send_failed,
			
 
				-	handed_over_to_network,
			
 
				-	oos_handed_to_network,
			
 
				-	connection_lost_while_pending,
			
 
				-	read_retry_remote_canceled,
			
 
				-	recv_acked_by_peer,
			
 
				-	write_acked_by_peer,
			
 
				-	write_acked_by_peer_and_sis, /* and set_in_sync */
			
 
				-	conflict_discarded_by_peer,
			
 
				-	neg_acked,
			
 
				-	barrier_acked, /* in protocol A and B */
			
 
				-	data_received, /* (remote read) */
			
 
				-
			
 
				-	read_completed_with_error,
			
 
				-	read_ahead_completed_with_error,
			
 
				-	write_completed_with_error,
			
 
				-	abort_disk_io,
			
 
				-	completed_ok,
			
 
				-	resend,
			
 
				-	fail_frozen_disk_io,
			
 
				-	restart_frozen_disk_io,
			
 
				-	nothing, /* for tracing only */
			
 
				+	QUEUE_FOR_NET_WRITE,
			
 
				+	QUEUE_FOR_NET_READ,
			
 
				+	QUEUE_FOR_SEND_OOS,
			
 
				+
			
 
				+	SEND_CANCELED,
			
 
				+	SEND_FAILED,
			
 
				+	HANDED_OVER_TO_NETWORK,
			
 
				+	OOS_HANDED_TO_NETWORK,
			
 
				+	CONNECTION_LOST_WHILE_PENDING,
			
 
				+	READ_RETRY_REMOTE_CANCELED,
			
 
				+	RECV_ACKED_BY_PEER,
			
 
				+	WRITE_ACKED_BY_PEER,
			
 
				+	WRITE_ACKED_BY_PEER_AND_SIS, /* and set_in_sync */
			
 
				+	CONFLICT_RESOLVED,
			
 
				+	POSTPONE_WRITE,
			
 
				+	NEG_ACKED,
			
 
				+	BARRIER_ACKED, /* in protocol A and B */
			
 
				+	DATA_RECEIVED, /* (remote read) */
			
 
				+
			
 
				+	READ_COMPLETED_WITH_ERROR,
			
 
				+	READ_AHEAD_COMPLETED_WITH_ERROR,
			
 
				+	WRITE_COMPLETED_WITH_ERROR,
			
 
				+	ABORT_DISK_IO,
			
 
				+	COMPLETED_OK,
			
 
				+	RESEND,
			
 
				+	FAIL_FROZEN_DISK_IO,
			
 
				+	RESTART_FROZEN_DISK_IO,
			
 
				+	NOTHING,
			
 
				 };
			
 
				 
			
 
				 /* encoding of request states for now.  we don't actually need that many bits.
			
@@ -142,8 +143,8 @@ enum drbd_req_state_bits {
 
				 	 *        recv_ack (B) or implicit "ack" (A),
			
 
				 	 *        still waiting for the barrier ack.
			
 
				 	 *        master_bio may already be completed and invalidated.
			
 
				-	 * 11100: write_acked (C),
			
 
				-	 *        data_received (for remote read, any protocol)
			
 
				+	 * 11100: write acked (C),
			
 
				+	 *        data received (for remote read, any protocol)
			
 
				 	 *        or finally the barrier ack has arrived (B,A)...
			
 
				 	 *        request can be freed
			
 
				 	 * 01100: neg-acked (write, protocol C)
			
@@ -198,6 +199,22 @@ enum drbd_req_state_bits {
 
				 
			
 
				 	/* Should call drbd_al_complete_io() for this request... */
			
 
				 	__RQ_IN_ACT_LOG,
			
 
				+
			
 
				+	/* The peer has sent a retry ACK */
			
 
				+	__RQ_POSTPONED,
			
 
				+
			
 
				+	/* would have been completed,
			
 
				+	 * but was not, because of drbd_suspended() */
			
 
				+	__RQ_COMPLETION_SUSP,
			
 
				+
			
 
				+	/* We expect a receive ACK (wire proto B) */
			
 
				+	__RQ_EXP_RECEIVE_ACK,
			
 
				+
			
 
				+	/* We expect a write ACK (wite proto C) */
			
 
				+	__RQ_EXP_WRITE_ACK,
			
 
				+
			
 
				+	/* waiting for a barrier ack, did an extra kref_get */
			
 
				+	__RQ_EXP_BARR_ACK,
			
 
				 };
			
 
				 
			
 
				 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
			
@@ -219,56 +236,16 @@ enum drbd_req_state_bits {
 
				 
			
 
				 #define RQ_WRITE           (1UL << __RQ_WRITE)
			
 
				 #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
			
 
				+#define RQ_POSTPONED	   (1UL << __RQ_POSTPONED)
			
 
				+#define RQ_COMPLETION_SUSP (1UL << __RQ_COMPLETION_SUSP)
			
 
				+#define RQ_EXP_RECEIVE_ACK (1UL << __RQ_EXP_RECEIVE_ACK)
			
 
				+#define RQ_EXP_WRITE_ACK   (1UL << __RQ_EXP_WRITE_ACK)
			
 
				+#define RQ_EXP_BARR_ACK    (1UL << __RQ_EXP_BARR_ACK)
			
 
				 
			
 
				 /* For waking up the frozen transfer log mod_req() has to return if the request
			
 
				    should be counted in the epoch object*/
			
 
				-#define MR_WRITE_SHIFT 0
			
 
				-#define MR_WRITE       (1 << MR_WRITE_SHIFT)
			
 
				-#define MR_READ_SHIFT  1
			
 
				-#define MR_READ        (1 << MR_READ_SHIFT)
			
 
				-
			
 
				-/* epoch entries */
			
 
				-static inline
			
 
				-struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
			
 
				-{
			
 
				-	BUG_ON(mdev->ee_hash_s == 0);
			
 
				-	return mdev->ee_hash +
			
 
				-		((unsigned int)(sector>>HT_SHIFT) % mdev->ee_hash_s);
			
 
				-}
			
 
				-
			
 
				-/* transfer log (drbd_request objects) */
			
 
				-static inline
			
 
				-struct hlist_head *tl_hash_slot(struct drbd_conf *mdev, sector_t sector)
			
 
				-{
			
 
				-	BUG_ON(mdev->tl_hash_s == 0);
			
 
				-	return mdev->tl_hash +
			
 
				-		((unsigned int)(sector>>HT_SHIFT) % mdev->tl_hash_s);
			
 
				-}
			
 
				-
			
 
				-/* application reads (drbd_request objects) */
			
 
				-static struct hlist_head *ar_hash_slot(struct drbd_conf *mdev, sector_t sector)
			
 
				-{
			
 
				-	return mdev->app_reads_hash
			
 
				-		+ ((unsigned int)(sector) % APP_R_HSIZE);
			
 
				-}
			
 
				-
			
 
				-/* when we receive the answer for a read request,
			
 
				- * verify that we actually know about it */
			
 
				-static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
			
 
				-	u64 id, sector_t sector)
			
 
				-{
			
 
				-	struct hlist_head *slot = ar_hash_slot(mdev, sector);
			
 
				-	struct hlist_node *n;
			
 
				-	struct drbd_request *req;
			
 
				-
			
 
				-	hlist_for_each_entry(req, n, slot, collision) {
			
 
				-		if ((unsigned long)req == (unsigned long)id) {
			
 
				-			D_ASSERT(req->sector == sector);
			
 
				-			return req;
			
 
				-		}
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				+#define MR_WRITE       1
			
 
				+#define MR_READ        2
			
 
				 
			
 
				 static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
			
 
				 {
			
@@ -278,41 +255,10 @@ static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bi
 
				 	req->private_bio = bio;
			
 
				 
			
 
				 	bio->bi_private  = req;
			
 
				-	bio->bi_end_io   = drbd_endio_pri;
			
 
				+	bio->bi_end_io   = drbd_request_endio;
			
 
				 	bio->bi_next     = NULL;
			
 
				 }
			
 
				 
			
 
				-static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
			
 
				-	struct bio *bio_src)
			
 
				-{
			
 
				-	struct drbd_request *req =
			
 
				-		mempool_alloc(drbd_request_mempool, GFP_NOIO);
			
 
				-	if (likely(req)) {
			
 
				-		drbd_req_make_private_bio(req, bio_src);
			
 
				-
			
 
				-		req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
			
 
				-		req->mdev        = mdev;
			
 
				-		req->master_bio  = bio_src;
			
 
				-		req->epoch       = 0;
			
 
				-		req->sector      = bio_src->bi_sector;
			
 
				-		req->size        = bio_src->bi_size;
			
 
				-		INIT_HLIST_NODE(&req->collision);
			
 
				-		INIT_LIST_HEAD(&req->tl_requests);
			
 
				-		INIT_LIST_HEAD(&req->w.list);
			
 
				-	}
			
 
				-	return req;
			
 
				-}
			
 
				-
			
 
				-static inline void drbd_req_free(struct drbd_request *req)
			
 
				-{
			
 
				-	mempool_free(req, drbd_request_mempool);
			
 
				-}
			
 
				-
			
 
				-static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
			
 
				-{
			
 
				-	return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
			
 
				-}
			
 
				-
			
 
				 /* Short lived temporary struct on the stack.
			
 
				  * We could squirrel the error to be returned into
			
 
				  * bio->bi_size, or similar. But that would be too ugly. */
			
@@ -321,6 +267,7 @@ struct bio_and_error {
 
				 	int error;
			
 
				 };
			
 
				 
			
 
				+extern void drbd_req_destroy(struct kref *kref);
			
 
				 extern void _req_may_be_done(struct drbd_request *req,
			
 
				 		struct bio_and_error *m);
			
 
				 extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
			
@@ -328,13 +275,17 @@ extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 
				 extern void complete_master_bio(struct drbd_conf *mdev,
			
 
				 		struct bio_and_error *m);
			
 
				 extern void request_timer_fn(unsigned long data);
			
 
				-extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
			
 
				+extern void tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
			
 
				+extern void _tl_restart(struct drbd_tconn *tconn, enum drbd_req_event what);
			
 
				+
			
 
				+/* this is in drbd_main.c */
			
 
				+extern void drbd_restart_request(struct drbd_request *req);
			
 
				 
			
 
				 /* use this if you don't want to deal with calling complete_master_bio()
			
 
				  * outside the spinlock, e.g. when walking some list on cleanup. */
			
 
				 static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
			
 
				 {
			
 
				-	struct drbd_conf *mdev = req->mdev;
			
 
				+	struct drbd_conf *mdev = req->w.mdev;
			
 
				 	struct bio_and_error m;
			
 
				 	int rv;
			
 
				 
			
@@ -354,13 +305,13 @@ static inline int req_mod(struct drbd_request *req,
 
				 		enum drbd_req_event what)
			
 
				 {
			
 
				 	unsigned long flags;
			
 
				-	struct drbd_conf *mdev = req->mdev;
			
 
				+	struct drbd_conf *mdev = req->w.mdev;
			
 
				 	struct bio_and_error m;
			
 
				 	int rv;
			
 
				 
			
 
				-	spin_lock_irqsave(&mdev->req_lock, flags);
			
 
				+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
			
 
				 	rv = __req_mod(req, what, &m);
			
 
				-	spin_unlock_irqrestore(&mdev->req_lock, flags);
			
 
				+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
			
 
				 
			
 
				 	if (m.bio)
			
 
				 		complete_master_bio(mdev, &m);
			
@@ -368,7 +319,7 @@ static inline int req_mod(struct drbd_request *req,
 
				 	return rv;
			
 
				 }
			
 
				 
			
 
				-static inline bool drbd_should_do_remote(union drbd_state s)
			
 
				+static inline bool drbd_should_do_remote(union drbd_dev_state s)
			
 
				 {
			
 
				 	return s.pdsk == D_UP_TO_DATE ||
			
 
				 		(s.pdsk >= D_INCONSISTENT &&
			
@@ -378,7 +329,7 @@ static inline bool drbd_should_do_remote(union drbd_state s)
 
				 	   That is equivalent since before 96 IO was frozen in the C_WF_BITMAP*
			
 
				 	   states. */
			
 
				 }
			
 
				-static inline bool drbd_should_send_oos(union drbd_state s)
			
 
				+static inline bool drbd_should_send_out_of_sync(union drbd_dev_state s)
			
 
				 {
			
 
				 	return s.conn == C_AHEAD || s.conn == C_WF_BITMAP_S;
			
 
				 	/* pdsk = D_INCONSISTENT as a consequence. Protocol 96 check not necessary
			
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -0,0 +1,1856 @@
 
				+/*
			
 
				+   drbd_state.c
			
 
				+
			
 
				+   This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
			
 
				+
			
 
				+   Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
			
 
				+   Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
			
 
				+   Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
			
 
				+
			
 
				+   Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
			
 
				+   from Logicworks, Inc. for making SDP replication support possible.
			
 
				+
			
 
				+   drbd is free software; you can redistribute it and/or modify
			
 
				+   it under the terms of the GNU General Public License as published by
			
 
				+   the Free Software Foundation; either version 2, or (at your option)
			
 
				+   any later version.
			
 
				+
			
 
				+   drbd is distributed in the hope that it will be useful,
			
 
				+   but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+   GNU General Public License for more details.
			
 
				+
			
 
				+   You should have received a copy of the GNU General Public License
			
 
				+   along with drbd; see the file COPYING.  If not, write to
			
 
				+   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
			
 
				+ */
			
 
				+
			
 
				+#include <linux/drbd_limits.h>
			
 
				+#include "drbd_int.h"
			
 
				+#include "drbd_req.h"
			
 
				+
			
 
				+/* in drbd_main.c */
			
 
				+extern void tl_abort_disk_io(struct drbd_conf *mdev);
			
 
				+
			
 
				+struct after_state_chg_work {
			
 
				+	struct drbd_work w;
			
 
				+	union drbd_state os;
			
 
				+	union drbd_state ns;
			
 
				+	enum chg_state_flags flags;
			
 
				+	struct completion *done;
			
 
				+};
			
 
				+
			
 
				+enum sanitize_state_warnings {
			
 
				+	NO_WARNING,
			
 
				+	ABORTED_ONLINE_VERIFY,
			
 
				+	ABORTED_RESYNC,
			
 
				+	CONNECTION_LOST_NEGOTIATING,
			
 
				+	IMPLICITLY_UPGRADED_DISK,
			
 
				+	IMPLICITLY_UPGRADED_PDSK,
			
 
				+};
			
 
				+
			
 
				+static int w_after_state_ch(struct drbd_work *w, int unused);
			
 
				+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
			
 
				+			   union drbd_state ns, enum chg_state_flags flags);
			
 
				+static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
			
 
				+static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_tconn *);
			
 
				+static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
			
 
				+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
			
 
				+				       enum sanitize_state_warnings *warn);
			
 
				+
			
 
				+static inline bool is_susp(union drbd_state s)
			
 
				+{
			
 
				+        return s.susp || s.susp_nod || s.susp_fen;
			
 
				+}
			
 
				+
			
 
				+bool conn_all_vols_unconf(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	struct drbd_conf *mdev;
			
 
				+	bool rv = true;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
			
 
				+		if (mdev->state.disk != D_DISKLESS ||
			
 
				+		    mdev->state.conn != C_STANDALONE ||
			
 
				+		    mdev->state.role != R_SECONDARY) {
			
 
				+			rv = false;
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+/* Unfortunately the states where not correctly ordered, when
			
 
				+   they where defined. therefore can not use max_t() here. */
			
 
				+static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
			
 
				+{
			
 
				+	if (role1 == R_PRIMARY || role2 == R_PRIMARY)
			
 
				+		return R_PRIMARY;
			
 
				+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
			
 
				+		return R_SECONDARY;
			
 
				+	return R_UNKNOWN;
			
 
				+}
			
 
				+static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
			
 
				+{
			
 
				+	if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
			
 
				+		return R_UNKNOWN;
			
 
				+	if (role1 == R_SECONDARY || role2 == R_SECONDARY)
			
 
				+		return R_SECONDARY;
			
 
				+	return R_PRIMARY;
			
 
				+}
			
 
				+
			
 
				+enum drbd_role conn_highest_role(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_role role = R_UNKNOWN;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		role = max_role(role, mdev->state.role);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return role;
			
 
				+}
			
 
				+
			
 
				+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_role peer = R_UNKNOWN;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		peer = max_role(peer, mdev->state.peer);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return peer;
			
 
				+}
			
 
				+
			
 
				+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_disk_state ds = D_DISKLESS;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		ds = max_t(enum drbd_disk_state, ds, mdev->state.disk);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return ds;
			
 
				+}
			
 
				+
			
 
				+enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_disk_state ds = D_MASK;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		ds = min_t(enum drbd_disk_state, ds, mdev->state.disk);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return ds;
			
 
				+}
			
 
				+
			
 
				+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_disk_state ds = D_DISKLESS;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		ds = max_t(enum drbd_disk_state, ds, mdev->state.pdsk);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return ds;
			
 
				+}
			
 
				+
			
 
				+enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_conns conn = C_MASK;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		conn = min_t(enum drbd_conns, conn, mdev->state.conn);
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return conn;
			
 
				+}
			
 
				+
			
 
				+static bool no_peer_wf_report_params(struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+	bool rv = true;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr)
			
 
				+		if (mdev->state.conn == C_WF_REPORT_PARAMS) {
			
 
				+			rv = false;
			
 
				+			break;
			
 
				+		}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * cl_wide_st_chg() - true if the state change is a cluster wide one
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @os:		old (current) state.
			
 
				+ * @ns:		new (wanted) state.
			
 
				+ */
			
 
				+static int cl_wide_st_chg(struct drbd_conf *mdev,
			
 
				+			  union drbd_state os, union drbd_state ns)
			
 
				+{
			
 
				+	return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
			
 
				+		 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
			
 
				+		  (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
			
 
				+		  (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
			
 
				+		  (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
			
 
				+		(os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
			
 
				+		(os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) ||
			
 
				+		(os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS);
			
 
				+}
			
 
				+
			
 
				+static union drbd_state
			
 
				+apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
			
 
				+{
			
 
				+	union drbd_state ns;
			
 
				+	ns.i = (os.i & ~mask.i) | val.i;
			
 
				+	return ns;
			
 
				+}
			
 
				+
			
 
				+enum drbd_state_rv
			
 
				+drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
			
 
				+		  union drbd_state mask, union drbd_state val)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	union drbd_state ns;
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
			
 
				+	ns = apply_mask_val(drbd_read_state(mdev), mask, val);
			
 
				+	rv = _drbd_set_state(mdev, ns, f, NULL);
			
 
				+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * drbd_force_state() - Impose a change which happens outside our control on our state
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @mask:	mask of state bits to change.
			
 
				+ * @val:	value of new state bits.
			
 
				+ */
			
 
				+void drbd_force_state(struct drbd_conf *mdev,
			
 
				+	union drbd_state mask, union drbd_state val)
			
 
				+{
			
 
				+	drbd_change_state(mdev, CS_HARD, mask, val);
			
 
				+}
			
 
				+
			
 
				+static enum drbd_state_rv
			
 
				+_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
			
 
				+	     union drbd_state val)
			
 
				+{
			
 
				+	union drbd_state os, ns;
			
 
				+	unsigned long flags;
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
			
 
				+		return SS_CW_SUCCESS;
			
 
				+
			
 
				+	if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
			
 
				+		return SS_CW_FAILED_BY_PEER;
			
 
				+
			
 
				+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
			
 
				+	os = drbd_read_state(mdev);
			
 
				+	ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
			
 
				+	rv = is_valid_transition(os, ns);
			
 
				+	if (rv >= SS_SUCCESS)
			
 
				+		rv = SS_UNKNOWN_ERROR;  /* cont waiting, otherwise fail. */
			
 
				+
			
 
				+	if (!cl_wide_st_chg(mdev, os, ns))
			
 
				+		rv = SS_CW_NO_NEED;
			
 
				+	if (rv == SS_UNKNOWN_ERROR) {
			
 
				+		rv = is_valid_state(mdev, ns);
			
 
				+		if (rv >= SS_SUCCESS) {
			
 
				+			rv = is_valid_soft_transition(os, ns, mdev->tconn);
			
 
				+			if (rv >= SS_SUCCESS)
			
 
				+				rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
			
 
				+		}
			
 
				+	}
			
 
				+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * drbd_req_state() - Perform an eventually cluster wide state change
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @mask:	mask of state bits to change.
			
 
				+ * @val:	value of new state bits.
			
 
				+ * @f:		flags
			
 
				+ *
			
 
				+ * Should not be called directly, use drbd_request_state() or
			
 
				+ * _drbd_request_state().
			
 
				+ */
			
 
				+static enum drbd_state_rv
			
 
				+drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
			
 
				+	       union drbd_state val, enum chg_state_flags f)
			
 
				+{
			
 
				+	struct completion done;
			
 
				+	unsigned long flags;
			
 
				+	union drbd_state os, ns;
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	init_completion(&done);
			
 
				+
			
 
				+	if (f & CS_SERIALIZE)
			
 
				+		mutex_lock(mdev->state_mutex);
			
 
				+
			
 
				+	spin_lock_irqsave(&mdev->tconn->req_lock, flags);
			
 
				+	os = drbd_read_state(mdev);
			
 
				+	ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
			
 
				+	rv = is_valid_transition(os, ns);
			
 
				+	if (rv < SS_SUCCESS) {
			
 
				+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
			
 
				+		goto abort;
			
 
				+	}
			
 
				+
			
 
				+	if (cl_wide_st_chg(mdev, os, ns)) {
			
 
				+		rv = is_valid_state(mdev, ns);
			
 
				+		if (rv == SS_SUCCESS)
			
 
				+			rv = is_valid_soft_transition(os, ns, mdev->tconn);
			
 
				+		spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
			
 
				+
			
 
				+		if (rv < SS_SUCCESS) {
			
 
				+			if (f & CS_VERBOSE)
			
 
				+				print_st_err(mdev, os, ns, rv);
			
 
				+			goto abort;
			
 
				+		}
			
 
				+
			
 
				+		if (drbd_send_state_req(mdev, mask, val)) {
			
 
				+			rv = SS_CW_FAILED_BY_PEER;
			
 
				+			if (f & CS_VERBOSE)
			
 
				+				print_st_err(mdev, os, ns, rv);
			
 
				+			goto abort;
			
 
				+		}
			
 
				+
			
 
				+		wait_event(mdev->state_wait,
			
 
				+			(rv = _req_st_cond(mdev, mask, val)));
			
 
				+
			
 
				+		if (rv < SS_SUCCESS) {
			
 
				+			if (f & CS_VERBOSE)
			
 
				+				print_st_err(mdev, os, ns, rv);
			
 
				+			goto abort;
			
 
				+		}
			
 
				+		spin_lock_irqsave(&mdev->tconn->req_lock, flags);
			
 
				+		ns = apply_mask_val(drbd_read_state(mdev), mask, val);
			
 
				+		rv = _drbd_set_state(mdev, ns, f, &done);
			
 
				+	} else {
			
 
				+		rv = _drbd_set_state(mdev, ns, f, &done);
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
			
 
				+
			
 
				+	if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
			
 
				+		D_ASSERT(current != mdev->tconn->worker.task);
			
 
				+		wait_for_completion(&done);
			
 
				+	}
			
 
				+
			
 
				+abort:
			
 
				+	if (f & CS_SERIALIZE)
			
 
				+		mutex_unlock(mdev->state_mutex);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * _drbd_request_state() - Request a state change (with flags)
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @mask:	mask of state bits to change.
			
 
				+ * @val:	value of new state bits.
			
 
				+ * @f:		flags
			
 
				+ *
			
 
				+ * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
			
 
				+ * flag, or when logging of failed state change requests is not desired.
			
 
				+ */
			
 
				+enum drbd_state_rv
			
 
				+_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
			
 
				+		    union drbd_state val, enum chg_state_flags f)
			
 
				+{
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	wait_event(mdev->state_wait,
			
 
				+		   (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
			
 
				+{
			
 
				+	dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
			
 
				+	    name,
			
 
				+	    drbd_conn_str(ns.conn),
			
 
				+	    drbd_role_str(ns.role),
			
 
				+	    drbd_role_str(ns.peer),
			
 
				+	    drbd_disk_str(ns.disk),
			
 
				+	    drbd_disk_str(ns.pdsk),
			
 
				+	    is_susp(ns) ? 's' : 'r',
			
 
				+	    ns.aftr_isp ? 'a' : '-',
			
 
				+	    ns.peer_isp ? 'p' : '-',
			
 
				+	    ns.user_isp ? 'u' : '-',
			
 
				+	    ns.susp_fen ? 'F' : '-',
			
 
				+	    ns.susp_nod ? 'N' : '-'
			
 
				+	    );
			
 
				+}
			
 
				+
			
 
				+void print_st_err(struct drbd_conf *mdev, union drbd_state os,
			
 
				+	          union drbd_state ns, enum drbd_state_rv err)
			
 
				+{
			
 
				+	if (err == SS_IN_TRANSIENT_STATE)
			
 
				+		return;
			
 
				+	dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
			
 
				+	print_st(mdev, " state", os);
			
 
				+	print_st(mdev, "wanted", ns);
			
 
				+}
			
 
				+
			
 
				+static long print_state_change(char *pb, union drbd_state os, union drbd_state ns,
			
 
				+			       enum chg_state_flags flags)
			
 
				+{
			
 
				+	char *pbp;
			
 
				+	pbp = pb;
			
 
				+	*pbp = 0;
			
 
				+
			
 
				+	if (ns.role != os.role && flags & CS_DC_ROLE)
			
 
				+		pbp += sprintf(pbp, "role( %s -> %s ) ",
			
 
				+			       drbd_role_str(os.role),
			
 
				+			       drbd_role_str(ns.role));
			
 
				+	if (ns.peer != os.peer && flags & CS_DC_PEER)
			
 
				+		pbp += sprintf(pbp, "peer( %s -> %s ) ",
			
 
				+			       drbd_role_str(os.peer),
			
 
				+			       drbd_role_str(ns.peer));
			
 
				+	if (ns.conn != os.conn && flags & CS_DC_CONN)
			
 
				+		pbp += sprintf(pbp, "conn( %s -> %s ) ",
			
 
				+			       drbd_conn_str(os.conn),
			
 
				+			       drbd_conn_str(ns.conn));
			
 
				+	if (ns.disk != os.disk && flags & CS_DC_DISK)
			
 
				+		pbp += sprintf(pbp, "disk( %s -> %s ) ",
			
 
				+			       drbd_disk_str(os.disk),
			
 
				+			       drbd_disk_str(ns.disk));
			
 
				+	if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
			
 
				+		pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
			
 
				+			       drbd_disk_str(os.pdsk),
			
 
				+			       drbd_disk_str(ns.pdsk));
			
 
				+
			
 
				+	return pbp - pb;
			
 
				+}
			
 
				+
			
 
				+static void drbd_pr_state_change(struct drbd_conf *mdev, union drbd_state os, union drbd_state ns,
			
 
				+				 enum chg_state_flags flags)
			
 
				+{
			
 
				+	char pb[300];
			
 
				+	char *pbp = pb;
			
 
				+
			
 
				+	pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK);
			
 
				+
			
 
				+	if (ns.aftr_isp != os.aftr_isp)
			
 
				+		pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
			
 
				+			       os.aftr_isp,
			
 
				+			       ns.aftr_isp);
			
 
				+	if (ns.peer_isp != os.peer_isp)
			
 
				+		pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
			
 
				+			       os.peer_isp,
			
 
				+			       ns.peer_isp);
			
 
				+	if (ns.user_isp != os.user_isp)
			
 
				+		pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
			
 
				+			       os.user_isp,
			
 
				+			       ns.user_isp);
			
 
				+
			
 
				+	if (pbp != pb)
			
 
				+		dev_info(DEV, "%s\n", pb);
			
 
				+}
			
 
				+
			
 
				+static void conn_pr_state_change(struct drbd_tconn *tconn, union drbd_state os, union drbd_state ns,
			
 
				+				 enum chg_state_flags flags)
			
 
				+{
			
 
				+	char pb[300];
			
 
				+	char *pbp = pb;
			
 
				+
			
 
				+	pbp += print_state_change(pbp, os, ns, flags);
			
 
				+
			
 
				+	if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP)
			
 
				+		pbp += sprintf(pbp, "susp( %d -> %d ) ",
			
 
				+			       is_susp(os),
			
 
				+			       is_susp(ns));
			
 
				+
			
 
				+	if (pbp != pb)
			
 
				+		conn_info(tconn, "%s\n", pb);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * is_valid_state() - Returns an SS_ error code if ns is not valid
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @ns:		State to consider.
			
 
				+ */
			
 
				+static enum drbd_state_rv
			
 
				+is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
			
 
				+{
			
 
				+	/* See drbd_state_sw_errors in drbd_strings.c */
			
 
				+
			
 
				+	enum drbd_fencing_p fp;
			
 
				+	enum drbd_state_rv rv = SS_SUCCESS;
			
 
				+	struct net_conf *nc;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	fp = FP_DONT_CARE;
			
 
				+	if (get_ldev(mdev)) {
			
 
				+		fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	nc = rcu_dereference(mdev->tconn->net_conf);
			
 
				+	if (nc) {
			
 
				+		if (!nc->two_primaries && ns.role == R_PRIMARY) {
			
 
				+			if (ns.peer == R_PRIMARY)
			
 
				+				rv = SS_TWO_PRIMARIES;
			
 
				+			else if (conn_highest_peer(mdev->tconn) == R_PRIMARY)
			
 
				+				rv = SS_O_VOL_PEER_PRI;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (rv <= 0)
			
 
				+		/* already found a reason to abort */;
			
 
				+	else if (ns.role == R_SECONDARY && mdev->open_cnt)
			
 
				+		rv = SS_DEVICE_IN_USE;
			
 
				+
			
 
				+	else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
			
 
				+		rv = SS_NO_UP_TO_DATE_DISK;
			
 
				+
			
 
				+	else if (fp >= FP_RESOURCE &&
			
 
				+		 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
			
 
				+		rv = SS_PRIMARY_NOP;
			
 
				+
			
 
				+	else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
			
 
				+		rv = SS_NO_UP_TO_DATE_DISK;
			
 
				+
			
 
				+	else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
			
 
				+		rv = SS_NO_LOCAL_DISK;
			
 
				+
			
 
				+	else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
			
 
				+		rv = SS_NO_REMOTE_DISK;
			
 
				+
			
 
				+	else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
			
 
				+		rv = SS_NO_UP_TO_DATE_DISK;
			
 
				+
			
 
				+	else if ((ns.conn == C_CONNECTED ||
			
 
				+		  ns.conn == C_WF_BITMAP_S ||
			
 
				+		  ns.conn == C_SYNC_SOURCE ||
			
 
				+		  ns.conn == C_PAUSED_SYNC_S) &&
			
 
				+		  ns.disk == D_OUTDATED)
			
 
				+		rv = SS_CONNECTED_OUTDATES;
			
 
				+
			
 
				+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
			
 
				+		 (nc->verify_alg[0] == 0))
			
 
				+		rv = SS_NO_VERIFY_ALG;
			
 
				+
			
 
				+	else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
			
 
				+		  mdev->tconn->agreed_pro_version < 88)
			
 
				+		rv = SS_NOT_SUPPORTED;
			
 
				+
			
 
				+	else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
			
 
				+		rv = SS_CONNECTED_OUTDATES;
			
 
				+
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
			
 
				+ * This function limits state transitions that may be declined by DRBD. I.e.
			
 
				+ * user requests (aka soft transitions).
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @ns:		new state.
			
 
				+ * @os:		old state.
			
 
				+ */
			
 
				+static enum drbd_state_rv
			
 
				+is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_tconn *tconn)
			
 
				+{
			
 
				+	enum drbd_state_rv rv = SS_SUCCESS;
			
 
				+
			
 
				+	if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
			
 
				+	    os.conn > C_CONNECTED)
			
 
				+		rv = SS_RESYNC_RUNNING;
			
 
				+
			
 
				+	if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
			
 
				+		rv = SS_ALREADY_STANDALONE;
			
 
				+
			
 
				+	if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
			
 
				+		rv = SS_IS_DISKLESS;
			
 
				+
			
 
				+	if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
			
 
				+		rv = SS_NO_NET_CONFIG;
			
 
				+
			
 
				+	if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
			
 
				+		rv = SS_LOWER_THAN_OUTDATED;
			
 
				+
			
 
				+	if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
			
 
				+		rv = SS_IN_TRANSIENT_STATE;
			
 
				+
			
 
				+	/* if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
			
 
				+	   rv = SS_IN_TRANSIENT_STATE; */
			
 
				+
			
 
				+	/* While establishing a connection only allow cstate to change.
			
 
				+	   Delay/refuse role changes, detach attach etc... */
			
 
				+	if (test_bit(STATE_SENT, &tconn->flags) &&
			
 
				+	    !(os.conn == C_WF_REPORT_PARAMS ||
			
 
				+	      (ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION)))
			
 
				+		rv = SS_IN_TRANSIENT_STATE;
			
 
				+
			
 
				+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
			
 
				+		rv = SS_NEED_CONNECTION;
			
 
				+
			
 
				+	if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
			
 
				+	    ns.conn != os.conn && os.conn > C_CONNECTED)
			
 
				+		rv = SS_RESYNC_RUNNING;
			
 
				+
			
 
				+	if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
			
 
				+	    os.conn < C_CONNECTED)
			
 
				+		rv = SS_NEED_CONNECTION;
			
 
				+
			
 
				+	if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
			
 
				+	    && os.conn < C_WF_REPORT_PARAMS)
			
 
				+		rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+static enum drbd_state_rv
			
 
				+is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
			
 
				+{
			
 
				+	/* no change -> nothing to do, at least for the connection part */
			
 
				+	if (oc == nc)
			
 
				+		return SS_NOTHING_TO_DO;
			
 
				+
			
 
				+	/* disconnect of an unconfigured connection does not make sense */
			
 
				+	if (oc == C_STANDALONE && nc == C_DISCONNECTING)
			
 
				+		return SS_ALREADY_STANDALONE;
			
 
				+
			
 
				+	/* from C_STANDALONE, we start with C_UNCONNECTED */
			
 
				+	if (oc == C_STANDALONE && nc != C_UNCONNECTED)
			
 
				+		return SS_NEED_CONNECTION;
			
 
				+
			
 
				+	/* When establishing a connection we need to go through WF_REPORT_PARAMS!
			
 
				+	   Necessary to do the right thing upon invalidate-remote on a disconnected resource */
			
 
				+	if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED)
			
 
				+		return SS_NEED_CONNECTION;
			
 
				+
			
 
				+	/* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
			
 
				+	if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
			
 
				+		return SS_IN_TRANSIENT_STATE;
			
 
				+
			
 
				+	/* After C_DISCONNECTING only C_STANDALONE may follow */
			
 
				+	if (oc == C_DISCONNECTING && nc != C_STANDALONE)
			
 
				+		return SS_IN_TRANSIENT_STATE;
			
 
				+
			
 
				+	return SS_SUCCESS;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/**
			
 
				+ * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
			
 
				+ * This limits hard state transitions. Hard state transitions are facts there are
			
 
				+ * imposed on DRBD by the environment. E.g. disk broke or network broke down.
			
 
				+ * But those hard state transitions are still not allowed to do everything.
			
 
				+ * @ns:		new state.
			
 
				+ * @os:		old state.
			
 
				+ */
			
 
				+static enum drbd_state_rv
			
 
				+is_valid_transition(union drbd_state os, union drbd_state ns)
			
 
				+{
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	rv = is_valid_conn_transition(os.conn, ns.conn);
			
 
				+
			
 
				+	/* we cannot fail (again) if we already detached */
			
 
				+	if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
			
 
				+		rv = SS_IS_DISKLESS;
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
			
 
				+{
			
 
				+	static const char *msg_table[] = {
			
 
				+		[NO_WARNING] = "",
			
 
				+		[ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
			
 
				+		[ABORTED_RESYNC] = "Resync aborted.",
			
 
				+		[CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
			
 
				+		[IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
			
 
				+		[IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
			
 
				+	};
			
 
				+
			
 
				+	if (warn != NO_WARNING)
			
 
				+		dev_warn(DEV, "%s\n", msg_table[warn]);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @os:		old state.
			
 
				+ * @ns:		new state.
			
 
				+ * @warn_sync_abort:
			
 
				+ *
			
 
				+ * When we loose connection, we have to set the state of the peers disk (pdsk)
			
 
				+ * to D_UNKNOWN. This rule and many more along those lines are in this function.
			
 
				+ */
			
 
				+static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state ns,
			
 
				+				       enum sanitize_state_warnings *warn)
			
 
				+{
			
 
				+	enum drbd_fencing_p fp;
			
 
				+	enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
			
 
				+
			
 
				+	if (warn)
			
 
				+		*warn = NO_WARNING;
			
 
				+
			
 
				+	fp = FP_DONT_CARE;
			
 
				+	if (get_ldev(mdev)) {
			
 
				+		rcu_read_lock();
			
 
				+		fp = rcu_dereference(mdev->ldev->disk_conf)->fencing;
			
 
				+		rcu_read_unlock();
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* Implications from connection to peer and peer_isp */
			
 
				+	if (ns.conn < C_CONNECTED) {
			
 
				+		ns.peer_isp = 0;
			
 
				+		ns.peer = R_UNKNOWN;
			
 
				+		if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
			
 
				+			ns.pdsk = D_UNKNOWN;
			
 
				+	}
			
 
				+
			
 
				+	/* Clear the aftr_isp when becoming unconfigured */
			
 
				+	if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
			
 
				+		ns.aftr_isp = 0;
			
 
				+
			
 
				+	/* An implication of the disk states onto the connection state */
			
 
				+	/* Abort resync if a disk fails/detaches */
			
 
				+	if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
			
 
				+		if (warn)
			
 
				+			*warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
			
 
				+				ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
			
 
				+		ns.conn = C_CONNECTED;
			
 
				+	}
			
 
				+
			
 
				+	/* Connection breaks down before we finished "Negotiating" */
			
 
				+	if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
			
 
				+	    get_ldev_if_state(mdev, D_NEGOTIATING)) {
			
 
				+		if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
			
 
				+			ns.disk = mdev->new_state_tmp.disk;
			
 
				+			ns.pdsk = mdev->new_state_tmp.pdsk;
			
 
				+		} else {
			
 
				+			if (warn)
			
 
				+				*warn = CONNECTION_LOST_NEGOTIATING;
			
 
				+			ns.disk = D_DISKLESS;
			
 
				+			ns.pdsk = D_UNKNOWN;
			
 
				+		}
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* D_CONSISTENT and D_OUTDATED vanish when we get connected */
			
 
				+	if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
			
 
				+		if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
			
 
				+			ns.disk = D_UP_TO_DATE;
			
 
				+		if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
			
 
				+			ns.pdsk = D_UP_TO_DATE;
			
 
				+	}
			
 
				+
			
 
				+	/* Implications of the connection stat on the disk states */
			
 
				+	disk_min = D_DISKLESS;
			
 
				+	disk_max = D_UP_TO_DATE;
			
 
				+	pdsk_min = D_INCONSISTENT;
			
 
				+	pdsk_max = D_UNKNOWN;
			
 
				+	switch ((enum drbd_conns)ns.conn) {
			
 
				+	case C_WF_BITMAP_T:
			
 
				+	case C_PAUSED_SYNC_T:
			
 
				+	case C_STARTING_SYNC_T:
			
 
				+	case C_WF_SYNC_UUID:
			
 
				+	case C_BEHIND:
			
 
				+		disk_min = D_INCONSISTENT;
			
 
				+		disk_max = D_OUTDATED;
			
 
				+		pdsk_min = D_UP_TO_DATE;
			
 
				+		pdsk_max = D_UP_TO_DATE;
			
 
				+		break;
			
 
				+	case C_VERIFY_S:
			
 
				+	case C_VERIFY_T:
			
 
				+		disk_min = D_UP_TO_DATE;
			
 
				+		disk_max = D_UP_TO_DATE;
			
 
				+		pdsk_min = D_UP_TO_DATE;
			
 
				+		pdsk_max = D_UP_TO_DATE;
			
 
				+		break;
			
 
				+	case C_CONNECTED:
			
 
				+		disk_min = D_DISKLESS;
			
 
				+		disk_max = D_UP_TO_DATE;
			
 
				+		pdsk_min = D_DISKLESS;
			
 
				+		pdsk_max = D_UP_TO_DATE;
			
 
				+		break;
			
 
				+	case C_WF_BITMAP_S:
			
 
				+	case C_PAUSED_SYNC_S:
			
 
				+	case C_STARTING_SYNC_S:
			
 
				+	case C_AHEAD:
			
 
				+		disk_min = D_UP_TO_DATE;
			
 
				+		disk_max = D_UP_TO_DATE;
			
 
				+		pdsk_min = D_INCONSISTENT;
			
 
				+		pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
			
 
				+		break;
			
 
				+	case C_SYNC_TARGET:
			
 
				+		disk_min = D_INCONSISTENT;
			
 
				+		disk_max = D_INCONSISTENT;
			
 
				+		pdsk_min = D_UP_TO_DATE;
			
 
				+		pdsk_max = D_UP_TO_DATE;
			
 
				+		break;
			
 
				+	case C_SYNC_SOURCE:
			
 
				+		disk_min = D_UP_TO_DATE;
			
 
				+		disk_max = D_UP_TO_DATE;
			
 
				+		pdsk_min = D_INCONSISTENT;
			
 
				+		pdsk_max = D_INCONSISTENT;
			
 
				+		break;
			
 
				+	case C_STANDALONE:
			
 
				+	case C_DISCONNECTING:
			
 
				+	case C_UNCONNECTED:
			
 
				+	case C_TIMEOUT:
			
 
				+	case C_BROKEN_PIPE:
			
 
				+	case C_NETWORK_FAILURE:
			
 
				+	case C_PROTOCOL_ERROR:
			
 
				+	case C_TEAR_DOWN:
			
 
				+	case C_WF_CONNECTION:
			
 
				+	case C_WF_REPORT_PARAMS:
			
 
				+	case C_MASK:
			
 
				+		break;
			
 
				+	}
			
 
				+	if (ns.disk > disk_max)
			
 
				+		ns.disk = disk_max;
			
 
				+
			
 
				+	if (ns.disk < disk_min) {
			
 
				+		if (warn)
			
 
				+			*warn = IMPLICITLY_UPGRADED_DISK;
			
 
				+		ns.disk = disk_min;
			
 
				+	}
			
 
				+	if (ns.pdsk > pdsk_max)
			
 
				+		ns.pdsk = pdsk_max;
			
 
				+
			
 
				+	if (ns.pdsk < pdsk_min) {
			
 
				+		if (warn)
			
 
				+			*warn = IMPLICITLY_UPGRADED_PDSK;
			
 
				+		ns.pdsk = pdsk_min;
			
 
				+	}
			
 
				+
			
 
				+	if (fp == FP_STONITH &&
			
 
				+	    (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED))
			
 
				+		ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
			
 
				+
			
 
				+	if (mdev->tconn->res_opts.on_no_data == OND_SUSPEND_IO &&
			
 
				+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
			
 
				+		ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
			
 
				+
			
 
				+	if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
			
 
				+		if (ns.conn == C_SYNC_SOURCE)
			
 
				+			ns.conn = C_PAUSED_SYNC_S;
			
 
				+		if (ns.conn == C_SYNC_TARGET)
			
 
				+			ns.conn = C_PAUSED_SYNC_T;
			
 
				+	} else {
			
 
				+		if (ns.conn == C_PAUSED_SYNC_S)
			
 
				+			ns.conn = C_SYNC_SOURCE;
			
 
				+		if (ns.conn == C_PAUSED_SYNC_T)
			
 
				+			ns.conn = C_SYNC_TARGET;
			
 
				+	}
			
 
				+
			
 
				+	return ns;
			
 
				+}
			
 
				+
			
 
				+void drbd_resume_al(struct drbd_conf *mdev)
			
 
				+{
			
 
				+	if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
			
 
				+		dev_info(DEV, "Resumed AL updates\n");
			
 
				+}
			
 
				+
			
 
				+/* helper for __drbd_set_state */
			
 
				+static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
			
 
				+{
			
 
				+	if (mdev->tconn->agreed_pro_version < 90)
			
 
				+		mdev->ov_start_sector = 0;
			
 
				+	mdev->rs_total = drbd_bm_bits(mdev);
			
 
				+	mdev->ov_position = 0;
			
 
				+	if (cs == C_VERIFY_T) {
			
 
				+		/* starting online verify from an arbitrary position
			
 
				+		 * does not fit well into the existing protocol.
			
 
				+		 * on C_VERIFY_T, we initialize ov_left and friends
			
 
				+		 * implicitly in receive_DataRequest once the
			
 
				+		 * first P_OV_REQUEST is received */
			
 
				+		mdev->ov_start_sector = ~(sector_t)0;
			
 
				+	} else {
			
 
				+		unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
			
 
				+		if (bit >= mdev->rs_total) {
			
 
				+			mdev->ov_start_sector =
			
 
				+				BM_BIT_TO_SECT(mdev->rs_total - 1);
			
 
				+			mdev->rs_total = 1;
			
 
				+		} else
			
 
				+			mdev->rs_total -= bit;
			
 
				+		mdev->ov_position = mdev->ov_start_sector;
			
 
				+	}
			
 
				+	mdev->ov_left = mdev->rs_total;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * __drbd_set_state() - Set a new DRBD state
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @ns:		new state.
			
 
				+ * @flags:	Flags
			
 
				+ * @done:	Optional completion, that will get completed after the after_state_ch() finished
			
 
				+ *
			
 
				+ * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
			
 
				+ */
			
 
				+enum drbd_state_rv
			
 
				+__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
			
 
				+	         enum chg_state_flags flags, struct completion *done)
			
 
				+{
			
 
				+	union drbd_state os;
			
 
				+	enum drbd_state_rv rv = SS_SUCCESS;
			
 
				+	enum sanitize_state_warnings ssw;
			
 
				+	struct after_state_chg_work *ascw;
			
 
				+
			
 
				+	os = drbd_read_state(mdev);
			
 
				+
			
 
				+	ns = sanitize_state(mdev, ns, &ssw);
			
 
				+	if (ns.i == os.i)
			
 
				+		return SS_NOTHING_TO_DO;
			
 
				+
			
 
				+	rv = is_valid_transition(os, ns);
			
 
				+	if (rv < SS_SUCCESS)
			
 
				+		return rv;
			
 
				+
			
 
				+	if (!(flags & CS_HARD)) {
			
 
				+		/*  pre-state-change checks ; only look at ns  */
			
 
				+		/* See drbd_state_sw_errors in drbd_strings.c */
			
 
				+
			
 
				+		rv = is_valid_state(mdev, ns);
			
 
				+		if (rv < SS_SUCCESS) {
			
 
				+			/* If the old state was illegal as well, then let
			
 
				+			   this happen...*/
			
 
				+
			
 
				+			if (is_valid_state(mdev, os) == rv)
			
 
				+				rv = is_valid_soft_transition(os, ns, mdev->tconn);
			
 
				+		} else
			
 
				+			rv = is_valid_soft_transition(os, ns, mdev->tconn);
			
 
				+	}
			
 
				+
			
 
				+	if (rv < SS_SUCCESS) {
			
 
				+		if (flags & CS_VERBOSE)
			
 
				+			print_st_err(mdev, os, ns, rv);
			
 
				+		return rv;
			
 
				+	}
			
 
				+
			
 
				+	print_sanitize_warnings(mdev, ssw);
			
 
				+
			
 
				+	drbd_pr_state_change(mdev, os, ns, flags);
			
 
				+
			
 
				+	/* Display changes to the susp* flags that where caused by the call to
			
 
				+	   sanitize_state(). Only display it here if we where not called from
			
 
				+	   _conn_request_state() */
			
 
				+	if (!(flags & CS_DC_SUSP))
			
 
				+		conn_pr_state_change(mdev->tconn, os, ns, (flags & ~CS_DC_MASK) | CS_DC_SUSP);
			
 
				+
			
 
				+	/* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
			
 
				+	 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
			
 
				+	 * drbd_ldev_destroy() won't happen before our corresponding
			
 
				+	 * after_state_ch works run, where we put_ldev again. */
			
 
				+	if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
			
 
				+	    (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
			
 
				+		atomic_inc(&mdev->local_cnt);
			
 
				+
			
 
				+	mdev->state.i = ns.i;
			
 
				+	mdev->tconn->susp = ns.susp;
			
 
				+	mdev->tconn->susp_nod = ns.susp_nod;
			
 
				+	mdev->tconn->susp_fen = ns.susp_fen;
			
 
				+
			
 
				+	if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
			
 
				+		drbd_print_uuids(mdev, "attached to UUIDs");
			
 
				+
			
 
				+	/* Wake up role changes, that were delayed because of connection establishing */
			
 
				+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
			
 
				+	    no_peer_wf_report_params(mdev->tconn))
			
 
				+		clear_bit(STATE_SENT, &mdev->tconn->flags);
			
 
				+
			
 
				+	wake_up(&mdev->misc_wait);
			
 
				+	wake_up(&mdev->state_wait);
			
 
				+	wake_up(&mdev->tconn->ping_wait);
			
 
				+
			
 
				+	/* Aborted verify run, or we reached the stop sector.
			
 
				+	 * Log the last position, unless end-of-device. */
			
 
				+	if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
			
 
				+	    ns.conn <= C_CONNECTED) {
			
 
				+		mdev->ov_start_sector =
			
 
				+			BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
			
 
				+		if (mdev->ov_left)
			
 
				+			dev_info(DEV, "Online Verify reached sector %llu\n",
			
 
				+				(unsigned long long)mdev->ov_start_sector);
			
 
				+	}
			
 
				+
			
 
				+	if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
			
 
				+	    (ns.conn == C_SYNC_TARGET  || ns.conn == C_SYNC_SOURCE)) {
			
 
				+		dev_info(DEV, "Syncer continues.\n");
			
 
				+		mdev->rs_paused += (long)jiffies
			
 
				+				  -(long)mdev->rs_mark_time[mdev->rs_last_mark];
			
 
				+		if (ns.conn == C_SYNC_TARGET)
			
 
				+			mod_timer(&mdev->resync_timer, jiffies);
			
 
				+	}
			
 
				+
			
 
				+	if ((os.conn == C_SYNC_TARGET  || os.conn == C_SYNC_SOURCE) &&
			
 
				+	    (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
			
 
				+		dev_info(DEV, "Resync suspended\n");
			
 
				+		mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
			
 
				+	}
			
 
				+
			
 
				+	if (os.conn == C_CONNECTED &&
			
 
				+	    (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
			
 
				+		unsigned long now = jiffies;
			
 
				+		int i;
			
 
				+
			
 
				+		set_ov_position(mdev, ns.conn);
			
 
				+		mdev->rs_start = now;
			
 
				+		mdev->rs_last_events = 0;
			
 
				+		mdev->rs_last_sect_ev = 0;
			
 
				+		mdev->ov_last_oos_size = 0;
			
 
				+		mdev->ov_last_oos_start = 0;
			
 
				+
			
 
				+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
			
 
				+			mdev->rs_mark_left[i] = mdev->ov_left;
			
 
				+			mdev->rs_mark_time[i] = now;
			
 
				+		}
			
 
				+
			
 
				+		drbd_rs_controller_reset(mdev);
			
 
				+
			
 
				+		if (ns.conn == C_VERIFY_S) {
			
 
				+			dev_info(DEV, "Starting Online Verify from sector %llu\n",
			
 
				+					(unsigned long long)mdev->ov_position);
			
 
				+			mod_timer(&mdev->resync_timer, jiffies);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (get_ldev(mdev)) {
			
 
				+		u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
			
 
				+						 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
			
 
				+						 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
			
 
				+
			
 
				+		mdf &= ~MDF_AL_CLEAN;
			
 
				+		if (test_bit(CRASHED_PRIMARY, &mdev->flags))
			
 
				+			mdf |= MDF_CRASHED_PRIMARY;
			
 
				+		if (mdev->state.role == R_PRIMARY ||
			
 
				+		    (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
			
 
				+			mdf |= MDF_PRIMARY_IND;
			
 
				+		if (mdev->state.conn > C_WF_REPORT_PARAMS)
			
 
				+			mdf |= MDF_CONNECTED_IND;
			
 
				+		if (mdev->state.disk > D_INCONSISTENT)
			
 
				+			mdf |= MDF_CONSISTENT;
			
 
				+		if (mdev->state.disk > D_OUTDATED)
			
 
				+			mdf |= MDF_WAS_UP_TO_DATE;
			
 
				+		if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
			
 
				+			mdf |= MDF_PEER_OUT_DATED;
			
 
				+		if (mdf != mdev->ldev->md.flags) {
			
 
				+			mdev->ldev->md.flags = mdf;
			
 
				+			drbd_md_mark_dirty(mdev);
			
 
				+		}
			
 
				+		if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
			
 
				+			drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
			
 
				+	if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
			
 
				+	    os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
			
 
				+		set_bit(CONSIDER_RESYNC, &mdev->flags);
			
 
				+
			
 
				+	/* Receiver should clean up itself */
			
 
				+	if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
			
 
				+		drbd_thread_stop_nowait(&mdev->tconn->receiver);
			
 
				+
			
 
				+	/* Now the receiver finished cleaning up itself, it should die */
			
 
				+	if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
			
 
				+		drbd_thread_stop_nowait(&mdev->tconn->receiver);
			
 
				+
			
 
				+	/* Upon network failure, we need to restart the receiver. */
			
 
				+	if (os.conn > C_WF_CONNECTION &&
			
 
				+	    ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
			
 
				+		drbd_thread_restart_nowait(&mdev->tconn->receiver);
			
 
				+
			
 
				+	/* Resume AL writing if we get a connection */
			
 
				+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
			
 
				+		drbd_resume_al(mdev);
			
 
				+
			
 
				+	/* remember last attach time so request_timer_fn() won't
			
 
				+	 * kill newly established sessions while we are still trying to thaw
			
 
				+	 * previously frozen IO */
			
 
				+	if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
			
 
				+	    ns.disk > D_NEGOTIATING)
			
 
				+		mdev->last_reattach_jif = jiffies;
			
 
				+
			
 
				+	ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
			
 
				+	if (ascw) {
			
 
				+		ascw->os = os;
			
 
				+		ascw->ns = ns;
			
 
				+		ascw->flags = flags;
			
 
				+		ascw->w.cb = w_after_state_ch;
			
 
				+		ascw->w.mdev = mdev;
			
 
				+		ascw->done = done;
			
 
				+		drbd_queue_work(&mdev->tconn->sender_work, &ascw->w);
			
 
				+	} else {
			
 
				+		dev_err(DEV, "Could not kmalloc an ascw\n");
			
 
				+	}
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+static int w_after_state_ch(struct drbd_work *w, int unused)
			
 
				+{
			
 
				+	struct after_state_chg_work *ascw =
			
 
				+		container_of(w, struct after_state_chg_work, w);
			
 
				+	struct drbd_conf *mdev = w->mdev;
			
 
				+
			
 
				+	after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
			
 
				+	if (ascw->flags & CS_WAIT_COMPLETE) {
			
 
				+		D_ASSERT(ascw->done != NULL);
			
 
				+		complete(ascw->done);
			
 
				+	}
			
 
				+	kfree(ascw);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void abw_start_sync(struct drbd_conf *mdev, int rv)
			
 
				+{
			
 
				+	if (rv) {
			
 
				+		dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
			
 
				+		_drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	switch (mdev->state.conn) {
			
 
				+	case C_STARTING_SYNC_T:
			
 
				+		_drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
			
 
				+		break;
			
 
				+	case C_STARTING_SYNC_S:
			
 
				+		drbd_start_resync(mdev, C_SYNC_SOURCE);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
			
 
				+		int (*io_fn)(struct drbd_conf *),
			
 
				+		char *why, enum bm_flag flags)
			
 
				+{
			
 
				+	int rv;
			
 
				+
			
 
				+	D_ASSERT(current == mdev->tconn->worker.task);
			
 
				+
			
 
				+	/* open coded non-blocking drbd_suspend_io(mdev); */
			
 
				+	set_bit(SUSPEND_IO, &mdev->flags);
			
 
				+
			
 
				+	drbd_bm_lock(mdev, why, flags);
			
 
				+	rv = io_fn(mdev);
			
 
				+	drbd_bm_unlock(mdev);
			
 
				+
			
 
				+	drbd_resume_io(mdev);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * after_state_ch() - Perform after state change actions that may sleep
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @os:		old state.
			
 
				+ * @ns:		new state.
			
 
				+ * @flags:	Flags
			
 
				+ */
			
 
				+static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
			
 
				+			   union drbd_state ns, enum chg_state_flags flags)
			
 
				+{
			
 
				+	struct sib_info sib;
			
 
				+
			
 
				+	sib.sib_reason = SIB_STATE_CHANGE;
			
 
				+	sib.os = os;
			
 
				+	sib.ns = ns;
			
 
				+
			
 
				+	if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
			
 
				+		clear_bit(CRASHED_PRIMARY, &mdev->flags);
			
 
				+		if (mdev->p_uuid)
			
 
				+			mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
			
 
				+	}
			
 
				+
			
 
				+	/* Inform userspace about the change... */
			
 
				+	drbd_bcast_event(mdev, &sib);
			
 
				+
			
 
				+	if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
			
 
				+	    (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
			
 
				+		drbd_khelper(mdev, "pri-on-incon-degr");
			
 
				+
			
 
				+	/* Here we have the actions that are performed after a
			
 
				+	   state change. This function might sleep */
			
 
				+
			
 
				+	if (ns.susp_nod) {
			
 
				+		struct drbd_tconn *tconn = mdev->tconn;
			
 
				+		enum drbd_req_event what = NOTHING;
			
 
				+
			
 
				+		spin_lock_irq(&tconn->req_lock);
			
 
				+		if (os.conn < C_CONNECTED && conn_lowest_conn(tconn) >= C_CONNECTED)
			
 
				+			what = RESEND;
			
 
				+
			
 
				+		if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
			
 
				+		    conn_lowest_disk(tconn) > D_NEGOTIATING)
			
 
				+			what = RESTART_FROZEN_DISK_IO;
			
 
				+
			
 
				+		if (tconn->susp_nod && what != NOTHING) {
			
 
				+			_tl_restart(tconn, what);
			
 
				+			_conn_request_state(tconn,
			
 
				+					    (union drbd_state) { { .susp_nod = 1 } },
			
 
				+					    (union drbd_state) { { .susp_nod = 0 } },
			
 
				+					    CS_VERBOSE);
			
 
				+		}
			
 
				+		spin_unlock_irq(&tconn->req_lock);
			
 
				+	}
			
 
				+
			
 
				+	if (ns.susp_fen) {
			
 
				+		struct drbd_tconn *tconn = mdev->tconn;
			
 
				+
			
 
				+		spin_lock_irq(&tconn->req_lock);
			
 
				+		if (tconn->susp_fen && conn_lowest_conn(tconn) >= C_CONNECTED) {
			
 
				+			/* case2: The connection was established again: */
			
 
				+			struct drbd_conf *odev;
			
 
				+			int vnr;
			
 
				+
			
 
				+			rcu_read_lock();
			
 
				+			idr_for_each_entry(&tconn->volumes, odev, vnr)
			
 
				+				clear_bit(NEW_CUR_UUID, &odev->flags);
			
 
				+			rcu_read_unlock();
			
 
				+			_tl_restart(tconn, RESEND);
			
 
				+			_conn_request_state(tconn,
			
 
				+					    (union drbd_state) { { .susp_fen = 1 } },
			
 
				+					    (union drbd_state) { { .susp_fen = 0 } },
			
 
				+					    CS_VERBOSE);
			
 
				+		}
			
 
				+		spin_unlock_irq(&tconn->req_lock);
			
 
				+	}
			
 
				+
			
 
				+	/* Became sync source.  With protocol >= 96, we still need to send out
			
 
				+	 * the sync uuid now. Need to do that before any drbd_send_state, or
			
 
				+	 * the other side may go "paused sync" before receiving the sync uuids,
			
 
				+	 * which is unexpected. */
			
 
				+	if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
			
 
				+	    (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
			
 
				+	    mdev->tconn->agreed_pro_version >= 96 && get_ldev(mdev)) {
			
 
				+		drbd_gen_and_send_sync_uuid(mdev);
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* Do not change the order of the if above and the two below... */
			
 
				+	if (os.pdsk == D_DISKLESS &&
			
 
				+	    ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) {      /* attach on the peer */
			
 
				+		/* we probably will start a resync soon.
			
 
				+		 * make sure those things are properly reset. */
			
 
				+		mdev->rs_total = 0;
			
 
				+		mdev->rs_failed = 0;
			
 
				+		atomic_set(&mdev->rs_pending_cnt, 0);
			
 
				+		drbd_rs_cancel_all(mdev);
			
 
				+
			
 
				+		drbd_send_uuids(mdev);
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+	}
			
 
				+	/* No point in queuing send_bitmap if we don't have a connection
			
 
				+	 * anymore, so check also the _current_ state, not only the new state
			
 
				+	 * at the time this work was queued. */
			
 
				+	if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
			
 
				+	    mdev->state.conn == C_WF_BITMAP_S)
			
 
				+		drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
			
 
				+				"send_bitmap (WFBitMapS)",
			
 
				+				BM_LOCKED_TEST_ALLOWED);
			
 
				+
			
 
				+	/* Lost contact to peer's copy of the data */
			
 
				+	if ((os.pdsk >= D_INCONSISTENT &&
			
 
				+	     os.pdsk != D_UNKNOWN &&
			
 
				+	     os.pdsk != D_OUTDATED)
			
 
				+	&&  (ns.pdsk < D_INCONSISTENT ||
			
 
				+	     ns.pdsk == D_UNKNOWN ||
			
 
				+	     ns.pdsk == D_OUTDATED)) {
			
 
				+		if (get_ldev(mdev)) {
			
 
				+			if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
			
 
				+			    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
			
 
				+				if (drbd_suspended(mdev)) {
			
 
				+					set_bit(NEW_CUR_UUID, &mdev->flags);
			
 
				+				} else {
			
 
				+					drbd_uuid_new_current(mdev);
			
 
				+					drbd_send_uuids(mdev);
			
 
				+				}
			
 
				+			}
			
 
				+			put_ldev(mdev);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
			
 
				+		if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
			
 
				+		    mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
			
 
				+			drbd_uuid_new_current(mdev);
			
 
				+			drbd_send_uuids(mdev);
			
 
				+		}
			
 
				+		/* D_DISKLESS Peer becomes secondary */
			
 
				+		if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
			
 
				+			/* We may still be Primary ourselves.
			
 
				+			 * No harm done if the bitmap still changes,
			
 
				+			 * redirtied pages will follow later. */
			
 
				+			drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
			
 
				+				"demote diskless peer", BM_LOCKED_SET_ALLOWED);
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* Write out all changed bits on demote.
			
 
				+	 * Though, no need to da that just yet
			
 
				+	 * if there is a resync going on still */
			
 
				+	if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
			
 
				+		mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
			
 
				+		/* No changes to the bitmap expected this time, so assert that,
			
 
				+		 * even though no harm was done if it did change. */
			
 
				+		drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
			
 
				+				"demote", BM_LOCKED_TEST_ALLOWED);
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* Last part of the attaching process ... */
			
 
				+	if (ns.conn >= C_CONNECTED &&
			
 
				+	    os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
			
 
				+		drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
			
 
				+		drbd_send_uuids(mdev);
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+	}
			
 
				+
			
 
				+	/* We want to pause/continue resync, tell peer. */
			
 
				+	if (ns.conn >= C_CONNECTED &&
			
 
				+	     ((os.aftr_isp != ns.aftr_isp) ||
			
 
				+	      (os.user_isp != ns.user_isp)))
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+
			
 
				+	/* In case one of the isp bits got set, suspend other devices. */
			
 
				+	if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
			
 
				+	    (ns.aftr_isp || ns.peer_isp || ns.user_isp))
			
 
				+		suspend_other_sg(mdev);
			
 
				+
			
 
				+	/* Make sure the peer gets informed about eventual state
			
 
				+	   changes (ISP bits) while we were in WFReportParams. */
			
 
				+	if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+
			
 
				+	if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+
			
 
				+	/* We are in the progress to start a full sync... */
			
 
				+	if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
			
 
				+	    (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
			
 
				+		/* no other bitmap changes expected during this phase */
			
 
				+		drbd_queue_bitmap_io(mdev,
			
 
				+			&drbd_bmio_set_n_write, &abw_start_sync,
			
 
				+			"set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
			
 
				+
			
 
				+	/* We are invalidating our self... */
			
 
				+	if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
			
 
				+	    os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
			
 
				+		/* other bitmap operation expected during this phase */
			
 
				+		drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
			
 
				+			"set_n_write from invalidate", BM_LOCKED_MASK);
			
 
				+
			
 
				+	/* first half of local IO error, failure to attach,
			
 
				+	 * or administrative detach */
			
 
				+	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
			
 
				+		enum drbd_io_error_p eh = EP_PASS_ON;
			
 
				+		int was_io_error = 0;
			
 
				+		/* corresponding get_ldev was in __drbd_set_state, to serialize
			
 
				+		 * our cleanup here with the transition to D_DISKLESS.
			
 
				+		 * But is is still not save to dreference ldev here, since
			
 
				+		 * we might come from an failed Attach before ldev was set. */
			
 
				+		if (mdev->ldev) {
			
 
				+			rcu_read_lock();
			
 
				+			eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
			
 
				+			rcu_read_unlock();
			
 
				+
			
 
				+			was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
			
 
				+
			
 
				+			if (was_io_error && eh == EP_CALL_HELPER)
			
 
				+				drbd_khelper(mdev, "local-io-error");
			
 
				+
			
 
				+			/* Immediately allow completion of all application IO,
			
 
				+			 * that waits for completion from the local disk,
			
 
				+			 * if this was a force-detach due to disk_timeout
			
 
				+			 * or administrator request (drbdsetup detach --force).
			
 
				+			 * Do NOT abort otherwise.
			
 
				+			 * Aborting local requests may cause serious problems,
			
 
				+			 * if requests are completed to upper layers already,
			
 
				+			 * and then later the already submitted local bio completes.
			
 
				+			 * This can cause DMA into former bio pages that meanwhile
			
 
				+			 * have been re-used for other things.
			
 
				+			 * So aborting local requests may cause crashes,
			
 
				+			 * or even worse, silent data corruption.
			
 
				+			 */
			
 
				+			if (test_and_clear_bit(FORCE_DETACH, &mdev->flags))
			
 
				+				tl_abort_disk_io(mdev);
			
 
				+
			
 
				+			/* current state still has to be D_FAILED,
			
 
				+			 * there is only one way out: to D_DISKLESS,
			
 
				+			 * and that may only happen after our put_ldev below. */
			
 
				+			if (mdev->state.disk != D_FAILED)
			
 
				+				dev_err(DEV,
			
 
				+					"ASSERT FAILED: disk is %s during detach\n",
			
 
				+					drbd_disk_str(mdev->state.disk));
			
 
				+
			
 
				+			if (ns.conn >= C_CONNECTED)
			
 
				+				drbd_send_state(mdev, ns);
			
 
				+
			
 
				+			drbd_rs_cancel_all(mdev);
			
 
				+
			
 
				+			/* In case we want to get something to stable storage still,
			
 
				+			 * this may be the last chance.
			
 
				+			 * Following put_ldev may transition to D_DISKLESS. */
			
 
				+			drbd_md_sync(mdev);
			
 
				+		}
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+        /* second half of local IO error, failure to attach,
			
 
				+         * or administrative detach,
			
 
				+         * after local_cnt references have reached zero again */
			
 
				+        if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
			
 
				+                /* We must still be diskless,
			
 
				+                 * re-attach has to be serialized with this! */
			
 
				+                if (mdev->state.disk != D_DISKLESS)
			
 
				+                        dev_err(DEV,
			
 
				+                                "ASSERT FAILED: disk is %s while going diskless\n",
			
 
				+                                drbd_disk_str(mdev->state.disk));
			
 
				+
			
 
				+		if (ns.conn >= C_CONNECTED)
			
 
				+			drbd_send_state(mdev, ns);
			
 
				+		/* corresponding get_ldev in __drbd_set_state
			
 
				+		 * this may finally trigger drbd_ldev_destroy. */
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* Notify peer that I had a local IO error, and did not detached.. */
			
 
				+	if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+
			
 
				+	/* Disks got bigger while they were detached */
			
 
				+	if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
			
 
				+	    test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
			
 
				+		if (ns.conn == C_CONNECTED)
			
 
				+			resync_after_online_grow(mdev);
			
 
				+	}
			
 
				+
			
 
				+	/* A resync finished or aborted, wake paused devices... */
			
 
				+	if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
			
 
				+	    (os.peer_isp && !ns.peer_isp) ||
			
 
				+	    (os.user_isp && !ns.user_isp))
			
 
				+		resume_next_sg(mdev);
			
 
				+
			
 
				+	/* sync target done with resync.  Explicitly notify peer, even though
			
 
				+	 * it should (at least for non-empty resyncs) already know itself. */
			
 
				+	if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+
			
 
				+	/* Verify finished, or reached stop sector.  Peer did not know about
			
 
				+	 * the stop sector, and we may even have changed the stop sector during
			
 
				+	 * verify to interrupt/stop early.  Send the new state. */
			
 
				+	if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
			
 
				+	&& verify_can_do_stop_sector(mdev))
			
 
				+		drbd_send_state(mdev, ns);
			
 
				+
			
 
				+	/* This triggers bitmap writeout of potentially still unwritten pages
			
 
				+	 * if the resync finished cleanly, or aborted because of peer disk
			
 
				+	 * failure, or because of connection loss.
			
 
				+	 * For resync aborted because of local disk failure, we cannot do
			
 
				+	 * any bitmap writeout anymore.
			
 
				+	 * No harm done if some bits change during this phase.
			
 
				+	 */
			
 
				+	if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
			
 
				+		drbd_queue_bitmap_io(mdev, &drbd_bm_write_copy_pages, NULL,
			
 
				+			"write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
			
 
				+		put_ldev(mdev);
			
 
				+	}
			
 
				+
			
 
				+	if (ns.disk == D_DISKLESS &&
			
 
				+	    ns.conn == C_STANDALONE &&
			
 
				+	    ns.role == R_SECONDARY) {
			
 
				+		if (os.aftr_isp != ns.aftr_isp)
			
 
				+			resume_next_sg(mdev);
			
 
				+	}
			
 
				+
			
 
				+	drbd_md_sync(mdev);
			
 
				+}
			
 
				+
			
 
				+struct after_conn_state_chg_work {
			
 
				+	struct drbd_work w;
			
 
				+	enum drbd_conns oc;
			
 
				+	union drbd_state ns_min;
			
 
				+	union drbd_state ns_max; /* new, max state, over all mdevs */
			
 
				+	enum chg_state_flags flags;
			
 
				+};
			
 
				+
			
 
				+static int w_after_conn_state_ch(struct drbd_work *w, int unused)
			
 
				+{
			
 
				+	struct after_conn_state_chg_work *acscw =
			
 
				+		container_of(w, struct after_conn_state_chg_work, w);
			
 
				+	struct drbd_tconn *tconn = w->tconn;
			
 
				+	enum drbd_conns oc = acscw->oc;
			
 
				+	union drbd_state ns_max = acscw->ns_max;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	kfree(acscw);
			
 
				+
			
 
				+	/* Upon network configuration, we need to start the receiver */
			
 
				+	if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
			
 
				+		drbd_thread_start(&tconn->receiver);
			
 
				+
			
 
				+	if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
			
 
				+		struct net_conf *old_conf;
			
 
				+
			
 
				+		mutex_lock(&tconn->conf_update);
			
 
				+		old_conf = tconn->net_conf;
			
 
				+		tconn->my_addr_len = 0;
			
 
				+		tconn->peer_addr_len = 0;
			
 
				+		rcu_assign_pointer(tconn->net_conf, NULL);
			
 
				+		conn_free_crypto(tconn);
			
 
				+		mutex_unlock(&tconn->conf_update);
			
 
				+
			
 
				+		synchronize_rcu();
			
 
				+		kfree(old_conf);
			
 
				+	}
			
 
				+
			
 
				+	if (ns_max.susp_fen) {
			
 
				+		/* case1: The outdate peer handler is successful: */
			
 
				+		if (ns_max.pdsk <= D_OUTDATED) {
			
 
				+			rcu_read_lock();
			
 
				+			idr_for_each_entry(&tconn->volumes, mdev, vnr) {
			
 
				+				if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
			
 
				+					drbd_uuid_new_current(mdev);
			
 
				+					clear_bit(NEW_CUR_UUID, &mdev->flags);
			
 
				+				}
			
 
				+			}
			
 
				+			rcu_read_unlock();
			
 
				+			spin_lock_irq(&tconn->req_lock);
			
 
				+			_tl_restart(tconn, CONNECTION_LOST_WHILE_PENDING);
			
 
				+			_conn_request_state(tconn,
			
 
				+					    (union drbd_state) { { .susp_fen = 1 } },
			
 
				+					    (union drbd_state) { { .susp_fen = 0 } },
			
 
				+					    CS_VERBOSE);
			
 
				+			spin_unlock_irq(&tconn->req_lock);
			
 
				+		}
			
 
				+	}
			
 
				+	kref_put(&tconn->kref, &conn_destroy);
			
 
				+
			
 
				+	conn_md_sync(tconn);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+void conn_old_common_state(struct drbd_tconn *tconn, union drbd_state *pcs, enum chg_state_flags *pf)
			
 
				+{
			
 
				+	enum chg_state_flags flags = ~0;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr, first_vol = 1;
			
 
				+	union drbd_dev_state os, cs = {
			
 
				+		{ .role = R_SECONDARY,
			
 
				+		  .peer = R_UNKNOWN,
			
 
				+		  .conn = tconn->cstate,
			
 
				+		  .disk = D_DISKLESS,
			
 
				+		  .pdsk = D_UNKNOWN,
			
 
				+		} };
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
			
 
				+		os = mdev->state;
			
 
				+
			
 
				+		if (first_vol) {
			
 
				+			cs = os;
			
 
				+			first_vol = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		if (cs.role != os.role)
			
 
				+			flags &= ~CS_DC_ROLE;
			
 
				+
			
 
				+		if (cs.peer != os.peer)
			
 
				+			flags &= ~CS_DC_PEER;
			
 
				+
			
 
				+		if (cs.conn != os.conn)
			
 
				+			flags &= ~CS_DC_CONN;
			
 
				+
			
 
				+		if (cs.disk != os.disk)
			
 
				+			flags &= ~CS_DC_DISK;
			
 
				+
			
 
				+		if (cs.pdsk != os.pdsk)
			
 
				+			flags &= ~CS_DC_PDSK;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	*pf |= CS_DC_MASK;
			
 
				+	*pf &= flags;
			
 
				+	(*pcs).i = cs.i;
			
 
				+}
			
 
				+
			
 
				+static enum drbd_state_rv
			
 
				+conn_is_valid_transition(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
			
 
				+			 enum chg_state_flags flags)
			
 
				+{
			
 
				+	enum drbd_state_rv rv = SS_SUCCESS;
			
 
				+	union drbd_state ns, os;
			
 
				+	struct drbd_conf *mdev;
			
 
				+	int vnr;
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
			
 
				+		os = drbd_read_state(mdev);
			
 
				+		ns = sanitize_state(mdev, apply_mask_val(os, mask, val), NULL);
			
 
				+
			
 
				+		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
			
 
				+			ns.disk = os.disk;
			
 
				+
			
 
				+		if (ns.i == os.i)
			
 
				+			continue;
			
 
				+
			
 
				+		rv = is_valid_transition(os, ns);
			
 
				+		if (rv < SS_SUCCESS)
			
 
				+			break;
			
 
				+
			
 
				+		if (!(flags & CS_HARD)) {
			
 
				+			rv = is_valid_state(mdev, ns);
			
 
				+			if (rv < SS_SUCCESS) {
			
 
				+				if (is_valid_state(mdev, os) == rv)
			
 
				+					rv = is_valid_soft_transition(os, ns, tconn);
			
 
				+			} else
			
 
				+				rv = is_valid_soft_transition(os, ns, tconn);
			
 
				+		}
			
 
				+		if (rv < SS_SUCCESS)
			
 
				+			break;
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	if (rv < SS_SUCCESS && flags & CS_VERBOSE)
			
 
				+		print_st_err(mdev, os, ns, rv);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+void
			
 
				+conn_set_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
			
 
				+	       union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
			
 
				+{
			
 
				+	union drbd_state ns, os, ns_max = { };
			
 
				+	union drbd_state ns_min = {
			
 
				+		{ .role = R_MASK,
			
 
				+		  .peer = R_MASK,
			
 
				+		  .conn = val.conn,
			
 
				+		  .disk = D_MASK,
			
 
				+		  .pdsk = D_MASK
			
 
				+		} };
			
 
				+	struct drbd_conf *mdev;
			
 
				+	enum drbd_state_rv rv;
			
 
				+	int vnr, number_of_volumes = 0;
			
 
				+
			
 
				+	if (mask.conn == C_MASK) {
			
 
				+		/* remember last connect time so request_timer_fn() won't
			
 
				+		 * kill newly established sessions while we are still trying to thaw
			
 
				+		 * previously frozen IO */
			
 
				+		if (tconn->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
			
 
				+			tconn->last_reconnect_jif = jiffies;
			
 
				+
			
 
				+		tconn->cstate = val.conn;
			
 
				+	}
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
			
 
				+		number_of_volumes++;
			
 
				+		os = drbd_read_state(mdev);
			
 
				+		ns = apply_mask_val(os, mask, val);
			
 
				+		ns = sanitize_state(mdev, ns, NULL);
			
 
				+
			
 
				+		if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
			
 
				+			ns.disk = os.disk;
			
 
				+
			
 
				+		rv = __drbd_set_state(mdev, ns, flags, NULL);
			
 
				+		if (rv < SS_SUCCESS)
			
 
				+			BUG();
			
 
				+
			
 
				+		ns.i = mdev->state.i;
			
 
				+		ns_max.role = max_role(ns.role, ns_max.role);
			
 
				+		ns_max.peer = max_role(ns.peer, ns_max.peer);
			
 
				+		ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn);
			
 
				+		ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk);
			
 
				+		ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk);
			
 
				+
			
 
				+		ns_min.role = min_role(ns.role, ns_min.role);
			
 
				+		ns_min.peer = min_role(ns.peer, ns_min.peer);
			
 
				+		ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn);
			
 
				+		ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk);
			
 
				+		ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk);
			
 
				+	}
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	if (number_of_volumes == 0) {
			
 
				+		ns_min = ns_max = (union drbd_state) { {
			
 
				+				.role = R_SECONDARY,
			
 
				+				.peer = R_UNKNOWN,
			
 
				+				.conn = val.conn,
			
 
				+				.disk = D_DISKLESS,
			
 
				+				.pdsk = D_UNKNOWN
			
 
				+			} };
			
 
				+	}
			
 
				+
			
 
				+	ns_min.susp = ns_max.susp = tconn->susp;
			
 
				+	ns_min.susp_nod = ns_max.susp_nod = tconn->susp_nod;
			
 
				+	ns_min.susp_fen = ns_max.susp_fen = tconn->susp_fen;
			
 
				+
			
 
				+	*pns_min = ns_min;
			
 
				+	*pns_max = ns_max;
			
 
				+}
			
 
				+
			
 
				+static enum drbd_state_rv
			
 
				+_conn_rq_cond(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val)
			
 
				+{
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags))
			
 
				+		return SS_CW_SUCCESS;
			
 
				+
			
 
				+	if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags))
			
 
				+		return SS_CW_FAILED_BY_PEER;
			
 
				+
			
 
				+	rv = tconn->cstate != C_WF_REPORT_PARAMS ? SS_CW_NO_NEED : SS_UNKNOWN_ERROR;
			
 
				+
			
 
				+	if (rv == SS_UNKNOWN_ERROR)
			
 
				+		rv = conn_is_valid_transition(tconn, mask, val, 0);
			
 
				+
			
 
				+	if (rv == SS_SUCCESS)
			
 
				+		rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+enum drbd_state_rv
			
 
				+_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
			
 
				+		    enum chg_state_flags flags)
			
 
				+{
			
 
				+	enum drbd_state_rv rv = SS_SUCCESS;
			
 
				+	struct after_conn_state_chg_work *acscw;
			
 
				+	enum drbd_conns oc = tconn->cstate;
			
 
				+	union drbd_state ns_max, ns_min, os;
			
 
				+	bool have_mutex = false;
			
 
				+
			
 
				+	if (mask.conn) {
			
 
				+		rv = is_valid_conn_transition(oc, val.conn);
			
 
				+		if (rv < SS_SUCCESS)
			
 
				+			goto abort;
			
 
				+	}
			
 
				+
			
 
				+	rv = conn_is_valid_transition(tconn, mask, val, flags);
			
 
				+	if (rv < SS_SUCCESS)
			
 
				+		goto abort;
			
 
				+
			
 
				+	if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
			
 
				+	    !(flags & (CS_LOCAL_ONLY | CS_HARD))) {
			
 
				+
			
 
				+		/* This will be a cluster-wide state change.
			
 
				+		 * Need to give up the spinlock, grab the mutex,
			
 
				+		 * then send the state change request, ... */
			
 
				+		spin_unlock_irq(&tconn->req_lock);
			
 
				+		mutex_lock(&tconn->cstate_mutex);
			
 
				+		have_mutex = true;
			
 
				+
			
 
				+		set_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
			
 
				+		if (conn_send_state_req(tconn, mask, val)) {
			
 
				+			/* sending failed. */
			
 
				+			clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
			
 
				+			rv = SS_CW_FAILED_BY_PEER;
			
 
				+			/* need to re-aquire the spin lock, though */
			
 
				+			goto abort_unlocked;
			
 
				+		}
			
 
				+
			
 
				+		if (val.conn == C_DISCONNECTING)
			
 
				+			set_bit(DISCONNECT_SENT, &tconn->flags);
			
 
				+
			
 
				+		/* ... and re-aquire the spinlock.
			
 
				+		 * If _conn_rq_cond() returned >= SS_SUCCESS, we must call
			
 
				+		 * conn_set_state() within the same spinlock. */
			
 
				+		spin_lock_irq(&tconn->req_lock);
			
 
				+		wait_event_lock_irq(tconn->ping_wait,
			
 
				+				(rv = _conn_rq_cond(tconn, mask, val)),
			
 
				+				tconn->req_lock);
			
 
				+		clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags);
			
 
				+		if (rv < SS_SUCCESS)
			
 
				+			goto abort;
			
 
				+	}
			
 
				+
			
 
				+	conn_old_common_state(tconn, &os, &flags);
			
 
				+	flags |= CS_DC_SUSP;
			
 
				+	conn_set_state(tconn, mask, val, &ns_min, &ns_max, flags);
			
 
				+	conn_pr_state_change(tconn, os, ns_max, flags);
			
 
				+
			
 
				+	acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
			
 
				+	if (acscw) {
			
 
				+		acscw->oc = os.conn;
			
 
				+		acscw->ns_min = ns_min;
			
 
				+		acscw->ns_max = ns_max;
			
 
				+		acscw->flags = flags;
			
 
				+		acscw->w.cb = w_after_conn_state_ch;
			
 
				+		kref_get(&tconn->kref);
			
 
				+		acscw->w.tconn = tconn;
			
 
				+		drbd_queue_work(&tconn->sender_work, &acscw->w);
			
 
				+	} else {
			
 
				+		conn_err(tconn, "Could not kmalloc an acscw\n");
			
 
				+	}
			
 
				+
			
 
				+ abort:
			
 
				+	if (have_mutex) {
			
 
				+		/* mutex_unlock() "... must not be used in interrupt context.",
			
 
				+		 * so give up the spinlock, then re-aquire it */
			
 
				+		spin_unlock_irq(&tconn->req_lock);
			
 
				+ abort_unlocked:
			
 
				+		mutex_unlock(&tconn->cstate_mutex);
			
 
				+		spin_lock_irq(&tconn->req_lock);
			
 
				+	}
			
 
				+	if (rv < SS_SUCCESS && flags & CS_VERBOSE) {
			
 
				+		conn_err(tconn, "State change failed: %s\n", drbd_set_st_err_str(rv));
			
 
				+		conn_err(tconn, " mask = 0x%x val = 0x%x\n", mask.i, val.i);
			
 
				+		conn_err(tconn, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn));
			
 
				+	}
			
 
				+	return rv;
			
 
				+}
			
 
				+
			
 
				+enum drbd_state_rv
			
 
				+conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
			
 
				+		   enum chg_state_flags flags)
			
 
				+{
			
 
				+	enum drbd_state_rv rv;
			
 
				+
			
 
				+	spin_lock_irq(&tconn->req_lock);
			
 
				+	rv = _conn_request_state(tconn, mask, val, flags);
			
 
				+	spin_unlock_irq(&tconn->req_lock);
			
 
				+
			
 
				+	return rv;
			
 
				+}
			
--- a/drivers/block/drbd/drbd_state.h
+++ b/drivers/block/drbd/drbd_state.h
@@ -0,0 +1,161 @@
 
				+#ifndef DRBD_STATE_H
			
 
				+#define DRBD_STATE_H
			
 
				+
			
 
				+struct drbd_conf;
			
 
				+struct drbd_tconn;
			
 
				+
			
 
				+/**
			
 
				+ * DOC: DRBD State macros
			
 
				+ *
			
 
				+ * These macros are used to express state changes in easily readable form.
			
 
				+ *
			
 
				+ * The NS macros expand to a mask and a value, that can be bit ored onto the
			
 
				+ * current state as soon as the spinlock (req_lock) was taken.
			
 
				+ *
			
 
				+ * The _NS macros are used for state functions that get called with the
			
 
				+ * spinlock. These macros expand directly to the new state value.
			
 
				+ *
			
 
				+ * Besides the basic forms NS() and _NS() additional _?NS[23] are defined
			
 
				+ * to express state changes that affect more than one aspect of the state.
			
 
				+ *
			
 
				+ * E.g. NS2(conn, C_CONNECTED, peer, R_SECONDARY)
			
 
				+ * Means that the network connection was established and that the peer
			
 
				+ * is in secondary role.
			
 
				+ */
			
 
				+#define role_MASK R_MASK
			
 
				+#define peer_MASK R_MASK
			
 
				+#define disk_MASK D_MASK
			
 
				+#define pdsk_MASK D_MASK
			
 
				+#define conn_MASK C_MASK
			
 
				+#define susp_MASK 1
			
 
				+#define user_isp_MASK 1
			
 
				+#define aftr_isp_MASK 1
			
 
				+#define susp_nod_MASK 1
			
 
				+#define susp_fen_MASK 1
			
 
				+
			
 
				+#define NS(T, S) \
			
 
				+	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
			
 
				+	({ union drbd_state val; val.i = 0; val.T = (S); val; })
			
 
				+#define NS2(T1, S1, T2, S2) \
			
 
				+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
			
 
				+	  mask.T2 = T2##_MASK; mask; }), \
			
 
				+	({ union drbd_state val; val.i = 0; val.T1 = (S1); \
			
 
				+	  val.T2 = (S2); val; })
			
 
				+#define NS3(T1, S1, T2, S2, T3, S3) \
			
 
				+	({ union drbd_state mask; mask.i = 0; mask.T1 = T1##_MASK; \
			
 
				+	  mask.T2 = T2##_MASK; mask.T3 = T3##_MASK; mask; }), \
			
 
				+	({ union drbd_state val;  val.i = 0; val.T1 = (S1); \
			
 
				+	  val.T2 = (S2); val.T3 = (S3); val; })
			
 
				+
			
 
				+#define _NS(D, T, S) \
			
 
				+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T = (S); __ns; })
			
 
				+#define _NS2(D, T1, S1, T2, S2) \
			
 
				+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
			
 
				+	__ns.T2 = (S2); __ns; })
			
 
				+#define _NS3(D, T1, S1, T2, S2, T3, S3) \
			
 
				+	D, ({ union drbd_state __ns; __ns = drbd_read_state(D); __ns.T1 = (S1); \
			
 
				+	__ns.T2 = (S2); __ns.T3 = (S3); __ns; })
			
 
				+
			
 
				+enum chg_state_flags {
			
 
				+	CS_HARD	         = 1 << 0,
			
 
				+	CS_VERBOSE       = 1 << 1,
			
 
				+	CS_WAIT_COMPLETE = 1 << 2,
			
 
				+	CS_SERIALIZE     = 1 << 3,
			
 
				+	CS_ORDERED       = CS_WAIT_COMPLETE + CS_SERIALIZE,
			
 
				+	CS_LOCAL_ONLY    = 1 << 4, /* Do not consider a device pair wide state change */
			
 
				+	CS_DC_ROLE       = 1 << 5, /* DC = display as connection state change */
			
 
				+	CS_DC_PEER       = 1 << 6,
			
 
				+	CS_DC_CONN       = 1 << 7,
			
 
				+	CS_DC_DISK       = 1 << 8,
			
 
				+	CS_DC_PDSK       = 1 << 9,
			
 
				+	CS_DC_SUSP       = 1 << 10,
			
 
				+	CS_DC_MASK       = CS_DC_ROLE + CS_DC_PEER + CS_DC_CONN + CS_DC_DISK + CS_DC_PDSK,
			
 
				+	CS_IGN_OUTD_FAIL = 1 << 11,
			
 
				+};
			
 
				+
			
 
				+/* drbd_dev_state and drbd_state are different types. This is to stress the
			
 
				+   small difference. There is no suspended flag (.susp), and no suspended
			
 
				+   while fence handler runs flas (susp_fen). */
			
 
				+union drbd_dev_state {
			
 
				+	struct {
			
 
				+#if defined(__LITTLE_ENDIAN_BITFIELD)
			
 
				+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
			
 
				+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
			
 
				+		unsigned conn:5 ;   /* 17/32	 cstates */
			
 
				+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
			
 
				+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
			
 
				+		unsigned _unused:1 ;
			
 
				+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
			
 
				+		unsigned peer_isp:1 ;
			
 
				+		unsigned user_isp:1 ;
			
 
				+		unsigned _pad:11;   /* 0	 unused */
			
 
				+#elif defined(__BIG_ENDIAN_BITFIELD)
			
 
				+		unsigned _pad:11;
			
 
				+		unsigned user_isp:1 ;
			
 
				+		unsigned peer_isp:1 ;
			
 
				+		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
			
 
				+		unsigned _unused:1 ;
			
 
				+		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
			
 
				+		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
			
 
				+		unsigned conn:5 ;   /* 17/32	 cstates */
			
 
				+		unsigned peer:2 ;   /* 3/4	 primary/secondary/unknown */
			
 
				+		unsigned role:2 ;   /* 3/4	 primary/secondary/unknown */
			
 
				+#else
			
 
				+# error "this endianess is not supported"
			
 
				+#endif
			
 
				+	};
			
 
				+	unsigned int i;
			
 
				+};
			
 
				+
			
 
				+extern enum drbd_state_rv drbd_change_state(struct drbd_conf *mdev,
			
 
				+					    enum chg_state_flags f,
			
 
				+					    union drbd_state mask,
			
 
				+					    union drbd_state val);
			
 
				+extern void drbd_force_state(struct drbd_conf *, union drbd_state,
			
 
				+			union drbd_state);
			
 
				+extern enum drbd_state_rv _drbd_request_state(struct drbd_conf *,
			
 
				+					      union drbd_state,
			
 
				+					      union drbd_state,
			
 
				+					      enum chg_state_flags);
			
 
				+extern enum drbd_state_rv __drbd_set_state(struct drbd_conf *, union drbd_state,
			
 
				+					   enum chg_state_flags,
			
 
				+					   struct completion *done);
			
 
				+extern void print_st_err(struct drbd_conf *, union drbd_state,
			
 
				+			union drbd_state, int);
			
 
				+
			
 
				+enum drbd_state_rv
			
 
				+_conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
			
 
				+		    enum chg_state_flags flags);
			
 
				+
			
 
				+enum drbd_state_rv
			
 
				+conn_request_state(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state val,
			
 
				+		   enum chg_state_flags flags);
			
 
				+
			
 
				+extern void drbd_resume_al(struct drbd_conf *mdev);
			
 
				+extern bool conn_all_vols_unconf(struct drbd_tconn *tconn);
			
 
				+
			
 
				+/**
			
 
				+ * drbd_request_state() - Reqest a state change
			
 
				+ * @mdev:	DRBD device.
			
 
				+ * @mask:	mask of state bits to change.
			
 
				+ * @val:	value of new state bits.
			
 
				+ *
			
 
				+ * This is the most graceful way of requesting a state change. It is verbose
			
 
				+ * quite verbose in case the state change is not possible, and all those
			
 
				+ * state changes are globally serialized.
			
 
				+ */
			
 
				+static inline int drbd_request_state(struct drbd_conf *mdev,
			
 
				+				     union drbd_state mask,
			
 
				+				     union drbd_state val)
			
 
				+{
			
 
				+	return _drbd_request_state(mdev, mask, val, CS_VERBOSE + CS_ORDERED);
			
 
				+}
			
 
				+
			
 
				+enum drbd_role conn_highest_role(struct drbd_tconn *tconn);
			
 
				+enum drbd_role conn_highest_peer(struct drbd_tconn *tconn);
			
 
				+enum drbd_disk_state conn_highest_disk(struct drbd_tconn *tconn);
			
 
				+enum drbd_disk_state conn_lowest_disk(struct drbd_tconn *tconn);
			
 
				+enum drbd_disk_state conn_highest_pdsk(struct drbd_tconn *tconn);
			
 
				+enum drbd_conns conn_lowest_conn(struct drbd_tconn *tconn);
			
 
				+
			
 
				+#endif
			
--- a/drivers/block/drbd/drbd_strings.c
+++ b/drivers/block/drbd/drbd_strings.c
@@ -89,6 +89,7 @@ static const char *drbd_state_sw_errors[] = {
 
				 	[-SS_LOWER_THAN_OUTDATED] = "Disk state is lower than outdated",
			
 
				 	[-SS_IN_TRANSIENT_STATE] = "In transient state, retry after next state change",
			
 
				 	[-SS_CONCURRENT_ST_CHG] = "Concurrent state changes detected and aborted",
			
 
				+	[-SS_O_VOL_PEER_PRI] = "Other vol primary on peer not allowed by config",
			
 
				 };
			
 
				 
			
 
				 const char *drbd_conn_str(enum drbd_conns s)
			
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
--- a/drivers/block/drbd/drbd_wrappers.h
+++ b/drivers/block/drbd/drbd_wrappers.h
@@ -3,6 +3,7 @@
 
				 
			
 
				 #include <linux/ctype.h>
			
 
				 #include <linux/mm.h>
			
 
				+#include "drbd_int.h"
			
 
				 
			
 
				 /* see get_sb_bdev and bd_claim */
			
 
				 extern char *drbd_sec_holder;
			
@@ -20,8 +21,8 @@ static inline void drbd_set_my_capacity(struct drbd_conf *mdev,
 
				 
			
 
				 /* bi_end_io handlers */
			
 
				 extern void drbd_md_io_complete(struct bio *bio, int error);
			
 
				-extern void drbd_endio_sec(struct bio *bio, int error);
			
 
				-extern void drbd_endio_pri(struct bio *bio, int error);
			
 
				+extern void drbd_peer_request_endio(struct bio *bio, int error);
			
 
				+extern void drbd_request_endio(struct bio *bio, int error);
			
 
				 
			
 
				 /*
			
 
				  * used to submit our private bio
			
@@ -45,12 +46,6 @@ static inline void drbd_generic_make_request(struct drbd_conf *mdev,
 
				 		generic_make_request(bio);
			
 
				 }
			
 
				 
			
 
				-static inline int drbd_crypto_is_hash(struct crypto_tfm *tfm)
			
 
				-{
			
 
				-        return (crypto_tfm_alg_type(tfm) & CRYPTO_ALG_TYPE_HASH_MASK)
			
 
				-                == CRYPTO_ALG_TYPE_HASH;
			
 
				-}
			
 
				-
			
 
				 #ifndef __CHECKER__
			
 
				 # undef __cond_lock
			
 
				 # define __cond_lock(x,c) (c)
			
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -463,6 +463,7 @@ out:
 
				  */
			
 
				 static void loop_add_bio(struct loop_device *lo, struct bio *bio)
			
 
				 {
			
 
				+	lo->lo_bio_count++;
			
 
				 	bio_list_add(&lo->lo_bio_list, bio);
			
 
				 }
			
 
				 
			
@@ -471,6 +472,7 @@ static void loop_add_bio(struct loop_device *lo, struct bio *bio)
 
				  */
			
 
				 static struct bio *loop_get_bio(struct loop_device *lo)
			
 
				 {
			
 
				+	lo->lo_bio_count--;
			
 
				 	return bio_list_pop(&lo->lo_bio_list);
			
 
				 }
			
 
				 
			
@@ -489,6 +491,10 @@ static void loop_make_request(struct request_queue *q, struct bio *old_bio)
 
				 		goto out;
			
 
				 	if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
			
 
				 		goto out;
			
 
				+	if (lo->lo_bio_count >= q->nr_congestion_on)
			
 
				+		wait_event_lock_irq(lo->lo_req_wait,
			
 
				+				    lo->lo_bio_count < q->nr_congestion_off,
			
 
				+				    lo->lo_lock);
			
 
				 	loop_add_bio(lo, old_bio);
			
 
				 	wake_up(&lo->lo_event);
			
 
				 	spin_unlock_irq(&lo->lo_lock);
			
@@ -546,6 +552,8 @@ static int loop_thread(void *data)
 
				 			continue;
			
 
				 		spin_lock_irq(&lo->lo_lock);
			
 
				 		bio = loop_get_bio(lo);
			
 
				+		if (lo->lo_bio_count < lo->lo_queue->nr_congestion_off)
			
 
				+			wake_up(&lo->lo_req_wait);
			
 
				 		spin_unlock_irq(&lo->lo_lock);
			
 
				 
			
 
				 		BUG_ON(!bio);
			
@@ -873,6 +881,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
				 	lo->transfer = transfer_none;
			
 
				 	lo->ioctl = NULL;
			
 
				 	lo->lo_sizelimit = 0;
			
 
				+	lo->lo_bio_count = 0;
			
 
				 	lo->old_gfp_mask = mapping_gfp_mask(mapping);
			
 
				 	mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
			
 
				 
			
@@ -1673,6 +1682,7 @@ static int loop_add(struct loop_device **l, int i)
 
				 	lo->lo_number		= i;
			
 
				 	lo->lo_thread		= NULL;
			
 
				 	init_waitqueue_head(&lo->lo_event);
			
 
				+	init_waitqueue_head(&lo->lo_req_wait);
			
 
				 	spin_lock_init(&lo->lo_lock);
			
 
				 	disk->major		= LOOP_MAJOR;
			
 
				 	disk->first_minor	= i << part_shift;
			
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -39,6 +39,7 @@
 
				 #include <linux/list.h>
			
 
				 #include <linux/delay.h>
			
 
				 #include <linux/freezer.h>
			
 
				+#include <linux/bitmap.h>
			
 
				 
			
 
				 #include <xen/events.h>
			
 
				 #include <xen/page.h>
			
@@ -79,6 +80,7 @@ struct pending_req {
 
				 	unsigned short		operation;
			
 
				 	int			status;
			
 
				 	struct list_head	free_list;
			
 
				+	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
			
 
				 };
			
 
				 
			
 
				 #define BLKBACK_INVALID_HANDLE (~0)
			
@@ -98,6 +100,36 @@ struct xen_blkbk {
 
				 
			
 
				 static struct xen_blkbk *blkbk;
			
 
				 
			
 
				+/*
			
 
				+ * Maximum number of grant pages that can be mapped in blkback.
			
 
				+ * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
			
 
				+ * pages that blkback will persistently map.
			
 
				+ * Currently, this is:
			
 
				+ * RING_SIZE = 32 (for all known ring types)
			
 
				+ * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
			
 
				+ * sizeof(struct persistent_gnt) = 48
			
 
				+ * So the maximum memory used to store the grants is:
			
 
				+ * 32 * 11 * 48 = 16896 bytes
			
 
				+ */
			
 
				+static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
			
 
				+{
			
 
				+	switch (protocol) {
			
 
				+	case BLKIF_PROTOCOL_NATIVE:
			
 
				+		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
			
 
				+			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
			
 
				+	case BLKIF_PROTOCOL_X86_32:
			
 
				+		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
			
 
				+			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
			
 
				+	case BLKIF_PROTOCOL_X86_64:
			
 
				+		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
			
 
				+			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
			
 
				+	default:
			
 
				+		BUG();
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+
			
 
				 /*
			
 
				  * Little helpful macro to figure out the index and virtual address of the
			
 
				  * pending_pages[..]. For each 'pending_req' we have have up to
			
@@ -129,6 +161,90 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
				 static void make_response(struct xen_blkif *blkif, u64 id,
			
 
				 			  unsigned short op, int st);
			
 
				 
			
 
				+#define foreach_grant(pos, rbtree, node) \
			
 
				+	for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \
			
 
				+	     &(pos)->node != NULL; \
			
 
				+	     (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node))
			
 
				+
			
 
				+
			
 
				+static void add_persistent_gnt(struct rb_root *root,
			
 
				+			       struct persistent_gnt *persistent_gnt)
			
 
				+{
			
 
				+	struct rb_node **new = &(root->rb_node), *parent = NULL;
			
 
				+	struct persistent_gnt *this;
			
 
				+
			
 
				+	/* Figure out where to put new node */
			
 
				+	while (*new) {
			
 
				+		this = container_of(*new, struct persistent_gnt, node);
			
 
				+
			
 
				+		parent = *new;
			
 
				+		if (persistent_gnt->gnt < this->gnt)
			
 
				+			new = &((*new)->rb_left);
			
 
				+		else if (persistent_gnt->gnt > this->gnt)
			
 
				+			new = &((*new)->rb_right);
			
 
				+		else {
			
 
				+			pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
			
 
				+			BUG();
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	/* Add new node and rebalance tree. */
			
 
				+	rb_link_node(&(persistent_gnt->node), parent, new);
			
 
				+	rb_insert_color(&(persistent_gnt->node), root);
			
 
				+}
			
 
				+
			
 
				+static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
			
 
				+						 grant_ref_t gref)
			
 
				+{
			
 
				+	struct persistent_gnt *data;
			
 
				+	struct rb_node *node = root->rb_node;
			
 
				+
			
 
				+	while (node) {
			
 
				+		data = container_of(node, struct persistent_gnt, node);
			
 
				+
			
 
				+		if (gref < data->gnt)
			
 
				+			node = node->rb_left;
			
 
				+		else if (gref > data->gnt)
			
 
				+			node = node->rb_right;
			
 
				+		else
			
 
				+			return data;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static void free_persistent_gnts(struct rb_root *root, unsigned int num)
			
 
				+{
			
 
				+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				+	struct persistent_gnt *persistent_gnt;
			
 
				+	int ret = 0;
			
 
				+	int segs_to_unmap = 0;
			
 
				+
			
 
				+	foreach_grant(persistent_gnt, root, node) {
			
 
				+		BUG_ON(persistent_gnt->handle ==
			
 
				+			BLKBACK_INVALID_HANDLE);
			
 
				+		gnttab_set_unmap_op(&unmap[segs_to_unmap],
			
 
				+			(unsigned long) pfn_to_kaddr(page_to_pfn(
			
 
				+				persistent_gnt->page)),
			
 
				+			GNTMAP_host_map,
			
 
				+			persistent_gnt->handle);
			
 
				+
			
 
				+		pages[segs_to_unmap] = persistent_gnt->page;
			
 
				+		rb_erase(&persistent_gnt->node, root);
			
 
				+		kfree(persistent_gnt);
			
 
				+		num--;
			
 
				+
			
 
				+		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
			
 
				+			!rb_next(&persistent_gnt->node)) {
			
 
				+			ret = gnttab_unmap_refs(unmap, NULL, pages,
			
 
				+				segs_to_unmap);
			
 
				+			BUG_ON(ret);
			
 
				+			segs_to_unmap = 0;
			
 
				+		}
			
 
				+	}
			
 
				+	BUG_ON(num != 0);
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
			
 
				  */
			
@@ -302,6 +418,14 @@ int xen_blkif_schedule(void *arg)
 
				 			print_stats(blkif);
			
 
				 	}
			
 
				 
			
 
				+	/* Free all persistent grant pages */
			
 
				+	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
			
 
				+		free_persistent_gnts(&blkif->persistent_gnts,
			
 
				+			blkif->persistent_gnt_c);
			
 
				+
			
 
				+	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
			
 
				+	blkif->persistent_gnt_c = 0;
			
 
				+
			
 
				 	if (log_stats)
			
 
				 		print_stats(blkif);
			
 
				 
			
@@ -328,6 +452,8 @@ static void xen_blkbk_unmap(struct pending_req *req)
 
				 	int ret;
			
 
				 
			
 
				 	for (i = 0; i < req->nr_pages; i++) {
			
 
				+		if (!test_bit(i, req->unmap_seg))
			
 
				+			continue;
			
 
				 		handle = pending_handle(req, i);
			
 
				 		if (handle == BLKBACK_INVALID_HANDLE)
			
 
				 			continue;
			
@@ -344,12 +470,26 @@ static void xen_blkbk_unmap(struct pending_req *req)
 
				 
			
 
				 static int xen_blkbk_map(struct blkif_request *req,
			
 
				 			 struct pending_req *pending_req,
			
 
				-			 struct seg_buf seg[])
			
 
				+			 struct seg_buf seg[],
			
 
				+			 struct page *pages[])
			
 
				 {
			
 
				 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				-	int i;
			
 
				+	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				+	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				+	struct persistent_gnt *persistent_gnt = NULL;
			
 
				+	struct xen_blkif *blkif = pending_req->blkif;
			
 
				+	phys_addr_t addr = 0;
			
 
				+	int i, j;
			
 
				+	bool new_map;
			
 
				 	int nseg = req->u.rw.nr_segments;
			
 
				+	int segs_to_map = 0;
			
 
				 	int ret = 0;
			
 
				+	int use_persistent_gnts;
			
 
				+
			
 
				+	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
			
 
				+
			
 
				+	BUG_ON(blkif->persistent_gnt_c >
			
 
				+		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));
			
 
				 
			
 
				 	/*
			
 
				 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
			
@@ -359,36 +499,146 @@ static int xen_blkbk_map(struct blkif_request *req,
 
				 	for (i = 0; i < nseg; i++) {
			
 
				 		uint32_t flags;
			
 
				 
			
 
				-		flags = GNTMAP_host_map;
			
 
				-		if (pending_req->operation != BLKIF_OP_READ)
			
 
				-			flags |= GNTMAP_readonly;
			
 
				-		gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
			
 
				-				  req->u.rw.seg[i].gref,
			
 
				-				  pending_req->blkif->domid);
			
 
				+		if (use_persistent_gnts)
			
 
				+			persistent_gnt = get_persistent_gnt(
			
 
				+				&blkif->persistent_gnts,
			
 
				+				req->u.rw.seg[i].gref);
			
 
				+
			
 
				+		if (persistent_gnt) {
			
 
				+			/*
			
 
				+			 * We are using persistent grants and
			
 
				+			 * the grant is already mapped
			
 
				+			 */
			
 
				+			new_map = false;
			
 
				+		} else if (use_persistent_gnts &&
			
 
				+			   blkif->persistent_gnt_c <
			
 
				+			   max_mapped_grant_pages(blkif->blk_protocol)) {
			
 
				+			/*
			
 
				+			 * We are using persistent grants, the grant is
			
 
				+			 * not mapped but we have room for it
			
 
				+			 */
			
 
				+			new_map = true;
			
 
				+			persistent_gnt = kmalloc(
			
 
				+				sizeof(struct persistent_gnt),
			
 
				+				GFP_KERNEL);
			
 
				+			if (!persistent_gnt)
			
 
				+				return -ENOMEM;
			
 
				+			persistent_gnt->page = alloc_page(GFP_KERNEL);
			
 
				+			if (!persistent_gnt->page) {
			
 
				+				kfree(persistent_gnt);
			
 
				+				return -ENOMEM;
			
 
				+			}
			
 
				+			persistent_gnt->gnt = req->u.rw.seg[i].gref;
			
 
				+			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
			
 
				+
			
 
				+			pages_to_gnt[segs_to_map] =
			
 
				+				persistent_gnt->page;
			
 
				+			addr = (unsigned long) pfn_to_kaddr(
			
 
				+				page_to_pfn(persistent_gnt->page));
			
 
				+
			
 
				+			add_persistent_gnt(&blkif->persistent_gnts,
			
 
				+				persistent_gnt);
			
 
				+			blkif->persistent_gnt_c++;
			
 
				+			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
			
 
				+				 persistent_gnt->gnt, blkif->persistent_gnt_c,
			
 
				+				 max_mapped_grant_pages(blkif->blk_protocol));
			
 
				+		} else {
			
 
				+			/*
			
 
				+			 * We are either using persistent grants and
			
 
				+			 * hit the maximum limit of grants mapped,
			
 
				+			 * or we are not using persistent grants.
			
 
				+			 */
			
 
				+			if (use_persistent_gnts &&
			
 
				+				!blkif->vbd.overflow_max_grants) {
			
 
				+				blkif->vbd.overflow_max_grants = 1;
			
 
				+				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
			
 
				+					 blkif->domid, blkif->vbd.handle);
			
 
				+			}
			
 
				+			new_map = true;
			
 
				+			pages[i] = blkbk->pending_page(pending_req, i);
			
 
				+			addr = vaddr(pending_req, i);
			
 
				+			pages_to_gnt[segs_to_map] =
			
 
				+				blkbk->pending_page(pending_req, i);
			
 
				+		}
			
 
				+
			
 
				+		if (persistent_gnt) {
			
 
				+			pages[i] = persistent_gnt->page;
			
 
				+			persistent_gnts[i] = persistent_gnt;
			
 
				+		} else {
			
 
				+			persistent_gnts[i] = NULL;
			
 
				+		}
			
 
				+
			
 
				+		if (new_map) {
			
 
				+			flags = GNTMAP_host_map;
			
 
				+			if (!persistent_gnt &&
			
 
				+			    (pending_req->operation != BLKIF_OP_READ))
			
 
				+				flags |= GNTMAP_readonly;
			
 
				+			gnttab_set_map_op(&map[segs_to_map++], addr,
			
 
				+					  flags, req->u.rw.seg[i].gref,
			
 
				+					  blkif->domid);
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				-	ret = gnttab_map_refs(map, NULL, &blkbk->pending_page(pending_req, 0), nseg);
			
 
				-	BUG_ON(ret);
			
 
				+	if (segs_to_map) {
			
 
				+		ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map);
			
 
				+		BUG_ON(ret);
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * Now swizzle the MFN in our domain with the MFN from the other domain
			
 
				 	 * so that when we access vaddr(pending_req,i) it has the contents of
			
 
				 	 * the page from the other domain.
			
 
				 	 */
			
 
				-	for (i = 0; i < nseg; i++) {
			
 
				-		if (unlikely(map[i].status != 0)) {
			
 
				-			pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
			
 
				-			map[i].handle = BLKBACK_INVALID_HANDLE;
			
 
				-			ret |= 1;
			
 
				+	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
			
 
				+	for (i = 0, j = 0; i < nseg; i++) {
			
 
				+		if (!persistent_gnts[i] ||
			
 
				+		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
			
 
				+			/* This is a newly mapped grant */
			
 
				+			BUG_ON(j >= segs_to_map);
			
 
				+			if (unlikely(map[j].status != 0)) {
			
 
				+				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
			
 
				+				map[j].handle = BLKBACK_INVALID_HANDLE;
			
 
				+				ret |= 1;
			
 
				+				if (persistent_gnts[i]) {
			
 
				+					rb_erase(&persistent_gnts[i]->node,
			
 
				+						 &blkif->persistent_gnts);
			
 
				+					blkif->persistent_gnt_c--;
			
 
				+					kfree(persistent_gnts[i]);
			
 
				+					persistent_gnts[i] = NULL;
			
 
				+				}
			
 
				+			}
			
 
				+		}
			
 
				+		if (persistent_gnts[i]) {
			
 
				+			if (persistent_gnts[i]->handle ==
			
 
				+			    BLKBACK_INVALID_HANDLE) {
			
 
				+				/*
			
 
				+				 * If this is a new persistent grant
			
 
				+				 * save the handler
			
 
				+				 */
			
 
				+				persistent_gnts[i]->handle = map[j].handle;
			
 
				+				persistent_gnts[i]->dev_bus_addr =
			
 
				+					map[j++].dev_bus_addr;
			
 
				+			}
			
 
				+			pending_handle(pending_req, i) =
			
 
				+				persistent_gnts[i]->handle;
			
 
				+
			
 
				+			if (ret)
			
 
				+				continue;
			
 
				+
			
 
				+			seg[i].buf = persistent_gnts[i]->dev_bus_addr |
			
 
				+				(req->u.rw.seg[i].first_sect << 9);
			
 
				+		} else {
			
 
				+			pending_handle(pending_req, i) = map[j].handle;
			
 
				+			bitmap_set(pending_req->unmap_seg, i, 1);
			
 
				+
			
 
				+			if (ret) {
			
 
				+				j++;
			
 
				+				continue;
			
 
				+			}
			
 
				+
			
 
				+			seg[i].buf = map[j++].dev_bus_addr |
			
 
				+				(req->u.rw.seg[i].first_sect << 9);
			
 
				 		}
			
 
				-
			
 
				-		pending_handle(pending_req, i) = map[i].handle;
			
 
				-
			
 
				-		if (ret)
			
 
				-			continue;
			
 
				-
			
 
				-		seg[i].buf  = map[i].dev_bus_addr |
			
 
				-			(req->u.rw.seg[i].first_sect << 9);
			
 
				 	}
			
 
				 	return ret;
			
 
				 }
			
@@ -591,6 +841,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
				 	int operation;
			
 
				 	struct blk_plug plug;
			
 
				 	bool drain = false;
			
 
				+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				 
			
 
				 	switch (req->operation) {
			
 
				 	case BLKIF_OP_READ:
			
@@ -677,7 +928,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
				 	 * the hypercall to unmap the grants - that is all done in
			
 
				 	 * xen_blkbk_unmap.
			
 
				 	 */
			
 
				-	if (xen_blkbk_map(req, pending_req, seg))
			
 
				+	if (xen_blkbk_map(req, pending_req, seg, pages))
			
 
				 		goto fail_flush;
			
 
				 
			
 
				 	/*
			
@@ -689,7 +940,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 
				 	for (i = 0; i < nseg; i++) {
			
 
				 		while ((bio == NULL) ||
			
 
				 		       (bio_add_page(bio,
			
 
				-				     blkbk->pending_page(pending_req, i),
			
 
				+				     pages[i],
			
 
				 				     seg[i].nsec << 9,
			
 
				 				     seg[i].buf & ~PAGE_MASK) == 0)) {
			
 
				 
			
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -34,6 +34,7 @@
 
				 #include <linux/vmalloc.h>
			
 
				 #include <linux/wait.h>
			
 
				 #include <linux/io.h>
			
 
				+#include <linux/rbtree.h>
			
 
				 #include <asm/setup.h>
			
 
				 #include <asm/pgalloc.h>
			
 
				 #include <asm/hypervisor.h>
			
@@ -160,10 +161,21 @@ struct xen_vbd {
 
				 	sector_t		size;
			
 
				 	unsigned int		flush_support:1;
			
 
				 	unsigned int		discard_secure:1;
			
 
				+	unsigned int		feature_gnt_persistent:1;
			
 
				+	unsigned int		overflow_max_grants:1;
			
 
				 };
			
 
				 
			
 
				 struct backend_info;
			
 
				 
			
 
				+
			
 
				+struct persistent_gnt {
			
 
				+	struct page *page;
			
 
				+	grant_ref_t gnt;
			
 
				+	grant_handle_t handle;
			
 
				+	uint64_t dev_bus_addr;
			
 
				+	struct rb_node node;
			
 
				+};
			
 
				+
			
 
				 struct xen_blkif {
			
 
				 	/* Unique identifier for this interface. */
			
 
				 	domid_t			domid;
			
@@ -190,6 +202,10 @@ struct xen_blkif {
 
				 	struct task_struct	*xenblkd;
			
 
				 	unsigned int		waiting_reqs;
			
 
				 
			
 
				+	/* tree to store persistent grants */
			
 
				+	struct rb_root		persistent_gnts;
			
 
				+	unsigned int		persistent_gnt_c;
			
 
				+
			
 
				 	/* statistics */
			
 
				 	unsigned long		st_print;
			
 
				 	int			st_rd_req;
			
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -117,6 +117,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 
				 	atomic_set(&blkif->drain, 0);
			
 
				 	blkif->st_print = jiffies;
			
 
				 	init_waitqueue_head(&blkif->waiting_to_free);
			
 
				+	blkif->persistent_gnts.rb_node = NULL;
			
 
				 
			
 
				 	return blkif;
			
 
				 }
			
@@ -672,6 +673,13 @@ again:
 
				 
			
 
				 	xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
			
 
				 
			
 
				+	err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
			
 
				+	if (err) {
			
 
				+		xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
			
 
				+				 dev->nodename);
			
 
				+		goto abort;
			
 
				+	}
			
 
				+
			
 
				 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
			
 
				 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
			
 
				 	if (err) {
			
@@ -720,6 +728,7 @@ static int connect_ring(struct backend_info *be)
 
				 	struct xenbus_device *dev = be->dev;
			
 
				 	unsigned long ring_ref;
			
 
				 	unsigned int evtchn;
			
 
				+	unsigned int pers_grants;
			
 
				 	char protocol[64] = "";
			
 
				 	int err;
			
 
				 
			
@@ -749,8 +758,18 @@ static int connect_ring(struct backend_info *be)
 
				 		xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
			
 
				 		return -1;
			
 
				 	}
			
 
				-	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n",
			
 
				-		ring_ref, evtchn, be->blkif->blk_protocol, protocol);
			
 
				+	err = xenbus_gather(XBT_NIL, dev->otherend,
			
 
				+			    "feature-persistent", "%u",
			
 
				+			    &pers_grants, NULL);
			
 
				+	if (err)
			
 
				+		pers_grants = 0;
			
 
				+
			
 
				+	be->blkif->vbd.feature_gnt_persistent = pers_grants;
			
 
				+	be->blkif->vbd.overflow_max_grants = 0;
			
 
				+
			
 
				+	pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
			
 
				+		ring_ref, evtchn, be->blkif->blk_protocol, protocol,
			
 
				+		pers_grants ? "persistent grants" : "");
			
 
				 
			
 
				 	/* Map the shared frame, irq etc. */
			
 
				 	err = xen_blkif_map(be->blkif, ring_ref, evtchn);
			
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -44,6 +44,7 @@
 
				 #include <linux/mutex.h>
			
 
				 #include <linux/scatterlist.h>
			
 
				 #include <linux/bitmap.h>
			
 
				+#include <linux/llist.h>
			
 
				 
			
 
				 #include <xen/xen.h>
			
 
				 #include <xen/xenbus.h>
			
@@ -64,10 +65,17 @@ enum blkif_state {
 
				 	BLKIF_STATE_SUSPENDED,
			
 
				 };
			
 
				 
			
 
				+struct grant {
			
 
				+	grant_ref_t gref;
			
 
				+	unsigned long pfn;
			
 
				+	struct llist_node node;
			
 
				+};
			
 
				+
			
 
				 struct blk_shadow {
			
 
				 	struct blkif_request req;
			
 
				 	struct request *request;
			
 
				 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				+	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
			
 
				 };
			
 
				 
			
 
				 static DEFINE_MUTEX(blkfront_mutex);
			
@@ -97,6 +105,8 @@ struct blkfront_info
 
				 	struct work_struct work;
			
 
				 	struct gnttab_free_callback callback;
			
 
				 	struct blk_shadow shadow[BLK_RING_SIZE];
			
 
				+	struct llist_head persistent_gnts;
			
 
				+	unsigned int persistent_gnts_c;
			
 
				 	unsigned long shadow_free;
			
 
				 	unsigned int feature_flush;
			
 
				 	unsigned int flush_op;
			
@@ -104,6 +114,7 @@ struct blkfront_info
 
				 	unsigned int feature_secdiscard:1;
			
 
				 	unsigned int discard_granularity;
			
 
				 	unsigned int discard_alignment;
			
 
				+	unsigned int feature_persistent:1;
			
 
				 	int is_ready;
			
 
				 };
			
 
				 
			
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
 
				 	unsigned long id;
			
 
				 	unsigned int fsect, lsect;
			
 
				 	int i, ref;
			
 
				+
			
 
				+	/*
			
 
				+	 * Used to store if we are able to queue the request by just using
			
 
				+	 * existing persistent grants, or if we have to get new grants,
			
 
				+	 * as there are not sufficiently many free.
			
 
				+	 */
			
 
				+	bool new_persistent_gnts;
			
 
				 	grant_ref_t gref_head;
			
 
				+	struct page *granted_page;
			
 
				+	struct grant *gnt_list_entry = NULL;
			
 
				 	struct scatterlist *sg;
			
 
				 
			
 
				 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
			
 
				 		return 1;
			
 
				 
			
 
				-	if (gnttab_alloc_grant_references(
			
 
				-		BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
			
 
				-		gnttab_request_free_callback(
			
 
				-			&info->callback,
			
 
				-			blkif_restart_queue_callback,
			
 
				-			info,
			
 
				-			BLKIF_MAX_SEGMENTS_PER_REQUEST);
			
 
				-		return 1;
			
 
				-	}
			
 
				+	/* Check if we have enought grants to allocate a requests */
			
 
				+	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
			
 
				+		new_persistent_gnts = 1;
			
 
				+		if (gnttab_alloc_grant_references(
			
 
				+		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
			
 
				+		    &gref_head) < 0) {
			
 
				+			gnttab_request_free_callback(
			
 
				+				&info->callback,
			
 
				+				blkif_restart_queue_callback,
			
 
				+				info,
			
 
				+				BLKIF_MAX_SEGMENTS_PER_REQUEST);
			
 
				+			return 1;
			
 
				+		}
			
 
				+	} else
			
 
				+		new_persistent_gnts = 0;
			
 
				 
			
 
				 	/* Fill out a communications ring structure. */
			
 
				 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
			
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
 
				 		       BLKIF_MAX_SEGMENTS_PER_REQUEST);
			
 
				 
			
 
				 		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
			
 
				-			buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
			
 
				 			fsect = sg->offset >> 9;
			
 
				 			lsect = fsect + (sg->length >> 9) - 1;
			
 
				-			/* install a grant reference. */
			
 
				-			ref = gnttab_claim_grant_reference(&gref_head);
			
 
				-			BUG_ON(ref == -ENOSPC);
			
 
				 
			
 
				-			gnttab_grant_foreign_access_ref(
			
 
				-					ref,
			
 
				+			if (info->persistent_gnts_c) {
			
 
				+				BUG_ON(llist_empty(&info->persistent_gnts));
			
 
				+				gnt_list_entry = llist_entry(
			
 
				+					llist_del_first(&info->persistent_gnts),
			
 
				+					struct grant, node);
			
 
				+
			
 
				+				ref = gnt_list_entry->gref;
			
 
				+				buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
			
 
				+				info->persistent_gnts_c--;
			
 
				+			} else {
			
 
				+				ref = gnttab_claim_grant_reference(&gref_head);
			
 
				+				BUG_ON(ref == -ENOSPC);
			
 
				+
			
 
				+				gnt_list_entry =
			
 
				+					kmalloc(sizeof(struct grant),
			
 
				+							 GFP_ATOMIC);
			
 
				+				if (!gnt_list_entry)
			
 
				+					return -ENOMEM;
			
 
				+
			
 
				+				granted_page = alloc_page(GFP_ATOMIC);
			
 
				+				if (!granted_page) {
			
 
				+					kfree(gnt_list_entry);
			
 
				+					return -ENOMEM;
			
 
				+				}
			
 
				+
			
 
				+				gnt_list_entry->pfn =
			
 
				+					page_to_pfn(granted_page);
			
 
				+				gnt_list_entry->gref = ref;
			
 
				+
			
 
				+				buffer_mfn = pfn_to_mfn(page_to_pfn(
			
 
				+								granted_page));
			
 
				+				gnttab_grant_foreign_access_ref(ref,
			
 
				 					info->xbdev->otherend_id,
			
 
				-					buffer_mfn,
			
 
				-					rq_data_dir(req));
			
 
				+					buffer_mfn, 0);
			
 
				+			}
			
 
				+
			
 
				+			info->shadow[id].grants_used[i] = gnt_list_entry;
			
 
				+
			
 
				+			if (rq_data_dir(req)) {
			
 
				+				char *bvec_data;
			
 
				+				void *shared_data;
			
 
				+
			
 
				+				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
			
 
				+
			
 
				+				shared_data = kmap_atomic(
			
 
				+					pfn_to_page(gnt_list_entry->pfn));
			
 
				+				bvec_data = kmap_atomic(sg_page(sg));
			
 
				+
			
 
				+				/*
			
 
				+				 * this does not wipe data stored outside the
			
 
				+				 * range sg->offset..sg->offset+sg->length.
			
 
				+				 * Therefore, blkback *could* see data from
			
 
				+				 * previous requests. This is OK as long as
			
 
				+				 * persistent grants are shared with just one
			
 
				+				 * domain. It may need refactoring if this
			
 
				+				 * changes
			
 
				+				 */
			
 
				+				memcpy(shared_data + sg->offset,
			
 
				+				       bvec_data   + sg->offset,
			
 
				+				       sg->length);
			
 
				+
			
 
				+				kunmap_atomic(bvec_data);
			
 
				+				kunmap_atomic(shared_data);
			
 
				+			}
			
 
				 
			
 
				 			info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
			
 
				 			ring_req->u.rw.seg[i] =
			
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
 
				 	/* Keep a private copy so we can reissue requests when recovering. */
			
 
				 	info->shadow[id].req = *ring_req;
			
 
				 
			
 
				-	gnttab_free_grant_references(gref_head);
			
 
				+	if (new_persistent_gnts)
			
 
				+		gnttab_free_grant_references(gref_head);
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
 
				 static void xlvbd_flush(struct blkfront_info *info)
			
 
				 {
			
 
				 	blk_queue_flush(info->rq, info->feature_flush);
			
 
				-	printk(KERN_INFO "blkfront: %s: %s: %s\n",
			
 
				+	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
			
 
				 	       info->gd->disk_name,
			
 
				 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
			
 
				 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
			
 
				 		"flush diskcache" : "barrier or flush"),
			
 
				-	       info->feature_flush ? "enabled" : "disabled");
			
 
				+	       info->feature_flush ? "enabled" : "disabled",
			
 
				+	       info->feature_persistent ? "using persistent grants" : "");
			
 
				 }
			
 
				 
			
 
				 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
			
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
 
				 
			
 
				 static void blkif_free(struct blkfront_info *info, int suspend)
			
 
				 {
			
 
				+	struct llist_node *all_gnts;
			
 
				+	struct grant *persistent_gnt;
			
 
				+
			
 
				 	/* Prevent new requests being issued until we fix things up. */
			
 
				 	spin_lock_irq(&info->io_lock);
			
 
				 	info->connected = suspend ?
			
@@ -714,6 +800,18 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 
				 	/* No more blkif_request(). */
			
 
				 	if (info->rq)
			
 
				 		blk_stop_queue(info->rq);
			
 
				+
			
 
				+	/* Remove all persistent grants */
			
 
				+	if (info->persistent_gnts_c) {
			
 
				+		all_gnts = llist_del_all(&info->persistent_gnts);
			
 
				+		llist_for_each_entry(persistent_gnt, all_gnts, node) {
			
 
				+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
			
 
				+			__free_page(pfn_to_page(persistent_gnt->pfn));
			
 
				+			kfree(persistent_gnt);
			
 
				+		}
			
 
				+		info->persistent_gnts_c = 0;
			
 
				+	}
			
 
				+
			
 
				 	/* No more gnttab callback work. */
			
 
				 	gnttab_cancel_free_callback(&info->callback);
			
 
				 	spin_unlock_irq(&info->io_lock);
			
@@ -734,13 +832,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 
				 
			
 
				 }
			
 
				 
			
 
				-static void blkif_completion(struct blk_shadow *s)
			
 
				+static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
			
 
				+			     struct blkif_response *bret)
			
 
				 {
			
 
				 	int i;
			
 
				-	/* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
			
 
				-	 * flag. */
			
 
				-	for (i = 0; i < s->req.u.rw.nr_segments; i++)
			
 
				-		gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
			
 
				+	struct bio_vec *bvec;
			
 
				+	struct req_iterator iter;
			
 
				+	unsigned long flags;
			
 
				+	char *bvec_data;
			
 
				+	void *shared_data;
			
 
				+	unsigned int offset = 0;
			
 
				+
			
 
				+	if (bret->operation == BLKIF_OP_READ) {
			
 
				+		/*
			
 
				+		 * Copy the data received from the backend into the bvec.
			
 
				+		 * Since bv_offset can be different than 0, and bv_len different
			
 
				+		 * than PAGE_SIZE, we have to keep track of the current offset,
			
 
				+		 * to be sure we are copying the data from the right shared page.
			
 
				+		 */
			
 
				+		rq_for_each_segment(bvec, s->request, iter) {
			
 
				+			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
			
 
				+			i = offset >> PAGE_SHIFT;
			
 
				+			BUG_ON(i >= s->req.u.rw.nr_segments);
			
 
				+			shared_data = kmap_atomic(
			
 
				+				pfn_to_page(s->grants_used[i]->pfn));
			
 
				+			bvec_data = bvec_kmap_irq(bvec, &flags);
			
 
				+			memcpy(bvec_data, shared_data + bvec->bv_offset,
			
 
				+				bvec->bv_len);
			
 
				+			bvec_kunmap_irq(bvec_data, &flags);
			
 
				+			kunmap_atomic(shared_data);
			
 
				+			offset += bvec->bv_len;
			
 
				+		}
			
 
				+	}
			
 
				+	/* Add the persistent grant into the list of free grants */
			
 
				+	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
			
 
				+		llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
			
 
				+		info->persistent_gnts_c++;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
			
@@ -783,7 +911,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
				 		req  = info->shadow[id].request;
			
 
				 
			
 
				 		if (bret->operation != BLKIF_OP_DISCARD)
			
 
				-			blkif_completion(&info->shadow[id]);
			
 
				+			blkif_completion(&info->shadow[id], info, bret);
			
 
				 
			
 
				 		if (add_id_to_freelist(info, id)) {
			
 
				 			WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
			
@@ -942,6 +1070,11 @@ again:
 
				 		message = "writing protocol";
			
 
				 		goto abort_transaction;
			
 
				 	}
			
 
				+	err = xenbus_printf(xbt, dev->nodename,
			
 
				+			    "feature-persistent", "%u", 1);
			
 
				+	if (err)
			
 
				+		dev_warn(&dev->dev,
			
 
				+			 "writing persistent grants feature to xenbus");
			
 
				 
			
 
				 	err = xenbus_transaction_end(xbt, 0);
			
 
				 	if (err) {
			
@@ -1029,6 +1162,8 @@ static int blkfront_probe(struct xenbus_device *dev,
 
				 	spin_lock_init(&info->io_lock);
			
 
				 	info->xbdev = dev;
			
 
				 	info->vdevice = vdevice;
			
 
				+	init_llist_head(&info->persistent_gnts);
			
 
				+	info->persistent_gnts_c = 0;
			
 
				 	info->connected = BLKIF_STATE_DISCONNECTED;
			
 
				 	INIT_WORK(&info->work, blkif_restart_queue);
			
 
				 
			
@@ -1093,7 +1228,7 @@ static int blkif_recover(struct blkfront_info *info)
 
				 					req->u.rw.seg[j].gref,
			
 
				 					info->xbdev->otherend_id,
			
 
				 					pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
			
 
				-					rq_data_dir(info->shadow[req->u.rw.id].request));
			
 
				+					0);
			
 
				 		}
			
 
				 		info->shadow[req->u.rw.id].req = *req;
			
 
				 
			
@@ -1225,7 +1360,7 @@ static void blkfront_connect(struct blkfront_info *info)
 
				 	unsigned long sector_size;
			
 
				 	unsigned int binfo;
			
 
				 	int err;
			
 
				-	int barrier, flush, discard;
			
 
				+	int barrier, flush, discard, persistent;
			
 
				 
			
 
				 	switch (info->connected) {
			
 
				 	case BLKIF_STATE_CONNECTED:
			
@@ -1303,6 +1438,14 @@ static void blkfront_connect(struct blkfront_info *info)
 
				 	if (!err && discard)
			
 
				 		blkfront_setup_discard(info);
			
 
				 
			
 
				+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
			
 
				+			    "feature-persistent", "%u", &persistent,
			
 
				+			    NULL);
			
 
				+	if (err)
			
 
				+		info->feature_persistent = 0;
			
 
				+	else
			
 
				+		info->feature_persistent = persistent;
			
 
				+
			
 
				 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
			
 
				 	if (err) {
			
 
				 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
			
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -452,7 +452,7 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
 
				 	spin_lock_irq(&mddev->write_lock);
			
 
				 	wait_event_lock_irq(mddev->sb_wait,
			
 
				 			    !mddev->flush_bio,
			
 
				-			    mddev->write_lock, /*nothing*/);
			
 
				+			    mddev->write_lock);
			
 
				 	mddev->flush_bio = bio;
			
 
				 	spin_unlock_irq(&mddev->write_lock);
			
 
				 
			
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -551,32 +551,6 @@ struct md_thread {
 
				 
			
 
				 #define THREAD_WAKEUP  0
			
 
				 
			
 
				-#define __wait_event_lock_irq(wq, condition, lock, cmd) 		\
			
 
				-do {									\
			
 
				-	wait_queue_t __wait;						\
			
 
				-	init_waitqueue_entry(&__wait, current);				\
			
 
				-									\
			
 
				-	add_wait_queue(&wq, &__wait);					\
			
 
				-	for (;;) {							\
			
 
				-		set_current_state(TASK_UNINTERRUPTIBLE);		\
			
 
				-		if (condition)						\
			
 
				-			break;						\
			
 
				-		spin_unlock_irq(&lock);					\
			
 
				-		cmd;							\
			
 
				-		schedule();						\
			
 
				-		spin_lock_irq(&lock);					\
			
 
				-	}								\
			
 
				-	current->state = TASK_RUNNING;					\
			
 
				-	remove_wait_queue(&wq, &__wait);				\
			
 
				-} while (0)
			
 
				-
			
 
				-#define wait_event_lock_irq(wq, condition, lock, cmd) 			\
			
 
				-do {									\
			
 
				-	if (condition)	 						\
			
 
				-		break;							\
			
 
				-	__wait_event_lock_irq(wq, condition, lock, cmd);		\
			
 
				-} while (0)
			
 
				-
			
 
				 static inline void safe_put_page(struct page *p)
			
 
				 {
			
 
				 	if (p) put_page(p);
			
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -822,7 +822,7 @@ static void raise_barrier(struct r1conf *conf)
 
				 
			
 
				 	/* Wait until no block IO is waiting */
			
 
				 	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
			
 
				-			    conf->resync_lock, );
			
 
				+			    conf->resync_lock);
			
 
				 
			
 
				 	/* block any new IO from starting */
			
 
				 	conf->barrier++;
			
@@ -830,7 +830,7 @@ static void raise_barrier(struct r1conf *conf)
 
				 	/* Now wait for all pending IO to complete */
			
 
				 	wait_event_lock_irq(conf->wait_barrier,
			
 
				 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
			
 
				-			    conf->resync_lock, );
			
 
				+			    conf->resync_lock);
			
 
				 
			
 
				 	spin_unlock_irq(&conf->resync_lock);
			
 
				 }
			
@@ -864,8 +864,7 @@ static void wait_barrier(struct r1conf *conf)
 
				 				    (conf->nr_pending &&
			
 
				 				     current->bio_list &&
			
 
				 				     !bio_list_empty(current->bio_list)),
			
 
				-				    conf->resync_lock,
			
 
				-			);
			
 
				+				    conf->resync_lock);
			
 
				 		conf->nr_waiting--;
			
 
				 	}
			
 
				 	conf->nr_pending++;
			
@@ -898,10 +897,10 @@ static void freeze_array(struct r1conf *conf)
 
				 	spin_lock_irq(&conf->resync_lock);
			
 
				 	conf->barrier++;
			
 
				 	conf->nr_waiting++;
			
 
				-	wait_event_lock_irq(conf->wait_barrier,
			
 
				-			    conf->nr_pending == conf->nr_queued+1,
			
 
				-			    conf->resync_lock,
			
 
				-			    flush_pending_writes(conf));
			
 
				+	wait_event_lock_irq_cmd(conf->wait_barrier,
			
 
				+				conf->nr_pending == conf->nr_queued+1,
			
 
				+				conf->resync_lock,
			
 
				+				flush_pending_writes(conf));
			
 
				 	spin_unlock_irq(&conf->resync_lock);
			
 
				 }
			
 
				 static void unfreeze_array(struct r1conf *conf)
			
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -952,7 +952,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
				 
			
 
				 	/* Wait until no block IO is waiting (unless 'force') */
			
 
				 	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
			
 
				-			    conf->resync_lock, );
			
 
				+			    conf->resync_lock);
			
 
				 
			
 
				 	/* block any new IO from starting */
			
 
				 	conf->barrier++;
			
@@ -960,7 +960,7 @@ static void raise_barrier(struct r10conf *conf, int force)
 
				 	/* Now wait for all pending IO to complete */
			
 
				 	wait_event_lock_irq(conf->wait_barrier,
			
 
				 			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
			
 
				-			    conf->resync_lock, );
			
 
				+			    conf->resync_lock);
			
 
				 
			
 
				 	spin_unlock_irq(&conf->resync_lock);
			
 
				 }
			
@@ -993,8 +993,7 @@ static void wait_barrier(struct r10conf *conf)
 
				 				    (conf->nr_pending &&
			
 
				 				     current->bio_list &&
			
 
				 				     !bio_list_empty(current->bio_list)),
			
 
				-				    conf->resync_lock,
			
 
				-			);
			
 
				+				    conf->resync_lock);
			
 
				 		conf->nr_waiting--;
			
 
				 	}
			
 
				 	conf->nr_pending++;
			
@@ -1027,10 +1026,10 @@ static void freeze_array(struct r10conf *conf)
 
				 	spin_lock_irq(&conf->resync_lock);
			
 
				 	conf->barrier++;
			
 
				 	conf->nr_waiting++;
			
 
				-	wait_event_lock_irq(conf->wait_barrier,
			
 
				-			    conf->nr_pending == conf->nr_queued+1,
			
 
				-			    conf->resync_lock,
			
 
				-			    flush_pending_writes(conf));
			
 
				+	wait_event_lock_irq_cmd(conf->wait_barrier,
			
 
				+				conf->nr_pending == conf->nr_queued+1,
			
 
				+				conf->resync_lock,
			
 
				+				flush_pending_writes(conf));
			
 
				 
			
 
				 	spin_unlock_irq(&conf->resync_lock);
			
 
				 }
			
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -466,7 +466,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 
				 	do {
			
 
				 		wait_event_lock_irq(conf->wait_for_stripe,
			
 
				 				    conf->quiesce == 0 || noquiesce,
			
 
				-				    conf->device_lock, /* nothing */);
			
 
				+				    conf->device_lock);
			
 
				 		sh = __find_stripe(conf, sector, conf->generation - previous);
			
 
				 		if (!sh) {
			
 
				 			if (!conf->inactive_blocked)
			
@@ -480,8 +480,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
 
				 						    (atomic_read(&conf->active_stripes)
			
 
				 						     < (conf->max_nr_stripes *3/4)
			
 
				 						     || !conf->inactive_blocked),
			
 
				-						    conf->device_lock,
			
 
				-						    );
			
 
				+						    conf->device_lock);
			
 
				 				conf->inactive_blocked = 0;
			
 
				 			} else
			
 
				 				init_stripe(sh, sector, previous);
			
@@ -1646,8 +1645,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
 
				 		spin_lock_irq(&conf->device_lock);
			
 
				 		wait_event_lock_irq(conf->wait_for_stripe,
			
 
				 				    !list_empty(&conf->inactive_list),
			
 
				-				    conf->device_lock,
			
 
				-				    );
			
 
				+				    conf->device_lock);
			
 
				 		osh = get_free_stripe(conf);
			
 
				 		spin_unlock_irq(&conf->device_lock);
			
 
				 		atomic_set(&nsh->count, 1);
			
@@ -4003,7 +4001,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
 
				 		spin_lock_irq(&conf->device_lock);
			
 
				 		wait_event_lock_irq(conf->wait_for_stripe,
			
 
				 				    conf->quiesce == 0,
			
 
				-				    conf->device_lock, /* nothing */);
			
 
				+				    conf->device_lock);
			
 
				 		atomic_inc(&conf->active_aligned_reads);
			
 
				 		spin_unlock_irq(&conf->device_lock);
			
 
				 
			
@@ -6095,7 +6093,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 
				 		wait_event_lock_irq(conf->wait_for_stripe,
			
 
				 				    atomic_read(&conf->active_stripes) == 0 &&
			
 
				 				    atomic_read(&conf->active_aligned_reads) == 0,
			
 
				-				    conf->device_lock, /* nothing */);
			
 
				+				    conf->device_lock);
			
 
				 		conf->quiesce = 1;
			
 
				 		spin_unlock_irq(&conf->device_lock);
			
 
				 		/* allow reshape to continue */
			
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -51,12 +51,11 @@
 
				 
			
 
				 #endif
			
 
				 
			
 
				-
			
 
				 extern const char *drbd_buildtag(void);
			
 
				-#define REL_VERSION "8.3.13"
			
 
				-#define API_VERSION 88
			
 
				+#define REL_VERSION "8.4.2"
			
 
				+#define API_VERSION 1
			
 
				 #define PRO_VERSION_MIN 86
			
 
				-#define PRO_VERSION_MAX 96
			
 
				+#define PRO_VERSION_MAX 101
			
 
				 
			
 
				 
			
 
				 enum drbd_io_error_p {
			
@@ -66,7 +65,8 @@ enum drbd_io_error_p {
 
				 };
			
 
				 
			
 
				 enum drbd_fencing_p {
			
 
				-	FP_DONT_CARE,
			
 
				+	FP_NOT_AVAIL = -1, /* Not a policy */
			
 
				+	FP_DONT_CARE = 0,
			
 
				 	FP_RESOURCE,
			
 
				 	FP_STONITH
			
 
				 };
			
@@ -102,6 +102,20 @@ enum drbd_on_congestion {
 
				 	OC_DISCONNECT,
			
 
				 };
			
 
				 
			
 
				+enum drbd_read_balancing {
			
 
				+	RB_PREFER_LOCAL,
			
 
				+	RB_PREFER_REMOTE,
			
 
				+	RB_ROUND_ROBIN,
			
 
				+	RB_LEAST_PENDING,
			
 
				+	RB_CONGESTED_REMOTE,
			
 
				+	RB_32K_STRIPING,
			
 
				+	RB_64K_STRIPING,
			
 
				+	RB_128K_STRIPING,
			
 
				+	RB_256K_STRIPING,
			
 
				+	RB_512K_STRIPING,
			
 
				+	RB_1M_STRIPING,
			
 
				+};
			
 
				+
			
 
				 /* KEEP the order, do not delete or insert. Only append. */
			
 
				 enum drbd_ret_code {
			
 
				 	ERR_CODE_BASE		= 100,
			
@@ -122,7 +136,7 @@ enum drbd_ret_code {
 
				 	ERR_AUTH_ALG		= 120,
			
 
				 	ERR_AUTH_ALG_ND		= 121,
			
 
				 	ERR_NOMEM		= 122,
			
 
				-	ERR_DISCARD		= 123,
			
 
				+	ERR_DISCARD_IMPOSSIBLE	= 123,
			
 
				 	ERR_DISK_CONFIGURED	= 124,
			
 
				 	ERR_NET_CONFIGURED	= 125,
			
 
				 	ERR_MANDATORY_TAG	= 126,
			
@@ -130,8 +144,8 @@ enum drbd_ret_code {
 
				 	ERR_INTR		= 129, /* EINTR */
			
 
				 	ERR_RESIZE_RESYNC	= 130,
			
 
				 	ERR_NO_PRIMARY		= 131,
			
 
				-	ERR_SYNC_AFTER		= 132,
			
 
				-	ERR_SYNC_AFTER_CYCLE	= 133,
			
 
				+	ERR_RESYNC_AFTER	= 132,
			
 
				+	ERR_RESYNC_AFTER_CYCLE	= 133,
			
 
				 	ERR_PAUSE_IS_SET	= 134,
			
 
				 	ERR_PAUSE_IS_CLEAR	= 135,
			
 
				 	ERR_PACKET_NR		= 137,
			
@@ -155,6 +169,14 @@ enum drbd_ret_code {
 
				 	ERR_CONG_NOT_PROTO_A	= 155,
			
 
				 	ERR_PIC_AFTER_DEP	= 156,
			
 
				 	ERR_PIC_PEER_DEP	= 157,
			
 
				+	ERR_RES_NOT_KNOWN	= 158,
			
 
				+	ERR_RES_IN_USE		= 159,
			
 
				+	ERR_MINOR_CONFIGURED    = 160,
			
 
				+	ERR_MINOR_EXISTS	= 161,
			
 
				+	ERR_INVALID_REQUEST	= 162,
			
 
				+	ERR_NEED_APV_100	= 163,
			
 
				+	ERR_NEED_ALLOW_TWO_PRI  = 164,
			
 
				+	ERR_MD_UNCLEAN          = 165,
			
 
				 
			
 
				 	/* insert new ones above this line */
			
 
				 	AFTER_LAST_ERR_CODE
			
@@ -296,7 +318,8 @@ enum drbd_state_rv {
 
				 	SS_NOT_SUPPORTED = -17,      /* drbd-8.2 only */
			
 
				 	SS_IN_TRANSIENT_STATE = -18,  /* Retry after the next state change */
			
 
				 	SS_CONCURRENT_ST_CHG = -19,   /* Concurrent cluster side state change! */
			
 
				-	SS_AFTER_LAST_ERROR = -20,    /* Keep this at bottom */
			
 
				+	SS_O_VOL_PEER_PRI = -20,
			
 
				+	SS_AFTER_LAST_ERROR = -21,    /* Keep this at bottom */
			
 
				 };
			
 
				 
			
 
				 /* from drbd_strings.c */
			
@@ -313,7 +336,9 @@ extern const char *drbd_set_st_err_str(enum drbd_state_rv);
 
				 #define MDF_FULL_SYNC		(1 << 3)
			
 
				 #define MDF_WAS_UP_TO_DATE	(1 << 4)
			
 
				 #define MDF_PEER_OUT_DATED	(1 << 5)
			
 
				-#define MDF_CRASHED_PRIMARY     (1 << 6)
			
 
				+#define MDF_CRASHED_PRIMARY	(1 << 6)
			
 
				+#define MDF_AL_CLEAN		(1 << 7)
			
 
				+#define MDF_AL_DISABLED		(1 << 8)
			
 
				 
			
 
				 enum drbd_uuid_index {
			
 
				 	UI_CURRENT,
			
@@ -333,37 +358,23 @@ enum drbd_timeout_flag {
 
				 
			
 
				 #define UUID_JUST_CREATED ((__u64)4)
			
 
				 
			
 
				+/* magic numbers used in meta data and network packets */
			
 
				 #define DRBD_MAGIC 0x83740267
			
 
				-#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
			
 
				 #define DRBD_MAGIC_BIG 0x835a
			
 
				-#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
			
 
				+#define DRBD_MAGIC_100 0x8620ec20
			
 
				+
			
 
				+#define DRBD_MD_MAGIC_07   (DRBD_MAGIC+3)
			
 
				+#define DRBD_MD_MAGIC_08   (DRBD_MAGIC+4)
			
 
				+#define DRBD_MD_MAGIC_84_UNCLEAN	(DRBD_MAGIC+5)
			
 
				+
			
 
				+
			
 
				+/* how I came up with this magic?
			
 
				+ * base64 decode "actlog==" ;) */
			
 
				+#define DRBD_AL_MAGIC 0x69cb65a2
			
 
				 
			
 
				 /* these are of type "int" */
			
 
				 #define DRBD_MD_INDEX_INTERNAL -1
			
 
				 #define DRBD_MD_INDEX_FLEX_EXT -2
			
 
				 #define DRBD_MD_INDEX_FLEX_INT -3
			
 
				 
			
 
				-/* Start of the new netlink/connector stuff */
			
 
				-
			
 
				-#define DRBD_NL_CREATE_DEVICE 0x01
			
 
				-#define DRBD_NL_SET_DEFAULTS  0x02
			
 
				-
			
 
				-
			
 
				-/* For searching a vacant cn_idx value */
			
 
				-#define CN_IDX_STEP			6977
			
 
				-
			
 
				-struct drbd_nl_cfg_req {
			
 
				-	int packet_type;
			
 
				-	unsigned int drbd_minor;
			
 
				-	int flags;
			
 
				-	unsigned short tag_list[];
			
 
				-};
			
 
				-
			
 
				-struct drbd_nl_cfg_reply {
			
 
				-	int packet_type;
			
 
				-	unsigned int minor;
			
 
				-	int ret_code; /* enum ret_code or set_st_err_t */
			
 
				-	unsigned short tag_list[]; /* only used with get_* calls */
			
 
				-};
			
 
				-
			
 
				 #endif
			
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -0,0 +1,378 @@
 
				+/*
			
 
				+ * General overview:
			
 
				+ * full generic netlink message:
			
 
				+ * |nlmsghdr|genlmsghdr|<payload>
			
 
				+ *
			
 
				+ * payload:
			
 
				+ * |optional fixed size family header|<sequence of netlink attributes>
			
 
				+ *
			
 
				+ * sequence of netlink attributes:
			
 
				+ * I chose to have all "top level" attributes NLA_NESTED,
			
 
				+ * corresponding to some real struct.
			
 
				+ * So we have a sequence of |tla, len|<nested nla sequence>
			
 
				+ *
			
 
				+ * nested nla sequence:
			
 
				+ * may be empty, or contain a sequence of netlink attributes
			
 
				+ * representing the struct fields.
			
 
				+ *
			
 
				+ * The tag number of any field (regardless of containing struct)
			
 
				+ * will be available as T_ ## field_name,
			
 
				+ * so you cannot have the same field name in two differnt structs.
			
 
				+ *
			
 
				+ * The tag numbers themselves are per struct, though,
			
 
				+ * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type,
			
 
				+ * which we won't use here).
			
 
				+ * The tag numbers are used as index in the respective nla_policy array.
			
 
				+ *
			
 
				+ * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy
			
 
				+ *	genl_magic_struct.h
			
 
				+ *		generates the struct declaration,
			
 
				+ *		generates an entry in the tla enum,
			
 
				+ *	genl_magic_func.h
			
 
				+ *		generates an entry in the static tla policy
			
 
				+ *		with .type = NLA_NESTED
			
 
				+ *		generates the static <struct_name>_nl_policy definition,
			
 
				+ *		and static conversion functions
			
 
				+ *
			
 
				+ *	genl_magic_func.h
			
 
				+ *
			
 
				+ * GENL_mc_group(group)
			
 
				+ *	genl_magic_struct.h
			
 
				+ *		does nothing
			
 
				+ *	genl_magic_func.h
			
 
				+ *		defines and registers the mcast group,
			
 
				+ *		and provides a send helper
			
 
				+ *
			
 
				+ * GENL_notification(op_name, op_num, mcast_group, tla list)
			
 
				+ *	These are notifications to userspace.
			
 
				+ *
			
 
				+ *	genl_magic_struct.h
			
 
				+ *		generates an entry in the genl_ops enum,
			
 
				+ *	genl_magic_func.h
			
 
				+ *		does nothing
			
 
				+ *
			
 
				+ *	mcast group: the name of the mcast group this notification should be
			
 
				+ *	expected on
			
 
				+ *	tla list: the list of expected top level attributes,
			
 
				+ *	for documentation and sanity checking.
			
 
				+ *
			
 
				+ * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations"
			
 
				+ *	These are requests from userspace.
			
 
				+ *
			
 
				+ *	_op and _notification share the same "number space",
			
 
				+ *	op_nr will be assigned to "genlmsghdr->cmd"
			
 
				+ *
			
 
				+ *	genl_magic_struct.h
			
 
				+ *		generates an entry in the genl_ops enum,
			
 
				+ *	genl_magic_func.h
			
 
				+ *		generates an entry in the static genl_ops array,
			
 
				+ *		and static register/unregister functions to
			
 
				+ *		genl_register_family_with_ops().
			
 
				+ *
			
 
				+ *	flags and handler:
			
 
				+ *		GENL_op_init( .doit = x, .dumpit = y, .flags = something)
			
 
				+ *		GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM
			
 
				+ *	tla list: the list of expected top level attributes,
			
 
				+ *	for documentation and sanity checking.
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * STRUCTS
			
 
				+ */
			
 
				+
			
 
				+/* this is sent kernel -> userland on various error conditions, and contains
			
 
				+ * informational textual info, which is supposedly human readable.
			
 
				+ * The computer relevant return code is in the drbd_genlmsghdr.
			
 
				+ */
			
 
				+GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply,
			
 
				+		/* "arbitrary" size strings, nla_policy.len = 0 */
			
 
				+	__str_field(1, DRBD_GENLA_F_MANDATORY,	info_text, 0)
			
 
				+)
			
 
				+
			
 
				+/* Configuration requests typically need a context to operate on.
			
 
				+ * Possible keys are device minor (fits in the drbd_genlmsghdr),
			
 
				+ * the replication link (aka connection) name,
			
 
				+ * and/or the replication group (aka resource) name,
			
 
				+ * and the volume id within the resource. */
			
 
				+GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context,
			
 
				+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	ctx_volume)
			
 
				+	__str_field(2, DRBD_GENLA_F_MANDATORY,	ctx_resource_name, 128)
			
 
				+	__bin_field(3, DRBD_GENLA_F_MANDATORY,	ctx_my_addr, 128)
			
 
				+	__bin_field(4, DRBD_GENLA_F_MANDATORY,	ctx_peer_addr, 128)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf,
			
 
				+	__str_field(1, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	backing_dev,	128)
			
 
				+	__str_field(2, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev,	128)
			
 
				+	__s32_field(3, DRBD_F_REQUIRED | DRBD_F_INVARIANT,	meta_dev_idx)
			
 
				+
			
 
				+	/* use the resize command to try and change the disk_size */
			
 
				+	__u64_field(4, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	disk_size)
			
 
				+	/* we could change the max_bio_bvecs,
			
 
				+	 * but it won't propagate through the stack */
			
 
				+	__u32_field(5, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	max_bio_bvecs)
			
 
				+
			
 
				+	__u32_field_def(6, DRBD_GENLA_F_MANDATORY,	on_io_error, DRBD_ON_IO_ERROR_DEF)
			
 
				+	__u32_field_def(7, DRBD_GENLA_F_MANDATORY,	fencing, DRBD_FENCING_DEF)
			
 
				+
			
 
				+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	resync_rate, DRBD_RESYNC_RATE_DEF)
			
 
				+	__s32_field_def(9,	DRBD_GENLA_F_MANDATORY,	resync_after, DRBD_MINOR_NUMBER_DEF)
			
 
				+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	al_extents, DRBD_AL_EXTENTS_DEF)
			
 
				+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	c_plan_ahead, DRBD_C_PLAN_AHEAD_DEF)
			
 
				+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	c_delay_target, DRBD_C_DELAY_TARGET_DEF)
			
 
				+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	c_fill_target, DRBD_C_FILL_TARGET_DEF)
			
 
				+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	c_max_rate, DRBD_C_MAX_RATE_DEF)
			
 
				+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	c_min_rate, DRBD_C_MIN_RATE_DEF)
			
 
				+
			
 
				+	__flg_field_def(16, DRBD_GENLA_F_MANDATORY,	disk_barrier, DRBD_DISK_BARRIER_DEF)
			
 
				+	__flg_field_def(17, DRBD_GENLA_F_MANDATORY,	disk_flushes, DRBD_DISK_FLUSHES_DEF)
			
 
				+	__flg_field_def(18, DRBD_GENLA_F_MANDATORY,	disk_drain, DRBD_DISK_DRAIN_DEF)
			
 
				+	__flg_field_def(19, DRBD_GENLA_F_MANDATORY,	md_flushes, DRBD_MD_FLUSHES_DEF)
			
 
				+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	disk_timeout, DRBD_DISK_TIMEOUT_DEF)
			
 
				+	__u32_field_def(21,	0 /* OPTIONAL */,       read_balancing, DRBD_READ_BALANCING_DEF)
			
 
				+	/* 9: __u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF) */
			
 
				+	__flg_field_def(23,     0 /* OPTIONAL */,	al_updates, DRBD_AL_UPDATES_DEF)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_RESOURCE_OPTS, 4, res_opts,
			
 
				+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY,	cpu_mask,       32)
			
 
				+	__u32_field_def(2,	DRBD_GENLA_F_MANDATORY,	on_no_data, DRBD_ON_NO_DATA_DEF)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
			
 
				+	__str_field_def(1,	DRBD_GENLA_F_MANDATORY | DRBD_F_SENSITIVE,
			
 
				+						shared_secret,	SHARED_SECRET_MAX)
			
 
				+	__str_field_def(2,	DRBD_GENLA_F_MANDATORY,	cram_hmac_alg,	SHARED_SECRET_MAX)
			
 
				+	__str_field_def(3,	DRBD_GENLA_F_MANDATORY,	integrity_alg,	SHARED_SECRET_MAX)
			
 
				+	__str_field_def(4,	DRBD_GENLA_F_MANDATORY,	verify_alg,     SHARED_SECRET_MAX)
			
 
				+	__str_field_def(5,	DRBD_GENLA_F_MANDATORY,	csums_alg,	SHARED_SECRET_MAX)
			
 
				+	__u32_field_def(6,	DRBD_GENLA_F_MANDATORY,	wire_protocol, DRBD_PROTOCOL_DEF)
			
 
				+	__u32_field_def(7,	DRBD_GENLA_F_MANDATORY,	connect_int, DRBD_CONNECT_INT_DEF)
			
 
				+	__u32_field_def(8,	DRBD_GENLA_F_MANDATORY,	timeout, DRBD_TIMEOUT_DEF)
			
 
				+	__u32_field_def(9,	DRBD_GENLA_F_MANDATORY,	ping_int, DRBD_PING_INT_DEF)
			
 
				+	__u32_field_def(10,	DRBD_GENLA_F_MANDATORY,	ping_timeo, DRBD_PING_TIMEO_DEF)
			
 
				+	__u32_field_def(11,	DRBD_GENLA_F_MANDATORY,	sndbuf_size, DRBD_SNDBUF_SIZE_DEF)
			
 
				+	__u32_field_def(12,	DRBD_GENLA_F_MANDATORY,	rcvbuf_size, DRBD_RCVBUF_SIZE_DEF)
			
 
				+	__u32_field_def(13,	DRBD_GENLA_F_MANDATORY,	ko_count, DRBD_KO_COUNT_DEF)
			
 
				+	__u32_field_def(14,	DRBD_GENLA_F_MANDATORY,	max_buffers, DRBD_MAX_BUFFERS_DEF)
			
 
				+	__u32_field_def(15,	DRBD_GENLA_F_MANDATORY,	max_epoch_size, DRBD_MAX_EPOCH_SIZE_DEF)
			
 
				+	__u32_field_def(16,	DRBD_GENLA_F_MANDATORY,	unplug_watermark, DRBD_UNPLUG_WATERMARK_DEF)
			
 
				+	__u32_field_def(17,	DRBD_GENLA_F_MANDATORY,	after_sb_0p, DRBD_AFTER_SB_0P_DEF)
			
 
				+	__u32_field_def(18,	DRBD_GENLA_F_MANDATORY,	after_sb_1p, DRBD_AFTER_SB_1P_DEF)
			
 
				+	__u32_field_def(19,	DRBD_GENLA_F_MANDATORY,	after_sb_2p, DRBD_AFTER_SB_2P_DEF)
			
 
				+	__u32_field_def(20,	DRBD_GENLA_F_MANDATORY,	rr_conflict, DRBD_RR_CONFLICT_DEF)
			
 
				+	__u32_field_def(21,	DRBD_GENLA_F_MANDATORY,	on_congestion, DRBD_ON_CONGESTION_DEF)
			
 
				+	__u32_field_def(22,	DRBD_GENLA_F_MANDATORY,	cong_fill, DRBD_CONG_FILL_DEF)
			
 
				+	__u32_field_def(23,	DRBD_GENLA_F_MANDATORY,	cong_extents, DRBD_CONG_EXTENTS_DEF)
			
 
				+	__flg_field_def(24, DRBD_GENLA_F_MANDATORY,	two_primaries, DRBD_ALLOW_TWO_PRIMARIES_DEF)
			
 
				+	__flg_field(25, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	discard_my_data)
			
 
				+	__flg_field_def(26, DRBD_GENLA_F_MANDATORY,	tcp_cork, DRBD_TCP_CORK_DEF)
			
 
				+	__flg_field_def(27, DRBD_GENLA_F_MANDATORY,	always_asbp, DRBD_ALWAYS_ASBP_DEF)
			
 
				+	__flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT,	tentative)
			
 
				+	__flg_field_def(29,	DRBD_GENLA_F_MANDATORY,	use_rle, DRBD_USE_RLE_DEF)
			
 
				+	/* 9: __u32_field_def(30,	DRBD_GENLA_F_MANDATORY,	fencing_policy, DRBD_FENCING_DEF) */
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
			
 
				+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	assume_uptodate)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms,
			
 
				+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
			
 
				+	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
			
 
				+	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
			
 
				+	/* the reason of the broadcast,
			
 
				+	 * if this is an event triggered broadcast. */
			
 
				+	__u32_field(1, DRBD_GENLA_F_MANDATORY,	sib_reason)
			
 
				+	__u32_field(2, DRBD_F_REQUIRED,	current_state)
			
 
				+	__u64_field(3, DRBD_GENLA_F_MANDATORY,	capacity)
			
 
				+	__u64_field(4, DRBD_GENLA_F_MANDATORY,	ed_uuid)
			
 
				+
			
 
				+	/* These are for broadcast from after state change work.
			
 
				+	 * prev_state and new_state are from the moment the state change took
			
 
				+	 * place, new_state is not neccessarily the same as current_state,
			
 
				+	 * there may have been more state changes since.  Which will be
			
 
				+	 * broadcasted soon, in their respective after state change work.  */
			
 
				+	__u32_field(5, DRBD_GENLA_F_MANDATORY,	prev_state)
			
 
				+	__u32_field(6, DRBD_GENLA_F_MANDATORY,	new_state)
			
 
				+
			
 
				+	/* if we have a local disk: */
			
 
				+	__bin_field(7, DRBD_GENLA_F_MANDATORY,	uuids, (UI_SIZE*sizeof(__u64)))
			
 
				+	__u32_field(8, DRBD_GENLA_F_MANDATORY,	disk_flags)
			
 
				+	__u64_field(9, DRBD_GENLA_F_MANDATORY,	bits_total)
			
 
				+	__u64_field(10, DRBD_GENLA_F_MANDATORY,	bits_oos)
			
 
				+	/* and in case resync or online verify is active */
			
 
				+	__u64_field(11, DRBD_GENLA_F_MANDATORY,	bits_rs_total)
			
 
				+	__u64_field(12, DRBD_GENLA_F_MANDATORY,	bits_rs_failed)
			
 
				+
			
 
				+	/* for pre and post notifications of helper execution */
			
 
				+	__str_field(13, DRBD_GENLA_F_MANDATORY,	helper, 32)
			
 
				+	__u32_field(14, DRBD_GENLA_F_MANDATORY,	helper_exit_code)
			
 
				+
			
 
				+	__u64_field(15,                      0, send_cnt)
			
 
				+	__u64_field(16,                      0, recv_cnt)
			
 
				+	__u64_field(17,                      0, read_cnt)
			
 
				+	__u64_field(18,                      0, writ_cnt)
			
 
				+	__u64_field(19,                      0, al_writ_cnt)
			
 
				+	__u64_field(20,                      0, bm_writ_cnt)
			
 
				+	__u32_field(21,                      0, ap_bio_cnt)
			
 
				+	__u32_field(22,                      0, ap_pending_cnt)
			
 
				+	__u32_field(23,                      0, rs_pending_cnt)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms,
			
 
				+	__u64_field(1, DRBD_GENLA_F_MANDATORY,	ov_start_sector)
			
 
				+	__u64_field(2, DRBD_GENLA_F_MANDATORY,	ov_stop_sector)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms,
			
 
				+	__flg_field(1, DRBD_GENLA_F_MANDATORY, clear_bm)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms,
			
 
				+	__u32_field(1,	DRBD_F_REQUIRED,	timeout_type)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms,
			
 
				+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_disconnect)
			
 
				+)
			
 
				+
			
 
				+GENL_struct(DRBD_NLA_DETACH_PARMS, 13, detach_parms,
			
 
				+	__flg_field(1, DRBD_GENLA_F_MANDATORY,	force_detach)
			
 
				+)
			
 
				+
			
 
				+/*
			
 
				+ * Notifications and commands (genlmsghdr->cmd)
			
 
				+ */
			
 
				+GENL_mc_group(events)
			
 
				+
			
 
				+	/* kernel -> userspace announcement of changes */
			
 
				+GENL_notification(
			
 
				+	DRBD_EVENT, 1, events,
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_STATE_INFO, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_GENLA_F_MANDATORY)
			
 
				+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_GENLA_F_MANDATORY)
			
 
				+	GENL_tla_expected(DRBD_NLA_SYNCER_CONF, DRBD_GENLA_F_MANDATORY)
			
 
				+)
			
 
				+
			
 
				+	/* query kernel for specific or all info */
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_GET_STATUS, 2,
			
 
				+	GENL_op_init(
			
 
				+		.doit = drbd_adm_get_status,
			
 
				+		.dumpit = drbd_adm_get_status_all,
			
 
				+		/* anyone may ask for the status,
			
 
				+		 * it is broadcasted anyways */
			
 
				+	),
			
 
				+	/* To select the object .doit.
			
 
				+	 * Or a subset of objects in .dumpit. */
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_GENLA_F_MANDATORY)
			
 
				+)
			
 
				+
			
 
				+	/* add DRBD minor devices as volumes to resources */
			
 
				+GENL_op(DRBD_ADM_NEW_MINOR, 5, GENL_doit(drbd_adm_add_minor),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+
			
 
				+	/* add or delete resources */
			
 
				+GENL_op(DRBD_ADM_NEW_RESOURCE, 7, GENL_doit(drbd_adm_new_resource),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_DEL_RESOURCE, 8, GENL_doit(drbd_adm_del_resource),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+
			
 
				+GENL_op(DRBD_ADM_RESOURCE_OPTS, 9,
			
 
				+	GENL_doit(drbd_adm_resource_opts),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_RESOURCE_OPTS, DRBD_GENLA_F_MANDATORY)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_CONNECT, 10,
			
 
				+	GENL_doit(drbd_adm_connect),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_CHG_NET_OPTS, 29,
			
 
				+	GENL_doit(drbd_adm_net_opts),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_NET_CONF, DRBD_F_REQUIRED)
			
 
				+)
			
 
				+
			
 
				+GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+
			
 
				+GENL_op(DRBD_ADM_ATTACH, 12,
			
 
				+	GENL_doit(drbd_adm_attach),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_DISK_CONF, DRBD_F_REQUIRED)
			
 
				+)
			
 
				+
			
 
				+GENL_op(DRBD_ADM_CHG_DISK_OPTS, 28,
			
 
				+	GENL_doit(drbd_adm_disk_opts),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_DISK_OPTS, DRBD_F_REQUIRED)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_RESIZE, 13,
			
 
				+	GENL_doit(drbd_adm_resize),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, DRBD_GENLA_F_MANDATORY)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_PRIMARY, 14,
			
 
				+	GENL_doit(drbd_adm_set_role),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_SECONDARY, 15,
			
 
				+	GENL_doit(drbd_adm_set_role),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, DRBD_F_REQUIRED)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_NEW_C_UUID, 16,
			
 
				+	GENL_doit(drbd_adm_new_c_uuid),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, DRBD_GENLA_F_MANDATORY)
			
 
				+)
			
 
				+
			
 
				+GENL_op(
			
 
				+	DRBD_ADM_START_OV, 17,
			
 
				+	GENL_doit(drbd_adm_start_ov),
			
 
				+	GENL_tla_expected(DRBD_NLA_START_OV_PARMS, DRBD_GENLA_F_MANDATORY)
			
 
				+)
			
 
				+
			
 
				+GENL_op(DRBD_ADM_DETACH,	18, GENL_doit(drbd_adm_detach),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED)
			
 
				+	GENL_tla_expected(DRBD_NLA_DETACH_PARMS, DRBD_GENLA_F_MANDATORY))
			
 
				+
			
 
				+GENL_op(DRBD_ADM_INVALIDATE,	19, GENL_doit(drbd_adm_invalidate),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_INVAL_PEER,	20, GENL_doit(drbd_adm_invalidate_peer),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_PAUSE_SYNC,	21, GENL_doit(drbd_adm_pause_sync),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_RESUME_SYNC,	22, GENL_doit(drbd_adm_resume_sync),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_SUSPEND_IO,	23, GENL_doit(drbd_adm_suspend_io),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_RESUME_IO,	24, GENL_doit(drbd_adm_resume_io),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_OUTDATE,	25, GENL_doit(drbd_adm_outdate),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
 
				+GENL_op(DRBD_ADM_DOWN,		27, GENL_doit(drbd_adm_down),
			
 
				+	GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, DRBD_F_REQUIRED))
			
--- a/include/linux/drbd_genl_api.h
+++ b/include/linux/drbd_genl_api.h
@@ -0,0 +1,55 @@
 
				+#ifndef DRBD_GENL_STRUCT_H
			
 
				+#define DRBD_GENL_STRUCT_H
			
 
				+
			
 
				+/**
			
 
				+ * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests
			
 
				+ * @minor:
			
 
				+ *     For admin requests (user -> kernel): which minor device to operate on.
			
 
				+ *     For (unicast) replies or informational (broadcast) messages
			
 
				+ *     (kernel -> user): which minor device the information is about.
			
 
				+ *     If we do not operate on minors, but on connections or resources,
			
 
				+ *     the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT
			
 
				+ *     is used instead.
			
 
				+ * @flags: possible operation modifiers (relevant only for user->kernel):
			
 
				+ *     DRBD_GENL_F_SET_DEFAULTS
			
 
				+ * @volume:
			
 
				+ *     When creating a new minor (adding it to a resource), the resource needs
			
 
				+ *     to know which volume number within the resource this is supposed to be.
			
 
				+ *     The volume number corresponds to the same volume number on the remote side,
			
 
				+ *     whereas the minor number on the remote side may be different
			
 
				+ *     (union with flags).
			
 
				+ * @ret_code: kernel->userland unicast cfg reply return code (union with flags);
			
 
				+ */
			
 
				+struct drbd_genlmsghdr {
			
 
				+	__u32 minor;
			
 
				+	union {
			
 
				+	__u32 flags;
			
 
				+	__s32 ret_code;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+/* To be used in drbd_genlmsghdr.flags */
			
 
				+enum {
			
 
				+	DRBD_GENL_F_SET_DEFAULTS = 1,
			
 
				+};
			
 
				+
			
 
				+enum drbd_state_info_bcast_reason {
			
 
				+	SIB_GET_STATUS_REPLY = 1,
			
 
				+	SIB_STATE_CHANGE = 2,
			
 
				+	SIB_HELPER_PRE = 3,
			
 
				+	SIB_HELPER_POST = 4,
			
 
				+	SIB_SYNC_PROGRESS = 5,
			
 
				+};
			
 
				+
			
 
				+/* hack around predefined gcc/cpp "linux=1",
			
 
				+ * we cannot possibly include <1/drbd_genl.h> */
			
 
				+#undef linux
			
 
				+
			
 
				+#include <linux/drbd.h>
			
 
				+#define GENL_MAGIC_VERSION	API_VERSION
			
 
				+#define GENL_MAGIC_FAMILY	drbd
			
 
				+#define GENL_MAGIC_FAMILY_HDRSZ	sizeof(struct drbd_genlmsghdr)
			
 
				+#define GENL_MAGIC_INCLUDE_FILE <linux/drbd_genl.h>
			
 
				+#include <linux/genl_magic_struct.h>
			
 
				+
			
 
				+#endif
			
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -16,29 +16,37 @@
 
				 #define DEBUG_RANGE_CHECK 0
			
 
				 
			
 
				 #define DRBD_MINOR_COUNT_MIN 1
			
 
				-#define DRBD_MINOR_COUNT_MAX 256
			
 
				+#define DRBD_MINOR_COUNT_MAX 255
			
 
				 #define DRBD_MINOR_COUNT_DEF 32
			
 
				+#define DRBD_MINOR_COUNT_SCALE '1'
			
 
				+
			
 
				+#define DRBD_VOLUME_MAX 65535
			
 
				 
			
 
				 #define DRBD_DIALOG_REFRESH_MIN 0
			
 
				 #define DRBD_DIALOG_REFRESH_MAX 600
			
 
				+#define DRBD_DIALOG_REFRESH_SCALE '1'
			
 
				 
			
 
				 /* valid port number */
			
 
				 #define DRBD_PORT_MIN 1
			
 
				 #define DRBD_PORT_MAX 0xffff
			
 
				+#define DRBD_PORT_SCALE '1'
			
 
				 
			
 
				 /* startup { */
			
 
				   /* if you want more than 3.4 days, disable */
			
 
				 #define DRBD_WFC_TIMEOUT_MIN 0
			
 
				 #define DRBD_WFC_TIMEOUT_MAX 300000
			
 
				 #define DRBD_WFC_TIMEOUT_DEF 0
			
 
				+#define DRBD_WFC_TIMEOUT_SCALE '1'
			
 
				 
			
 
				 #define DRBD_DEGR_WFC_TIMEOUT_MIN 0
			
 
				 #define DRBD_DEGR_WFC_TIMEOUT_MAX 300000
			
 
				 #define DRBD_DEGR_WFC_TIMEOUT_DEF 0
			
 
				+#define DRBD_DEGR_WFC_TIMEOUT_SCALE '1'
			
 
				 
			
 
				 #define DRBD_OUTDATED_WFC_TIMEOUT_MIN 0
			
 
				 #define DRBD_OUTDATED_WFC_TIMEOUT_MAX 300000
			
 
				 #define DRBD_OUTDATED_WFC_TIMEOUT_DEF 0
			
 
				+#define DRBD_OUTDATED_WFC_TIMEOUT_SCALE '1'
			
 
				 /* }*/
			
 
				 
			
 
				 /* net { */
			
@@ -47,75 +55,91 @@
 
				 #define DRBD_TIMEOUT_MIN 1
			
 
				 #define DRBD_TIMEOUT_MAX 600
			
 
				 #define DRBD_TIMEOUT_DEF 60       /* 6 seconds */
			
 
				+#define DRBD_TIMEOUT_SCALE '1'
			
 
				 
			
 
				  /* If backing disk takes longer than disk_timeout, mark the disk as failed */
			
 
				 #define DRBD_DISK_TIMEOUT_MIN 0    /* 0 = disabled */
			
 
				 #define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
			
 
				 #define DRBD_DISK_TIMEOUT_DEF 0    /* disabled */
			
 
				+#define DRBD_DISK_TIMEOUT_SCALE '1'
			
 
				 
			
 
				   /* active connection retries when C_WF_CONNECTION */
			
 
				 #define DRBD_CONNECT_INT_MIN 1
			
 
				 #define DRBD_CONNECT_INT_MAX 120
			
 
				 #define DRBD_CONNECT_INT_DEF 10   /* seconds */
			
 
				+#define DRBD_CONNECT_INT_SCALE '1'
			
 
				 
			
 
				   /* keep-alive probes when idle */
			
 
				 #define DRBD_PING_INT_MIN 1
			
 
				 #define DRBD_PING_INT_MAX 120
			
 
				 #define DRBD_PING_INT_DEF 10
			
 
				+#define DRBD_PING_INT_SCALE '1'
			
 
				 
			
 
				  /* timeout for the ping packets.*/
			
 
				 #define DRBD_PING_TIMEO_MIN  1
			
 
				 #define DRBD_PING_TIMEO_MAX  300
			
 
				 #define DRBD_PING_TIMEO_DEF  5
			
 
				+#define DRBD_PING_TIMEO_SCALE '1'
			
 
				 
			
 
				   /* max number of write requests between write barriers */
			
 
				 #define DRBD_MAX_EPOCH_SIZE_MIN 1
			
 
				 #define DRBD_MAX_EPOCH_SIZE_MAX 20000
			
 
				 #define DRBD_MAX_EPOCH_SIZE_DEF 2048
			
 
				+#define DRBD_MAX_EPOCH_SIZE_SCALE '1'
			
 
				 
			
 
				   /* I don't think that a tcp send buffer of more than 10M is useful */
			
 
				 #define DRBD_SNDBUF_SIZE_MIN  0
			
 
				 #define DRBD_SNDBUF_SIZE_MAX  (10<<20)
			
 
				 #define DRBD_SNDBUF_SIZE_DEF  0
			
 
				+#define DRBD_SNDBUF_SIZE_SCALE '1'
			
 
				 
			
 
				 #define DRBD_RCVBUF_SIZE_MIN  0
			
 
				 #define DRBD_RCVBUF_SIZE_MAX  (10<<20)
			
 
				 #define DRBD_RCVBUF_SIZE_DEF  0
			
 
				+#define DRBD_RCVBUF_SIZE_SCALE '1'
			
 
				 
			
 
				   /* @4k PageSize -> 128kB - 512MB */
			
 
				 #define DRBD_MAX_BUFFERS_MIN  32
			
 
				 #define DRBD_MAX_BUFFERS_MAX  131072
			
 
				 #define DRBD_MAX_BUFFERS_DEF  2048
			
 
				+#define DRBD_MAX_BUFFERS_SCALE '1'
			
 
				 
			
 
				   /* @4k PageSize -> 4kB - 512MB */
			
 
				 #define DRBD_UNPLUG_WATERMARK_MIN  1
			
 
				 #define DRBD_UNPLUG_WATERMARK_MAX  131072
			
 
				 #define DRBD_UNPLUG_WATERMARK_DEF (DRBD_MAX_BUFFERS_DEF/16)
			
 
				+#define DRBD_UNPLUG_WATERMARK_SCALE '1'
			
 
				 
			
 
				   /* 0 is disabled.
			
 
				    * 200 should be more than enough even for very short timeouts */
			
 
				 #define DRBD_KO_COUNT_MIN  0
			
 
				 #define DRBD_KO_COUNT_MAX  200
			
 
				-#define DRBD_KO_COUNT_DEF  0
			
 
				+#define DRBD_KO_COUNT_DEF  7
			
 
				+#define DRBD_KO_COUNT_SCALE '1'
			
 
				 /* } */
			
 
				 
			
 
				 /* syncer { */
			
 
				   /* FIXME allow rate to be zero? */
			
 
				-#define DRBD_RATE_MIN 1
			
 
				+#define DRBD_RESYNC_RATE_MIN 1
			
 
				 /* channel bonding 10 GbE, or other hardware */
			
 
				-#define DRBD_RATE_MAX (4 << 20)
			
 
				-#define DRBD_RATE_DEF 250  /* kb/second */
			
 
				+#define DRBD_RESYNC_RATE_MAX (4 << 20)
			
 
				+#define DRBD_RESYNC_RATE_DEF 250
			
 
				+#define DRBD_RESYNC_RATE_SCALE 'k'  /* kilobytes */
			
 
				 
			
 
				   /* less than 7 would hit performance unnecessarily.
			
 
				-   * 3833 is the largest prime that still does fit
			
 
				-   * into 64 sectors of activity log */
			
 
				+   * 919 slots context information per transaction,
			
 
				+   * 32k activity log, 4k transaction size,
			
 
				+   * one transaction in flight:
			
 
				+   * 919 * 7 = 6433 */
			
 
				 #define DRBD_AL_EXTENTS_MIN  7
			
 
				-#define DRBD_AL_EXTENTS_MAX  3833
			
 
				-#define DRBD_AL_EXTENTS_DEF  127
			
 
				+#define DRBD_AL_EXTENTS_MAX  6433
			
 
				+#define DRBD_AL_EXTENTS_DEF  1237
			
 
				+#define DRBD_AL_EXTENTS_SCALE '1'
			
 
				 
			
 
				-#define DRBD_AFTER_MIN  -1
			
 
				-#define DRBD_AFTER_MAX  255
			
 
				-#define DRBD_AFTER_DEF  -1
			
 
				+#define DRBD_MINOR_NUMBER_MIN  -1
			
 
				+#define DRBD_MINOR_NUMBER_MAX  ((1 << 20) - 1)
			
 
				+#define DRBD_MINOR_NUMBER_DEF  -1
			
 
				+#define DRBD_MINOR_NUMBER_SCALE '1'
			
 
				 
			
 
				 /* } */
			
 
				 
			
@@ -124,11 +148,12 @@
 
				  * the upper limit with 64bit kernel, enough ram and flexible meta data
			
 
				  * is 1 PiB, currently. */
			
 
				 /* DRBD_MAX_SECTORS */
			
 
				-#define DRBD_DISK_SIZE_SECT_MIN  0
			
 
				-#define DRBD_DISK_SIZE_SECT_MAX  (1 * (2LLU << 40))
			
 
				-#define DRBD_DISK_SIZE_SECT_DEF  0 /* = disabled = no user size... */
			
 
				+#define DRBD_DISK_SIZE_MIN  0
			
 
				+#define DRBD_DISK_SIZE_MAX  (1 * (2LLU << 40))
			
 
				+#define DRBD_DISK_SIZE_DEF  0 /* = disabled = no user size... */
			
 
				+#define DRBD_DISK_SIZE_SCALE 's'  /* sectors */
			
 
				 
			
 
				-#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON
			
 
				+#define DRBD_ON_IO_ERROR_DEF EP_DETACH
			
 
				 #define DRBD_FENCING_DEF FP_DONT_CARE
			
 
				 #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT
			
 
				 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
			
@@ -136,38 +161,59 @@
 
				 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
			
 
				 #define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
			
 
				 #define DRBD_ON_CONGESTION_DEF OC_BLOCK
			
 
				+#define DRBD_READ_BALANCING_DEF RB_PREFER_LOCAL
			
 
				 
			
 
				 #define DRBD_MAX_BIO_BVECS_MIN 0
			
 
				 #define DRBD_MAX_BIO_BVECS_MAX 128
			
 
				 #define DRBD_MAX_BIO_BVECS_DEF 0
			
 
				+#define DRBD_MAX_BIO_BVECS_SCALE '1'
			
 
				 
			
 
				 #define DRBD_C_PLAN_AHEAD_MIN  0
			
 
				 #define DRBD_C_PLAN_AHEAD_MAX  300
			
 
				-#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
			
 
				+#define DRBD_C_PLAN_AHEAD_DEF  20
			
 
				+#define DRBD_C_PLAN_AHEAD_SCALE '1'
			
 
				 
			
 
				 #define DRBD_C_DELAY_TARGET_MIN 1
			
 
				 #define DRBD_C_DELAY_TARGET_MAX 100
			
 
				 #define DRBD_C_DELAY_TARGET_DEF 10
			
 
				+#define DRBD_C_DELAY_TARGET_SCALE '1'
			
 
				 
			
 
				 #define DRBD_C_FILL_TARGET_MIN 0
			
 
				 #define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
			
 
				-#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
			
 
				+#define DRBD_C_FILL_TARGET_DEF 100 /* Try to place 50KiB in socket send buffer during resync */
			
 
				+#define DRBD_C_FILL_TARGET_SCALE 's'  /* sectors */
			
 
				 
			
 
				-#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
			
 
				+#define DRBD_C_MAX_RATE_MIN     250
			
 
				 #define DRBD_C_MAX_RATE_MAX     (4 << 20)
			
 
				 #define DRBD_C_MAX_RATE_DEF     102400
			
 
				+#define DRBD_C_MAX_RATE_SCALE	'k'  /* kilobytes */
			
 
				 
			
 
				-#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
			
 
				+#define DRBD_C_MIN_RATE_MIN     0
			
 
				 #define DRBD_C_MIN_RATE_MAX     (4 << 20)
			
 
				-#define DRBD_C_MIN_RATE_DEF     4096
			
 
				+#define DRBD_C_MIN_RATE_DEF     250
			
 
				+#define DRBD_C_MIN_RATE_SCALE	'k'  /* kilobytes */
			
 
				 
			
 
				 #define DRBD_CONG_FILL_MIN	0
			
 
				 #define DRBD_CONG_FILL_MAX	(10<<21) /* 10GByte in sectors */
			
 
				 #define DRBD_CONG_FILL_DEF	0
			
 
				+#define DRBD_CONG_FILL_SCALE	's'  /* sectors */
			
 
				 
			
 
				 #define DRBD_CONG_EXTENTS_MIN	DRBD_AL_EXTENTS_MIN
			
 
				 #define DRBD_CONG_EXTENTS_MAX	DRBD_AL_EXTENTS_MAX
			
 
				 #define DRBD_CONG_EXTENTS_DEF	DRBD_AL_EXTENTS_DEF
			
 
				+#define DRBD_CONG_EXTENTS_SCALE DRBD_AL_EXTENTS_SCALE
			
 
				+
			
 
				+#define DRBD_PROTOCOL_DEF DRBD_PROT_C
			
 
				+
			
 
				+#define DRBD_DISK_BARRIER_DEF	0
			
 
				+#define DRBD_DISK_FLUSHES_DEF	1
			
 
				+#define DRBD_DISK_DRAIN_DEF	1
			
 
				+#define DRBD_MD_FLUSHES_DEF	1
			
 
				+#define DRBD_TCP_CORK_DEF	1
			
 
				+#define DRBD_AL_UPDATES_DEF     1
			
 
				+
			
 
				+#define DRBD_ALLOW_TWO_PRIMARIES_DEF	0
			
 
				+#define DRBD_ALWAYS_ASBP_DEF	0
			
 
				+#define DRBD_USE_RLE_DEF	1
			
 
				 
			
 
				-#undef RANGE
			
 
				 #endif
			
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -1,163 +0,0 @@
 
				-/*
			
 
				-   PAKET( name,
			
 
				-	  TYPE ( pn, pr, member )
			
 
				-	  ...
			
 
				-   )
			
 
				-
			
 
				-   You may never reissue one of the pn arguments
			
 
				-*/
			
 
				-
			
 
				-#if !defined(NL_PACKET) || !defined(NL_STRING) || !defined(NL_INTEGER) || !defined(NL_BIT) || !defined(NL_INT64)
			
 
				-#error "The macros NL_PACKET, NL_STRING, NL_INTEGER, NL_INT64 and NL_BIT needs to be defined"
			
 
				-#endif
			
 
				-
			
 
				-NL_PACKET(primary, 1,
			
 
				-       NL_BIT(		1,	T_MAY_IGNORE,	primary_force)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(secondary, 2, )
			
 
				-
			
 
				-NL_PACKET(disk_conf, 3,
			
 
				-	NL_INT64(	2,	T_MAY_IGNORE,	disk_size)
			
 
				-	NL_STRING(	3,	T_MANDATORY,	backing_dev,	128)
			
 
				-	NL_STRING(	4,	T_MANDATORY,	meta_dev,	128)
			
 
				-	NL_INTEGER(	5,	T_MANDATORY,	meta_dev_idx)
			
 
				-	NL_INTEGER(	6,	T_MAY_IGNORE,	on_io_error)
			
 
				-	NL_INTEGER(	7,	T_MAY_IGNORE,	fencing)
			
 
				-	NL_BIT(		37,	T_MAY_IGNORE,	use_bmbv)
			
 
				-	NL_BIT(		53,	T_MAY_IGNORE,	no_disk_flush)
			
 
				-	NL_BIT(		54,	T_MAY_IGNORE,	no_md_flush)
			
 
				-	  /*  55 max_bio_size was available in 8.2.6rc2 */
			
 
				-	NL_INTEGER(	56,	T_MAY_IGNORE,	max_bio_bvecs)
			
 
				-	NL_BIT(		57,	T_MAY_IGNORE,	no_disk_barrier)
			
 
				-	NL_BIT(		58,	T_MAY_IGNORE,	no_disk_drain)
			
 
				-	NL_INTEGER(	89,	T_MAY_IGNORE,	disk_timeout)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(detach, 4,
			
 
				-	NL_BIT(		88,	T_MANDATORY,	detach_force)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(net_conf, 5,
			
 
				-	NL_STRING(	8,	T_MANDATORY,	my_addr,	128)
			
 
				-	NL_STRING(	9,	T_MANDATORY,	peer_addr,	128)
			
 
				-	NL_STRING(	10,	T_MAY_IGNORE,	shared_secret,	SHARED_SECRET_MAX)
			
 
				-	NL_STRING(	11,	T_MAY_IGNORE,	cram_hmac_alg,	SHARED_SECRET_MAX)
			
 
				-	NL_STRING(	44,	T_MAY_IGNORE,	integrity_alg,	SHARED_SECRET_MAX)
			
 
				-	NL_INTEGER(	14,	T_MAY_IGNORE,	timeout)
			
 
				-	NL_INTEGER(	15,	T_MANDATORY,	wire_protocol)
			
 
				-	NL_INTEGER(	16,	T_MAY_IGNORE,	try_connect_int)
			
 
				-	NL_INTEGER(	17,	T_MAY_IGNORE,	ping_int)
			
 
				-	NL_INTEGER(	18,	T_MAY_IGNORE,	max_epoch_size)
			
 
				-	NL_INTEGER(	19,	T_MAY_IGNORE,	max_buffers)
			
 
				-	NL_INTEGER(	20,	T_MAY_IGNORE,	unplug_watermark)
			
 
				-	NL_INTEGER(	21,	T_MAY_IGNORE,	sndbuf_size)
			
 
				-	NL_INTEGER(	22,	T_MAY_IGNORE,	ko_count)
			
 
				-	NL_INTEGER(	24,	T_MAY_IGNORE,	after_sb_0p)
			
 
				-	NL_INTEGER(	25,	T_MAY_IGNORE,	after_sb_1p)
			
 
				-	NL_INTEGER(	26,	T_MAY_IGNORE,	after_sb_2p)
			
 
				-	NL_INTEGER(	39,	T_MAY_IGNORE,	rr_conflict)
			
 
				-	NL_INTEGER(	40,	T_MAY_IGNORE,	ping_timeo)
			
 
				-	NL_INTEGER(	67,	T_MAY_IGNORE,	rcvbuf_size)
			
 
				-	NL_INTEGER(	81,	T_MAY_IGNORE,	on_congestion)
			
 
				-	NL_INTEGER(	82,	T_MAY_IGNORE,	cong_fill)
			
 
				-	NL_INTEGER(	83,	T_MAY_IGNORE,	cong_extents)
			
 
				-	  /* 59 addr_family was available in GIT, never released */
			
 
				-	NL_BIT(		60,	T_MANDATORY,	mind_af)
			
 
				-	NL_BIT(		27,	T_MAY_IGNORE,	want_lose)
			
 
				-	NL_BIT(		28,	T_MAY_IGNORE,	two_primaries)
			
 
				-	NL_BIT(		41,	T_MAY_IGNORE,	always_asbp)
			
 
				-	NL_BIT(		61,	T_MAY_IGNORE,	no_cork)
			
 
				-	NL_BIT(		62,	T_MANDATORY,	auto_sndbuf_size)
			
 
				-	NL_BIT(		70,	T_MANDATORY,	dry_run)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(disconnect, 6,
			
 
				-	NL_BIT(		84,	T_MAY_IGNORE,	force)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(resize, 7,
			
 
				-	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
			
 
				-	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
			
 
				-	NL_BIT(			69,	T_MANDATORY,	no_resync)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(syncer_conf, 8,
			
 
				-	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
			
 
				-	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
			
 
				-	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
			
 
				-/*	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
			
 
				- *	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval)
			
 
				- *	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th)
			
 
				- *	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th)
			
 
				- * feature will be reimplemented differently with 8.3.9 */
			
 
				-	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
			
 
				-	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
			
 
				-	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
			
 
				-	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
			
 
				-	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
			
 
				-	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
			
 
				-	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
			
 
				-	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
			
 
				-	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
			
 
				-	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(invalidate, 9, )
			
 
				-NL_PACKET(invalidate_peer, 10, )
			
 
				-NL_PACKET(pause_sync, 11, )
			
 
				-NL_PACKET(resume_sync, 12, )
			
 
				-NL_PACKET(suspend_io, 13, )
			
 
				-NL_PACKET(resume_io, 14, )
			
 
				-NL_PACKET(outdate, 15, )
			
 
				-NL_PACKET(get_config, 16, )
			
 
				-NL_PACKET(get_state, 17,
			
 
				-	NL_INTEGER(	33,	T_MAY_IGNORE,	state_i)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(get_uuids, 18,
			
 
				-	NL_STRING(	34,	T_MAY_IGNORE,	uuids,	(UI_SIZE*sizeof(__u64)))
			
 
				-	NL_INTEGER(	35,	T_MAY_IGNORE,	uuids_flags)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(get_timeout_flag, 19,
			
 
				-	NL_BIT(		36,	T_MAY_IGNORE,	use_degraded)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(call_helper, 20,
			
 
				-	NL_STRING(	38,	T_MAY_IGNORE,	helper,		32)
			
 
				-)
			
 
				-
			
 
				-/* Tag nr 42 already allocated in drbd-8.1 development. */
			
 
				-
			
 
				-NL_PACKET(sync_progress, 23,
			
 
				-	NL_INTEGER(	43,	T_MAY_IGNORE,	sync_progress)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(dump_ee, 24,
			
 
				-	NL_STRING(	45,	T_MAY_IGNORE,	dump_ee_reason, 32)
			
 
				-	NL_STRING(	46,	T_MAY_IGNORE,	seen_digest, SHARED_SECRET_MAX)
			
 
				-	NL_STRING(	47,	T_MAY_IGNORE,	calc_digest, SHARED_SECRET_MAX)
			
 
				-	NL_INT64(	48,	T_MAY_IGNORE,	ee_sector)
			
 
				-	NL_INT64(	49,	T_MAY_IGNORE,	ee_block_id)
			
 
				-	NL_STRING(	50,	T_MAY_IGNORE,	ee_data,	32 << 10)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(start_ov, 25,
			
 
				-	NL_INT64(	66,	T_MAY_IGNORE,	start_sector)
			
 
				-)
			
 
				-
			
 
				-NL_PACKET(new_c_uuid, 26,
			
 
				-       NL_BIT(		63,	T_MANDATORY,	clear_bm)
			
 
				-)
			
 
				-
			
 
				-#ifdef NL_RESPONSE
			
 
				-NL_RESPONSE(return_code_only, 27)
			
 
				-#endif
			
 
				-
			
 
				-#undef NL_PACKET
			
 
				-#undef NL_INTEGER
			
 
				-#undef NL_INT64
			
 
				-#undef NL_BIT
			
 
				-#undef NL_STRING
			
 
				-#undef NL_RESPONSE
			
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -1,84 +0,0 @@
 
				-#ifndef DRBD_TAG_MAGIC_H
			
 
				-#define DRBD_TAG_MAGIC_H
			
 
				-
			
 
				-#define TT_END     0
			
 
				-#define TT_REMOVED 0xE000
			
 
				-
			
 
				-/* declare packet_type enums */
			
 
				-enum packet_types {
			
 
				-#define NL_PACKET(name, number, fields) P_ ## name = number,
			
 
				-#define NL_RESPONSE(name, number) P_ ## name = number,
			
 
				-#define NL_INTEGER(pn, pr, member)
			
 
				-#define NL_INT64(pn, pr, member)
			
 
				-#define NL_BIT(pn, pr, member)
			
 
				-#define NL_STRING(pn, pr, member, len)
			
 
				-#include <linux/drbd_nl.h>
			
 
				-	P_nl_after_last_packet,
			
 
				-};
			
 
				-
			
 
				-/* These struct are used to deduce the size of the tag lists: */
			
 
				-#define NL_PACKET(name, number, fields)	\
			
 
				-	struct name ## _tag_len_struct { fields };
			
 
				-#define NL_INTEGER(pn, pr, member)		\
			
 
				-	int member; int tag_and_len ## member;
			
 
				-#define NL_INT64(pn, pr, member)		\
			
 
				-	__u64 member; int tag_and_len ## member;
			
 
				-#define NL_BIT(pn, pr, member)		\
			
 
				-	unsigned char member:1; int tag_and_len ## member;
			
 
				-#define NL_STRING(pn, pr, member, len)	\
			
 
				-	unsigned char member[len]; int member ## _len; \
			
 
				-	int tag_and_len ## member;
			
 
				-#include <linux/drbd_nl.h>
			
 
				-
			
 
				-/* declare tag-list-sizes */
			
 
				-static const int tag_list_sizes[] = {
			
 
				-#define NL_PACKET(name, number, fields) 2 fields ,
			
 
				-#define NL_INTEGER(pn, pr, member)      + 4 + 4
			
 
				-#define NL_INT64(pn, pr, member)        + 4 + 8
			
 
				-#define NL_BIT(pn, pr, member)          + 4 + 1
			
 
				-#define NL_STRING(pn, pr, member, len)  + 4 + (len)
			
 
				-#include <linux/drbd_nl.h>
			
 
				-};
			
 
				-
			
 
				-/* The two highest bits are used for the tag type */
			
 
				-#define TT_MASK      0xC000
			
 
				-#define TT_INTEGER   0x0000
			
 
				-#define TT_INT64     0x4000
			
 
				-#define TT_BIT       0x8000
			
 
				-#define TT_STRING    0xC000
			
 
				-/* The next bit indicates if processing of the tag is mandatory */
			
 
				-#define T_MANDATORY  0x2000
			
 
				-#define T_MAY_IGNORE 0x0000
			
 
				-#define TN_MASK      0x1fff
			
 
				-/* The remaining 13 bits are used to enumerate the tags */
			
 
				-
			
 
				-#define tag_type(T)   ((T) & TT_MASK)
			
 
				-#define tag_number(T) ((T) & TN_MASK)
			
 
				-
			
 
				-/* declare tag enums */
			
 
				-#define NL_PACKET(name, number, fields) fields
			
 
				-enum drbd_tags {
			
 
				-#define NL_INTEGER(pn, pr, member)     T_ ## member = pn | TT_INTEGER | pr ,
			
 
				-#define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
			
 
				-#define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
			
 
				-#define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
			
 
				-#include <linux/drbd_nl.h>
			
 
				-};
			
 
				-
			
 
				-struct tag {
			
 
				-	const char *name;
			
 
				-	int type_n_flags;
			
 
				-	int max_len;
			
 
				-};
			
 
				-
			
 
				-/* declare tag names */
			
 
				-#define NL_PACKET(name, number, fields) fields
			
 
				-static const struct tag tag_descriptions[] = {
			
 
				-#define NL_INTEGER(pn, pr, member)     [ pn ] = { #member, TT_INTEGER | pr, sizeof(int)   },
			
 
				-#define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
			
 
				-#define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
			
 
				-#define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
			
 
				-#include <linux/drbd_nl.h>
			
 
				-};
			
 
				-
			
 
				-#endif
			
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -88,10 +88,14 @@ struct disk_stats {
 
				 };
			
 
				 
			
 
				 #define PARTITION_META_INFO_VOLNAMELTH	64
			
 
				-#define PARTITION_META_INFO_UUIDLTH	16
			
 
				+/*
			
 
				+ * Enough for the string representation of any kind of UUID plus NULL.
			
 
				+ * EFI UUID is 36 characters. MSDOS UUID is 11 characters.
			
 
				+ */
			
 
				+#define PARTITION_META_INFO_UUIDLTH	37
			
 
				 
			
 
				 struct partition_meta_info {
			
 
				-	u8 uuid[PARTITION_META_INFO_UUIDLTH];	/* always big endian */
			
 
				+	char uuid[PARTITION_META_INFO_UUIDLTH];
			
 
				 	u8 volname[PARTITION_META_INFO_VOLNAMELTH];
			
 
				 };
			
 
				 
			
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -0,0 +1,422 @@
 
				+#ifndef GENL_MAGIC_FUNC_H
			
 
				+#define GENL_MAGIC_FUNC_H
			
 
				+
			
 
				+#include <linux/genl_magic_struct.h>
			
 
				+
			
 
				+/*
			
 
				+ * Magic: declare tla policy						{{{1
			
 
				+ * Magic: declare nested policies
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+#undef GENL_mc_group
			
 
				+#define GENL_mc_group(group)
			
 
				+
			
 
				+#undef GENL_notification
			
 
				+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, tla_list)
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+	[tag_name] = { .type = NLA_NESTED },
			
 
				+
			
 
				+static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = {
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+};
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+static struct nla_policy s_name ## _nl_policy[] __read_mostly =		\
			
 
				+{ s_fields };
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, _type, __get,	\
			
 
				+		 __put, __is_signed)					\
			
 
				+	[attr_nr] = { .type = nla_type },
			
 
				+
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen,	\
			
 
				+		__get, __put, __is_signed)				\
			
 
				+	[attr_nr] = { .type = nla_type,					\
			
 
				+		      .len = maxlen - (nla_type == NLA_NUL_STRING) },
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+#ifndef __KERNEL__
			
 
				+#ifndef pr_info
			
 
				+#define pr_info(args...)	fprintf(stderr, args);
			
 
				+#endif
			
 
				+#endif
			
 
				+
			
 
				+#ifdef GENL_MAGIC_DEBUG
			
 
				+static void dprint_field(const char *dir, int nla_type,
			
 
				+		const char *name, void *valp)
			
 
				+{
			
 
				+	__u64 val = valp ? *(__u32 *)valp : 1;
			
 
				+	switch (nla_type) {
			
 
				+	case NLA_U8:  val = (__u8)val;
			
 
				+	case NLA_U16: val = (__u16)val;
			
 
				+	case NLA_U32: val = (__u32)val;
			
 
				+		pr_info("%s attr %s: %d 0x%08x\n", dir,
			
 
				+			name, (int)val, (unsigned)val);
			
 
				+		break;
			
 
				+	case NLA_U64:
			
 
				+		val = *(__u64*)valp;
			
 
				+		pr_info("%s attr %s: %lld 0x%08llx\n", dir,
			
 
				+			name, (long long)val, (unsigned long long)val);
			
 
				+		break;
			
 
				+	case NLA_FLAG:
			
 
				+		if (val)
			
 
				+			pr_info("%s attr %s: set\n", dir, name);
			
 
				+		break;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static void dprint_array(const char *dir, int nla_type,
			
 
				+		const char *name, const char *val, unsigned len)
			
 
				+{
			
 
				+	switch (nla_type) {
			
 
				+	case NLA_NUL_STRING:
			
 
				+		if (len && val[len-1] == '\0')
			
 
				+			len--;
			
 
				+		pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val);
			
 
				+		break;
			
 
				+	default:
			
 
				+		/* we can always show 4 byte,
			
 
				+		 * thats what nlattr are aligned to. */
			
 
				+		pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n",
			
 
				+			dir, name, len, val[0], val[1], val[2], val[3]);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b);
			
 
				+
			
 
				+/* Name is a member field name of the struct s.
			
 
				+ * If s is NULL (only parsing, no copy requested in *_from_attrs()),
			
 
				+ * nla is supposed to point to the attribute containing the information
			
 
				+ * corresponding to that struct member. */
			
 
				+#define DPRINT_FIELD(dir, nla_type, name, s, nla)			\
			
 
				+	do {								\
			
 
				+		if (s)							\
			
 
				+			dprint_field(dir, nla_type, #name, &s->name);	\
			
 
				+		else if (nla)						\
			
 
				+			dprint_field(dir, nla_type, #name,		\
			
 
				+				(nla_type == NLA_FLAG) ? NULL		\
			
 
				+						: nla_data(nla));	\
			
 
				+	} while (0)
			
 
				+
			
 
				+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla)			\
			
 
				+	do {								\
			
 
				+		if (s)							\
			
 
				+			dprint_array(dir, nla_type, #name,		\
			
 
				+					s->name, s->name ## _len);	\
			
 
				+		else if (nla)						\
			
 
				+			dprint_array(dir, nla_type, #name,		\
			
 
				+					nla_data(nla), nla_len(nla));	\
			
 
				+	} while (0)
			
 
				+#else
			
 
				+#define DPRINT_TLA(a, op, b) do {} while (0)
			
 
				+#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0)
			
 
				+#define	DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0)
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Magic: provide conversion functions					{{{1
			
 
				+ * populate struct from attribute table:
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+
			
 
				+/* processing of generic netlink messages is serialized.
			
 
				+ * use one static buffer for parsing of nested attributes */
			
 
				+static struct nlattr *nested_attr_tb[128];
			
 
				+
			
 
				+#ifndef BUILD_BUG_ON
			
 
				+/* Force a compilation error if condition is true */
			
 
				+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
			
 
				+/* Force a compilation error if condition is true, but also produce a
			
 
				+   result (of value 0 and type size_t), so the expression can be used
			
 
				+   e.g. in a structure initializer (or where-ever else comma expressions
			
 
				+   aren't permitted). */
			
 
				+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
			
 
				+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
			
 
				+#endif
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+/* *_from_attrs functions are static, but potentially unused */		\
			
 
				+static int __ ## s_name ## _from_attrs(struct s_name *s,		\
			
 
				+		struct genl_info *info, bool exclude_invariants)	\
			
 
				+{									\
			
 
				+	const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1;		\
			
 
				+	struct nlattr *tla = info->attrs[tag_number];			\
			
 
				+	struct nlattr **ntb = nested_attr_tb;				\
			
 
				+	struct nlattr *nla;						\
			
 
				+	int err;							\
			
 
				+	BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb));	\
			
 
				+	if (!tla)							\
			
 
				+		return -ENOMSG;						\
			
 
				+	DPRINT_TLA(#s_name, "<=-", #tag_name);				\
			
 
				+	err = drbd_nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy);	\
			
 
				+	if (err)							\
			
 
				+		return err;						\
			
 
				+									\
			
 
				+	s_fields							\
			
 
				+	return 0;							\
			
 
				+}					__attribute__((unused))		\
			
 
				+static int s_name ## _from_attrs(struct s_name *s,			\
			
 
				+						struct genl_info *info)	\
			
 
				+{									\
			
 
				+	return __ ## s_name ## _from_attrs(s, info, false);		\
			
 
				+}					__attribute__((unused))		\
			
 
				+static int s_name ## _from_attrs_for_change(struct s_name *s,		\
			
 
				+						struct genl_info *info)	\
			
 
				+{									\
			
 
				+	return __ ## s_name ## _from_attrs(s, info, true);		\
			
 
				+}					__attribute__((unused))		\
			
 
				+
			
 
				+#define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
			
 
				+		nla = ntb[attr_nr];						\
			
 
				+		if (nla) {						\
			
 
				+			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
			
 
				+				pr_info("<< must not change invariant attr: %s\n", #name);	\
			
 
				+				return -EEXIST;				\
			
 
				+			}						\
			
 
				+			assignment;					\
			
 
				+		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
			
 
				+			/* attribute missing from payload, */		\
			
 
				+			/* which was expected */			\
			
 
				+		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
			
 
				+			pr_info("<< missing attr: %s\n", #name);	\
			
 
				+			return -ENOMSG;					\
			
 
				+		}
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
			
 
				+		__is_signed)						\
			
 
				+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
			
 
				+			if (s)						\
			
 
				+				s->name = __get(nla);			\
			
 
				+			DPRINT_FIELD("<<", nla_type, name, s, nla))
			
 
				+
			
 
				+/* validate_nla() already checked nla_len <= maxlen appropriately. */
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
			
 
				+		__get, __put, __is_signed)				\
			
 
				+	__assign(attr_nr, attr_flag, name, nla_type, type,		\
			
 
				+			if (s)						\
			
 
				+				s->name ## _len =			\
			
 
				+					__get(s->name, nla, maxlen);	\
			
 
				+			DPRINT_ARRAY("<<", nla_type, name, s, nla))
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
			
 
				+
			
 
				+/*
			
 
				+ * Magic: define op number to op name mapping				{{{1
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd)
			
 
				+{
			
 
				+	switch (cmd) {
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, tla_list)		\
			
 
				+	case op_num: return #op_name;
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+	default:
			
 
				+		     return "unknown";
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#ifdef __KERNEL__
			
 
				+#include <linux/stringify.h>
			
 
				+/*
			
 
				+ * Magic: define genl_ops						{{{1
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, tla_list)		\
			
 
				+{								\
			
 
				+	handler							\
			
 
				+	.cmd = op_name,						\
			
 
				+	.policy	= CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy),	\
			
 
				+},
			
 
				+
			
 
				+#define ZZZ_genl_ops		CONCAT_(GENL_MAGIC_FAMILY, _genl_ops)
			
 
				+static struct genl_ops ZZZ_genl_ops[] __read_mostly = {
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+};
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, tla_list)
			
 
				+
			
 
				+/*
			
 
				+ * Define the genl_family, multicast groups,				{{{1
			
 
				+ * and provide register/unregister functions.
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+#define ZZZ_genl_family		CONCAT_(GENL_MAGIC_FAMILY, _genl_family)
			
 
				+static struct genl_family ZZZ_genl_family __read_mostly = {
			
 
				+	.id = GENL_ID_GENERATE,
			
 
				+	.name = __stringify(GENL_MAGIC_FAMILY),
			
 
				+	.version = GENL_MAGIC_VERSION,
			
 
				+#ifdef GENL_MAGIC_FAMILY_HDRSZ
			
 
				+	.hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ),
			
 
				+#endif
			
 
				+	.maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Magic: define multicast groups
			
 
				+ * Magic: define multicast group registration helper
			
 
				+ */
			
 
				+#undef GENL_mc_group
			
 
				+#define GENL_mc_group(group)						\
			
 
				+static struct genl_multicast_group					\
			
 
				+CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = {		\
			
 
				+	.name = #group,							\
			
 
				+};									\
			
 
				+static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)(	\
			
 
				+	struct sk_buff *skb, gfp_t flags)				\
			
 
				+{									\
			
 
				+	unsigned int group_id =						\
			
 
				+		CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id;	\
			
 
				+	if (!group_id)							\
			
 
				+		return -EINVAL;						\
			
 
				+	return genlmsg_multicast(skb, 0, group_id, flags);		\
			
 
				+}
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void)
			
 
				+{
			
 
				+	int err = genl_register_family_with_ops(&ZZZ_genl_family,
			
 
				+		ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops));
			
 
				+	if (err)
			
 
				+		return err;
			
 
				+#undef GENL_mc_group
			
 
				+#define GENL_mc_group(group)						\
			
 
				+	err = genl_register_mc_group(&ZZZ_genl_family,			\
			
 
				+		&CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group));		\
			
 
				+	if (err)							\
			
 
				+		goto fail;						\
			
 
				+	else								\
			
 
				+		pr_info("%s: mcg %s: %u\n", #group,			\
			
 
				+			__stringify(GENL_MAGIC_FAMILY),			\
			
 
				+			CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id);
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+#undef GENL_mc_group
			
 
				+#define GENL_mc_group(group)
			
 
				+	return 0;
			
 
				+fail:
			
 
				+	genl_unregister_family(&ZZZ_genl_family);
			
 
				+	return err;
			
 
				+}
			
 
				+
			
 
				+void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void)
			
 
				+{
			
 
				+	genl_unregister_family(&ZZZ_genl_family);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Magic: provide conversion functions					{{{1
			
 
				+ * populate skb from struct.
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, tla_list)
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s,	\
			
 
				+		const bool exclude_sensitive)				\
			
 
				+{									\
			
 
				+	struct nlattr *tla = nla_nest_start(skb, tag_number);		\
			
 
				+	if (!tla)							\
			
 
				+		goto nla_put_failure;					\
			
 
				+	DPRINT_TLA(#s_name, "-=>", #tag_name);				\
			
 
				+	s_fields							\
			
 
				+	nla_nest_end(skb, tla);						\
			
 
				+	return 0;							\
			
 
				+									\
			
 
				+nla_put_failure:							\
			
 
				+	if (tla)							\
			
 
				+		nla_nest_cancel(skb, tla);				\
			
 
				+        return -EMSGSIZE;						\
			
 
				+}									\
			
 
				+static inline int s_name ## _to_priv_skb(struct sk_buff *skb,		\
			
 
				+		struct s_name *s)					\
			
 
				+{									\
			
 
				+	return s_name ## _to_skb(skb, s, 0);				\
			
 
				+}									\
			
 
				+static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb,		\
			
 
				+		struct s_name *s)					\
			
 
				+{									\
			
 
				+	return s_name ## _to_skb(skb, s, 1);				\
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
			
 
				+		__is_signed)						\
			
 
				+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
			
 
				+		DPRINT_FIELD(">>", nla_type, name, s, NULL);		\
			
 
				+		if (__put(skb, attr_nr, s->name))			\
			
 
				+			goto nla_put_failure;				\
			
 
				+	}
			
 
				+
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
			
 
				+		__get, __put, __is_signed)				\
			
 
				+	if (!exclude_sensitive || !((attr_flag) & DRBD_F_SENSITIVE)) {	\
			
 
				+		DPRINT_ARRAY(">>",nla_type, name, s, NULL);		\
			
 
				+		if (__put(skb, attr_nr, min_t(int, maxlen,		\
			
 
				+			s->name ## _len + (nla_type == NLA_NUL_STRING)),\
			
 
				+						s->name))		\
			
 
				+			goto nla_put_failure;				\
			
 
				+	}
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+
			
 
				+/* Functions for initializing structs to default values.  */
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
			
 
				+		__is_signed)
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
			
 
				+		__get, __put, __is_signed)
			
 
				+#undef __u32_field_def
			
 
				+#define __u32_field_def(attr_nr, attr_flag, name, default)		\
			
 
				+	x->name = default;
			
 
				+#undef __s32_field_def
			
 
				+#define __s32_field_def(attr_nr, attr_flag, name, default)		\
			
 
				+	x->name = default;
			
 
				+#undef __flg_field_def
			
 
				+#define __flg_field_def(attr_nr, attr_flag, name, default)		\
			
 
				+	x->name = default;
			
 
				+#undef __str_field_def
			
 
				+#define __str_field_def(attr_nr, attr_flag, name, maxlen)		\
			
 
				+	memset(x->name, 0, sizeof(x->name));				\
			
 
				+	x->name ## _len = 0;
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+static void set_ ## s_name ## _defaults(struct s_name *x) __attribute__((unused)); \
			
 
				+static void set_ ## s_name ## _defaults(struct s_name *x) {	\
			
 
				+s_fields								\
			
 
				+}
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+#endif /* __KERNEL__ */
			
 
				+
			
 
				+/* }}}1 */
			
 
				+#endif /* GENL_MAGIC_FUNC_H */
			
 
				+/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
			
--- a/include/linux/genl_magic_struct.h
+++ b/include/linux/genl_magic_struct.h
@@ -0,0 +1,277 @@
 
				+#ifndef GENL_MAGIC_STRUCT_H
			
 
				+#define GENL_MAGIC_STRUCT_H
			
 
				+
			
 
				+#ifndef GENL_MAGIC_FAMILY
			
 
				+# error "you need to define GENL_MAGIC_FAMILY before inclusion"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef GENL_MAGIC_VERSION
			
 
				+# error "you need to define GENL_MAGIC_VERSION before inclusion"
			
 
				+#endif
			
 
				+
			
 
				+#ifndef GENL_MAGIC_INCLUDE_FILE
			
 
				+# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion"
			
 
				+#endif
			
 
				+
			
 
				+#include <linux/genetlink.h>
			
 
				+#include <linux/types.h>
			
 
				+
			
 
				+#define CONCAT__(a,b)	a ## b
			
 
				+#define CONCAT_(a,b)	CONCAT__(a,b)
			
 
				+
			
 
				+extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void);
			
 
				+extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void);
			
 
				+
			
 
				+/*
			
 
				+ * Extension of genl attribute validation policies			{{{2
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * @DRBD_GENLA_F_MANDATORY: By default, netlink ignores attributes it does not
			
 
				+ * know about.  This flag can be set in nlattr->nla_type to indicate that this
			
 
				+ * attribute must not be ignored.
			
 
				+ *
			
 
				+ * We check and remove this flag in drbd_nla_check_mandatory() before
			
 
				+ * validating the attribute types and lengths via nla_parse_nested().
			
 
				+ */
			
 
				+#define DRBD_GENLA_F_MANDATORY (1 << 14)
			
 
				+
			
 
				+/*
			
 
				+ * Flags specific to drbd and not visible at the netlink layer, used in
			
 
				+ * <struct>_from_attrs and <struct>_to_skb:
			
 
				+ *
			
 
				+ * @DRBD_F_REQUIRED: Attribute is required; a request without this attribute is
			
 
				+ * invalid.
			
 
				+ *
			
 
				+ * @DRBD_F_SENSITIVE: Attribute includes sensitive information and must not be
			
 
				+ * included in unpriviledged get requests or broadcasts.
			
 
				+ *
			
 
				+ * @DRBD_F_INVARIANT: Attribute is set when an object is initially created, but
			
 
				+ * cannot subsequently be changed.
			
 
				+ */
			
 
				+#define DRBD_F_REQUIRED (1 << 0)
			
 
				+#define DRBD_F_SENSITIVE (1 << 1)
			
 
				+#define DRBD_F_INVARIANT (1 << 2)
			
 
				+
			
 
				+#define __nla_type(x)	((__u16)((x) & NLA_TYPE_MASK & ~DRBD_GENLA_F_MANDATORY))
			
 
				+
			
 
				+/*									}}}1
			
 
				+ * MAGIC
			
 
				+ * multi-include macro expansion magic starts here
			
 
				+ */
			
 
				+
			
 
				+/* MAGIC helpers							{{{2 */
			
 
				+
			
 
				+/* possible field types */
			
 
				+#define __flg_field(attr_nr, attr_flag, name) \
			
 
				+	__field(attr_nr, attr_flag, name, NLA_U8, char, \
			
 
				+			nla_get_u8, nla_put_u8, false)
			
 
				+#define __u8_field(attr_nr, attr_flag, name)	\
			
 
				+	__field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \
			
 
				+			nla_get_u8, nla_put_u8, false)
			
 
				+#define __u16_field(attr_nr, attr_flag, name)	\
			
 
				+	__field(attr_nr, attr_flag, name, NLA_U16, __u16, \
			
 
				+			nla_get_u16, nla_put_u16, false)
			
 
				+#define __u32_field(attr_nr, attr_flag, name)	\
			
 
				+	__field(attr_nr, attr_flag, name, NLA_U32, __u32, \
			
 
				+			nla_get_u32, nla_put_u32, false)
			
 
				+#define __s32_field(attr_nr, attr_flag, name)	\
			
 
				+	__field(attr_nr, attr_flag, name, NLA_U32, __s32, \
			
 
				+			nla_get_u32, nla_put_u32, true)
			
 
				+#define __u64_field(attr_nr, attr_flag, name)	\
			
 
				+	__field(attr_nr, attr_flag, name, NLA_U64, __u64, \
			
 
				+			nla_get_u64, nla_put_u64, false)
			
 
				+#define __str_field(attr_nr, attr_flag, name, maxlen) \
			
 
				+	__array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \
			
 
				+			nla_strlcpy, nla_put, false)
			
 
				+#define __bin_field(attr_nr, attr_flag, name, maxlen) \
			
 
				+	__array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \
			
 
				+			nla_memcpy, nla_put, false)
			
 
				+
			
 
				+/* fields with default values */
			
 
				+#define __flg_field_def(attr_nr, attr_flag, name, default) \
			
 
				+	__flg_field(attr_nr, attr_flag, name)
			
 
				+#define __u32_field_def(attr_nr, attr_flag, name, default) \
			
 
				+	__u32_field(attr_nr, attr_flag, name)
			
 
				+#define __s32_field_def(attr_nr, attr_flag, name, default) \
			
 
				+	__s32_field(attr_nr, attr_flag, name)
			
 
				+#define __str_field_def(attr_nr, attr_flag, name, maxlen) \
			
 
				+	__str_field(attr_nr, attr_flag, name, maxlen)
			
 
				+
			
 
				+#define GENL_op_init(args...)	args
			
 
				+#define GENL_doit(handler)		\
			
 
				+	.doit = handler,		\
			
 
				+	.flags = GENL_ADMIN_PERM,
			
 
				+#define GENL_dumpit(handler)		\
			
 
				+	.dumpit = handler,		\
			
 
				+	.flags = GENL_ADMIN_PERM,
			
 
				+
			
 
				+/*									}}}1
			
 
				+ * Magic: define the enum symbols for genl_ops
			
 
				+ * Magic: define the enum symbols for top level attributes
			
 
				+ * Magic: define the enum symbols for nested attributes
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
			
 
				+
			
 
				+#undef GENL_mc_group
			
 
				+#define GENL_mc_group(group)
			
 
				+
			
 
				+#undef GENL_notification
			
 
				+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
			
 
				+	op_name = op_num,
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, tla_list)			\
			
 
				+	op_name = op_num,
			
 
				+
			
 
				+enum {
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+};
			
 
				+
			
 
				+#undef GENL_notification
			
 
				+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, attr_list)
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields) \
			
 
				+		tag_name = tag_number,
			
 
				+
			
 
				+enum {
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+};
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)	\
			
 
				+enum {								\
			
 
				+	s_fields						\
			
 
				+};
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type,	\
			
 
				+		__get, __put, __is_signed)			\
			
 
				+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
			
 
				+
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type,	\
			
 
				+		maxlen, __get, __put, __is_signed)		\
			
 
				+	T_ ## name = (__u16)(attr_nr | ((attr_flag) & DRBD_GENLA_F_MANDATORY)),
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+/*									}}}1
			
 
				+ * Magic: compile time assert unique numbers for operations
			
 
				+ * Magic: -"- unique numbers for top level attributes
			
 
				+ * Magic: -"- unique numbers for nested attributes
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, attr_list)	\
			
 
				+	case op_name:
			
 
				+
			
 
				+#undef GENL_notification
			
 
				+#define GENL_notification(op_name, op_num, mcast_group, tla_list)	\
			
 
				+	case op_name:
			
 
				+
			
 
				+static inline void ct_assert_unique_operations(void)
			
 
				+{
			
 
				+	switch (0) {
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+		;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#undef GENL_op
			
 
				+#define GENL_op(op_name, op_num, handler, attr_list)
			
 
				+
			
 
				+#undef GENL_notification
			
 
				+#define GENL_notification(op_name, op_num, mcast_group, tla_list)
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+		case tag_number:
			
 
				+
			
 
				+static inline void ct_assert_unique_top_level_attributes(void)
			
 
				+{
			
 
				+	switch (0) {
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+		;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+static inline void ct_assert_unique_ ## s_name ## _attributes(void)	\
			
 
				+{									\
			
 
				+	switch (0) {							\
			
 
				+		s_fields						\
			
 
				+			;						\
			
 
				+	}								\
			
 
				+}
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
			
 
				+		__is_signed)						\
			
 
				+	case attr_nr:
			
 
				+
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
			
 
				+		__get, __put, __is_signed)				\
			
 
				+	case attr_nr:
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+/*									}}}1
			
 
				+ * Magic: declare structs
			
 
				+ * struct <name> {
			
 
				+ *	fields
			
 
				+ * };
			
 
				+ *									{{{2
			
 
				+ */
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+struct s_name { s_fields };
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
			
 
				+		__is_signed)						\
			
 
				+	type name;
			
 
				+
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
			
 
				+		__get, __put, __is_signed)				\
			
 
				+	type name[maxlen];	\
			
 
				+	__u32 name ## _len;
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+#undef GENL_struct
			
 
				+#define GENL_struct(tag_name, tag_number, s_name, s_fields)		\
			
 
				+enum {									\
			
 
				+	s_fields							\
			
 
				+};
			
 
				+
			
 
				+#undef __field
			
 
				+#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put,	\
			
 
				+		is_signed)						\
			
 
				+	F_ ## name ## _IS_SIGNED = is_signed,
			
 
				+
			
 
				+#undef __array
			
 
				+#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen,	\
			
 
				+		__get, __put, is_signed)				\
			
 
				+	F_ ## name ## _IS_SIGNED = is_signed,
			
 
				+
			
 
				+#include GENL_MAGIC_INCLUDE_FILE
			
 
				+
			
 
				+/* }}}1 */
			
 
				+#endif /* GENL_MAGIC_STRUCT_H */
			
 
				+/* vim: set foldmethod=marker nofoldenable : */
			
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 
				 
			
 
				 void __init idr_init_cache(void);
			
 
				 
			
 
				+/**
			
 
				+ * idr_for_each_entry - iterate over an idr's elements of a given type
			
 
				+ * @idp:     idr handle
			
 
				+ * @entry:   the type * to use as cursor
			
 
				+ * @id:      id entry's key
			
 
				+ */
			
 
				+#define idr_for_each_entry(idp, entry, id)				\
			
 
				+	for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \
			
 
				+	     entry != NULL;                                             \
			
 
				+	     ++id, entry = (typeof(entry))idr_get_next((idp), &(id)))
			
 
				+
			
 
				 #endif /* __IDR_H__ */
			
--- a/include/linux/loop.h
+++ b/include/linux/loop.h
@@ -53,10 +53,13 @@ struct loop_device {
 
				 
			
 
				 	spinlock_t		lo_lock;
			
 
				 	struct bio_list		lo_bio_list;
			
 
				+	unsigned int		lo_bio_count;
			
 
				 	int			lo_state;
			
 
				 	struct mutex		lo_ctl_mutex;
			
 
				 	struct task_struct	*lo_thread;
			
 
				 	wait_queue_head_t	lo_event;
			
 
				+	/* wait queue for incoming requests */
			
 
				+	wait_queue_head_t	lo_req_wait;
			
 
				 
			
 
				 	struct request_queue	*lo_queue;
			
 
				 	struct gendisk		*lo_disk;
			
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -166,9 +166,11 @@ struct lc_element {
 
				 	/* if we want to track a larger set of objects,
			
 
				 	 * it needs to become arch independend u64 */
			
 
				 	unsigned lc_number;
			
 
				-
			
 
				 	/* special label when on free list */
			
 
				 #define LC_FREE (~0U)
			
 
				+
			
 
				+	/* for pending changes */
			
 
				+	unsigned lc_new_number;
			
 
				 };
			
 
				 
			
 
				 struct lru_cache {
			
@@ -176,6 +178,7 @@ struct lru_cache {
 
				 	struct list_head lru;
			
 
				 	struct list_head free;
			
 
				 	struct list_head in_use;
			
 
				+	struct list_head to_be_changed;
			
 
				 
			
 
				 	/* the pre-created kmem cache to allocate the objects from */
			
 
				 	struct kmem_cache *lc_cache;
			
@@ -186,7 +189,7 @@ struct lru_cache {
 
				 	size_t element_off;
			
 
				 
			
 
				 	/* number of elements (indices) */
			
 
				-	unsigned int  nr_elements;
			
 
				+	unsigned int nr_elements;
			
 
				 	/* Arbitrary limit on maximum tracked objects. Practical limit is much
			
 
				 	 * lower due to allocation failures, probably. For typical use cases,
			
 
				 	 * nr_elements should be a few thousand at most.
			
@@ -194,18 +197,19 @@ struct lru_cache {
 
				 	 * 8 high bits of .lc_index to be overloaded with flags in the future. */
			
 
				 #define LC_MAX_ACTIVE	(1<<24)
			
 
				 
			
 
				+	/* allow to accumulate a few (index:label) changes,
			
 
				+	 * but no more than max_pending_changes */
			
 
				+	unsigned int max_pending_changes;
			
 
				+	/* number of elements currently on to_be_changed list */
			
 
				+	unsigned int pending_changes;
			
 
				+
			
 
				 	/* statistics */
			
 
				-	unsigned used; /* number of lelements currently on in_use list */
			
 
				-	unsigned long hits, misses, starving, dirty, changed;
			
 
				+	unsigned used; /* number of elements currently on in_use list */
			
 
				+	unsigned long hits, misses, starving, locked, changed;
			
 
				 
			
 
				 	/* see below: flag-bits for lru_cache */
			
 
				 	unsigned long flags;
			
 
				 
			
 
				-	/* when changing the label of an index element */
			
 
				-	unsigned int  new_number;
			
 
				-
			
 
				-	/* for paranoia when changing the label of an index element */
			
 
				-	struct lc_element *changing_element;
			
 
				 
			
 
				 	void  *lc_private;
			
 
				 	const char *name;
			
@@ -221,10 +225,15 @@ enum {
 
				 	/* debugging aid, to catch concurrent access early.
			
 
				 	 * user needs to guarantee exclusive access by proper locking! */
			
 
				 	__LC_PARANOIA,
			
 
				-	/* if we need to change the set, but currently there is a changing
			
 
				-	 * transaction pending, we are "dirty", and must deferr further
			
 
				-	 * changing requests */
			
 
				+
			
 
				+	/* annotate that the set is "dirty", possibly accumulating further
			
 
				+	 * changes, until a transaction is finally triggered */
			
 
				 	__LC_DIRTY,
			
 
				+
			
 
				+	/* Locked, no further changes allowed.
			
 
				+	 * Also used to serialize changing transactions. */
			
 
				+	__LC_LOCKED,
			
 
				+
			
 
				 	/* if we need to change the set, but currently there is no free nor
			
 
				 	 * unused element available, we are "starving", and must not give out
			
 
				 	 * further references, to guarantee that eventually some refcnt will
			
@@ -236,9 +245,11 @@ enum {
 
				 };
			
 
				 #define LC_PARANOIA (1<<__LC_PARANOIA)
			
 
				 #define LC_DIRTY    (1<<__LC_DIRTY)
			
 
				+#define LC_LOCKED   (1<<__LC_LOCKED)
			
 
				 #define LC_STARVING (1<<__LC_STARVING)
			
 
				 
			
 
				 extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
			
 
				+		unsigned max_pending_changes,
			
 
				 		unsigned e_count, size_t e_size, size_t e_off);
			
 
				 extern void lc_reset(struct lru_cache *lc);
			
 
				 extern void lc_destroy(struct lru_cache *lc);
			
@@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr);
 
				 extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr);
			
 
				 extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr);
			
 
				 extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e);
			
 
				-extern void lc_changed(struct lru_cache *lc, struct lc_element *e);
			
 
				+extern void lc_committed(struct lru_cache *lc);
			
 
				 
			
 
				 struct seq_file;
			
 
				 extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc);
			
@@ -258,17 +269,29 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char
 
				 				void (*detail) (struct seq_file *, struct lc_element *));
			
 
				 
			
 
				 /**
			
 
				- * lc_try_lock - can be used to stop lc_get() from changing the tracked set
			
 
				+ * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set
			
 
				  * @lc: the lru cache to operate on
			
 
				  *
			
 
				- * Note that the reference counts and order on the active and lru lists may
			
 
				- * still change.  Returns true if we acquired the lock.
			
 
				+ * Allows (expects) the set to be "dirty".  Note that the reference counts and
			
 
				+ * order on the active and lru lists may still change.  Used to serialize
			
 
				+ * changing transactions.  Returns true if we aquired the lock.
			
 
				  */
			
 
				-static inline int lc_try_lock(struct lru_cache *lc)
			
 
				+static inline int lc_try_lock_for_transaction(struct lru_cache *lc)
			
 
				 {
			
 
				-	return !test_and_set_bit(__LC_DIRTY, &lc->flags);
			
 
				+	return !test_and_set_bit(__LC_LOCKED, &lc->flags);
			
 
				 }
			
 
				 
			
 
				+/**
			
 
				+ * lc_try_lock - variant to stop lc_get() from changing the tracked set
			
 
				+ * @lc: the lru cache to operate on
			
 
				+ *
			
 
				+ * Note that the reference counts and order on the active and lru lists may
			
 
				+ * still change.  Only works on a "clean" set.  Returns true if we aquired the
			
 
				+ * lock, which means there are no pending changes, and any further attempt to
			
 
				+ * change the set will not succeed until the next lc_unlock().
			
 
				+ */
			
 
				+extern int lc_try_lock(struct lru_cache *lc);
			
 
				+
			
 
				 /**
			
 
				  * lc_unlock - unlock @lc, allow lc_get() to change the set again
			
 
				  * @lc: the lru cache to operate on
			
@@ -276,14 +299,10 @@ static inline int lc_try_lock(struct lru_cache *lc)
 
				 static inline void lc_unlock(struct lru_cache *lc)
			
 
				 {
			
 
				 	clear_bit(__LC_DIRTY, &lc->flags);
			
 
				-	smp_mb__after_clear_bit();
			
 
				+	clear_bit_unlock(__LC_LOCKED, &lc->flags);
			
 
				 }
			
 
				 
			
 
				-static inline int lc_is_used(struct lru_cache *lc, unsigned int enr)
			
 
				-{
			
 
				-	struct lc_element *e = lc_find(lc, enr);
			
 
				-	return e && e->refcnt;
			
 
				-}
			
 
				+extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
			
 
				 
			
 
				 #define lc_entry(ptr, type, member) \
			
 
				 	container_of(ptr, type, member)
			
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -550,6 +550,170 @@ do {									\
 
				 	__ret;								\
			
 
				 })
			
 
				 
			
 
				+
			
 
				+#define __wait_event_lock_irq(wq, condition, lock, cmd)			\
			
 
				+do {									\
			
 
				+	DEFINE_WAIT(__wait);						\
			
 
				+									\
			
 
				+	for (;;) {							\
			
 
				+		prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE);	\
			
 
				+		if (condition)						\
			
 
				+			break;						\
			
 
				+		spin_unlock_irq(&lock);					\
			
 
				+		cmd;							\
			
 
				+		schedule();						\
			
 
				+		spin_lock_irq(&lock);					\
			
 
				+	}								\
			
 
				+	finish_wait(&wq, &__wait);					\
			
 
				+} while (0)
			
 
				+
			
 
				+/**
			
 
				+ * wait_event_lock_irq_cmd - sleep until a condition gets true. The
			
 
				+ *			     condition is checked under the lock. This
			
 
				+ *			     is expected to be called with the lock
			
 
				+ *			     taken.
			
 
				+ * @wq: the waitqueue to wait on
			
 
				+ * @condition: a C expression for the event to wait for
			
 
				+ * @lock: a locked spinlock_t, which will be released before cmd
			
 
				+ *	  and schedule() and reacquired afterwards.
			
 
				+ * @cmd: a command which is invoked outside the critical section before
			
 
				+ *	 sleep
			
 
				+ *
			
 
				+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
			
 
				+ * @condition evaluates to true. The @condition is checked each time
			
 
				+ * the waitqueue @wq is woken up.
			
 
				+ *
			
 
				+ * wake_up() has to be called after changing any variable that could
			
 
				+ * change the result of the wait condition.
			
 
				+ *
			
 
				+ * This is supposed to be called while holding the lock. The lock is
			
 
				+ * dropped before invoking the cmd and going to sleep and is reacquired
			
 
				+ * afterwards.
			
 
				+ */
			
 
				+#define wait_event_lock_irq_cmd(wq, condition, lock, cmd)		\
			
 
				+do {									\
			
 
				+	if (condition)							\
			
 
				+		break;							\
			
 
				+	__wait_event_lock_irq(wq, condition, lock, cmd);		\
			
 
				+} while (0)
			
 
				+
			
 
				+/**
			
 
				+ * wait_event_lock_irq - sleep until a condition gets true. The
			
 
				+ *			 condition is checked under the lock. This
			
 
				+ *			 is expected to be called with the lock
			
 
				+ *			 taken.
			
 
				+ * @wq: the waitqueue to wait on
			
 
				+ * @condition: a C expression for the event to wait for
			
 
				+ * @lock: a locked spinlock_t, which will be released before schedule()
			
 
				+ *	  and reacquired afterwards.
			
 
				+ *
			
 
				+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
			
 
				+ * @condition evaluates to true. The @condition is checked each time
			
 
				+ * the waitqueue @wq is woken up.
			
 
				+ *
			
 
				+ * wake_up() has to be called after changing any variable that could
			
 
				+ * change the result of the wait condition.
			
 
				+ *
			
 
				+ * This is supposed to be called while holding the lock. The lock is
			
 
				+ * dropped before going to sleep and is reacquired afterwards.
			
 
				+ */
			
 
				+#define wait_event_lock_irq(wq, condition, lock)			\
			
 
				+do {									\
			
 
				+	if (condition)							\
			
 
				+		break;							\
			
 
				+	__wait_event_lock_irq(wq, condition, lock, );			\
			
 
				+} while (0)
			
 
				+
			
 
				+
			
 
				+#define __wait_event_interruptible_lock_irq(wq, condition,		\
			
 
				+					    lock, ret, cmd)		\
			
 
				+do {									\
			
 
				+	DEFINE_WAIT(__wait);						\
			
 
				+									\
			
 
				+	for (;;) {							\
			
 
				+		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
			
 
				+		if (condition)						\
			
 
				+			break;						\
			
 
				+		if (signal_pending(current)) {				\
			
 
				+			ret = -ERESTARTSYS;				\
			
 
				+			break;						\
			
 
				+		}							\
			
 
				+		spin_unlock_irq(&lock);					\
			
 
				+		cmd;							\
			
 
				+		schedule();						\
			
 
				+		spin_lock_irq(&lock);					\
			
 
				+	}								\
			
 
				+	finish_wait(&wq, &__wait);					\
			
 
				+} while (0)
			
 
				+
			
 
				+/**
			
 
				+ * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true.
			
 
				+ *		The condition is checked under the lock. This is expected to
			
 
				+ *		be called with the lock taken.
			
 
				+ * @wq: the waitqueue to wait on
			
 
				+ * @condition: a C expression for the event to wait for
			
 
				+ * @lock: a locked spinlock_t, which will be released before cmd and
			
 
				+ *	  schedule() and reacquired afterwards.
			
 
				+ * @cmd: a command which is invoked outside the critical section before
			
 
				+ *	 sleep
			
 
				+ *
			
 
				+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
			
 
				+ * @condition evaluates to true or a signal is received. The @condition is
			
 
				+ * checked each time the waitqueue @wq is woken up.
			
 
				+ *
			
 
				+ * wake_up() has to be called after changing any variable that could
			
 
				+ * change the result of the wait condition.
			
 
				+ *
			
 
				+ * This is supposed to be called while holding the lock. The lock is
			
 
				+ * dropped before invoking the cmd and going to sleep and is reacquired
			
 
				+ * afterwards.
			
 
				+ *
			
 
				+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
			
 
				+ * and 0 if @condition evaluated to true.
			
 
				+ */
			
 
				+#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd)	\
			
 
				+({									\
			
 
				+	int __ret = 0;							\
			
 
				+									\
			
 
				+	if (!(condition))						\
			
 
				+		__wait_event_interruptible_lock_irq(wq, condition,	\
			
 
				+						    lock, __ret, cmd);	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+/**
			
 
				+ * wait_event_interruptible_lock_irq - sleep until a condition gets true.
			
 
				+ *		The condition is checked under the lock. This is expected
			
 
				+ *		to be called with the lock taken.
			
 
				+ * @wq: the waitqueue to wait on
			
 
				+ * @condition: a C expression for the event to wait for
			
 
				+ * @lock: a locked spinlock_t, which will be released before schedule()
			
 
				+ *	  and reacquired afterwards.
			
 
				+ *
			
 
				+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
			
 
				+ * @condition evaluates to true or signal is received. The @condition is
			
 
				+ * checked each time the waitqueue @wq is woken up.
			
 
				+ *
			
 
				+ * wake_up() has to be called after changing any variable that could
			
 
				+ * change the result of the wait condition.
			
 
				+ *
			
 
				+ * This is supposed to be called while holding the lock. The lock is
			
 
				+ * dropped before going to sleep and is reacquired afterwards.
			
 
				+ *
			
 
				+ * The macro will return -ERESTARTSYS if it was interrupted by a signal
			
 
				+ * and 0 if @condition evaluated to true.
			
 
				+ */
			
 
				+#define wait_event_interruptible_lock_irq(wq, condition, lock)		\
			
 
				+({									\
			
 
				+	int __ret = 0;							\
			
 
				+									\
			
 
				+	if (!(condition))						\
			
 
				+		__wait_event_interruptible_lock_irq(wq, condition,	\
			
 
				+						    lock, __ret, );	\
			
 
				+	__ret;								\
			
 
				+})
			
 
				+
			
 
				+
			
 
				 /*
			
 
				  * These are the old interfaces to sleep waiting for an event.
			
 
				  * They are racy.  DO NOT use them, use the wait_event* interfaces above.
			
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -69,23 +69,28 @@ __setup("ro", readonly);
 
				 __setup("rw", readwrite);
			
 
				 
			
 
				 #ifdef CONFIG_BLOCK
			
 
				+struct uuidcmp {
			
 
				+	const char *uuid;
			
 
				+	int len;
			
 
				+};
			
 
				+
			
 
				 /**
			
 
				  * match_dev_by_uuid - callback for finding a partition using its uuid
			
 
				  * @dev:	device passed in by the caller
			
 
				- * @data:	opaque pointer to a 36 byte char array with a UUID
			
 
				+ * @data:	opaque pointer to the desired struct uuidcmp to match
			
 
				  *
			
 
				  * Returns 1 if the device matches, and 0 otherwise.
			
 
				  */
			
 
				 static int match_dev_by_uuid(struct device *dev, void *data)
			
 
				 {
			
 
				-	u8 *uuid = data;
			
 
				+	struct uuidcmp *cmp = data;
			
 
				 	struct hd_struct *part = dev_to_part(dev);
			
 
				 
			
 
				 	if (!part->info)
			
 
				 		goto no_match;
			
 
				 
			
 
				-	if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
			
 
				-			goto no_match;
			
 
				+	if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len))
			
 
				+		goto no_match;
			
 
				 
			
 
				 	return 1;
			
 
				 no_match:
			
@@ -95,7 +100,7 @@ no_match:
 
				 
			
 
				 /**
			
 
				  * devt_from_partuuid - looks up the dev_t of a partition by its UUID
			
 
				- * @uuid:	min 36 byte char array containing a hex ascii UUID
			
 
				+ * @uuid:	char array containing ascii UUID
			
 
				  *
			
 
				  * The function will return the first partition which contains a matching
			
 
				  * UUID value in its partition_meta_info struct.  This does not search
			
@@ -106,38 +111,41 @@ no_match:
 
				  *
			
 
				  * Returns the matching dev_t on success or 0 on failure.
			
 
				  */
			
 
				-static dev_t devt_from_partuuid(char *uuid_str)
			
 
				+static dev_t devt_from_partuuid(const char *uuid_str)
			
 
				 {
			
 
				 	dev_t res = 0;
			
 
				+	struct uuidcmp cmp;
			
 
				 	struct device *dev = NULL;
			
 
				-	u8 uuid[16];
			
 
				 	struct gendisk *disk;
			
 
				 	struct hd_struct *part;
			
 
				 	int offset = 0;
			
 
				+	bool clear_root_wait = false;
			
 
				+	char *slash;
			
 
				 
			
 
				-	if (strlen(uuid_str) < 36)
			
 
				-		goto done;
			
 
				+	cmp.uuid = uuid_str;
			
 
				 
			
 
				+	slash = strchr(uuid_str, '/');
			
 
				 	/* Check for optional partition number offset attributes. */
			
 
				-	if (uuid_str[36]) {
			
 
				+	if (slash) {
			
 
				 		char c = 0;
			
 
				 		/* Explicitly fail on poor PARTUUID syntax. */
			
 
				-		if (sscanf(&uuid_str[36],
			
 
				-			   "/PARTNROFF=%d%c", &offset, &c) != 1) {
			
 
				-			printk(KERN_ERR "VFS: PARTUUID= is invalid.\n"
			
 
				-			 "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
			
 
				-			if (root_wait)
			
 
				-				printk(KERN_ERR
			
 
				-				     "Disabling rootwait; root= is invalid.\n");
			
 
				-			root_wait = 0;
			
 
				+		if (sscanf(slash + 1,
			
 
				+			   "PARTNROFF=%d%c", &offset, &c) != 1) {
			
 
				+			clear_root_wait = true;
			
 
				 			goto done;
			
 
				 		}
			
 
				+		cmp.len = slash - uuid_str;
			
 
				+	} else {
			
 
				+		cmp.len = strlen(uuid_str);
			
 
				 	}
			
 
				 
			
 
				-	/* Pack the requested UUID in the expected format. */
			
 
				-	part_pack_uuid(uuid_str, uuid);
			
 
				+	if (!cmp.len) {
			
 
				+		clear_root_wait = true;
			
 
				+		goto done;
			
 
				+	}
			
 
				 
			
 
				-	dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
			
 
				+	dev = class_find_device(&block_class, NULL, &cmp,
			
 
				+				&match_dev_by_uuid);
			
 
				 	if (!dev)
			
 
				 		goto done;
			
 
				 
			
@@ -158,6 +166,13 @@ static dev_t devt_from_partuuid(char *uuid_str)
 
				 no_offset:
			
 
				 	put_device(dev);
			
 
				 done:
			
 
				+	if (clear_root_wait) {
			
 
				+		pr_err("VFS: PARTUUID= is invalid.\n"
			
 
				+		       "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n");
			
 
				+		if (root_wait)
			
 
				+			pr_err("Disabling rootwait; root= is invalid.\n");
			
 
				+		root_wait = 0;
			
 
				+	}
			
 
				 	return res;
			
 
				 }
			
 
				 #endif
			
@@ -174,6 +189,10 @@ done:
 
				  *	   used when disk name of partitioned disk ends on a digit.
			
 
				  *	6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
			
 
				  *	   unique id of a partition if the partition table provides it.
			
 
				+ *	   The UUID may be either an EFI/GPT UUID, or refer to an MSDOS
			
 
				+ *	   partition using the format SSSSSSSS-PP, where SSSSSSSS is a zero-
			
 
				+ *	   filled hex representation of the 32-bit "NT disk signature", and PP
			
 
				+ *	   is a zero-filled hex representation of the 1-based partition number.
			
 
				  *	7) PARTUUID=<UUID>/PARTNROFF=<int> to select a partition in relation to
			
 
				  *	   a partition with a known unique id.
			
 
				  *
			
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -44,8 +44,8 @@ MODULE_LICENSE("GPL");
 
				 } while (0)
			
 
				 
			
 
				 #define RETURN(x...)     do { \
			
 
				-	clear_bit(__LC_PARANOIA, &lc->flags); \
			
 
				-	smp_mb__after_clear_bit(); return x ; } while (0)
			
 
				+	clear_bit_unlock(__LC_PARANOIA, &lc->flags); \
			
 
				+	return x ; } while (0)
			
 
				 
			
 
				 /* BUG() if e is not one of the elements tracked by lc */
			
 
				 #define PARANOIA_LC_ELEMENT(lc, e) do {	\
			
@@ -55,9 +55,40 @@ MODULE_LICENSE("GPL");
 
				 	BUG_ON(i >= lc_->nr_elements);	\
			
 
				 	BUG_ON(lc_->lc_element[i] != e_); } while (0)
			
 
				 
			
 
				+
			
 
				+/* We need to atomically
			
 
				+ *  - try to grab the lock (set LC_LOCKED)
			
 
				+ *  - only if there is no pending transaction
			
 
				+ *    (neither LC_DIRTY nor LC_STARVING is set)
			
 
				+ * Because of PARANOIA_ENTRY() above abusing lc->flags as well,
			
 
				+ * it is not sufficient to just say
			
 
				+ *	return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED);
			
 
				+ */
			
 
				+int lc_try_lock(struct lru_cache *lc)
			
 
				+{
			
 
				+	unsigned long val;
			
 
				+	do {
			
 
				+		val = cmpxchg(&lc->flags, 0, LC_LOCKED);
			
 
				+	} while (unlikely (val == LC_PARANOIA));
			
 
				+	/* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */
			
 
				+	return 0 == val;
			
 
				+#if 0
			
 
				+	/* Alternative approach, spin in case someone enters or leaves a
			
 
				+	 * PARANOIA_ENTRY()/RETURN() section. */
			
 
				+	unsigned long old, new, val;
			
 
				+	do {
			
 
				+		old = lc->flags & LC_PARANOIA;
			
 
				+		new = old | LC_LOCKED;
			
 
				+		val = cmpxchg(&lc->flags, old, new);
			
 
				+	} while (unlikely (val == (old ^ LC_PARANOIA)));
			
 
				+	return old == val;
			
 
				+#endif
			
 
				+}
			
 
				+
			
 
				 /**
			
 
				  * lc_create - prepares to track objects in an active set
			
 
				  * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details
			
 
				+ * @max_pending_changes: maximum changes to accumulate until a transaction is required
			
 
				  * @e_count: number of elements allowed to be active simultaneously
			
 
				  * @e_size: size of the tracked objects
			
 
				  * @e_off: offset to the &struct lc_element member in a tracked object
			
@@ -66,6 +97,7 @@ MODULE_LICENSE("GPL");
 
				  * or NULL on (allocation) failure.
			
 
				  */
			
 
				 struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
			
 
				+		unsigned max_pending_changes,
			
 
				 		unsigned e_count, size_t e_size, size_t e_off)
			
 
				 {
			
 
				 	struct hlist_head *slot = NULL;
			
@@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 
				 	INIT_LIST_HEAD(&lc->in_use);
			
 
				 	INIT_LIST_HEAD(&lc->lru);
			
 
				 	INIT_LIST_HEAD(&lc->free);
			
 
				+	INIT_LIST_HEAD(&lc->to_be_changed);
			
 
				 
			
 
				 	lc->name = name;
			
 
				 	lc->element_size = e_size;
			
 
				 	lc->element_off = e_off;
			
 
				 	lc->nr_elements = e_count;
			
 
				-	lc->new_number = LC_FREE;
			
 
				+	lc->max_pending_changes = max_pending_changes;
			
 
				 	lc->lc_cache = cache;
			
 
				 	lc->lc_element = element;
			
 
				 	lc->lc_slot = slot;
			
@@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 
				 		e = p + e_off;
			
 
				 		e->lc_index = i;
			
 
				 		e->lc_number = LC_FREE;
			
 
				+		e->lc_new_number = LC_FREE;
			
 
				 		list_add(&e->list, &lc->free);
			
 
				 		element[i] = e;
			
 
				 	}
			
@@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc)
 
				 	INIT_LIST_HEAD(&lc->in_use);
			
 
				 	INIT_LIST_HEAD(&lc->lru);
			
 
				 	INIT_LIST_HEAD(&lc->free);
			
 
				+	INIT_LIST_HEAD(&lc->to_be_changed);
			
 
				 	lc->used = 0;
			
 
				 	lc->hits = 0;
			
 
				 	lc->misses = 0;
			
 
				 	lc->starving = 0;
			
 
				-	lc->dirty = 0;
			
 
				+	lc->locked = 0;
			
 
				 	lc->changed = 0;
			
 
				+	lc->pending_changes = 0;
			
 
				 	lc->flags = 0;
			
 
				-	lc->changing_element = NULL;
			
 
				-	lc->new_number = LC_FREE;
			
 
				 	memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements);
			
 
				 
			
 
				 	for (i = 0; i < lc->nr_elements; i++) {
			
@@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc)
 
				 		/* re-init it */
			
 
				 		e->lc_index = i;
			
 
				 		e->lc_number = LC_FREE;
			
 
				+		e->lc_new_number = LC_FREE;
			
 
				 		list_add(&e->list, &lc->free);
			
 
				 	}
			
 
				 }
			
@@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc)
 
				 	/* NOTE:
			
 
				 	 * total calls to lc_get are
			
 
				 	 * (starving + hits + misses)
			
 
				-	 * misses include "dirty" count (update from an other thread in
			
 
				+	 * misses include "locked" count (update from an other thread in
			
 
				 	 * progress) and "changed", when this in fact lead to an successful
			
 
				 	 * update of the cache.
			
 
				 	 */
			
 
				 	return seq_printf(seq, "\t%s: used:%u/%u "
			
 
				-		"hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n",
			
 
				+		"hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n",
			
 
				 		lc->name, lc->used, lc->nr_elements,
			
 
				-		lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed);
			
 
				+		lc->hits, lc->misses, lc->starving, lc->locked, lc->changed);
			
 
				 }
			
 
				 
			
 
				 static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
			
@@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr)
 
				 }
			
 
				 
			
 
				 
			
 
				-/**
			
 
				- * lc_find - find element by label, if present in the hash table
			
 
				- * @lc: The lru_cache object
			
 
				- * @enr: element number
			
 
				- *
			
 
				- * Returns the pointer to an element, if the element with the requested
			
 
				- * "label" or element number is present in the hash table,
			
 
				- * or NULL if not found. Does not change the refcnt.
			
 
				- */
			
 
				-struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
			
 
				+static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr,
			
 
				+		bool include_changing)
			
 
				 {
			
 
				 	struct hlist_node *n;
			
 
				 	struct lc_element *e;
			
@@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
 
				 	BUG_ON(!lc);
			
 
				 	BUG_ON(!lc->nr_elements);
			
 
				 	hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) {
			
 
				-		if (e->lc_number == enr)
			
 
				+		/* "about to be changed" elements, pending transaction commit,
			
 
				+		 * are hashed by their "new number". "Normal" elements have
			
 
				+		 * lc_number == lc_new_number. */
			
 
				+		if (e->lc_new_number != enr)
			
 
				+			continue;
			
 
				+		if (e->lc_new_number == e->lc_number || include_changing)
			
 
				 			return e;
			
 
				+		break;
			
 
				 	}
			
 
				 	return NULL;
			
 
				 }
			
 
				 
			
 
				-/* returned element will be "recycled" immediately */
			
 
				-static struct lc_element *lc_evict(struct lru_cache *lc)
			
 
				+/**
			
 
				+ * lc_find - find element by label, if present in the hash table
			
 
				+ * @lc: The lru_cache object
			
 
				+ * @enr: element number
			
 
				+ *
			
 
				+ * Returns the pointer to an element, if the element with the requested
			
 
				+ * "label" or element number is present in the hash table,
			
 
				+ * or NULL if not found. Does not change the refcnt.
			
 
				+ * Ignores elements that are "about to be used", i.e. not yet in the active
			
 
				+ * set, but still pending transaction commit.
			
 
				+ */
			
 
				+struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr)
			
 
				 {
			
 
				-	struct list_head  *n;
			
 
				-	struct lc_element *e;
			
 
				-
			
 
				-	if (list_empty(&lc->lru))
			
 
				-		return NULL;
			
 
				-
			
 
				-	n = lc->lru.prev;
			
 
				-	e = list_entry(n, struct lc_element, list);
			
 
				-
			
 
				-	PARANOIA_LC_ELEMENT(lc, e);
			
 
				+	return __lc_find(lc, enr, 0);
			
 
				+}
			
 
				 
			
 
				-	list_del(&e->list);
			
 
				-	hlist_del(&e->colision);
			
 
				-	return e;
			
 
				+/**
			
 
				+ * lc_is_used - find element by label
			
 
				+ * @lc: The lru_cache object
			
 
				+ * @enr: element number
			
 
				+ *
			
 
				+ * Returns true, if the element with the requested "label" or element number is
			
 
				+ * present in the hash table, and is used (refcnt > 0).
			
 
				+ * Also finds elements that are not _currently_ used but only "about to be
			
 
				+ * used", i.e. on the "to_be_changed" list, pending transaction commit.
			
 
				+ */
			
 
				+bool lc_is_used(struct lru_cache *lc, unsigned int enr)
			
 
				+{
			
 
				+	struct lc_element *e = __lc_find(lc, enr, 1);
			
 
				+	return e && e->refcnt;
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e)
 
				 	PARANOIA_LC_ELEMENT(lc, e);
			
 
				 	BUG_ON(e->refcnt);
			
 
				 
			
 
				-	e->lc_number = LC_FREE;
			
 
				+	e->lc_number = e->lc_new_number = LC_FREE;
			
 
				 	hlist_del_init(&e->colision);
			
 
				 	list_move(&e->list, &lc->free);
			
 
				 	RETURN();
			
 
				 }
			
 
				 
			
 
				-static struct lc_element *lc_get_unused_element(struct lru_cache *lc)
			
 
				+static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number)
			
 
				 {
			
 
				 	struct list_head *n;
			
 
				+	struct lc_element *e;
			
 
				+
			
 
				+	if (!list_empty(&lc->free))
			
 
				+		n = lc->free.next;
			
 
				+	else if (!list_empty(&lc->lru))
			
 
				+		n = lc->lru.prev;
			
 
				+	else
			
 
				+		return NULL;
			
 
				+
			
 
				+	e = list_entry(n, struct lc_element, list);
			
 
				+	PARANOIA_LC_ELEMENT(lc, e);
			
 
				 
			
 
				-	if (list_empty(&lc->free))
			
 
				-		return lc_evict(lc);
			
 
				+	e->lc_new_number = new_number;
			
 
				+	if (!hlist_unhashed(&e->colision))
			
 
				+		__hlist_del(&e->colision);
			
 
				+	hlist_add_head(&e->colision, lc_hash_slot(lc, new_number));
			
 
				+	list_move(&e->list, &lc->to_be_changed);
			
 
				 
			
 
				-	n = lc->free.next;
			
 
				-	list_del(n);
			
 
				-	return list_entry(n, struct lc_element, list);
			
 
				+	return e;
			
 
				 }
			
 
				 
			
 
				 static int lc_unused_element_available(struct lru_cache *lc)
			
@@ -308,45 +366,7 @@ static int lc_unused_element_available(struct lru_cache *lc)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-
			
 
				-/**
			
 
				- * lc_get - get element by label, maybe change the active set
			
 
				- * @lc: the lru cache to operate on
			
 
				- * @enr: the label to look up
			
 
				- *
			
 
				- * Finds an element in the cache, increases its usage count,
			
 
				- * "touches" and returns it.
			
 
				- *
			
 
				- * In case the requested number is not present, it needs to be added to the
			
 
				- * cache. Therefore it is possible that an other element becomes evicted from
			
 
				- * the cache. In either case, the user is notified so he is able to e.g. keep
			
 
				- * a persistent log of the cache changes, and therefore the objects in use.
			
 
				- *
			
 
				- * Return values:
			
 
				- *  NULL
			
 
				- *     The cache was marked %LC_STARVING,
			
 
				- *     or the requested label was not in the active set
			
 
				- *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
			
 
				- *     Or no unused or free element could be recycled (@lc will be marked as
			
 
				- *     %LC_STARVING, blocking further lc_get() operations).
			
 
				- *
			
 
				- *  pointer to the element with the REQUESTED element number.
			
 
				- *     In this case, it can be used right away
			
 
				- *
			
 
				- *  pointer to an UNUSED element with some different element number,
			
 
				- *          where that different number may also be %LC_FREE.
			
 
				- *
			
 
				- *          In this case, the cache is marked %LC_DIRTY (blocking further changes),
			
 
				- *          and the returned element pointer is removed from the lru list and
			
 
				- *          hash collision chains.  The user now should do whatever housekeeping
			
 
				- *          is necessary.
			
 
				- *          Then he must call lc_changed(lc,element_pointer), to finish
			
 
				- *          the change.
			
 
				- *
			
 
				- * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
			
 
				- *       any cache set change.
			
 
				- */
			
 
				-struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
			
 
				+static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool may_change)
			
 
				 {
			
 
				 	struct lc_element *e;
			
 
				 
			
@@ -356,8 +376,12 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 
				 		RETURN(NULL);
			
 
				 	}
			
 
				 
			
 
				-	e = lc_find(lc, enr);
			
 
				-	if (e) {
			
 
				+	e = __lc_find(lc, enr, 1);
			
 
				+	/* if lc_new_number != lc_number,
			
 
				+	 * this enr is currently being pulled in already,
			
 
				+	 * and will be available once the pending transaction
			
 
				+	 * has been committed. */
			
 
				+	if (e && e->lc_new_number == e->lc_number) {
			
 
				 		++lc->hits;
			
 
				 		if (e->refcnt++ == 0)
			
 
				 			lc->used++;
			
@@ -366,6 +390,26 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 
				 	}
			
 
				 
			
 
				 	++lc->misses;
			
 
				+	if (!may_change)
			
 
				+		RETURN(NULL);
			
 
				+
			
 
				+	/* It has been found above, but on the "to_be_changed" list, not yet
			
 
				+	 * committed.  Don't pull it in twice, wait for the transaction, then
			
 
				+	 * try again */
			
 
				+	if (e)
			
 
				+		RETURN(NULL);
			
 
				+
			
 
				+	/* To avoid races with lc_try_lock(), first, mark us dirty
			
 
				+	 * (using test_and_set_bit, as it implies memory barriers), ... */
			
 
				+	test_and_set_bit(__LC_DIRTY, &lc->flags);
			
 
				+
			
 
				+	/* ... only then check if it is locked anyways. If lc_unlock clears
			
 
				+	 * the dirty bit again, that's not a problem, we will come here again.
			
 
				+	 */
			
 
				+	if (test_bit(__LC_LOCKED, &lc->flags)) {
			
 
				+		++lc->locked;
			
 
				+		RETURN(NULL);
			
 
				+	}
			
 
				 
			
 
				 	/* In case there is nothing available and we can not kick out
			
 
				 	 * the LRU element, we have to wait ...
			
@@ -375,71 +419,109 @@ struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
 
				 		RETURN(NULL);
			
 
				 	}
			
 
				 
			
 
				-	/* it was not present in the active set.
			
 
				-	 * we are going to recycle an unused (or even "free") element.
			
 
				-	 * user may need to commit a transaction to record that change.
			
 
				-	 * we serialize on flags & TF_DIRTY */
			
 
				-	if (test_and_set_bit(__LC_DIRTY, &lc->flags)) {
			
 
				-		++lc->dirty;
			
 
				+	/* It was not present in the active set.  We are going to recycle an
			
 
				+	 * unused (or even "free") element, but we won't accumulate more than
			
 
				+	 * max_pending_changes changes.  */
			
 
				+	if (lc->pending_changes >= lc->max_pending_changes)
			
 
				 		RETURN(NULL);
			
 
				-	}
			
 
				 
			
 
				-	e = lc_get_unused_element(lc);
			
 
				+	e = lc_prepare_for_change(lc, enr);
			
 
				 	BUG_ON(!e);
			
 
				 
			
 
				 	clear_bit(__LC_STARVING, &lc->flags);
			
 
				 	BUG_ON(++e->refcnt != 1);
			
 
				 	lc->used++;
			
 
				-
			
 
				-	lc->changing_element = e;
			
 
				-	lc->new_number = enr;
			
 
				+	lc->pending_changes++;
			
 
				 
			
 
				 	RETURN(e);
			
 
				 }
			
 
				 
			
 
				-/* similar to lc_get,
			
 
				- * but only gets a new reference on an existing element.
			
 
				- * you either get the requested element, or NULL.
			
 
				- * will be consolidated into one function.
			
 
				+/**
			
 
				+ * lc_get - get element by label, maybe change the active set
			
 
				+ * @lc: the lru cache to operate on
			
 
				+ * @enr: the label to look up
			
 
				+ *
			
 
				+ * Finds an element in the cache, increases its usage count,
			
 
				+ * "touches" and returns it.
			
 
				+ *
			
 
				+ * In case the requested number is not present, it needs to be added to the
			
 
				+ * cache. Therefore it is possible that an other element becomes evicted from
			
 
				+ * the cache. In either case, the user is notified so he is able to e.g. keep
			
 
				+ * a persistent log of the cache changes, and therefore the objects in use.
			
 
				+ *
			
 
				+ * Return values:
			
 
				+ *  NULL
			
 
				+ *     The cache was marked %LC_STARVING,
			
 
				+ *     or the requested label was not in the active set
			
 
				+ *     and a changing transaction is still pending (@lc was marked %LC_DIRTY).
			
 
				+ *     Or no unused or free element could be recycled (@lc will be marked as
			
 
				+ *     %LC_STARVING, blocking further lc_get() operations).
			
 
				+ *
			
 
				+ *  pointer to the element with the REQUESTED element number.
			
 
				+ *     In this case, it can be used right away
			
 
				+ *
			
 
				+ *  pointer to an UNUSED element with some different element number,
			
 
				+ *          where that different number may also be %LC_FREE.
			
 
				+ *
			
 
				+ *          In this case, the cache is marked %LC_DIRTY,
			
 
				+ *          so lc_try_lock() will no longer succeed.
			
 
				+ *          The returned element pointer is moved to the "to_be_changed" list,
			
 
				+ *          and registered with the new element number on the hash collision chains,
			
 
				+ *          so it is possible to pick it up from lc_is_used().
			
 
				+ *          Up to "max_pending_changes" (see lc_create()) can be accumulated.
			
 
				+ *          The user now should do whatever housekeeping is necessary,
			
 
				+ *          typically serialize on lc_try_lock_for_transaction(), then call
			
 
				+ *          lc_committed(lc) and lc_unlock(), to finish the change.
			
 
				+ *
			
 
				+ * NOTE: The user needs to check the lc_number on EACH use, so he recognizes
			
 
				+ *       any cache set change.
			
 
				  */
			
 
				-struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
			
 
				+struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr)
			
 
				 {
			
 
				-	struct lc_element *e;
			
 
				-
			
 
				-	PARANOIA_ENTRY();
			
 
				-	if (lc->flags & LC_STARVING) {
			
 
				-		++lc->starving;
			
 
				-		RETURN(NULL);
			
 
				-	}
			
 
				+	return __lc_get(lc, enr, 1);
			
 
				+}
			
 
				 
			
 
				-	e = lc_find(lc, enr);
			
 
				-	if (e) {
			
 
				-		++lc->hits;
			
 
				-		if (e->refcnt++ == 0)
			
 
				-			lc->used++;
			
 
				-		list_move(&e->list, &lc->in_use); /* Not evictable... */
			
 
				-	}
			
 
				-	RETURN(e);
			
 
				+/**
			
 
				+ * lc_try_get - get element by label, if present; do not change the active set
			
 
				+ * @lc: the lru cache to operate on
			
 
				+ * @enr: the label to look up
			
 
				+ *
			
 
				+ * Finds an element in the cache, increases its usage count,
			
 
				+ * "touches" and returns it.
			
 
				+ *
			
 
				+ * Return values:
			
 
				+ *  NULL
			
 
				+ *     The cache was marked %LC_STARVING,
			
 
				+ *     or the requested label was not in the active set
			
 
				+ *
			
 
				+ *  pointer to the element with the REQUESTED element number.
			
 
				+ *     In this case, it can be used right away
			
 
				+ */
			
 
				+struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr)
			
 
				+{
			
 
				+	return __lc_get(lc, enr, 0);
			
 
				 }
			
 
				 
			
 
				 /**
			
 
				- * lc_changed - tell @lc that the change has been recorded
			
 
				+ * lc_committed - tell @lc that pending changes have been recorded
			
 
				  * @lc: the lru cache to operate on
			
 
				- * @e: the element pending label change
			
 
				+ *
			
 
				+ * User is expected to serialize on explicit lc_try_lock_for_transaction()
			
 
				+ * before the transaction is started, and later needs to lc_unlock() explicitly
			
 
				+ * as well.
			
 
				  */
			
 
				-void lc_changed(struct lru_cache *lc, struct lc_element *e)
			
 
				+void lc_committed(struct lru_cache *lc)
			
 
				 {
			
 
				+	struct lc_element *e, *tmp;
			
 
				+
			
 
				 	PARANOIA_ENTRY();
			
 
				-	BUG_ON(e != lc->changing_element);
			
 
				-	PARANOIA_LC_ELEMENT(lc, e);
			
 
				-	++lc->changed;
			
 
				-	e->lc_number = lc->new_number;
			
 
				-	list_add(&e->list, &lc->in_use);
			
 
				-	hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number));
			
 
				-	lc->changing_element = NULL;
			
 
				-	lc->new_number = LC_FREE;
			
 
				-	clear_bit(__LC_DIRTY, &lc->flags);
			
 
				-	smp_mb__after_clear_bit();
			
 
				+	list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) {
			
 
				+		/* count number of changes, not number of transactions */
			
 
				+		++lc->changed;
			
 
				+		e->lc_number = e->lc_new_number;
			
 
				+		list_move(&e->list, &lc->in_use);
			
 
				+	}
			
 
				+	lc->pending_changes = 0;
			
 
				 	RETURN();
			
 
				 }
			
 
				 
			
@@ -458,13 +540,12 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e)
 
				 	PARANOIA_ENTRY();
			
 
				 	PARANOIA_LC_ELEMENT(lc, e);
			
 
				 	BUG_ON(e->refcnt == 0);
			
 
				-	BUG_ON(e == lc->changing_element);
			
 
				+	BUG_ON(e->lc_number != e->lc_new_number);
			
 
				 	if (--e->refcnt == 0) {
			
 
				 		/* move it to the front of LRU. */
			
 
				 		list_move(&e->list, &lc->lru);
			
 
				 		lc->used--;
			
 
				-		clear_bit(__LC_STARVING, &lc->flags);
			
 
				-		smp_mb__after_clear_bit();
			
 
				+		clear_bit_unlock(__LC_STARVING, &lc->flags);
			
 
				 	}
			
 
				 	RETURN(e->refcnt);
			
 
				 }
			
@@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e)
 
				 void lc_set(struct lru_cache *lc, unsigned int enr, int index)
			
 
				 {
			
 
				 	struct lc_element *e;
			
 
				+	struct list_head *lh;
			
 
				 
			
 
				 	if (index < 0 || index >= lc->nr_elements)
			
 
				 		return;
			
 
				 
			
 
				 	e = lc_element_by_index(lc, index);
			
 
				-	e->lc_number = enr;
			
 
				+	BUG_ON(e->lc_number != e->lc_new_number);
			
 
				+	BUG_ON(e->refcnt != 0);
			
 
				 
			
 
				+	e->lc_number = e->lc_new_number = enr;
			
 
				 	hlist_del_init(&e->colision);
			
 
				-	hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
			
 
				-	list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru);
			
 
				+	if (enr == LC_FREE)
			
 
				+		lh = &lc->free;
			
 
				+	else {
			
 
				+		hlist_add_head(&e->colision, lc_hash_slot(lc, enr));
			
 
				+		lh = &lc->lru;
			
 
				+	}
			
 
				+	list_move(&e->list, lh);
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get);
 
				 EXPORT_SYMBOL(lc_find);
			
 
				 EXPORT_SYMBOL(lc_get);
			
 
				 EXPORT_SYMBOL(lc_put);
			
 
				-EXPORT_SYMBOL(lc_changed);
			
 
				+EXPORT_SYMBOL(lc_committed);
			
 
				 EXPORT_SYMBOL(lc_element_by_index);
			
 
				 EXPORT_SYMBOL(lc_index_of);
			
 
				 EXPORT_SYMBOL(lc_seq_printf_stats);
			
 
				 EXPORT_SYMBOL(lc_seq_dump_details);
			
 
				+EXPORT_SYMBOL(lc_try_lock);
			
 
				+EXPORT_SYMBOL(lc_is_used);