浏览代码

Merge branch 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.37/drivers' of git://git.kernel.dk/linux-2.6-block: (95 commits)
  cciss: fix PCI IDs for new Smart Array controllers
  drbd: add race-breaker to drbd_go_diskless
  drbd: use dynamic_dev_dbg to optionally log uuid changes
  dynamic_debug.h: Fix dynamic_dev_dbg() macro if CONFIG_DYNAMIC_DEBUG not set
  drbd: cleanup: change "<= 0" to "== 0"
  drbd: relax the grace period of the md_sync timer again
  drbd: add some more explicit drbd_md_sync
  drbd: drop wrong debug asserts, fix recently introduced race
  drbd: cleanup useless leftover warn/error printk's
  drbd: add explicit drbd_md_sync to drbd_resync_finished
  drbd: Do not log an ASSERT for P_OV_REQUEST packets while C_CONNECTED
  drbd: fix for possible deadlock on IO error during resync
  drbd: fix unlikely access after free and list corruption
  drbd: fix for spurious fullsync (uuids rotated too fast)
  drbd: allow for explicit resync-finished notifications
  drbd: preparation commit, using full state in receive_state()
  drbd: drbd_send_ack_dp must not rely on header information
  drbd: Fix regression in recv_bm_rle_bits (compressed bitmap)
  drbd: Fixed a stupid copy and paste error
  drbd: Allow larger values for c-fill-target.
  ...

Fix up trivial conflict in drivers/block/ataflop.c due to BKL removal
Linus Torvalds 14 年之前
父节点
当前提交
8abfc6e7a4

+ 48 - 12
drivers/block/amiflop.c

@@ -115,8 +115,6 @@ static unsigned long int fd_def_df0 = FD_DD_3;     /* default for df0 if it does
 module_param(fd_def_df0, ulong, 0);
 module_param(fd_def_df0, ulong, 0);
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 
 
-static struct request_queue *floppy_queue;
-
 /*
 /*
  *  Macros
  *  Macros
  */
  */
@@ -165,6 +163,7 @@ static volatile int selected = -1;	/* currently selected drive */
 static int writepending;
 static int writepending;
 static int writefromint;
 static int writefromint;
 static char *raw_buf;
 static char *raw_buf;
+static int fdc_queue;
 
 
 static DEFINE_SPINLOCK(amiflop_lock);
 static DEFINE_SPINLOCK(amiflop_lock);
 
 
@@ -1335,6 +1334,42 @@ static int get_track(int drive, int track)
 	return -1;
 	return -1;
 }
 }
 
 
+/*
+ * Round-robin between our available drives, doing one request from each
+ */
+static struct request *set_next_request(void)
+{
+	struct request_queue *q;
+	int cnt = FD_MAX_UNITS;
+	struct request *rq;
+
+	/* Find next queue we can dispatch from */
+	fdc_queue = fdc_queue + 1;
+	if (fdc_queue == FD_MAX_UNITS)
+		fdc_queue = 0;
+
+	for(cnt = FD_MAX_UNITS; cnt > 0; cnt--) {
+
+		if (unit[fdc_queue].type->code == FD_NODRIVE) {
+			if (++fdc_queue == FD_MAX_UNITS)
+				fdc_queue = 0;
+			continue;
+		}
+
+		q = unit[fdc_queue].gendisk->queue;
+		if (q) {
+			rq = blk_fetch_request(q);
+			if (rq)
+				break;
+		}
+
+		if (++fdc_queue == FD_MAX_UNITS)
+			fdc_queue = 0;
+	}
+
+	return rq;
+}
+
 static void redo_fd_request(void)
 static void redo_fd_request(void)
 {
 {
 	struct request *rq;
 	struct request *rq;
@@ -1346,7 +1381,7 @@ static void redo_fd_request(void)
 	int err;
 	int err;
 
 
 next_req:
 next_req:
-	rq = blk_fetch_request(floppy_queue);
+	rq = set_next_request();
 	if (!rq) {
 	if (!rq) {
 		/* Nothing left to do */
 		/* Nothing left to do */
 		return;
 		return;
@@ -1683,6 +1718,13 @@ static int __init fd_probe_drives(void)
 			continue;
 			continue;
 		}
 		}
 		unit[drive].gendisk = disk;
 		unit[drive].gendisk = disk;
+
+		disk->queue = blk_init_queue(do_fd_request, &amiflop_lock);
+		if (!disk->queue) {
+			unit[drive].type->code = FD_NODRIVE;
+			continue;
+		}
+
 		drives++;
 		drives++;
 		if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
 		if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) {
 			printk("no mem for ");
 			printk("no mem for ");
@@ -1696,7 +1738,6 @@ static int __init fd_probe_drives(void)
 		disk->fops = &floppy_fops;
 		disk->fops = &floppy_fops;
 		sprintf(disk->disk_name, "fd%d", drive);
 		sprintf(disk->disk_name, "fd%d", drive);
 		disk->private_data = &unit[drive];
 		disk->private_data = &unit[drive];
-		disk->queue = floppy_queue;
 		set_capacity(disk, 880*2);
 		set_capacity(disk, 880*2);
 		add_disk(disk);
 		add_disk(disk);
 	}
 	}
@@ -1744,11 +1785,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
 		goto out_irq2;
 		goto out_irq2;
 	}
 	}
 
 
-	ret = -ENOMEM;
-	floppy_queue = blk_init_queue(do_fd_request, &amiflop_lock);
-	if (!floppy_queue)
-		goto out_queue;
-
 	ret = -ENODEV;
 	ret = -ENODEV;
 	if (fd_probe_drives() < 1) /* No usable drives */
 	if (fd_probe_drives() < 1) /* No usable drives */
 		goto out_probe;
 		goto out_probe;
@@ -1792,8 +1828,6 @@ static int __init amiga_floppy_probe(struct platform_device *pdev)
 	return 0;
 	return 0;
 
 
 out_probe:
 out_probe:
-	blk_cleanup_queue(floppy_queue);
-out_queue:
 	free_irq(IRQ_AMIGA_CIAA_TB, NULL);
 	free_irq(IRQ_AMIGA_CIAA_TB, NULL);
 out_irq2:
 out_irq2:
 	free_irq(IRQ_AMIGA_DSKBLK, NULL);
 	free_irq(IRQ_AMIGA_DSKBLK, NULL);
@@ -1811,9 +1845,12 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
 
 
 	for( i = 0; i < FD_MAX_UNITS; i++) {
 	for( i = 0; i < FD_MAX_UNITS; i++) {
 		if (unit[i].type->code != FD_NODRIVE) {
 		if (unit[i].type->code != FD_NODRIVE) {
+			struct request_queue *q = unit[i].gendisk->queue;
 			del_gendisk(unit[i].gendisk);
 			del_gendisk(unit[i].gendisk);
 			put_disk(unit[i].gendisk);
 			put_disk(unit[i].gendisk);
 			kfree(unit[i].trackbuf);
 			kfree(unit[i].trackbuf);
+			if (q)
+				blk_cleanup_queue(q);
 		}
 		}
 	}
 	}
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
@@ -1821,7 +1858,6 @@ static int __exit amiga_floppy_remove(struct platform_device *pdev)
 	free_irq(IRQ_AMIGA_DSKBLK, NULL);
 	free_irq(IRQ_AMIGA_DSKBLK, NULL);
 	custom.dmacon = DMAF_DISK; /* disable DMA */
 	custom.dmacon = DMAF_DISK; /* disable DMA */
 	amiga_chip_free(raw_buf);
 	amiga_chip_free(raw_buf);
-	blk_cleanup_queue(floppy_queue);
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 }
 }
 #endif
 #endif

+ 39 - 11
drivers/block/ataflop.c

@@ -80,8 +80,8 @@
 #undef DEBUG
 #undef DEBUG
 
 
 static DEFINE_MUTEX(ataflop_mutex);
 static DEFINE_MUTEX(ataflop_mutex);
-static struct request_queue *floppy_queue;
 static struct request *fd_request;
 static struct request *fd_request;
+static int fdc_queue;
 
 
 /* Disk types: DD, HD, ED */
 /* Disk types: DD, HD, ED */
 static struct atari_disk_type {
 static struct atari_disk_type {
@@ -1392,6 +1392,29 @@ static void setup_req_params( int drive )
 			ReqTrack, ReqSector, (unsigned long)ReqData ));
 			ReqTrack, ReqSector, (unsigned long)ReqData ));
 }
 }
 
 
+/*
+ * Round-robin between our available drives, doing one request from each
+ */
+static struct request *set_next_request(void)
+{
+	struct request_queue *q;
+	int old_pos = fdc_queue;
+	struct request *rq;
+
+	do {
+		q = unit[fdc_queue].disk->queue;
+		if (++fdc_queue == FD_MAX_UNITS)
+			fdc_queue = 0;
+		if (q) {
+			rq = blk_fetch_request(q);
+			if (rq)
+				break;
+		}
+	} while (fdc_queue != old_pos);
+
+	return rq;
+}
+
 
 
 static void redo_fd_request(void)
 static void redo_fd_request(void)
 {
 {
@@ -1406,7 +1429,7 @@ static void redo_fd_request(void)
 
 
 repeat:
 repeat:
 	if (!fd_request) {
 	if (!fd_request) {
-		fd_request = blk_fetch_request(floppy_queue);
+		fd_request = set_next_request();
 		if (!fd_request)
 		if (!fd_request)
 			goto the_end;
 			goto the_end;
 	}
 	}
@@ -1933,10 +1956,6 @@ static int __init atari_floppy_init (void)
 	PhysTrackBuffer = virt_to_phys(TrackBuffer);
 	PhysTrackBuffer = virt_to_phys(TrackBuffer);
 	BufferDrive = BufferSide = BufferTrack = -1;
 	BufferDrive = BufferSide = BufferTrack = -1;
 
 
-	floppy_queue = blk_init_queue(do_fd_request, &ataflop_lock);
-	if (!floppy_queue)
-		goto Enomem;
-
 	for (i = 0; i < FD_MAX_UNITS; i++) {
 	for (i = 0; i < FD_MAX_UNITS; i++) {
 		unit[i].track = -1;
 		unit[i].track = -1;
 		unit[i].flags = 0;
 		unit[i].flags = 0;
@@ -1945,7 +1964,10 @@ static int __init atari_floppy_init (void)
 		sprintf(unit[i].disk->disk_name, "fd%d", i);
 		sprintf(unit[i].disk->disk_name, "fd%d", i);
 		unit[i].disk->fops = &floppy_fops;
 		unit[i].disk->fops = &floppy_fops;
 		unit[i].disk->private_data = &unit[i];
 		unit[i].disk->private_data = &unit[i];
-		unit[i].disk->queue = floppy_queue;
+		unit[i].disk->queue = blk_init_queue(do_fd_request,
+					&ataflop_lock);
+		if (!unit[i].disk->queue)
+			goto Enomem;
 		set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
 		set_capacity(unit[i].disk, MAX_DISK_SIZE * 2);
 		add_disk(unit[i].disk);
 		add_disk(unit[i].disk);
 	}
 	}
@@ -1960,10 +1982,14 @@ static int __init atari_floppy_init (void)
 
 
 	return 0;
 	return 0;
 Enomem:
 Enomem:
-	while (i--)
+	while (i--) {
+		struct request_queue *q = unit[i].disk->queue;
+
 		put_disk(unit[i].disk);
 		put_disk(unit[i].disk);
-	if (floppy_queue)
-		blk_cleanup_queue(floppy_queue);
+		if (q)
+			blk_cleanup_queue(q);
+	}
+
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
@@ -2012,12 +2038,14 @@ static void __exit atari_floppy_exit(void)
 	int i;
 	int i;
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	for (i = 0; i < FD_MAX_UNITS; i++) {
 	for (i = 0; i < FD_MAX_UNITS; i++) {
+		struct request_queue *q = unit[i].disk->queue;
+
 		del_gendisk(unit[i].disk);
 		del_gendisk(unit[i].disk);
 		put_disk(unit[i].disk);
 		put_disk(unit[i].disk);
+		blk_cleanup_queue(q);
 	}
 	}
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 
 
-	blk_cleanup_queue(floppy_queue);
 	del_timer_sync(&fd_timer);
 	del_timer_sync(&fd_timer);
 	atari_stram_free( DMABuffer );
 	atari_stram_free( DMABuffer );
 }
 }

+ 424 - 440
drivers/block/cciss.c

@@ -105,11 +105,12 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3250},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3251},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3252},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3253},
-	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3254},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3350},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3351},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3352},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3353},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3354},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3355},
 	{0,}
 	{0,}
 };
 };
 
 
@@ -149,11 +150,12 @@ static struct board_type products[] = {
 	{0x3249103C, "Smart Array P812", &SA5_access},
 	{0x3249103C, "Smart Array P812", &SA5_access},
 	{0x324A103C, "Smart Array P712m", &SA5_access},
 	{0x324A103C, "Smart Array P712m", &SA5_access},
 	{0x324B103C, "Smart Array P711m", &SA5_access},
 	{0x324B103C, "Smart Array P711m", &SA5_access},
-	{0x3250103C, "Smart Array", &SA5_access},
-	{0x3251103C, "Smart Array", &SA5_access},
-	{0x3252103C, "Smart Array", &SA5_access},
-	{0x3253103C, "Smart Array", &SA5_access},
-	{0x3254103C, "Smart Array", &SA5_access},
+	{0x3350103C, "Smart Array", &SA5_access},
+	{0x3351103C, "Smart Array", &SA5_access},
+	{0x3352103C, "Smart Array", &SA5_access},
+	{0x3353103C, "Smart Array", &SA5_access},
+	{0x3354103C, "Smart Array", &SA5_access},
+	{0x3355103C, "Smart Array", &SA5_access},
 };
 };
 
 
 /* How long to wait (in milliseconds) for board to go into simple mode */
 /* How long to wait (in milliseconds) for board to go into simple mode */
@@ -1232,470 +1234,452 @@ static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
 			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
 			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
 		(void)check_for_unit_attention(h, c);
 		(void)check_for_unit_attention(h, c);
 }
 }
-/*
- * ioctl
- */
-static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
-		       unsigned int cmd, unsigned long arg)
+
+static int cciss_getpciinfo(ctlr_info_t *h, void __user *argp)
 {
 {
-	struct gendisk *disk = bdev->bd_disk;
-	ctlr_info_t *h = get_host(disk);
-	drive_info_struct *drv = get_drv(disk);
-	void __user *argp = (void __user *)arg;
+	cciss_pci_info_struct pciinfo;
 
 
-	dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
-		cmd, arg);
-	switch (cmd) {
-	case CCISS_GETPCIINFO:
-		{
-			cciss_pci_info_struct pciinfo;
-
-			if (!arg)
-				return -EINVAL;
-			pciinfo.domain = pci_domain_nr(h->pdev->bus);
-			pciinfo.bus = h->pdev->bus->number;
-			pciinfo.dev_fn = h->pdev->devfn;
-			pciinfo.board_id = h->board_id;
-			if (copy_to_user
-			    (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
-				return -EFAULT;
-			return 0;
-		}
-	case CCISS_GETINTINFO:
-		{
-			cciss_coalint_struct intinfo;
-			if (!arg)
-				return -EINVAL;
-			intinfo.delay =
-			    readl(&h->cfgtable->HostWrite.CoalIntDelay);
-			intinfo.count =
-			    readl(&h->cfgtable->HostWrite.CoalIntCount);
-			if (copy_to_user
-			    (argp, &intinfo, sizeof(cciss_coalint_struct)))
-				return -EFAULT;
-			return 0;
-		}
-	case CCISS_SETINTINFO:
-		{
-			cciss_coalint_struct intinfo;
-			unsigned long flags;
-			int i;
-
-			if (!arg)
-				return -EINVAL;
-			if (!capable(CAP_SYS_ADMIN))
-				return -EPERM;
-			if (copy_from_user
-			    (&intinfo, argp, sizeof(cciss_coalint_struct)))
-				return -EFAULT;
-			if ((intinfo.delay == 0) && (intinfo.count == 0))
-				return -EINVAL;
-			spin_lock_irqsave(&h->lock, flags);
-			/* Update the field, and then ring the doorbell */
-			writel(intinfo.delay,
-			       &(h->cfgtable->HostWrite.CoalIntDelay));
-			writel(intinfo.count,
-			       &(h->cfgtable->HostWrite.CoalIntCount));
-			writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
-
-			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
-				if (!(readl(h->vaddr + SA5_DOORBELL)
-				      & CFGTBL_ChangeReq))
-					break;
-				/* delay and try again */
-				udelay(1000);
-			}
-			spin_unlock_irqrestore(&h->lock, flags);
-			if (i >= MAX_IOCTL_CONFIG_WAIT)
-				return -EAGAIN;
-			return 0;
-		}
-	case CCISS_GETNODENAME:
-		{
-			NodeName_type NodeName;
-			int i;
-
-			if (!arg)
-				return -EINVAL;
-			for (i = 0; i < 16; i++)
-				NodeName[i] =
-				    readb(&h->cfgtable->ServerName[i]);
-			if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
-				return -EFAULT;
-			return 0;
-		}
-	case CCISS_SETNODENAME:
-		{
-			NodeName_type NodeName;
-			unsigned long flags;
-			int i;
+	if (!argp)
+		return -EINVAL;
+	pciinfo.domain = pci_domain_nr(h->pdev->bus);
+	pciinfo.bus = h->pdev->bus->number;
+	pciinfo.dev_fn = h->pdev->devfn;
+	pciinfo.board_id = h->board_id;
+	if (copy_to_user(argp, &pciinfo, sizeof(cciss_pci_info_struct)))
+		return -EFAULT;
+	return 0;
+}
 
 
-			if (!arg)
-				return -EINVAL;
-			if (!capable(CAP_SYS_ADMIN))
-				return -EPERM;
+static int cciss_getintinfo(ctlr_info_t *h, void __user *argp)
+{
+	cciss_coalint_struct intinfo;
 
 
-			if (copy_from_user
-			    (NodeName, argp, sizeof(NodeName_type)))
-				return -EFAULT;
+	if (!argp)
+		return -EINVAL;
+	intinfo.delay = readl(&h->cfgtable->HostWrite.CoalIntDelay);
+	intinfo.count = readl(&h->cfgtable->HostWrite.CoalIntCount);
+	if (copy_to_user
+	    (argp, &intinfo, sizeof(cciss_coalint_struct)))
+		return -EFAULT;
+	return 0;
+}
 
 
-			spin_lock_irqsave(&h->lock, flags);
+static int cciss_setintinfo(ctlr_info_t *h, void __user *argp)
+{
+	cciss_coalint_struct intinfo;
+	unsigned long flags;
+	int i;
 
 
-			/* Update the field, and then ring the doorbell */
-			for (i = 0; i < 16; i++)
-				writeb(NodeName[i],
-				       &h->cfgtable->ServerName[i]);
+	if (!argp)
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (copy_from_user(&intinfo, argp, sizeof(intinfo)))
+		return -EFAULT;
+	if ((intinfo.delay == 0) && (intinfo.count == 0))
+		return -EINVAL;
+	spin_lock_irqsave(&h->lock, flags);
+	/* Update the field, and then ring the doorbell */
+	writel(intinfo.delay, &(h->cfgtable->HostWrite.CoalIntDelay));
+	writel(intinfo.count, &(h->cfgtable->HostWrite.CoalIntCount));
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
 
 
-			writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
+		if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+			break;
+		udelay(1000); /* delay and try again */
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (i >= MAX_IOCTL_CONFIG_WAIT)
+		return -EAGAIN;
+	return 0;
+}
 
 
-			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
-				if (!(readl(h->vaddr + SA5_DOORBELL)
-				      & CFGTBL_ChangeReq))
-					break;
-				/* delay and try again */
-				udelay(1000);
-			}
-			spin_unlock_irqrestore(&h->lock, flags);
-			if (i >= MAX_IOCTL_CONFIG_WAIT)
-				return -EAGAIN;
-			return 0;
-		}
+static int cciss_getnodename(ctlr_info_t *h, void __user *argp)
+{
+	NodeName_type NodeName;
+	int i;
 
 
-	case CCISS_GETHEARTBEAT:
-		{
-			Heartbeat_type heartbeat;
-
-			if (!arg)
-				return -EINVAL;
-			heartbeat = readl(&h->cfgtable->HeartBeat);
-			if (copy_to_user
-			    (argp, &heartbeat, sizeof(Heartbeat_type)))
-				return -EFAULT;
-			return 0;
-		}
-	case CCISS_GETBUSTYPES:
-		{
-			BusTypes_type BusTypes;
-
-			if (!arg)
-				return -EINVAL;
-			BusTypes = readl(&h->cfgtable->BusTypes);
-			if (copy_to_user
-			    (argp, &BusTypes, sizeof(BusTypes_type)))
-				return -EFAULT;
-			return 0;
-		}
-	case CCISS_GETFIRMVER:
-		{
-			FirmwareVer_type firmware;
+	if (!argp)
+		return -EINVAL;
+	for (i = 0; i < 16; i++)
+		NodeName[i] = readb(&h->cfgtable->ServerName[i]);
+	if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
+		return -EFAULT;
+	return 0;
+}
 
 
-			if (!arg)
-				return -EINVAL;
-			memcpy(firmware, h->firm_ver, 4);
+static int cciss_setnodename(ctlr_info_t *h, void __user *argp)
+{
+	NodeName_type NodeName;
+	unsigned long flags;
+	int i;
 
 
-			if (copy_to_user
-			    (argp, firmware, sizeof(FirmwareVer_type)))
-				return -EFAULT;
-			return 0;
-		}
-	case CCISS_GETDRIVVER:
-		{
-			DriverVer_type DriverVer = DRIVER_VERSION;
+	if (!argp)
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	if (copy_from_user(NodeName, argp, sizeof(NodeName_type)))
+		return -EFAULT;
+	spin_lock_irqsave(&h->lock, flags);
+	/* Update the field, and then ring the doorbell */
+	for (i = 0; i < 16; i++)
+		writeb(NodeName[i], &h->cfgtable->ServerName[i]);
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
+		if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+			break;
+		udelay(1000); /* delay and try again */
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (i >= MAX_IOCTL_CONFIG_WAIT)
+		return -EAGAIN;
+	return 0;
+}
 
 
-			if (!arg)
-				return -EINVAL;
+static int cciss_getheartbeat(ctlr_info_t *h, void __user *argp)
+{
+	Heartbeat_type heartbeat;
 
 
-			if (copy_to_user
-			    (argp, &DriverVer, sizeof(DriverVer_type)))
-				return -EFAULT;
-			return 0;
-		}
+	if (!argp)
+		return -EINVAL;
+	heartbeat = readl(&h->cfgtable->HeartBeat);
+	if (copy_to_user(argp, &heartbeat, sizeof(Heartbeat_type)))
+		return -EFAULT;
+	return 0;
+}
 
 
-	case CCISS_DEREGDISK:
-	case CCISS_REGNEWD:
-	case CCISS_REVALIDVOLS:
-		return rebuild_lun_table(h, 0, 1);
+static int cciss_getbustypes(ctlr_info_t *h, void __user *argp)
+{
+	BusTypes_type BusTypes;
+
+	if (!argp)
+		return -EINVAL;
+	BusTypes = readl(&h->cfgtable->BusTypes);
+	if (copy_to_user(argp, &BusTypes, sizeof(BusTypes_type)))
+		return -EFAULT;
+	return 0;
+}
 
 
-	case CCISS_GETLUNINFO:{
-			LogvolInfo_struct luninfo;
+static int cciss_getfirmver(ctlr_info_t *h, void __user *argp)
+{
+	FirmwareVer_type firmware;
 
 
-			memcpy(&luninfo.LunID, drv->LunID,
-				sizeof(luninfo.LunID));
-			luninfo.num_opens = drv->usage_count;
-			luninfo.num_parts = 0;
-			if (copy_to_user(argp, &luninfo,
-					 sizeof(LogvolInfo_struct)))
-				return -EFAULT;
-			return 0;
+	if (!argp)
+		return -EINVAL;
+	memcpy(firmware, h->firm_ver, 4);
+
+	if (copy_to_user
+	    (argp, firmware, sizeof(FirmwareVer_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getdrivver(ctlr_info_t *h, void __user *argp)
+{
+	DriverVer_type DriverVer = DRIVER_VERSION;
+
+	if (!argp)
+		return -EINVAL;
+	if (copy_to_user(argp, &DriverVer, sizeof(DriverVer_type)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_getluninfo(ctlr_info_t *h,
+	struct gendisk *disk, void __user *argp)
+{
+	LogvolInfo_struct luninfo;
+	drive_info_struct *drv = get_drv(disk);
+
+	if (!argp)
+		return -EINVAL;
+	memcpy(&luninfo.LunID, drv->LunID, sizeof(luninfo.LunID));
+	luninfo.num_opens = drv->usage_count;
+	luninfo.num_parts = 0;
+	if (copy_to_user(argp, &luninfo, sizeof(LogvolInfo_struct)))
+		return -EFAULT;
+	return 0;
+}
+
+static int cciss_passthru(ctlr_info_t *h, void __user *argp)
+{
+	IOCTL_Command_struct iocommand;
+	CommandList_struct *c;
+	char *buff = NULL;
+	u64bit temp64;
+	DECLARE_COMPLETION_ONSTACK(wait);
+
+	if (!argp)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	if (copy_from_user
+	    (&iocommand, argp, sizeof(IOCTL_Command_struct)))
+		return -EFAULT;
+	if ((iocommand.buf_size < 1) &&
+	    (iocommand.Request.Type.Direction != XFER_NONE)) {
+		return -EINVAL;
+	}
+	if (iocommand.buf_size > 0) {
+		buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
+		if (buff == NULL)
+			return -EFAULT;
+	}
+	if (iocommand.Request.Type.Direction == XFER_WRITE) {
+		/* Copy the data into the buffer we created */
+		if (copy_from_user(buff, iocommand.buf, iocommand.buf_size)) {
+			kfree(buff);
+			return -EFAULT;
 		}
 		}
-	case CCISS_PASSTHRU:
-		{
-			IOCTL_Command_struct iocommand;
-			CommandList_struct *c;
-			char *buff = NULL;
-			u64bit temp64;
-			DECLARE_COMPLETION_ONSTACK(wait);
-
-			if (!arg)
-				return -EINVAL;
-
-			if (!capable(CAP_SYS_RAWIO))
-				return -EPERM;
-
-			if (copy_from_user
-			    (&iocommand, argp, sizeof(IOCTL_Command_struct)))
-				return -EFAULT;
-			if ((iocommand.buf_size < 1) &&
-			    (iocommand.Request.Type.Direction != XFER_NONE)) {
-				return -EINVAL;
-			}
-#if 0				/* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
-			/* Check kmalloc limits */
-			if (iocommand.buf_size > 128000)
-				return -EINVAL;
-#endif
-			if (iocommand.buf_size > 0) {
-				buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
-				if (buff == NULL)
-					return -EFAULT;
-			}
-			if (iocommand.Request.Type.Direction == XFER_WRITE) {
-				/* Copy the data into the buffer we created */
-				if (copy_from_user
-				    (buff, iocommand.buf, iocommand.buf_size)) {
-					kfree(buff);
-					return -EFAULT;
-				}
-			} else {
-				memset(buff, 0, iocommand.buf_size);
-			}
-			c = cmd_special_alloc(h);
-			if (!c) {
-				kfree(buff);
-				return -ENOMEM;
-			}
-			/* Fill in the command type */
-			c->cmd_type = CMD_IOCTL_PEND;
-			/* Fill in Command Header */
-			c->Header.ReplyQueue = 0;   /* unused in simple mode */
-			if (iocommand.buf_size > 0) /* buffer to fill */
-			{
-				c->Header.SGList = 1;
-				c->Header.SGTotal = 1;
-			} else /* no buffers to fill */
-			{
-				c->Header.SGList = 0;
-				c->Header.SGTotal = 0;
-			}
-			c->Header.LUN = iocommand.LUN_info;
-			/* use the kernel address the cmd block for tag */
-			c->Header.Tag.lower = c->busaddr;
-
-			/* Fill in Request block */
-			c->Request = iocommand.Request;
-
-			/* Fill in the scatter gather information */
-			if (iocommand.buf_size > 0) {
-				temp64.val = pci_map_single(h->pdev, buff,
-					iocommand.buf_size,
-					PCI_DMA_BIDIRECTIONAL);
-				c->SG[0].Addr.lower = temp64.val32.lower;
-				c->SG[0].Addr.upper = temp64.val32.upper;
-				c->SG[0].Len = iocommand.buf_size;
-				c->SG[0].Ext = 0;  /* we are not chaining */
-			}
-			c->waiting = &wait;
+	} else {
+		memset(buff, 0, iocommand.buf_size);
+	}
+	c = cmd_special_alloc(h);
+	if (!c) {
+		kfree(buff);
+		return -ENOMEM;
+	}
+	/* Fill in the command type */
+	c->cmd_type = CMD_IOCTL_PEND;
+	/* Fill in Command Header */
+	c->Header.ReplyQueue = 0;   /* unused in simple mode */
+	if (iocommand.buf_size > 0) { /* buffer to fill */
+		c->Header.SGList = 1;
+		c->Header.SGTotal = 1;
+	} else { /* no buffers to fill */
+		c->Header.SGList = 0;
+		c->Header.SGTotal = 0;
+	}
+	c->Header.LUN = iocommand.LUN_info;
+	/* use the kernel address the cmd block for tag */
+	c->Header.Tag.lower = c->busaddr;
 
 
-			enqueue_cmd_and_start_io(h, c);
-			wait_for_completion(&wait);
+	/* Fill in Request block */
+	c->Request = iocommand.Request;
 
 
-			/* unlock the buffers from DMA */
-			temp64.val32.lower = c->SG[0].Addr.lower;
-			temp64.val32.upper = c->SG[0].Addr.upper;
-			pci_unmap_single(h->pdev, (dma_addr_t) temp64.val,
-					 iocommand.buf_size,
-					 PCI_DMA_BIDIRECTIONAL);
+	/* Fill in the scatter gather information */
+	if (iocommand.buf_size > 0) {
+		temp64.val = pci_map_single(h->pdev, buff,
+			iocommand.buf_size, PCI_DMA_BIDIRECTIONAL);
+		c->SG[0].Addr.lower = temp64.val32.lower;
+		c->SG[0].Addr.upper = temp64.val32.upper;
+		c->SG[0].Len = iocommand.buf_size;
+		c->SG[0].Ext = 0;  /* we are not chaining */
+	}
+	c->waiting = &wait;
 
 
-			check_ioctl_unit_attention(h, c);
+	enqueue_cmd_and_start_io(h, c);
+	wait_for_completion(&wait);
 
 
-			/* Copy the error information out */
-			iocommand.error_info = *(c->err_info);
-			if (copy_to_user
-			    (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
-				kfree(buff);
-				cmd_special_free(h, c);
-				return -EFAULT;
-			}
+	/* unlock the buffers from DMA */
+	temp64.val32.lower = c->SG[0].Addr.lower;
+	temp64.val32.upper = c->SG[0].Addr.upper;
+	pci_unmap_single(h->pdev, (dma_addr_t) temp64.val, iocommand.buf_size,
+			 PCI_DMA_BIDIRECTIONAL);
+	check_ioctl_unit_attention(h, c);
+
+	/* Copy the error information out */
+	iocommand.error_info = *(c->err_info);
+	if (copy_to_user(argp, &iocommand, sizeof(IOCTL_Command_struct))) {
+		kfree(buff);
+		cmd_special_free(h, c);
+		return -EFAULT;
+	}
 
 
-			if (iocommand.Request.Type.Direction == XFER_READ) {
-				/* Copy the data out of the buffer we created */
-				if (copy_to_user
-				    (iocommand.buf, buff, iocommand.buf_size)) {
-					kfree(buff);
-					cmd_special_free(h, c);
-					return -EFAULT;
-				}
-			}
+	if (iocommand.Request.Type.Direction == XFER_READ) {
+		/* Copy the data out of the buffer we created */
+		if (copy_to_user(iocommand.buf, buff, iocommand.buf_size)) {
 			kfree(buff);
 			kfree(buff);
 			cmd_special_free(h, c);
 			cmd_special_free(h, c);
-			return 0;
+			return -EFAULT;
 		}
 		}
-	case CCISS_BIG_PASSTHRU:{
-			BIG_IOCTL_Command_struct *ioc;
-			CommandList_struct *c;
-			unsigned char **buff = NULL;
-			int *buff_size = NULL;
-			u64bit temp64;
-			BYTE sg_used = 0;
-			int status = 0;
-			int i;
-			DECLARE_COMPLETION_ONSTACK(wait);
-			__u32 left;
-			__u32 sz;
-			BYTE __user *data_ptr;
-
-			if (!arg)
-				return -EINVAL;
-			if (!capable(CAP_SYS_RAWIO))
-				return -EPERM;
-			ioc = (BIG_IOCTL_Command_struct *)
-			    kmalloc(sizeof(*ioc), GFP_KERNEL);
-			if (!ioc) {
-				status = -ENOMEM;
-				goto cleanup1;
-			}
-			if (copy_from_user(ioc, argp, sizeof(*ioc))) {
+	}
+	kfree(buff);
+	cmd_special_free(h, c);
+	return 0;
+}
+
+static int cciss_bigpassthru(ctlr_info_t *h, void __user *argp)
+{
+	BIG_IOCTL_Command_struct *ioc;
+	CommandList_struct *c;
+	unsigned char **buff = NULL;
+	int *buff_size = NULL;
+	u64bit temp64;
+	BYTE sg_used = 0;
+	int status = 0;
+	int i;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	__u32 left;
+	__u32 sz;
+	BYTE __user *data_ptr;
+
+	if (!argp)
+		return -EINVAL;
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+	ioc = (BIG_IOCTL_Command_struct *)
+	    kmalloc(sizeof(*ioc), GFP_KERNEL);
+	if (!ioc) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	if (copy_from_user(ioc, argp, sizeof(*ioc))) {
+		status = -EFAULT;
+		goto cleanup1;
+	}
+	if ((ioc->buf_size < 1) &&
+	    (ioc->Request.Type.Direction != XFER_NONE)) {
+		status = -EINVAL;
+		goto cleanup1;
+	}
+	/* Check kmalloc limits  using all SGs */
+	if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
+		status = -EINVAL;
+		goto cleanup1;
+	}
+	if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
+		status = -EINVAL;
+		goto cleanup1;
+	}
+	buff = kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
+	if (!buff) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	buff_size = kmalloc(MAXSGENTRIES * sizeof(int), GFP_KERNEL);
+	if (!buff_size) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	left = ioc->buf_size;
+	data_ptr = ioc->buf;
+	while (left) {
+		sz = (left > ioc->malloc_size) ? ioc->malloc_size : left;
+		buff_size[sg_used] = sz;
+		buff[sg_used] = kmalloc(sz, GFP_KERNEL);
+		if (buff[sg_used] == NULL) {
+			status = -ENOMEM;
+			goto cleanup1;
+		}
+		if (ioc->Request.Type.Direction == XFER_WRITE) {
+			if (copy_from_user(buff[sg_used], data_ptr, sz)) {
 				status = -EFAULT;
 				status = -EFAULT;
 				goto cleanup1;
 				goto cleanup1;
 			}
 			}
-			if ((ioc->buf_size < 1) &&
-			    (ioc->Request.Type.Direction != XFER_NONE)) {
-				status = -EINVAL;
-				goto cleanup1;
-			}
-			/* Check kmalloc limits  using all SGs */
-			if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
-				status = -EINVAL;
-				goto cleanup1;
-			}
-			if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
-				status = -EINVAL;
-				goto cleanup1;
-			}
-			buff =
-			    kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
-			if (!buff) {
-				status = -ENOMEM;
-				goto cleanup1;
-			}
-			buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
-						   GFP_KERNEL);
-			if (!buff_size) {
-				status = -ENOMEM;
-				goto cleanup1;
-			}
-			left = ioc->buf_size;
-			data_ptr = ioc->buf;
-			while (left) {
-				sz = (left >
-				      ioc->malloc_size) ? ioc->
-				    malloc_size : left;
-				buff_size[sg_used] = sz;
-				buff[sg_used] = kmalloc(sz, GFP_KERNEL);
-				if (buff[sg_used] == NULL) {
-					status = -ENOMEM;
-					goto cleanup1;
-				}
-				if (ioc->Request.Type.Direction == XFER_WRITE) {
-					if (copy_from_user
-					    (buff[sg_used], data_ptr, sz)) {
-						status = -EFAULT;
-						goto cleanup1;
-					}
-				} else {
-					memset(buff[sg_used], 0, sz);
-				}
-				left -= sz;
-				data_ptr += sz;
-				sg_used++;
-			}
-			c = cmd_special_alloc(h);
-			if (!c) {
-				status = -ENOMEM;
-				goto cleanup1;
-			}
-			c->cmd_type = CMD_IOCTL_PEND;
-			c->Header.ReplyQueue = 0;
+		} else {
+			memset(buff[sg_used], 0, sz);
+		}
+		left -= sz;
+		data_ptr += sz;
+		sg_used++;
+	}
+	c = cmd_special_alloc(h);
+	if (!c) {
+		status = -ENOMEM;
+		goto cleanup1;
+	}
+	c->cmd_type = CMD_IOCTL_PEND;
+	c->Header.ReplyQueue = 0;
+	c->Header.SGList = sg_used;
+	c->Header.SGTotal = sg_used;
+	c->Header.LUN = ioc->LUN_info;
+	c->Header.Tag.lower = c->busaddr;
 
 
-			if (ioc->buf_size > 0) {
-				c->Header.SGList = sg_used;
-				c->Header.SGTotal = sg_used;
-			} else {
-				c->Header.SGList = 0;
-				c->Header.SGTotal = 0;
-			}
-			c->Header.LUN = ioc->LUN_info;
-			c->Header.Tag.lower = c->busaddr;
-
-			c->Request = ioc->Request;
-			if (ioc->buf_size > 0) {
-				for (i = 0; i < sg_used; i++) {
-					temp64.val =
-					    pci_map_single(h->pdev, buff[i],
-						    buff_size[i],
-						    PCI_DMA_BIDIRECTIONAL);
-					c->SG[i].Addr.lower =
-					    temp64.val32.lower;
-					c->SG[i].Addr.upper =
-					    temp64.val32.upper;
-					c->SG[i].Len = buff_size[i];
-					c->SG[i].Ext = 0;	/* we are not chaining */
-				}
-			}
-			c->waiting = &wait;
-			enqueue_cmd_and_start_io(h, c);
-			wait_for_completion(&wait);
-			/* unlock the buffers from DMA */
-			for (i = 0; i < sg_used; i++) {
-				temp64.val32.lower = c->SG[i].Addr.lower;
-				temp64.val32.upper = c->SG[i].Addr.upper;
-				pci_unmap_single(h->pdev,
-					(dma_addr_t) temp64.val, buff_size[i],
-					PCI_DMA_BIDIRECTIONAL);
-			}
-			check_ioctl_unit_attention(h, c);
-			/* Copy the error information out */
-			ioc->error_info = *(c->err_info);
-			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
+	c->Request = ioc->Request;
+	for (i = 0; i < sg_used; i++) {
+		temp64.val = pci_map_single(h->pdev, buff[i], buff_size[i],
+				    PCI_DMA_BIDIRECTIONAL);
+		c->SG[i].Addr.lower = temp64.val32.lower;
+		c->SG[i].Addr.upper = temp64.val32.upper;
+		c->SG[i].Len = buff_size[i];
+		c->SG[i].Ext = 0;	/* we are not chaining */
+	}
+	c->waiting = &wait;
+	enqueue_cmd_and_start_io(h, c);
+	wait_for_completion(&wait);
+	/* unlock the buffers from DMA */
+	for (i = 0; i < sg_used; i++) {
+		temp64.val32.lower = c->SG[i].Addr.lower;
+		temp64.val32.upper = c->SG[i].Addr.upper;
+		pci_unmap_single(h->pdev,
+			(dma_addr_t) temp64.val, buff_size[i],
+			PCI_DMA_BIDIRECTIONAL);
+	}
+	check_ioctl_unit_attention(h, c);
+	/* Copy the error information out */
+	ioc->error_info = *(c->err_info);
+	if (copy_to_user(argp, ioc, sizeof(*ioc))) {
+		cmd_special_free(h, c);
+		status = -EFAULT;
+		goto cleanup1;
+	}
+	if (ioc->Request.Type.Direction == XFER_READ) {
+		/* Copy the data out of the buffer we created */
+		BYTE __user *ptr = ioc->buf;
+		for (i = 0; i < sg_used; i++) {
+			if (copy_to_user(ptr, buff[i], buff_size[i])) {
 				cmd_special_free(h, c);
 				cmd_special_free(h, c);
 				status = -EFAULT;
 				status = -EFAULT;
 				goto cleanup1;
 				goto cleanup1;
 			}
 			}
-			if (ioc->Request.Type.Direction == XFER_READ) {
-				/* Copy the data out of the buffer we created */
-				BYTE __user *ptr = ioc->buf;
-				for (i = 0; i < sg_used; i++) {
-					if (copy_to_user
-					    (ptr, buff[i], buff_size[i])) {
-						cmd_special_free(h, c);
-						status = -EFAULT;
-						goto cleanup1;
-					}
-					ptr += buff_size[i];
-				}
-			}
-			cmd_special_free(h, c);
-			status = 0;
-		      cleanup1:
-			if (buff) {
-				for (i = 0; i < sg_used; i++)
-					kfree(buff[i]);
-				kfree(buff);
-			}
-			kfree(buff_size);
-			kfree(ioc);
-			return status;
+			ptr += buff_size[i];
 		}
 		}
+	}
+	cmd_special_free(h, c);
+	status = 0;
+cleanup1:
+	if (buff) {
+		for (i = 0; i < sg_used; i++)
+			kfree(buff[i]);
+		kfree(buff);
+	}
+	kfree(buff_size);
+	kfree(ioc);
+	return status;
+}
+
+static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
+	unsigned int cmd, unsigned long arg)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	ctlr_info_t *h = get_host(disk);
+	void __user *argp = (void __user *)arg;
+
+	dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
+		cmd, arg);
+	switch (cmd) {
+	case CCISS_GETPCIINFO:
+		return cciss_getpciinfo(h, argp);
+	case CCISS_GETINTINFO:
+		return cciss_getintinfo(h, argp);
+	case CCISS_SETINTINFO:
+		return cciss_setintinfo(h, argp);
+	case CCISS_GETNODENAME:
+		return cciss_getnodename(h, argp);
+	case CCISS_SETNODENAME:
+		return cciss_setnodename(h, argp);
+	case CCISS_GETHEARTBEAT:
+		return cciss_getheartbeat(h, argp);
+	case CCISS_GETBUSTYPES:
+		return cciss_getbustypes(h, argp);
+	case CCISS_GETFIRMVER:
+		return cciss_getfirmver(h, argp);
+	case CCISS_GETDRIVVER:
+		return cciss_getdrivver(h, argp);
+	case CCISS_DEREGDISK:
+	case CCISS_REGNEWD:
+	case CCISS_REVALIDVOLS:
+		return rebuild_lun_table(h, 0, 1);
+	case CCISS_GETLUNINFO:
+		return cciss_getluninfo(h, disk, argp);
+	case CCISS_PASSTHRU:
+		return cciss_passthru(h, argp);
+	case CCISS_BIG_PASSTHRU:
+		return cciss_bigpassthru(h, argp);
 
 
 	/* scsi_cmd_ioctl handles these, below, though some are not */
 	/* scsi_cmd_ioctl handles these, below, though some are not */
 	/* very meaningful for cciss.  SG_IO is the main one people want. */
 	/* very meaningful for cciss.  SG_IO is the main one people want. */

+ 20 - 21
drivers/block/drbd/drbd_actlog.c

@@ -965,29 +965,30 @@ void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
 	 * ok, (capacity & 7) != 0 sometimes, but who cares...
 	 * ok, (capacity & 7) != 0 sometimes, but who cares...
 	 * we count rs_{total,left} in bits, not sectors.
 	 * we count rs_{total,left} in bits, not sectors.
 	 */
 	 */
-	spin_lock_irqsave(&mdev->al_lock, flags);
 	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
 	count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
-	if (count) {
-		/* we need the lock for drbd_try_clear_on_disk_bm */
-		if (jiffies - mdev->rs_mark_time > HZ*10) {
-			/* should be rolling marks,
-			 * but we estimate only anyways. */
-			if (mdev->rs_mark_left != drbd_bm_total_weight(mdev) &&
+	if (count && get_ldev(mdev)) {
+		unsigned long now = jiffies;
+		unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
+		int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
+		if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
+			unsigned long tw = drbd_bm_total_weight(mdev);
+			if (mdev->rs_mark_left[mdev->rs_last_mark] != tw &&
 			    mdev->state.conn != C_PAUSED_SYNC_T &&
 			    mdev->state.conn != C_PAUSED_SYNC_T &&
 			    mdev->state.conn != C_PAUSED_SYNC_S) {
 			    mdev->state.conn != C_PAUSED_SYNC_S) {
-				mdev->rs_mark_time = jiffies;
-				mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+				mdev->rs_mark_time[next] = now;
+				mdev->rs_mark_left[next] = tw;
+				mdev->rs_last_mark = next;
 			}
 			}
 		}
 		}
-		if (get_ldev(mdev)) {
-			drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
-			put_ldev(mdev);
-		}
+		spin_lock_irqsave(&mdev->al_lock, flags);
+		drbd_try_clear_on_disk_bm(mdev, sector, count, TRUE);
+		spin_unlock_irqrestore(&mdev->al_lock, flags);
+
 		/* just wake_up unconditional now, various lc_chaged(),
 		/* just wake_up unconditional now, various lc_chaged(),
 		 * lc_put() in drbd_try_clear_on_disk_bm(). */
 		 * lc_put() in drbd_try_clear_on_disk_bm(). */
 		wake_up = 1;
 		wake_up = 1;
+		put_ldev(mdev);
 	}
 	}
-	spin_unlock_irqrestore(&mdev->al_lock, flags);
 	if (wake_up)
 	if (wake_up)
 		wake_up(&mdev->al_wait);
 		wake_up(&mdev->al_wait);
 }
 }
@@ -1118,7 +1119,7 @@ static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
  * @mdev:	DRBD device.
  * @mdev:	DRBD device.
  * @sector:	The sector number.
  * @sector:	The sector number.
  *
  *
- * This functions sleeps on al_wait. Returns 1 on success, 0 if interrupted.
+ * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
  */
  */
 int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 {
 {
@@ -1129,10 +1130,10 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 	sig = wait_event_interruptible(mdev->al_wait,
 	sig = wait_event_interruptible(mdev->al_wait,
 			(bm_ext = _bme_get(mdev, enr)));
 			(bm_ext = _bme_get(mdev, enr)));
 	if (sig)
 	if (sig)
-		return 0;
+		return -EINTR;
 
 
 	if (test_bit(BME_LOCKED, &bm_ext->flags))
 	if (test_bit(BME_LOCKED, &bm_ext->flags))
-		return 1;
+		return 0;
 
 
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
 	for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
 		sig = wait_event_interruptible(mdev->al_wait,
 		sig = wait_event_interruptible(mdev->al_wait,
@@ -1145,13 +1146,11 @@ int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
 				wake_up(&mdev->al_wait);
 				wake_up(&mdev->al_wait);
 			}
 			}
 			spin_unlock_irq(&mdev->al_lock);
 			spin_unlock_irq(&mdev->al_lock);
-			return 0;
+			return -EINTR;
 		}
 		}
 	}
 	}
-
 	set_bit(BME_LOCKED, &bm_ext->flags);
 	set_bit(BME_LOCKED, &bm_ext->flags);
-
-	return 1;
+	return 0;
 }
 }
 
 
 /**
 /**

+ 1 - 1
drivers/block/drbd/drbd_bitmap.c

@@ -569,7 +569,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
  *
  *
  * maybe bm_set should be atomic_t ?
  * maybe bm_set should be atomic_t ?
  */
  */
-static unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
+unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev)
 {
 {
 	struct drbd_bitmap *b = mdev->bitmap;
 	struct drbd_bitmap *b = mdev->bitmap;
 	unsigned long s;
 	unsigned long s;

+ 152 - 64
drivers/block/drbd/drbd_int.h

@@ -337,13 +337,25 @@ static inline void bm_xfer_ctx_bit_to_word_offset(struct bm_xfer_ctx *c)
  * NOTE that the payload starts at a long aligned offset,
  * NOTE that the payload starts at a long aligned offset,
  * regardless of 32 or 64 bit arch!
  * regardless of 32 or 64 bit arch!
  */
  */
-struct p_header {
+struct p_header80 {
 	u32	  magic;
 	u32	  magic;
 	u16	  command;
 	u16	  command;
 	u16	  length;	/* bytes of data after this header */
 	u16	  length;	/* bytes of data after this header */
 	u8	  payload[0];
 	u8	  payload[0];
 } __packed;
 } __packed;
-/* 8 bytes. packet FIXED for the next century! */
+
+/* Header for big packets, Used for data packets exceeding 64kB */
+struct p_header95 {
+	u16	  magic;	/* use DRBD_MAGIC_BIG here */
+	u16	  command;
+	u32	  length;	/* Use only 24 bits of that. Ignore the highest 8 bit. */
+	u8	  payload[0];
+} __packed;
+
+union p_header {
+	struct p_header80 h80;
+	struct p_header95 h95;
+};
 
 
 /*
 /*
  * short commands, packets without payload, plain p_header:
  * short commands, packets without payload, plain p_header:
@@ -362,12 +374,16 @@ struct p_header {
  */
  */
 
 
 /* these defines must not be changed without changing the protocol version */
 /* these defines must not be changed without changing the protocol version */
-#define DP_HARDBARRIER	      1
-#define DP_RW_SYNC	      2
+#define DP_HARDBARRIER	      1 /* depricated */
+#define DP_RW_SYNC	      2 /* equals REQ_SYNC    */
 #define DP_MAY_SET_IN_SYNC    4
 #define DP_MAY_SET_IN_SYNC    4
+#define DP_UNPLUG             8 /* equals REQ_UNPLUG  */
+#define DP_FUA               16 /* equals REQ_FUA     */
+#define DP_FLUSH             32 /* equals REQ_FLUSH   */
+#define DP_DISCARD           64 /* equals REQ_DISCARD */
 
 
 struct p_data {
 struct p_data {
-	struct p_header head;
+	union p_header head;
 	u64	    sector;    /* 64 bits sector number */
 	u64	    sector;    /* 64 bits sector number */
 	u64	    block_id;  /* to identify the request in protocol B&C */
 	u64	    block_id;  /* to identify the request in protocol B&C */
 	u32	    seq_num;
 	u32	    seq_num;
@@ -383,7 +399,7 @@ struct p_data {
  *   P_DATA_REQUEST, P_RS_DATA_REQUEST
  *   P_DATA_REQUEST, P_RS_DATA_REQUEST
  */
  */
 struct p_block_ack {
 struct p_block_ack {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    sector;
 	u64	    sector;
 	u64	    block_id;
 	u64	    block_id;
 	u32	    blksize;
 	u32	    blksize;
@@ -392,7 +408,7 @@ struct p_block_ack {
 
 
 
 
 struct p_block_req {
 struct p_block_req {
-	struct p_header head;
+	struct p_header80 head;
 	u64 sector;
 	u64 sector;
 	u64 block_id;
 	u64 block_id;
 	u32 blksize;
 	u32 blksize;
@@ -409,7 +425,7 @@ struct p_block_req {
  */
  */
 
 
 struct p_handshake {
 struct p_handshake {
-	struct p_header head;	/* 8 bytes */
+	struct p_header80 head;	/* 8 bytes */
 	u32 protocol_min;
 	u32 protocol_min;
 	u32 feature_flags;
 	u32 feature_flags;
 	u32 protocol_max;
 	u32 protocol_max;
@@ -424,19 +440,19 @@ struct p_handshake {
 /* 80 bytes, FIXED for the next century */
 /* 80 bytes, FIXED for the next century */
 
 
 struct p_barrier {
 struct p_barrier {
-	struct p_header head;
+	struct p_header80 head;
 	u32 barrier;	/* barrier number _handle_ only */
 	u32 barrier;	/* barrier number _handle_ only */
 	u32 pad;	/* to multiple of 8 Byte */
 	u32 pad;	/* to multiple of 8 Byte */
 } __packed;
 } __packed;
 
 
 struct p_barrier_ack {
 struct p_barrier_ack {
-	struct p_header head;
+	struct p_header80 head;
 	u32 barrier;
 	u32 barrier;
 	u32 set_size;
 	u32 set_size;
 } __packed;
 } __packed;
 
 
 struct p_rs_param {
 struct p_rs_param {
-	struct p_header head;
+	struct p_header80 head;
 	u32 rate;
 	u32 rate;
 
 
 	      /* Since protocol version 88 and higher. */
 	      /* Since protocol version 88 and higher. */
@@ -444,20 +460,31 @@ struct p_rs_param {
 } __packed;
 } __packed;
 
 
 struct p_rs_param_89 {
 struct p_rs_param_89 {
-	struct p_header head;
+	struct p_header80 head;
 	u32 rate;
 	u32 rate;
         /* protocol version 89: */
         /* protocol version 89: */
 	char verify_alg[SHARED_SECRET_MAX];
 	char verify_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 	char csums_alg[SHARED_SECRET_MAX];
 } __packed;
 } __packed;
 
 
+struct p_rs_param_95 {
+	struct p_header80 head;
+	u32 rate;
+	char verify_alg[SHARED_SECRET_MAX];
+	char csums_alg[SHARED_SECRET_MAX];
+	u32 c_plan_ahead;
+	u32 c_delay_target;
+	u32 c_fill_target;
+	u32 c_max_rate;
+} __packed;
+
 enum drbd_conn_flags {
 enum drbd_conn_flags {
 	CF_WANT_LOSE = 1,
 	CF_WANT_LOSE = 1,
 	CF_DRY_RUN = 2,
 	CF_DRY_RUN = 2,
 };
 };
 
 
 struct p_protocol {
 struct p_protocol {
-	struct p_header head;
+	struct p_header80 head;
 	u32 protocol;
 	u32 protocol;
 	u32 after_sb_0p;
 	u32 after_sb_0p;
 	u32 after_sb_1p;
 	u32 after_sb_1p;
@@ -471,17 +498,17 @@ struct p_protocol {
 } __packed;
 } __packed;
 
 
 struct p_uuids {
 struct p_uuids {
-	struct p_header head;
+	struct p_header80 head;
 	u64 uuid[UI_EXTENDED_SIZE];
 	u64 uuid[UI_EXTENDED_SIZE];
 } __packed;
 } __packed;
 
 
 struct p_rs_uuid {
 struct p_rs_uuid {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    uuid;
 	u64	    uuid;
 } __packed;
 } __packed;
 
 
 struct p_sizes {
 struct p_sizes {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    d_size;  /* size of disk */
 	u64	    d_size;  /* size of disk */
 	u64	    u_size;  /* user requested size */
 	u64	    u_size;  /* user requested size */
 	u64	    c_size;  /* current exported size */
 	u64	    c_size;  /* current exported size */
@@ -491,18 +518,18 @@ struct p_sizes {
 } __packed;
 } __packed;
 
 
 struct p_state {
 struct p_state {
-	struct p_header head;
+	struct p_header80 head;
 	u32	    state;
 	u32	    state;
 } __packed;
 } __packed;
 
 
 struct p_req_state {
 struct p_req_state {
-	struct p_header head;
+	struct p_header80 head;
 	u32	    mask;
 	u32	    mask;
 	u32	    val;
 	u32	    val;
 } __packed;
 } __packed;
 
 
 struct p_req_state_reply {
 struct p_req_state_reply {
-	struct p_header head;
+	struct p_header80 head;
 	u32	    retcode;
 	u32	    retcode;
 } __packed;
 } __packed;
 
 
@@ -517,7 +544,7 @@ struct p_drbd06_param {
 } __packed;
 } __packed;
 
 
 struct p_discard {
 struct p_discard {
-	struct p_header head;
+	struct p_header80 head;
 	u64	    block_id;
 	u64	    block_id;
 	u32	    seq_num;
 	u32	    seq_num;
 	u32	    pad;
 	u32	    pad;
@@ -533,7 +560,7 @@ enum drbd_bitmap_code {
 };
 };
 
 
 struct p_compressed_bm {
 struct p_compressed_bm {
-	struct p_header head;
+	struct p_header80 head;
 	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 	/* (encoding & 0x0f): actual encoding, see enum drbd_bitmap_code
 	 * (encoding & 0x80): polarity (set/unset) of first runlength
 	 * (encoding & 0x80): polarity (set/unset) of first runlength
 	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
 	 * ((encoding >> 4) & 0x07): pad_bits, number of trailing zero bits
@@ -544,10 +571,10 @@ struct p_compressed_bm {
 	u8 code[0];
 	u8 code[0];
 } __packed;
 } __packed;
 
 
-struct p_delay_probe {
-	struct p_header head;
-	u32	seq_num; /* sequence number to match the two probe packets */
-	u32	offset;	 /* usecs the probe got sent after the reference time point */
+struct p_delay_probe93 {
+	struct p_header80 head;
+	u32     seq_num; /* sequence number to match the two probe packets */
+	u32     offset;  /* usecs the probe got sent after the reference time point */
 } __packed;
 } __packed;
 
 
 /* DCBP: Drbd Compressed Bitmap Packet ... */
 /* DCBP: Drbd Compressed Bitmap Packet ... */
@@ -594,7 +621,7 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
  * so we need to use the fixed size 4KiB page size
  * so we need to use the fixed size 4KiB page size
  * most architechtures have used for a long time.
  * most architechtures have used for a long time.
  */
  */
-#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header))
+#define BM_PACKET_PAYLOAD_BYTES (4096 - sizeof(struct p_header80))
 #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
 #define BM_PACKET_WORDS (BM_PACKET_PAYLOAD_BYTES/sizeof(long))
 #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
 #define BM_PACKET_VLI_BYTES_MAX (4096 - sizeof(struct p_compressed_bm))
 #if (PAGE_SIZE < 4096)
 #if (PAGE_SIZE < 4096)
@@ -603,13 +630,14 @@ DCBP_set_pad_bits(struct p_compressed_bm *p, int n)
 #endif
 #endif
 
 
 union p_polymorph {
 union p_polymorph {
-        struct p_header          header;
+        union p_header           header;
         struct p_handshake       handshake;
         struct p_handshake       handshake;
         struct p_data            data;
         struct p_data            data;
         struct p_block_ack       block_ack;
         struct p_block_ack       block_ack;
         struct p_barrier         barrier;
         struct p_barrier         barrier;
         struct p_barrier_ack     barrier_ack;
         struct p_barrier_ack     barrier_ack;
         struct p_rs_param_89     rs_param_89;
         struct p_rs_param_89     rs_param_89;
+        struct p_rs_param_95     rs_param_95;
         struct p_protocol        protocol;
         struct p_protocol        protocol;
         struct p_sizes           sizes;
         struct p_sizes           sizes;
         struct p_uuids           uuids;
         struct p_uuids           uuids;
@@ -617,6 +645,8 @@ union p_polymorph {
         struct p_req_state       req_state;
         struct p_req_state       req_state;
         struct p_req_state_reply req_state_reply;
         struct p_req_state_reply req_state_reply;
         struct p_block_req       block_req;
         struct p_block_req       block_req;
+	struct p_delay_probe93   delay_probe93;
+	struct p_rs_uuid         rs_uuid;
 } __packed;
 } __packed;
 
 
 /**********************************************************************/
 /**********************************************************************/
@@ -697,7 +727,7 @@ struct drbd_tl_epoch {
 	struct list_head requests; /* requests before */
 	struct list_head requests; /* requests before */
 	struct drbd_tl_epoch *next; /* pointer to the next barrier */
 	struct drbd_tl_epoch *next; /* pointer to the next barrier */
 	unsigned int br_number;  /* the barriers identifier. */
 	unsigned int br_number;  /* the barriers identifier. */
-	int n_req;	/* number of requests attached before this barrier */
+	int n_writes;	/* number of requests attached before this barrier */
 };
 };
 
 
 struct drbd_request;
 struct drbd_request;
@@ -747,7 +777,7 @@ struct digest_info {
 struct drbd_epoch_entry {
 struct drbd_epoch_entry {
 	struct drbd_work w;
 	struct drbd_work w;
 	struct hlist_node colision;
 	struct hlist_node colision;
-	struct drbd_epoch *epoch;
+	struct drbd_epoch *epoch; /* for writes */
 	struct drbd_conf *mdev;
 	struct drbd_conf *mdev;
 	struct page *pages;
 	struct page *pages;
 	atomic_t pending_bios;
 	atomic_t pending_bios;
@@ -755,7 +785,10 @@ struct drbd_epoch_entry {
 	/* see comments on ee flag bits below */
 	/* see comments on ee flag bits below */
 	unsigned long flags;
 	unsigned long flags;
 	sector_t sector;
 	sector_t sector;
-	u64 block_id;
+	union {
+		u64 block_id;
+		struct digest_info *digest;
+	};
 };
 };
 
 
 /* ee flag bits.
 /* ee flag bits.
@@ -781,12 +814,16 @@ enum {
 	 * if any of those fail, we set this flag atomically
 	 * if any of those fail, we set this flag atomically
 	 * from the endio callback */
 	 * from the endio callback */
 	__EE_WAS_ERROR,
 	__EE_WAS_ERROR,
+
+	/* This ee has a pointer to a digest instead of a block id */
+	__EE_HAS_DIGEST,
 };
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
 #define EE_IS_BARRIER          (1<<__EE_IS_BARRIER)
 #define EE_IS_BARRIER          (1<<__EE_IS_BARRIER)
 #define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define	EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
+#define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
 
 
 /* global flag bits */
 /* global flag bits */
 enum {
 enum {
@@ -794,7 +831,6 @@ enum {
 	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
 	SIGNAL_ASENDER,		/* whether asender wants to be interrupted */
 	SEND_PING,		/* whether asender should send a ping asap */
 	SEND_PING,		/* whether asender should send a ping asap */
 
 
-	STOP_SYNC_TIMER,	/* tell timer to cancel itself */
 	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
 	UNPLUG_QUEUED,		/* only relevant with kernel 2.4 */
 	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 	UNPLUG_REMOTE,		/* sending a "UnplugRemote" could help */
 	MD_DIRTY,		/* current uuids and flags not yet on disk */
 	MD_DIRTY,		/* current uuids and flags not yet on disk */
@@ -816,6 +852,7 @@ enum {
 	BITMAP_IO,		/* suspend application io;
 	BITMAP_IO,		/* suspend application io;
 				   once no more io in flight, start bitmap io */
 				   once no more io in flight, start bitmap io */
 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
+	GO_DISKLESS,		/* Disk failed, local_cnt reached zero, we are going diskless */
 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
 	NET_CONGESTED,		/* The data socket is congested */
 	NET_CONGESTED,		/* The data socket is congested */
 
 
@@ -829,6 +866,8 @@ enum {
 				 * the peer, if it changed there as well. */
 				 * the peer, if it changed there as well. */
 	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
 	CONN_DRY_RUN,		/* Expect disconnect after resync handshake. */
 	GOT_PING_ACK,		/* set when we receive a ping_ack packet, misc wait gets woken */
 	GOT_PING_ACK,		/* set when we receive a ping_ack packet, misc wait gets woken */
+	NEW_CUR_UUID,		/* Create new current UUID when thawing IO */
+	AL_SUSPENDED,		/* Activity logging is currently suspended. */
 };
 };
 
 
 struct drbd_bitmap; /* opaque for drbd_conf */
 struct drbd_bitmap; /* opaque for drbd_conf */
@@ -838,10 +877,6 @@ struct drbd_bitmap; /* opaque for drbd_conf */
 
 
 /* THINK maybe we actually want to use the default "event/%s" worker threads
 /* THINK maybe we actually want to use the default "event/%s" worker threads
  * or similar in linux 2.6, which uses per cpu data and threads.
  * or similar in linux 2.6, which uses per cpu data and threads.
- *
- * To be general, this might need a spin_lock member.
- * For now, please use the mdev->req_lock to protect list_head,
- * see drbd_queue_work below.
  */
  */
 struct drbd_work_queue {
 struct drbd_work_queue {
 	struct list_head q;
 	struct list_head q;
@@ -915,6 +950,12 @@ enum write_ordering_e {
 	WO_bio_barrier
 	WO_bio_barrier
 };
 };
 
 
+struct fifo_buffer {
+	int *values;
+	unsigned int head_index;
+	unsigned int size;
+};
+
 struct drbd_conf {
 struct drbd_conf {
 	/* things that are stored as / read from meta data on disk */
 	/* things that are stored as / read from meta data on disk */
 	unsigned long flags;
 	unsigned long flags;
@@ -936,9 +977,16 @@ struct drbd_conf {
 	unsigned int ko_count;
 	unsigned int ko_count;
 	struct drbd_work  resync_work,
 	struct drbd_work  resync_work,
 			  unplug_work,
 			  unplug_work,
+			  go_diskless,
 			  md_sync_work;
 			  md_sync_work;
 	struct timer_list resync_timer;
 	struct timer_list resync_timer;
 	struct timer_list md_sync_timer;
 	struct timer_list md_sync_timer;
+#ifdef DRBD_DEBUG_MD_SYNC
+	struct {
+		unsigned int line;
+		const char* func;
+	} last_md_mark_dirty;
+#endif
 
 
 	/* Used after attach while negotiating new disk state. */
 	/* Used after attach while negotiating new disk state. */
 	union drbd_state new_state_tmp;
 	union drbd_state new_state_tmp;
@@ -946,6 +994,7 @@ struct drbd_conf {
 	union drbd_state state;
 	union drbd_state state;
 	wait_queue_head_t misc_wait;
 	wait_queue_head_t misc_wait;
 	wait_queue_head_t state_wait;  /* upon each state change. */
 	wait_queue_head_t state_wait;  /* upon each state change. */
+	wait_queue_head_t net_cnt_wait;
 	unsigned int send_cnt;
 	unsigned int send_cnt;
 	unsigned int recv_cnt;
 	unsigned int recv_cnt;
 	unsigned int read_cnt;
 	unsigned int read_cnt;
@@ -974,12 +1023,16 @@ struct drbd_conf {
 	unsigned long rs_start;
 	unsigned long rs_start;
 	/* cumulated time in PausedSyncX state [unit jiffies] */
 	/* cumulated time in PausedSyncX state [unit jiffies] */
 	unsigned long rs_paused;
 	unsigned long rs_paused;
+	/* skipped because csum was equal [unit BM_BLOCK_SIZE] */
+	unsigned long rs_same_csum;
+#define DRBD_SYNC_MARKS 8
+#define DRBD_SYNC_MARK_STEP (3*HZ)
 	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
 	/* block not up-to-date at mark [unit BM_BLOCK_SIZE] */
-	unsigned long rs_mark_left;
+	unsigned long rs_mark_left[DRBD_SYNC_MARKS];
 	/* marks's time [unit jiffies] */
 	/* marks's time [unit jiffies] */
-	unsigned long rs_mark_time;
-	/* skipped because csum was equeal [unit BM_BLOCK_SIZE] */
-	unsigned long rs_same_csum;
+	unsigned long rs_mark_time[DRBD_SYNC_MARKS];
+	/* current index into rs_mark_{left,time} */
+	int rs_last_mark;
 
 
 	/* where does the admin want us to start? (sector) */
 	/* where does the admin want us to start? (sector) */
 	sector_t ov_start_sector;
 	sector_t ov_start_sector;
@@ -1012,10 +1065,10 @@ struct drbd_conf {
 	spinlock_t epoch_lock;
 	spinlock_t epoch_lock;
 	unsigned int epochs;
 	unsigned int epochs;
 	enum write_ordering_e write_ordering;
 	enum write_ordering_e write_ordering;
-	struct list_head active_ee; /* IO in progress */
-	struct list_head sync_ee;   /* IO in progress */
+	struct list_head active_ee; /* IO in progress (P_DATA gets written to disk) */
+	struct list_head sync_ee;   /* IO in progress (P_RS_DATA_REPLY gets written to disk) */
 	struct list_head done_ee;   /* send ack */
 	struct list_head done_ee;   /* send ack */
-	struct list_head read_ee;   /* IO in progress */
+	struct list_head read_ee;   /* IO in progress (any read) */
 	struct list_head net_ee;    /* zero-copy network send in progress */
 	struct list_head net_ee;    /* zero-copy network send in progress */
 	struct hlist_head *ee_hash; /* is proteced by req_lock! */
 	struct hlist_head *ee_hash; /* is proteced by req_lock! */
 	unsigned int ee_hash_s;
 	unsigned int ee_hash_s;
@@ -1026,7 +1079,8 @@ struct drbd_conf {
 	int next_barrier_nr;
 	int next_barrier_nr;
 	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
 	struct hlist_head *app_reads_hash; /* is proteced by req_lock */
 	struct list_head resync_reads;
 	struct list_head resync_reads;
-	atomic_t pp_in_use;
+	atomic_t pp_in_use;		/* allocated from page pool */
+	atomic_t pp_in_use_by_net;	/* sendpage()d, still referenced by tcp */
 	wait_queue_head_t ee_wait;
 	wait_queue_head_t ee_wait;
 	struct page *md_io_page;	/* one page buffer for md_io */
 	struct page *md_io_page;	/* one page buffer for md_io */
 	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
 	struct page *md_io_tmpp;	/* for logical_block_size != 512 */
@@ -1054,6 +1108,15 @@ struct drbd_conf {
 	u64 ed_uuid; /* UUID of the exposed data */
 	u64 ed_uuid; /* UUID of the exposed data */
 	struct mutex state_mutex;
 	struct mutex state_mutex;
 	char congestion_reason;  /* Why we where congested... */
 	char congestion_reason;  /* Why we where congested... */
+	atomic_t rs_sect_in; /* for incoming resync data rate, SyncTarget */
+	atomic_t rs_sect_ev; /* for submitted resync data rate, both */
+	int rs_last_sect_ev; /* counter to compare with */
+	int rs_last_events;  /* counter of read or write "events" (unit sectors)
+			      * on the lower level device when we last looked. */
+	int c_sync_rate; /* current resync rate after syncer throttle magic */
+	struct fifo_buffer rs_plan_s; /* correction values of resync planer */
+	int rs_in_flight; /* resync sectors in flight (to proxy, in proxy and from proxy) */
+	int rs_planed;    /* resync sectors already planed */
 };
 };
 
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -1138,6 +1201,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
 extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
 extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
 		       unsigned int set_size);
 		       unsigned int set_size);
 extern void tl_clear(struct drbd_conf *mdev);
 extern void tl_clear(struct drbd_conf *mdev);
+enum drbd_req_event;
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
 extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
 extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
 extern void drbd_free_sock(struct drbd_conf *mdev);
 extern void drbd_free_sock(struct drbd_conf *mdev);
 extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
 extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
@@ -1150,12 +1215,12 @@ extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_f
 extern int _drbd_send_state(struct drbd_conf *mdev);
 extern int _drbd_send_state(struct drbd_conf *mdev);
 extern int drbd_send_state(struct drbd_conf *mdev);
 extern int drbd_send_state(struct drbd_conf *mdev);
 extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
 extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
-			enum drbd_packets cmd, struct p_header *h,
+			enum drbd_packets cmd, struct p_header80 *h,
 			size_t size, unsigned msg_flags);
 			size_t size, unsigned msg_flags);
 #define USE_DATA_SOCKET 1
 #define USE_DATA_SOCKET 1
 #define USE_META_SOCKET 0
 #define USE_META_SOCKET 0
 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
 extern int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
-			enum drbd_packets cmd, struct p_header *h,
+			enum drbd_packets cmd, struct p_header80 *h,
 			size_t size);
 			size_t size);
 extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
 extern int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd,
 			char *data, size_t size);
 			char *data, size_t size);
@@ -1167,7 +1232,7 @@ extern int drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
 extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
 extern int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
 			struct p_block_req *rp);
 			struct p_block_req *rp);
 extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
 extern int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
-			struct p_data *dp);
+			struct p_data *dp, int data_size);
 extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
 extern int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
 			    sector_t sector, int blksize, u64 block_id);
 			    sector_t sector, int blksize, u64 block_id);
 extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
 extern int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
@@ -1201,7 +1266,13 @@ extern void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
 extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
 extern void drbd_md_set_flag(struct drbd_conf *mdev, int flags) __must_hold(local);
 extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
 extern void drbd_md_clear_flag(struct drbd_conf *mdev, int flags)__must_hold(local);
 extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
 extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
+#ifndef DRBD_DEBUG_MD_SYNC
 extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
 extern void drbd_md_mark_dirty(struct drbd_conf *mdev);
+#else
+#define drbd_md_mark_dirty(m)	drbd_md_mark_dirty_(m, __LINE__ , __func__ )
+extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
+		unsigned int line, const char *func);
+#endif
 extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 				 int (*io_fn)(struct drbd_conf *),
 				 int (*io_fn)(struct drbd_conf *),
 				 void (*done)(struct drbd_conf *, int),
 				 void (*done)(struct drbd_conf *, int),
@@ -1209,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
 extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
 extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
 extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
 extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
+extern void drbd_go_diskless(struct drbd_conf *mdev);
 
 
 
 
 /* Meta data layout
 /* Meta data layout
@@ -1264,6 +1336,8 @@ struct bm_extent {
  * Bit 1 ==> local node thinks this block needs to be synced.
  * Bit 1 ==> local node thinks this block needs to be synced.
  */
  */
 
 
+#define SLEEP_TIME (HZ/10)
+
 #define BM_BLOCK_SHIFT  12			 /* 4k per bit */
 #define BM_BLOCK_SHIFT  12			 /* 4k per bit */
 #define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
 #define BM_BLOCK_SIZE	 (1<<BM_BLOCK_SHIFT)
 /* (9+3) : 512 bytes @ 8 bits; representing 16M storage
 /* (9+3) : 512 bytes @ 8 bits; representing 16M storage
@@ -1335,11 +1409,13 @@ struct bm_extent {
 #endif
 #endif
 
 
 /* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
 /* Sector shift value for the "hash" functions of tl_hash and ee_hash tables.
- * With a value of 6 all IO in one 32K block make it to the same slot of the
+ * With a value of 8 all IO in one 128K block make it to the same slot of the
  * hash table. */
  * hash table. */
-#define HT_SHIFT 6
+#define HT_SHIFT 8
 #define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
 #define DRBD_MAX_SEGMENT_SIZE (1U<<(9+HT_SHIFT))
 
 
+#define DRBD_MAX_SIZE_H80_PACKET (1 << 15) /* The old header only allows packets up to 32Kib data */
+
 /* Number of elements in the app_reads_hash */
 /* Number of elements in the app_reads_hash */
 #define APP_R_HSIZE 15
 #define APP_R_HSIZE 15
 
 
@@ -1369,6 +1445,7 @@ extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_
 /* bm_find_next variants for use while you hold drbd_bm_lock() */
 /* bm_find_next variants for use while you hold drbd_bm_lock() */
 extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
 extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
 extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
 extern unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo);
+extern unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
 extern unsigned long drbd_bm_total_weight(struct drbd_conf *mdev);
 extern int drbd_bm_rs_done(struct drbd_conf *mdev);
 extern int drbd_bm_rs_done(struct drbd_conf *mdev);
 /* for receive_bitmap */
 /* for receive_bitmap */
@@ -1421,7 +1498,8 @@ extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
 		int force);
 		int force);
-enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev);
+extern void drbd_try_outdate_peer_async(struct drbd_conf *mdev);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 extern int drbd_khelper(struct drbd_conf *mdev, char *cmd);
 
 
 /* drbd_worker.c */
 /* drbd_worker.c */
@@ -1467,10 +1545,12 @@ extern int w_send_barrier(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
 extern int w_send_read_req(struct drbd_conf *, struct drbd_work *, int);
 extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
 extern int w_prev_work_done(struct drbd_conf *, struct drbd_work *, int);
 extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
 extern int w_e_reissue(struct drbd_conf *, struct drbd_work *, int);
+extern int w_restart_disk_io(struct drbd_conf *, struct drbd_work *, int);
 
 
 extern void resync_timer_fn(unsigned long data);
 extern void resync_timer_fn(unsigned long data);
 
 
 /* drbd_receiver.c */
 /* drbd_receiver.c */
+extern int drbd_rs_should_slow_down(struct drbd_conf *mdev);
 extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
 extern int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
 		const unsigned rw, const int fault_type);
 		const unsigned rw, const int fault_type);
 extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
 extern int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list);
@@ -1479,7 +1559,10 @@ extern struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
 					    sector_t sector,
 					    sector_t sector,
 					    unsigned int data_size,
 					    unsigned int data_size,
 					    gfp_t gfp_mask) __must_hold(local);
 					    gfp_t gfp_mask) __must_hold(local);
-extern void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e);
+extern void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
+		int is_net);
+#define drbd_free_ee(m,e)	drbd_free_some_ee(m, e, 0)
+#define drbd_free_net_ee(m,e)	drbd_free_some_ee(m, e, 1)
 extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
 extern void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
 		struct list_head *head);
 		struct list_head *head);
 extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
 extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
@@ -1487,6 +1570,7 @@ extern void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
 extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
 extern void drbd_set_recv_tcq(struct drbd_conf *mdev, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
 extern void _drbd_clear_done_ee(struct drbd_conf *mdev, struct list_head *to_be_freed);
 extern void drbd_flush_workqueue(struct drbd_conf *mdev);
 extern void drbd_flush_workqueue(struct drbd_conf *mdev);
+extern void drbd_free_tl_hash(struct drbd_conf *mdev);
 
 
 /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
 /* yes, there is kernel_setsockopt, but only since 2.6.18. we don't need to
  * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
  * mess with get_fs/set_fs, we know we are KERNEL_DS always. */
@@ -1600,6 +1684,8 @@ void drbd_bcast_ee(struct drbd_conf *mdev,
 #define susp_MASK 1
 #define susp_MASK 1
 #define user_isp_MASK 1
 #define user_isp_MASK 1
 #define aftr_isp_MASK 1
 #define aftr_isp_MASK 1
+#define susp_nod_MASK 1
+#define susp_fen_MASK 1
 
 
 #define NS(T, S) \
 #define NS(T, S) \
 	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
 	({ union drbd_state mask; mask.i = 0; mask.T = T##_MASK; mask; }), \
@@ -1855,13 +1941,6 @@ static inline sector_t drbd_md_ss__(struct drbd_conf *mdev,
 	}
 	}
 }
 }
 
 
-static inline void
-_drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
-{
-	list_add_tail(&w->list, &q->q);
-	up(&q->s);
-}
-
 static inline void
 static inline void
 drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
 drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w)
 {
 {
@@ -1899,19 +1978,19 @@ static inline void request_ping(struct drbd_conf *mdev)
 static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
 static inline int drbd_send_short_cmd(struct drbd_conf *mdev,
 	enum drbd_packets cmd)
 	enum drbd_packets cmd)
 {
 {
-	struct p_header h;
+	struct p_header80 h;
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
 	return drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd, &h, sizeof(h));
 }
 }
 
 
 static inline int drbd_send_ping(struct drbd_conf *mdev)
 static inline int drbd_send_ping(struct drbd_conf *mdev)
 {
 {
-	struct p_header h;
+	struct p_header80 h;
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING, &h, sizeof(h));
 }
 }
 
 
 static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
 static inline int drbd_send_ping_ack(struct drbd_conf *mdev)
 {
 {
-	struct p_header h;
+	struct p_header80 h;
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
 	return drbd_send_cmd(mdev, USE_META_SOCKET, P_PING_ACK, &h, sizeof(h));
 }
 }
 
 
@@ -2013,7 +2092,7 @@ static inline void inc_unacked(struct drbd_conf *mdev)
 static inline void put_net_conf(struct drbd_conf *mdev)
 static inline void put_net_conf(struct drbd_conf *mdev)
 {
 {
 	if (atomic_dec_and_test(&mdev->net_cnt))
 	if (atomic_dec_and_test(&mdev->net_cnt))
-		wake_up(&mdev->misc_wait);
+		wake_up(&mdev->net_cnt_wait);
 }
 }
 
 
 /**
 /**
@@ -2044,10 +2123,14 @@ static inline int get_net_conf(struct drbd_conf *mdev)
 
 
 static inline void put_ldev(struct drbd_conf *mdev)
 static inline void put_ldev(struct drbd_conf *mdev)
 {
 {
+	int i = atomic_dec_return(&mdev->local_cnt);
 	__release(local);
 	__release(local);
-	if (atomic_dec_and_test(&mdev->local_cnt))
+	D_ASSERT(i >= 0);
+	if (i == 0) {
+		if (mdev->state.disk == D_FAILED)
+			drbd_go_diskless(mdev);
 		wake_up(&mdev->misc_wait);
 		wake_up(&mdev->misc_wait);
-	D_ASSERT(atomic_read(&mdev->local_cnt) >= 0);
+	}
 }
 }
 
 
 #ifndef __CHECKER__
 #ifndef __CHECKER__
@@ -2179,11 +2262,16 @@ static inline int drbd_state_is_stable(union drbd_state s)
 	return 1;
 	return 1;
 }
 }
 
 
+static inline int is_susp(union drbd_state s)
+{
+	return s.susp || s.susp_nod || s.susp_fen;
+}
+
 static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
 static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
 {
 {
 	int mxb = drbd_get_max_buffers(mdev);
 	int mxb = drbd_get_max_buffers(mdev);
 
 
-	if (mdev->state.susp)
+	if (is_susp(mdev->state))
 		return 0;
 		return 0;
 	if (test_bit(SUSPEND_IO, &mdev->flags))
 	if (test_bit(SUSPEND_IO, &mdev->flags))
 		return 0;
 		return 0;

文件差异内容过多而无法显示
+ 329 - 174
drivers/block/drbd/drbd_main.c


+ 218 - 52
drivers/block/drbd/drbd_nl.c

@@ -33,10 +33,13 @@
 #include <linux/blkpg.h>
 #include <linux/blkpg.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
 #include "drbd_int.h"
 #include "drbd_int.h"
+#include "drbd_req.h"
 #include "drbd_wrappers.h"
 #include "drbd_wrappers.h"
 #include <asm/unaligned.h>
 #include <asm/unaligned.h>
 #include <linux/drbd_tag_magic.h>
 #include <linux/drbd_tag_magic.h>
 #include <linux/drbd_limits.h>
 #include <linux/drbd_limits.h>
+#include <linux/compiler.h>
+#include <linux/kthread.h>
 
 
 static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
 static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
 static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
 static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
@@ -169,6 +172,10 @@ int drbd_khelper(struct drbd_conf *mdev, char *cmd)
 		put_net_conf(mdev);
 		put_net_conf(mdev);
 	}
 	}
 
 
+	/* The helper may take some time.
+	 * write out any unsynced meta data changes now */
+	drbd_md_sync(mdev);
+
 	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 	dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
 
 
 	drbd_bcast_ev_helper(mdev, cmd);
 	drbd_bcast_ev_helper(mdev, cmd);
@@ -202,12 +209,10 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
 		put_ldev(mdev);
 		put_ldev(mdev);
 	} else {
 	} else {
 		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
 		dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
-		return mdev->state.pdsk;
+		nps = mdev->state.pdsk;
+		goto out;
 	}
 	}
 
 
-	if (fp == FP_STONITH)
-		_drbd_request_state(mdev, NS(susp, 1), CS_WAIT_COMPLETE);
-
 	r = drbd_khelper(mdev, "fence-peer");
 	r = drbd_khelper(mdev, "fence-peer");
 
 
 	switch ((r>>8) & 0xff) {
 	switch ((r>>8) & 0xff) {
@@ -252,9 +257,36 @@ enum drbd_disk_state drbd_try_outdate_peer(struct drbd_conf *mdev)
 
 
 	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
 	dev_info(DEV, "fence-peer helper returned %d (%s)\n",
 			(r>>8) & 0xff, ex_to_string);
 			(r>>8) & 0xff, ex_to_string);
+
+out:
+	if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
+		/* The handler was not successful... unfreeze here, the
+		   state engine can not unfreeze... */
+		_drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
+	}
+
 	return nps;
 	return nps;
 }
 }
 
 
+static int _try_outdate_peer_async(void *data)
+{
+	struct drbd_conf *mdev = (struct drbd_conf *)data;
+	enum drbd_disk_state nps;
+
+	nps = drbd_try_outdate_peer(mdev);
+	drbd_request_state(mdev, NS(pdsk, nps));
+
+	return 0;
+}
+
+void drbd_try_outdate_peer_async(struct drbd_conf *mdev)
+{
+	struct task_struct *opa;
+
+	opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
+	if (IS_ERR(opa))
+		dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
+}
 
 
 int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 {
 {
@@ -394,6 +426,39 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
 	return r;
 	return r;
 }
 }
 
 
+static struct drbd_conf *ensure_mdev(int minor, int create)
+{
+	struct drbd_conf *mdev;
+
+	if (minor >= minor_count)
+		return NULL;
+
+	mdev = minor_to_mdev(minor);
+
+	if (!mdev && create) {
+		struct gendisk *disk = NULL;
+		mdev = drbd_new_device(minor);
+
+		spin_lock_irq(&drbd_pp_lock);
+		if (minor_table[minor] == NULL) {
+			minor_table[minor] = mdev;
+			disk = mdev->vdisk;
+			mdev = NULL;
+		} /* else: we lost the race */
+		spin_unlock_irq(&drbd_pp_lock);
+
+		if (disk) /* we won the race above */
+			/* in case we ever add a drbd_delete_device(),
+			 * don't forget the del_gendisk! */
+			add_disk(disk);
+		else /* we lost the race above */
+			drbd_free_mdev(mdev);
+
+		mdev = minor_to_mdev(minor);
+	}
+
+	return mdev;
+}
 
 
 static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			   struct drbd_nl_cfg_reply *reply)
 			   struct drbd_nl_cfg_reply *reply)
@@ -494,6 +559,8 @@ char *ppsize(char *buf, unsigned long long size)
 void drbd_suspend_io(struct drbd_conf *mdev)
 void drbd_suspend_io(struct drbd_conf *mdev)
 {
 {
 	set_bit(SUSPEND_IO, &mdev->flags);
 	set_bit(SUSPEND_IO, &mdev->flags);
+	if (is_susp(mdev->state))
+		return;
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
 }
 }
 
 
@@ -713,9 +780,6 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
 	blk_queue_segment_boundary(q, PAGE_SIZE-1);
 	blk_queue_segment_boundary(q, PAGE_SIZE-1);
 	blk_stack_limits(&q->limits, &b->limits, 0);
 	blk_stack_limits(&q->limits, &b->limits, 0);
 
 
-	if (b->merge_bvec_fn)
-		dev_warn(DEV, "Backing device's merge_bvec_fn() = %p\n",
-		     b->merge_bvec_fn);
 	dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
 	dev_info(DEV, "max_segment_size ( = BIO size ) = %u\n", queue_max_segment_size(q));
 
 
 	if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
 	if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
@@ -729,14 +793,16 @@ void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_seg_s) __mu
 /* serialize deconfig (worker exiting, doing cleanup)
 /* serialize deconfig (worker exiting, doing cleanup)
  * and reconfig (drbdsetup disk, drbdsetup net)
  * and reconfig (drbdsetup disk, drbdsetup net)
  *
  *
- * wait for a potentially exiting worker, then restart it,
- * or start a new one.
+ * Wait for a potentially exiting worker, then restart it,
+ * or start a new one.  Flush any pending work, there may still be an
+ * after_state_change queued.
  */
  */
 static void drbd_reconfig_start(struct drbd_conf *mdev)
 static void drbd_reconfig_start(struct drbd_conf *mdev)
 {
 {
 	wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
 	wait_event(mdev->state_wait, !test_and_set_bit(CONFIG_PENDING, &mdev->flags));
 	wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
 	wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
 	drbd_thread_start(&mdev->worker);
 	drbd_thread_start(&mdev->worker);
+	drbd_flush_workqueue(mdev);
 }
 }
 
 
 /* if still unconfigured, stops worker again.
 /* if still unconfigured, stops worker again.
@@ -756,6 +822,29 @@ static void drbd_reconfig_done(struct drbd_conf *mdev)
 	wake_up(&mdev->state_wait);
 	wake_up(&mdev->state_wait);
 }
 }
 
 
+/* Make sure IO is suspended before calling this function(). */
+static void drbd_suspend_al(struct drbd_conf *mdev)
+{
+	int s = 0;
+
+	if (lc_try_lock(mdev->act_log)) {
+		drbd_al_shrink(mdev);
+		lc_unlock(mdev->act_log);
+	} else {
+		dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
+		return;
+	}
+
+	spin_lock_irq(&mdev->req_lock);
+	if (mdev->state.conn < C_CONNECTED)
+		s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
+
+	spin_unlock_irq(&mdev->req_lock);
+
+	if (s)
+		dev_info(DEV, "Suspended AL updates\n");
+}
+
 /* does always return 0;
 /* does always return 0;
  * interesting return code is in reply->ret_code */
  * interesting return code is in reply->ret_code */
 static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
@@ -769,6 +858,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	struct inode *inode, *inode2;
 	struct inode *inode, *inode2;
 	struct lru_cache *resync_lru = NULL;
 	struct lru_cache *resync_lru = NULL;
 	union drbd_state ns, os;
 	union drbd_state ns, os;
+	unsigned int max_seg_s;
 	int rv;
 	int rv;
 	int cp_discovered = 0;
 	int cp_discovered = 0;
 	int logical_block_size;
 	int logical_block_size;
@@ -803,6 +893,15 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		goto fail;
 		goto fail;
 	}
 	}
 
 
+	if (get_net_conf(mdev)) {
+		int prot = mdev->net_conf->wire_protocol;
+		put_net_conf(mdev);
+		if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
+
 	nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
 	nbc->lo_file = filp_open(nbc->dc.backing_dev, O_RDWR, 0);
 	if (IS_ERR(nbc->lo_file)) {
 	if (IS_ERR(nbc->lo_file)) {
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
 		dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
@@ -924,7 +1023,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 
 	drbd_suspend_io(mdev);
 	drbd_suspend_io(mdev);
 	/* also wait for the last barrier ack. */
 	/* also wait for the last barrier ack. */
-	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt));
+	wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
 	/* and for any other previously queued work */
 	/* and for any other previously queued work */
 	drbd_flush_workqueue(mdev);
 	drbd_flush_workqueue(mdev);
 
 
@@ -1021,7 +1120,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	else
 	else
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 		clear_bit(CRASHED_PRIMARY, &mdev->flags);
 
 
-	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND)) {
+	if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
+	    !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
 		set_bit(CRASHED_PRIMARY, &mdev->flags);
 		cp_discovered = 1;
 		cp_discovered = 1;
 	}
 	}
@@ -1031,7 +1131,20 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	mdev->read_cnt = 0;
 	mdev->read_cnt = 0;
 	mdev->writ_cnt = 0;
 	mdev->writ_cnt = 0;
 
 
-	drbd_setup_queue_param(mdev, DRBD_MAX_SEGMENT_SIZE);
+	max_seg_s = DRBD_MAX_SEGMENT_SIZE;
+	if (mdev->state.conn == C_CONNECTED) {
+		/* We are Primary, Connected, and now attach a new local
+		 * backing store. We must not increase the user visible maximum
+		 * bio size on this device to something the peer may not be
+		 * able to handle. */
+		if (mdev->agreed_pro_version < 94)
+			max_seg_s = queue_max_segment_size(mdev->rq_queue);
+		else if (mdev->agreed_pro_version == 94)
+			max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
+		/* else: drbd 8.3.9 and later, stay with default */
+	}
+
+	drbd_setup_queue_param(mdev, max_seg_s);
 
 
 	/* If I am currently not R_PRIMARY,
 	/* If I am currently not R_PRIMARY,
 	 * but meta data primary indicator is set,
 	 * but meta data primary indicator is set,
@@ -1079,6 +1192,9 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 		drbd_al_to_on_disk_bm(mdev);
 		drbd_al_to_on_disk_bm(mdev);
 	}
 	}
 
 
+	if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
+		drbd_suspend_al(mdev); /* IO is still suspended here... */
+
 	spin_lock_irq(&mdev->req_lock);
 	spin_lock_irq(&mdev->req_lock);
 	os = mdev->state;
 	os = mdev->state;
 	ns.i = os.i;
 	ns.i = os.i;
@@ -1235,7 +1351,16 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
 	    && (new_conf->wire_protocol != DRBD_PROT_C)) {
 		retcode = ERR_NOT_PROTO_C;
 		retcode = ERR_NOT_PROTO_C;
 		goto fail;
 		goto fail;
-	};
+	}
+
+	if (get_ldev(mdev)) {
+		enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
+		put_ldev(mdev);
+		if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
+			retcode = ERR_STONITH_AND_PROT_A;
+			goto fail;
+		}
+	}
 
 
 	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
 	if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
 		retcode = ERR_DISCARD;
 		retcode = ERR_DISCARD;
@@ -1350,6 +1475,7 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 		}
 		}
 	}
 	}
 
 
+	drbd_flush_workqueue(mdev);
 	spin_lock_irq(&mdev->req_lock);
 	spin_lock_irq(&mdev->req_lock);
 	if (mdev->net_conf != NULL) {
 	if (mdev->net_conf != NULL) {
 		retcode = ERR_NET_CONFIGURED;
 		retcode = ERR_NET_CONFIGURED;
@@ -1388,10 +1514,9 @@ static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	mdev->int_dig_out=int_dig_out;
 	mdev->int_dig_out=int_dig_out;
 	mdev->int_dig_in=int_dig_in;
 	mdev->int_dig_in=int_dig_in;
 	mdev->int_dig_vv=int_dig_vv;
 	mdev->int_dig_vv=int_dig_vv;
+	retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
 	spin_unlock_irq(&mdev->req_lock);
 	spin_unlock_irq(&mdev->req_lock);
 
 
-	retcode = _drbd_request_state(mdev, NS(conn, C_UNCONNECTED), CS_VERBOSE);
-
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	reply->ret_code = retcode;
 	reply->ret_code = retcode;
 	drbd_reconfig_done(mdev);
 	drbd_reconfig_done(mdev);
@@ -1546,6 +1671,8 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	struct crypto_hash *csums_tfm = NULL;
 	struct crypto_hash *csums_tfm = NULL;
 	struct syncer_conf sc;
 	struct syncer_conf sc;
 	cpumask_var_t new_cpu_mask;
 	cpumask_var_t new_cpu_mask;
+	int *rs_plan_s = NULL;
+	int fifo_size;
 
 
 	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
 	if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
 		retcode = ERR_NOMEM;
 		retcode = ERR_NOMEM;
@@ -1557,6 +1684,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		sc.rate       = DRBD_RATE_DEF;
 		sc.rate       = DRBD_RATE_DEF;
 		sc.after      = DRBD_AFTER_DEF;
 		sc.after      = DRBD_AFTER_DEF;
 		sc.al_extents = DRBD_AL_EXTENTS_DEF;
 		sc.al_extents = DRBD_AL_EXTENTS_DEF;
+		sc.on_no_data  = DRBD_ON_NO_DATA_DEF;
+		sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
+		sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
+		sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
+		sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
+		sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
 	} else
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 
 
@@ -1634,6 +1767,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	}
 	}
 #undef AL_MAX
 #undef AL_MAX
 
 
+	/* to avoid spurious errors when configuring minors before configuring
+	 * the minors they depend on: if necessary, first create the minor we
+	 * depend on */
+	if (sc.after >= 0)
+		ensure_mdev(sc.after, 1);
+
 	/* most sanity checks done, try to assign the new sync-after
 	/* most sanity checks done, try to assign the new sync-after
 	 * dependency.  need to hold the global lock in there,
 	 * dependency.  need to hold the global lock in there,
 	 * to avoid a race in the dependency loop check. */
 	 * to avoid a race in the dependency loop check. */
@@ -1641,6 +1780,16 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 	if (retcode != NO_ERROR)
 	if (retcode != NO_ERROR)
 		goto fail;
 		goto fail;
 
 
+	fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
+	if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
+		rs_plan_s   = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
+		if (!rs_plan_s) {
+			dev_err(DEV, "kmalloc of fifo_buffer failed");
+			retcode = ERR_NOMEM;
+			goto fail;
+		}
+	}
+
 	/* ok, assign the rest of it as well.
 	/* ok, assign the rest of it as well.
 	 * lock against receive_SyncParam() */
 	 * lock against receive_SyncParam() */
 	spin_lock(&mdev->peer_seq_lock);
 	spin_lock(&mdev->peer_seq_lock);
@@ -1657,6 +1806,15 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 		mdev->verify_tfm = verify_tfm;
 		mdev->verify_tfm = verify_tfm;
 		verify_tfm = NULL;
 		verify_tfm = NULL;
 	}
 	}
+
+	if (fifo_size != mdev->rs_plan_s.size) {
+		kfree(mdev->rs_plan_s.values);
+		mdev->rs_plan_s.values = rs_plan_s;
+		mdev->rs_plan_s.size   = fifo_size;
+		mdev->rs_planed = 0;
+		rs_plan_s = NULL;
+	}
+
 	spin_unlock(&mdev->peer_seq_lock);
 	spin_unlock(&mdev->peer_seq_lock);
 
 
 	if (get_ldev(mdev)) {
 	if (get_ldev(mdev)) {
@@ -1688,6 +1846,7 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n
 
 
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 	kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
 fail:
 fail:
+	kfree(rs_plan_s);
 	free_cpumask_var(new_cpu_mask);
 	free_cpumask_var(new_cpu_mask);
 	crypto_free_hash(csums_tfm);
 	crypto_free_hash(csums_tfm);
 	crypto_free_hash(verify_tfm);
 	crypto_free_hash(verify_tfm);
@@ -1721,12 +1880,38 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
 	return 0;
 	return 0;
 }
 }
 
 
+static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
+{
+	int rv;
+
+	rv = drbd_bmio_set_n_write(mdev);
+	drbd_suspend_al(mdev);
+	return rv;
+}
+
 static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 				   struct drbd_nl_cfg_reply *reply)
 				   struct drbd_nl_cfg_reply *reply)
 {
 {
+	int retcode;
 
 
-	reply->ret_code = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+	retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
+
+	if (retcode < SS_SUCCESS) {
+		if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
+			/* The peer will get a resync upon connect anyways. Just make that
+			   into a full resync. */
+			retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
+			if (retcode >= SS_SUCCESS) {
+				/* open coded drbd_bitmap_io() */
+				if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
+						   "set_n_write from invalidate_peer"))
+					retcode = ERR_IO_MD_DISK;
+			}
+		} else
+			retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
+	}
 
 
+	reply->ret_code = retcode;
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1765,7 +1950,21 @@ static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
 static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 			     struct drbd_nl_cfg_reply *reply)
 			     struct drbd_nl_cfg_reply *reply)
 {
 {
-	reply->ret_code = drbd_request_state(mdev, NS(susp, 0));
+	if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
+		drbd_uuid_new_current(mdev);
+		clear_bit(NEW_CUR_UUID, &mdev->flags);
+		drbd_md_sync(mdev);
+	}
+	drbd_suspend_io(mdev);
+	reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
+	if (reply->ret_code == SS_SUCCESS) {
+		if (mdev->state.conn < C_CONNECTED)
+			tl_clear(mdev);
+		if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
+			tl_restart(mdev, fail_frozen_disk_io);
+	}
+	drbd_resume_io(mdev);
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1941,40 +2140,6 @@ out:
 	return 0;
 	return 0;
 }
 }
 
 
-static struct drbd_conf *ensure_mdev(struct drbd_nl_cfg_req *nlp)
-{
-	struct drbd_conf *mdev;
-
-	if (nlp->drbd_minor >= minor_count)
-		return NULL;
-
-	mdev = minor_to_mdev(nlp->drbd_minor);
-
-	if (!mdev && (nlp->flags & DRBD_NL_CREATE_DEVICE)) {
-		struct gendisk *disk = NULL;
-		mdev = drbd_new_device(nlp->drbd_minor);
-
-		spin_lock_irq(&drbd_pp_lock);
-		if (minor_table[nlp->drbd_minor] == NULL) {
-			minor_table[nlp->drbd_minor] = mdev;
-			disk = mdev->vdisk;
-			mdev = NULL;
-		} /* else: we lost the race */
-		spin_unlock_irq(&drbd_pp_lock);
-
-		if (disk) /* we won the race above */
-			/* in case we ever add a drbd_delete_device(),
-			 * don't forget the del_gendisk! */
-			add_disk(disk);
-		else /* we lost the race above */
-			drbd_free_mdev(mdev);
-
-		mdev = minor_to_mdev(nlp->drbd_minor);
-	}
-
-	return mdev;
-}
-
 struct cn_handler_struct {
 struct cn_handler_struct {
 	int (*function)(struct drbd_conf *,
 	int (*function)(struct drbd_conf *,
 			 struct drbd_nl_cfg_req *,
 			 struct drbd_nl_cfg_req *,
@@ -2035,7 +2200,8 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms
 		goto fail;
 		goto fail;
 	}
 	}
 
 
-	mdev = ensure_mdev(nlp);
+	mdev = ensure_mdev(nlp->drbd_minor,
+			(nlp->flags & DRBD_NL_CREATE_DEVICE));
 	if (!mdev) {
 	if (!mdev) {
 		retcode = ERR_MINOR_INVALID;
 		retcode = ERR_MINOR_INVALID;
 		goto fail;
 		goto fail;

+ 21 - 13
drivers/block/drbd/drbd_proc.c

@@ -57,6 +57,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 	unsigned long db, dt, dbdt, rt, rs_left;
 	unsigned long db, dt, dbdt, rt, rs_left;
 	unsigned int res;
 	unsigned int res;
 	int i, x, y;
 	int i, x, y;
+	int stalled = 0;
 
 
 	drbd_get_syncer_progress(mdev, &rs_left, &res);
 	drbd_get_syncer_progress(mdev, &rs_left, &res);
 
 
@@ -90,18 +91,17 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 	 * db: blocks written from mark until now
 	 * db: blocks written from mark until now
 	 * rt: remaining time
 	 * rt: remaining time
 	 */
 	 */
-	dt = (jiffies - mdev->rs_mark_time) / HZ;
-
-	if (dt > 20) {
-		/* if we made no update to rs_mark_time for too long,
-		 * we are stalled. show that. */
-		seq_printf(seq, "stalled\n");
-		return;
-	}
+	/* Rolling marks. last_mark+1 may just now be modified.  last_mark+2 is
+	 * at least (DRBD_SYNC_MARKS-2)*DRBD_SYNC_MARK_STEP old, and has at
+	 * least DRBD_SYNC_MARK_STEP time before it will be modified. */
+	i = (mdev->rs_last_mark + 2) % DRBD_SYNC_MARKS;
+	dt = (jiffies - mdev->rs_mark_time[i]) / HZ;
+	if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS))
+		stalled = 1;
 
 
 	if (!dt)
 	if (!dt)
 		dt++;
 		dt++;
-	db = mdev->rs_mark_left - rs_left;
+	db = mdev->rs_mark_left[i] - rs_left;
 	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
 	rt = (dt * (rs_left / (db/100+1)))/100; /* seconds */
 
 
 	seq_printf(seq, "finish: %lu:%02lu:%02lu",
 	seq_printf(seq, "finish: %lu:%02lu:%02lu",
@@ -118,7 +118,7 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 	/* mean speed since syncer started
 	/* mean speed since syncer started
 	 * we do account for PausedSync periods */
 	 * we do account for PausedSync periods */
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
 	dt = (jiffies - mdev->rs_start - mdev->rs_paused) / HZ;
-	if (dt <= 0)
+	if (dt == 0)
 		dt = 1;
 		dt = 1;
 	db = mdev->rs_total - rs_left;
 	db = mdev->rs_total - rs_left;
 	dbdt = Bit2KB(db/dt);
 	dbdt = Bit2KB(db/dt);
@@ -128,7 +128,14 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
 	else
 	else
 		seq_printf(seq, " (%ld)", dbdt);
 		seq_printf(seq, " (%ld)", dbdt);
 
 
-	seq_printf(seq, " K/sec\n");
+	if (mdev->state.conn == C_SYNC_TARGET) {
+		if (mdev->c_sync_rate > 1000)
+			seq_printf(seq, " want: %d,%03d",
+				   mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
+		else
+			seq_printf(seq, " want: %d", mdev->c_sync_rate);
+	}
+	seq_printf(seq, " K/sec%s\n", stalled ? " (stalled)" : "");
 }
 }
 
 
 static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
 static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
@@ -196,7 +203,7 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
 			seq_printf(seq, "%2d: cs:Unconfigured\n", i);
 		} else {
 		} else {
 			seq_printf(seq,
 			seq_printf(seq,
-			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c\n"
+			   "%2d: cs:%s ro:%s/%s ds:%s/%s %c %c%c%c%c%c%c\n"
 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
 			   "    ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
 			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
 			   "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
 			   i, sn,
 			   i, sn,
@@ -206,11 +213,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
 			   drbd_disk_str(mdev->state.pdsk),
 			   drbd_disk_str(mdev->state.pdsk),
 			   (mdev->net_conf == NULL ? ' ' :
 			   (mdev->net_conf == NULL ? ' ' :
 			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
 			    (mdev->net_conf->wire_protocol - DRBD_PROT_A+'A')),
-			   mdev->state.susp ? 's' : 'r',
+			   is_susp(mdev->state) ? 's' : 'r',
 			   mdev->state.aftr_isp ? 'a' : '-',
 			   mdev->state.aftr_isp ? 'a' : '-',
 			   mdev->state.peer_isp ? 'p' : '-',
 			   mdev->state.peer_isp ? 'p' : '-',
 			   mdev->state.user_isp ? 'u' : '-',
 			   mdev->state.user_isp ? 'u' : '-',
 			   mdev->congestion_reason ?: '-',
 			   mdev->congestion_reason ?: '-',
+			   test_bit(AL_SUSPENDED, &mdev->flags) ? 's' : '-',
 			   mdev->send_cnt/2,
 			   mdev->send_cnt/2,
 			   mdev->recv_cnt/2,
 			   mdev->recv_cnt/2,
 			   mdev->writ_cnt/2,
 			   mdev->writ_cnt/2,

文件差异内容过多而无法显示
+ 339 - 207
drivers/block/drbd/drbd_receiver.c


+ 99 - 66
drivers/block/drbd/drbd_req.c

@@ -59,17 +59,19 @@ static void _drbd_end_io_acct(struct drbd_conf *mdev, struct drbd_request *req)
 static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
 static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
 {
 {
 	const unsigned long s = req->rq_state;
 	const unsigned long s = req->rq_state;
+
+	/* remove it from the transfer log.
+	 * well, only if it had been there in the first
+	 * place... if it had not (local only or conflicting
+	 * and never sent), it should still be "empty" as
+	 * initialized in drbd_req_new(), so we can list_del() it
+	 * here unconditionally */
+	list_del(&req->tl_requests);
+
 	/* if it was a write, we may have to set the corresponding
 	/* if it was a write, we may have to set the corresponding
 	 * bit(s) out-of-sync first. If it had a local part, we need to
 	 * bit(s) out-of-sync first. If it had a local part, we need to
 	 * release the reference to the activity log. */
 	 * release the reference to the activity log. */
 	if (rw == WRITE) {
 	if (rw == WRITE) {
-		/* remove it from the transfer log.
-		 * well, only if it had been there in the first
-		 * place... if it had not (local only or conflicting
-		 * and never sent), it should still be "empty" as
-		 * initialized in drbd_req_new(), so we can list_del() it
-		 * here unconditionally */
-		list_del(&req->tl_requests);
 		/* Set out-of-sync unless both OK flags are set
 		/* Set out-of-sync unless both OK flags are set
 		 * (local only or remote failed).
 		 * (local only or remote failed).
 		 * Other places where we set out-of-sync:
 		 * Other places where we set out-of-sync:
@@ -92,7 +94,8 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const
 		 */
 		 */
 		if (s & RQ_LOCAL_MASK) {
 		if (s & RQ_LOCAL_MASK) {
 			if (get_ldev_if_state(mdev, D_FAILED)) {
 			if (get_ldev_if_state(mdev, D_FAILED)) {
-				drbd_al_complete_io(mdev, req->sector);
+				if (s & RQ_IN_ACT_LOG)
+					drbd_al_complete_io(mdev, req->sector);
 				put_ldev(mdev);
 				put_ldev(mdev);
 			} else if (__ratelimit(&drbd_ratelimit_state)) {
 			} else if (__ratelimit(&drbd_ratelimit_state)) {
 				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
 				dev_warn(DEV, "Should have called drbd_al_complete_io(, %llu), "
@@ -280,6 +283,14 @@ void _req_may_be_done(struct drbd_request *req, struct bio_and_error *m)
 	 * protocol A or B, barrier ack still pending... */
 	 * protocol A or B, barrier ack still pending... */
 }
 }
 
 
+static void _req_may_be_done_not_susp(struct drbd_request *req, struct bio_and_error *m)
+{
+	struct drbd_conf *mdev = req->mdev;
+
+	if (!is_susp(mdev->state))
+		_req_may_be_done(req, m);
+}
+
 /*
 /*
  * checks whether there was an overlapping request
  * checks whether there was an overlapping request
  * or ee already registered.
  * or ee already registered.
@@ -380,10 +391,11 @@ out_conflict:
  *  and it enforces that we have to think in a very structured manner
  *  and it enforces that we have to think in a very structured manner
  *  about the "events" that may happen to a request during its life time ...
  *  about the "events" that may happen to a request during its life time ...
  */
  */
-void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		struct bio_and_error *m)
 		struct bio_and_error *m)
 {
 {
 	struct drbd_conf *mdev = req->mdev;
 	struct drbd_conf *mdev = req->mdev;
+	int rv = 0;
 	m->bio = NULL;
 	m->bio = NULL;
 
 
 	switch (what) {
 	switch (what) {
@@ -420,7 +432,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
 		req->rq_state |= (RQ_LOCAL_COMPLETED|RQ_LOCAL_OK);
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
 
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		put_ldev(mdev);
 		break;
 		break;
 
 
@@ -429,7 +441,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
 
 		__drbd_chk_io_error(mdev, FALSE);
 		__drbd_chk_io_error(mdev, FALSE);
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		put_ldev(mdev);
 		break;
 		break;
 
 
@@ -437,7 +449,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* it is legal to fail READA */
 		/* it is legal to fail READA */
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		put_ldev(mdev);
 		put_ldev(mdev);
 		break;
 		break;
 
 
@@ -455,7 +467,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* no point in retrying if there is no good remote data,
 		/* no point in retrying if there is no good remote data,
 		 * or we have no connection. */
 		 * or we have no connection. */
 		if (mdev->state.pdsk != D_UP_TO_DATE) {
 		if (mdev->state.pdsk != D_UP_TO_DATE) {
-			_req_may_be_done(req, m);
+			_req_may_be_done_not_susp(req, m);
 			break;
 			break;
 		}
 		}
 
 
@@ -517,11 +529,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
 		D_ASSERT(test_bit(CREATE_BARRIER, &mdev->flags) == 0);
 
 
 		req->epoch = mdev->newest_tle->br_number;
 		req->epoch = mdev->newest_tle->br_number;
-		list_add_tail(&req->tl_requests,
-				&mdev->newest_tle->requests);
 
 
 		/* increment size of current epoch */
 		/* increment size of current epoch */
-		mdev->newest_tle->n_req++;
+		mdev->newest_tle->n_writes++;
 
 
 		/* queue work item to send data */
 		/* queue work item to send data */
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
@@ -530,7 +540,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		drbd_queue_work(&mdev->data.work, &req->w);
 		drbd_queue_work(&mdev->data.work, &req->w);
 
 
 		/* close the epoch, in case it outgrew the limit */
 		/* close the epoch, in case it outgrew the limit */
-		if (mdev->newest_tle->n_req >= mdev->net_conf->max_epoch_size)
+		if (mdev->newest_tle->n_writes >= mdev->net_conf->max_epoch_size)
 			queue_barrier(mdev);
 			queue_barrier(mdev);
 
 
 		break;
 		break;
@@ -543,7 +553,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state &= ~RQ_NET_QUEUED;
 		req->rq_state &= ~RQ_NET_QUEUED;
 		/* if we did it right, tl_clear should be scheduled only after
 		/* if we did it right, tl_clear should be scheduled only after
 		 * this, so this should not be necessary! */
 		 * this, so this should not be necessary! */
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 		break;
 
 
 	case handed_over_to_network:
 	case handed_over_to_network:
@@ -568,7 +578,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		 * "completed_ok" events came in, once we return from
 		 * "completed_ok" events came in, once we return from
 		 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
 		 * _drbd_send_zc_bio (drbd_send_dblock), we have to check
 		 * whether it is done already, and end it.  */
 		 * whether it is done already, and end it.  */
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 		break;
 
 
 	case read_retry_remote_canceled:
 	case read_retry_remote_canceled:
@@ -584,7 +594,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		/* if it is still queued, we may not complete it here.
 		/* if it is still queued, we may not complete it here.
 		 * it will be canceled soon. */
 		 * it will be canceled soon. */
 		if (!(req->rq_state & RQ_NET_QUEUED))
 		if (!(req->rq_state & RQ_NET_QUEUED))
-			_req_may_be_done(req, m);
+			_req_may_be_done(req, m); /* Allowed while state.susp */
 		break;
 		break;
 
 
 	case write_acked_by_peer_and_sis:
 	case write_acked_by_peer_and_sis:
@@ -619,7 +629,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		D_ASSERT(req->rq_state & RQ_NET_PENDING);
 		dec_ap_pending(mdev);
 		dec_ap_pending(mdev);
 		req->rq_state &= ~RQ_NET_PENDING;
 		req->rq_state &= ~RQ_NET_PENDING;
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 		break;
 
 
 	case neg_acked:
 	case neg_acked:
@@ -629,11 +639,50 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 		req->rq_state &= ~(RQ_NET_OK|RQ_NET_PENDING);
 
 
 		req->rq_state |= RQ_NET_DONE;
 		req->rq_state |= RQ_NET_DONE;
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		/* else: done by handed_over_to_network */
 		/* else: done by handed_over_to_network */
 		break;
 		break;
 
 
+	case fail_frozen_disk_io:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		_req_may_be_done(req, m); /* Allowed while state.susp */
+		break;
+
+	case restart_frozen_disk_io:
+		if (!(req->rq_state & RQ_LOCAL_COMPLETED))
+			break;
+
+		req->rq_state &= ~RQ_LOCAL_COMPLETED;
+
+		rv = MR_READ;
+		if (bio_data_dir(req->master_bio) == WRITE)
+			rv = MR_WRITE;
+
+		get_ldev(mdev);
+		req->w.cb = w_restart_disk_io;
+		drbd_queue_work(&mdev->data.work, &req->w);
+		break;
+
+	case resend:
+		/* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
+		   before the connection loss (B&C only); only P_BARRIER_ACK was missing.
+		   Trowing them out of the TL here by pretending we got a BARRIER_ACK
+		   We ensure that the peer was not rebooted */
+		if (!(req->rq_state & RQ_NET_OK)) {
+			if (req->w.cb) {
+				drbd_queue_work(&mdev->data.work, &req->w);
+				rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
+			}
+			break;
+		}
+		/* else, fall through to barrier_acked */
+
 	case barrier_acked:
 	case barrier_acked:
+		if (!(req->rq_state & RQ_WRITE))
+			break;
+
 		if (req->rq_state & RQ_NET_PENDING) {
 		if (req->rq_state & RQ_NET_PENDING) {
 			/* barrier came in before all requests have been acked.
 			/* barrier came in before all requests have been acked.
 			 * this is bad, because if the connection is lost now,
 			 * this is bad, because if the connection is lost now,
@@ -643,7 +692,7 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		}
 		}
 		D_ASSERT(req->rq_state & RQ_NET_SENT);
 		D_ASSERT(req->rq_state & RQ_NET_SENT);
 		req->rq_state |= RQ_NET_DONE;
 		req->rq_state |= RQ_NET_DONE;
-		_req_may_be_done(req, m);
+		_req_may_be_done(req, m); /* Allowed while state.susp */
 		break;
 		break;
 
 
 	case data_received:
 	case data_received:
@@ -651,9 +700,11 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		dec_ap_pending(mdev);
 		dec_ap_pending(mdev);
 		req->rq_state &= ~RQ_NET_PENDING;
 		req->rq_state &= ~RQ_NET_PENDING;
 		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
 		req->rq_state |= (RQ_NET_OK|RQ_NET_DONE);
-		_req_may_be_done(req, m);
+		_req_may_be_done_not_susp(req, m);
 		break;
 		break;
 	};
 	};
+
+	return rv;
 }
 }
 
 
 /* we may do a local read if:
 /* we may do a local read if:
@@ -752,14 +803,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
 	 * resync extent to finish, and, if necessary, pulls in the target
 	 * resync extent to finish, and, if necessary, pulls in the target
 	 * extent into the activity log, which involves further disk io because
 	 * extent into the activity log, which involves further disk io because
 	 * of transactional on-disk meta data updates. */
 	 * of transactional on-disk meta data updates. */
-	if (rw == WRITE && local)
+	if (rw == WRITE && local && !test_bit(AL_SUSPENDED, &mdev->flags)) {
+		req->rq_state |= RQ_IN_ACT_LOG;
 		drbd_al_begin_io(mdev, sector);
 		drbd_al_begin_io(mdev, sector);
+	}
 
 
 	remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
 	remote = remote && (mdev->state.pdsk == D_UP_TO_DATE ||
 			    (mdev->state.pdsk == D_INCONSISTENT &&
 			    (mdev->state.pdsk == D_INCONSISTENT &&
 			     mdev->state.conn >= C_CONNECTED));
 			     mdev->state.conn >= C_CONNECTED));
 
 
-	if (!(local || remote) && !mdev->state.susp) {
+	if (!(local || remote) && !is_susp(mdev->state)) {
 		dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
 		dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
 		goto fail_free_complete;
 		goto fail_free_complete;
 	}
 	}
@@ -785,7 +838,7 @@ allocate_barrier:
 	/* GOOD, everything prepared, grab the spin_lock */
 	/* GOOD, everything prepared, grab the spin_lock */
 	spin_lock_irq(&mdev->req_lock);
 	spin_lock_irq(&mdev->req_lock);
 
 
-	if (mdev->state.susp) {
+	if (is_susp(mdev->state)) {
 		/* If we got suspended, use the retry mechanism of
 		/* If we got suspended, use the retry mechanism of
 		   generic_make_request() to restart processing of this
 		   generic_make_request() to restart processing of this
 		   bio. In the next call to drbd_make_request_26
 		   bio. In the next call to drbd_make_request_26
@@ -867,30 +920,10 @@ allocate_barrier:
 	/* check this request on the collision detection hash tables.
 	/* check this request on the collision detection hash tables.
 	 * if we have a conflict, just complete it here.
 	 * if we have a conflict, just complete it here.
 	 * THINK do we want to check reads, too? (I don't think so...) */
 	 * THINK do we want to check reads, too? (I don't think so...) */
-	if (rw == WRITE && _req_conflicts(req)) {
-		/* this is a conflicting request.
-		 * even though it may have been only _partially_
-		 * overlapping with one of the currently pending requests,
-		 * without even submitting or sending it, we will
-		 * pretend that it was successfully served right now.
-		 */
-		if (local) {
-			bio_put(req->private_bio);
-			req->private_bio = NULL;
-			drbd_al_complete_io(mdev, req->sector);
-			put_ldev(mdev);
-			local = 0;
-		}
-		if (remote)
-			dec_ap_pending(mdev);
-		_drbd_end_io_acct(mdev, req);
-		/* THINK: do we want to fail it (-EIO), or pretend success? */
-		bio_endio(req->master_bio, 0);
-		req->master_bio = NULL;
-		dec_ap_bio(mdev);
-		drbd_req_free(req);
-		remote = 0;
-	}
+	if (rw == WRITE && _req_conflicts(req))
+		goto fail_conflicting;
+
+	list_add_tail(&req->tl_requests, &mdev->newest_tle->requests);
 
 
 	/* NOTE remote first: to get the concurrent write detection right,
 	/* NOTE remote first: to get the concurrent write detection right,
 	 * we must register the request before start of local IO.  */
 	 * we must register the request before start of local IO.  */
@@ -923,6 +956,21 @@ allocate_barrier:
 
 
 	return 0;
 	return 0;
 
 
+fail_conflicting:
+	/* this is a conflicting request.
+	 * even though it may have been only _partially_
+	 * overlapping with one of the currently pending requests,
+	 * without even submitting or sending it, we will
+	 * pretend that it was successfully served right now.
+	 */
+	_drbd_end_io_acct(mdev, req);
+	spin_unlock_irq(&mdev->req_lock);
+	if (remote)
+		dec_ap_pending(mdev);
+	/* THINK: do we want to fail it (-EIO), or pretend success?
+	 * this pretends success. */
+	err = 0;
+
 fail_free_complete:
 fail_free_complete:
 	if (rw == WRITE && local)
 	if (rw == WRITE && local)
 		drbd_al_complete_io(mdev, sector);
 		drbd_al_complete_io(mdev, sector);
@@ -961,21 +1009,6 @@ static int drbd_fail_request_early(struct drbd_conf *mdev, int is_write)
 		return 1;
 		return 1;
 	}
 	}
 
 
-	/*
-	 * Paranoia: we might have been primary, but sync target, or
-	 * even diskless, then lost the connection.
-	 * This should have been handled (panic? suspend?) somewhere
-	 * else. But maybe it was not, so check again here.
-	 * Caution: as long as we do not have a read/write lock on mdev,
-	 * to serialize state changes, this is racy, since we may lose
-	 * the connection *after* we test for the cstate.
-	 */
-	if (mdev->state.disk < D_UP_TO_DATE && mdev->state.pdsk < D_UP_TO_DATE) {
-		if (__ratelimit(&drbd_ratelimit_state))
-			dev_err(DEV, "Sorry, I have no access to good data anymore.\n");
-		return 1;
-	}
-
 	return 0;
 	return 0;
 }
 }
 
 

+ 47 - 15
drivers/block/drbd/drbd_req.h

@@ -104,6 +104,9 @@ enum drbd_req_event {
 	read_ahead_completed_with_error,
 	read_ahead_completed_with_error,
 	write_completed_with_error,
 	write_completed_with_error,
 	completed_ok,
 	completed_ok,
+	resend,
+	fail_frozen_disk_io,
+	restart_frozen_disk_io,
 	nothing, /* for tracing only */
 	nothing, /* for tracing only */
 };
 };
 
 
@@ -183,6 +186,12 @@ enum drbd_req_state_bits {
 
 
 	/* keep this last, its for the RQ_NET_MASK */
 	/* keep this last, its for the RQ_NET_MASK */
 	__RQ_NET_MAX,
 	__RQ_NET_MAX,
+
+	/* Set when this is a write, clear for a read */
+	__RQ_WRITE,
+
+	/* Should call drbd_al_complete_io() for this request... */
+	__RQ_IN_ACT_LOG,
 };
 };
 
 
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
 #define RQ_LOCAL_PENDING   (1UL << __RQ_LOCAL_PENDING)
@@ -201,6 +210,16 @@ enum drbd_req_state_bits {
 /* 0x1f8 */
 /* 0x1f8 */
 #define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
 #define RQ_NET_MASK        (((1UL << __RQ_NET_MAX)-1) & ~RQ_LOCAL_MASK)
 
 
+#define RQ_WRITE           (1UL << __RQ_WRITE)
+#define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
+
+/* For waking up the frozen transfer log mod_req() has to return if the request
+   should be counted in the epoch object*/
+#define MR_WRITE_SHIFT 0
+#define MR_WRITE       (1 << MR_WRITE_SHIFT)
+#define MR_READ_SHIFT  1
+#define MR_READ        (1 << MR_READ_SHIFT)
+
 /* epoch entries */
 /* epoch entries */
 static inline
 static inline
 struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
 struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)
@@ -244,30 +263,36 @@ static inline struct drbd_request *_ar_id_to_req(struct drbd_conf *mdev,
 	return NULL;
 	return NULL;
 }
 }
 
 
+static inline void drbd_req_make_private_bio(struct drbd_request *req, struct bio *bio_src)
+{
+	struct bio *bio;
+	bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+
+	req->private_bio = bio;
+
+	bio->bi_private  = req;
+	bio->bi_end_io   = drbd_endio_pri;
+	bio->bi_next     = NULL;
+}
+
 static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
 static inline struct drbd_request *drbd_req_new(struct drbd_conf *mdev,
 	struct bio *bio_src)
 	struct bio *bio_src)
 {
 {
-	struct bio *bio;
 	struct drbd_request *req =
 	struct drbd_request *req =
 		mempool_alloc(drbd_request_mempool, GFP_NOIO);
 		mempool_alloc(drbd_request_mempool, GFP_NOIO);
 	if (likely(req)) {
 	if (likely(req)) {
-		bio = bio_clone(bio_src, GFP_NOIO); /* XXX cannot fail?? */
+		drbd_req_make_private_bio(req, bio_src);
 
 
-		req->rq_state    = 0;
+		req->rq_state    = bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0;
 		req->mdev        = mdev;
 		req->mdev        = mdev;
 		req->master_bio  = bio_src;
 		req->master_bio  = bio_src;
-		req->private_bio = bio;
 		req->epoch       = 0;
 		req->epoch       = 0;
-		req->sector      = bio->bi_sector;
-		req->size        = bio->bi_size;
+		req->sector      = bio_src->bi_sector;
+		req->size        = bio_src->bi_size;
 		req->start_time  = jiffies;
 		req->start_time  = jiffies;
 		INIT_HLIST_NODE(&req->colision);
 		INIT_HLIST_NODE(&req->colision);
 		INIT_LIST_HEAD(&req->tl_requests);
 		INIT_LIST_HEAD(&req->tl_requests);
 		INIT_LIST_HEAD(&req->w.list);
 		INIT_LIST_HEAD(&req->w.list);
-
-		bio->bi_private  = req;
-		bio->bi_end_io   = drbd_endio_pri;
-		bio->bi_next     = NULL;
 	}
 	}
 	return req;
 	return req;
 }
 }
@@ -292,36 +317,43 @@ struct bio_and_error {
 
 
 extern void _req_may_be_done(struct drbd_request *req,
 extern void _req_may_be_done(struct drbd_request *req,
 		struct bio_and_error *m);
 		struct bio_and_error *m);
-extern void __req_mod(struct drbd_request *req, enum drbd_req_event what,
+extern int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		struct bio_and_error *m);
 		struct bio_and_error *m);
 extern void complete_master_bio(struct drbd_conf *mdev,
 extern void complete_master_bio(struct drbd_conf *mdev,
 		struct bio_and_error *m);
 		struct bio_and_error *m);
 
 
 /* use this if you don't want to deal with calling complete_master_bio()
 /* use this if you don't want to deal with calling complete_master_bio()
  * outside the spinlock, e.g. when walking some list on cleanup. */
  * outside the spinlock, e.g. when walking some list on cleanup. */
-static inline void _req_mod(struct drbd_request *req, enum drbd_req_event what)
+static inline int _req_mod(struct drbd_request *req, enum drbd_req_event what)
 {
 {
 	struct drbd_conf *mdev = req->mdev;
 	struct drbd_conf *mdev = req->mdev;
 	struct bio_and_error m;
 	struct bio_and_error m;
+	int rv;
 
 
 	/* __req_mod possibly frees req, do not touch req after that! */
 	/* __req_mod possibly frees req, do not touch req after that! */
-	__req_mod(req, what, &m);
+	rv = __req_mod(req, what, &m);
 	if (m.bio)
 	if (m.bio)
 		complete_master_bio(mdev, &m);
 		complete_master_bio(mdev, &m);
+
+	return rv;
 }
 }
 
 
 /* completion of master bio is outside of spinlock.
 /* completion of master bio is outside of spinlock.
  * If you need it irqsave, do it your self! */
  * If you need it irqsave, do it your self! */
-static inline void req_mod(struct drbd_request *req,
+static inline int req_mod(struct drbd_request *req,
 		enum drbd_req_event what)
 		enum drbd_req_event what)
 {
 {
 	struct drbd_conf *mdev = req->mdev;
 	struct drbd_conf *mdev = req->mdev;
 	struct bio_and_error m;
 	struct bio_and_error m;
+	int rv;
+
 	spin_lock_irq(&mdev->req_lock);
 	spin_lock_irq(&mdev->req_lock);
-	__req_mod(req, what, &m);
+	rv = __req_mod(req, what, &m);
 	spin_unlock_irq(&mdev->req_lock);
 	spin_unlock_irq(&mdev->req_lock);
 
 
 	if (m.bio)
 	if (m.bio)
 		complete_master_bio(mdev, &m);
 		complete_master_bio(mdev, &m);
+
+	return rv;
 }
 }
 #endif
 #endif

+ 229 - 63
drivers/block/drbd/drbd_worker.c

@@ -39,8 +39,6 @@
 #include "drbd_int.h"
 #include "drbd_int.h"
 #include "drbd_req.h"
 #include "drbd_req.h"
 
 
-#define SLEEP_TIME (HZ/10)
-
 static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
 static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
 
 
 
 
@@ -217,10 +215,8 @@ void drbd_endio_sec(struct bio *bio, int error)
  */
  */
 void drbd_endio_pri(struct bio *bio, int error)
 void drbd_endio_pri(struct bio *bio, int error)
 {
 {
-	unsigned long flags;
 	struct drbd_request *req = bio->bi_private;
 	struct drbd_request *req = bio->bi_private;
 	struct drbd_conf *mdev = req->mdev;
 	struct drbd_conf *mdev = req->mdev;
-	struct bio_and_error m;
 	enum drbd_req_event what;
 	enum drbd_req_event what;
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 	int uptodate = bio_flagged(bio, BIO_UPTODATE);
 
 
@@ -246,12 +242,7 @@ void drbd_endio_pri(struct bio *bio, int error)
 	bio_put(req->private_bio);
 	bio_put(req->private_bio);
 	req->private_bio = ERR_PTR(error);
 	req->private_bio = ERR_PTR(error);
 
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
-	__req_mod(req, what, &m);
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
-	if (m.bio)
-		complete_master_bio(mdev, &m);
+	req_mod(req, what);
 }
 }
 
 
 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -376,54 +367,145 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
 	struct drbd_epoch_entry *e;
 	struct drbd_epoch_entry *e;
 
 
 	if (!get_ldev(mdev))
 	if (!get_ldev(mdev))
-		return 0;
+		return -EIO;
+
+	if (drbd_rs_should_slow_down(mdev))
+		goto defer;
 
 
 	/* GFP_TRY, because if there is no memory available right now, this may
 	/* GFP_TRY, because if there is no memory available right now, this may
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	 * be rescheduled for later. It is "only" background resync, after all. */
 	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
 	e = drbd_alloc_ee(mdev, DRBD_MAGIC+0xbeef, sector, size, GFP_TRY);
 	if (!e)
 	if (!e)
-		goto fail;
+		goto defer;
 
 
+	e->w.cb = w_e_send_csum;
 	spin_lock_irq(&mdev->req_lock);
 	spin_lock_irq(&mdev->req_lock);
 	list_add(&e->w.list, &mdev->read_ee);
 	list_add(&e->w.list, &mdev->read_ee);
 	spin_unlock_irq(&mdev->req_lock);
 	spin_unlock_irq(&mdev->req_lock);
 
 
-	e->w.cb = w_e_send_csum;
+	atomic_add(size >> 9, &mdev->rs_sect_ev);
 	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
 	if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
-		return 1;
+		return 0;
+
+	/* drbd_submit_ee currently fails for one reason only:
+	 * not being able to allocate enough bios.
+	 * Is dropping the connection going to help? */
+	spin_lock_irq(&mdev->req_lock);
+	list_del(&e->w.list);
+	spin_unlock_irq(&mdev->req_lock);
 
 
 	drbd_free_ee(mdev, e);
 	drbd_free_ee(mdev, e);
-fail:
+defer:
 	put_ldev(mdev);
 	put_ldev(mdev);
-	return 2;
+	return -EAGAIN;
 }
 }
 
 
 void resync_timer_fn(unsigned long data)
 void resync_timer_fn(unsigned long data)
 {
 {
-	unsigned long flags;
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
 	struct drbd_conf *mdev = (struct drbd_conf *) data;
 	int queue;
 	int queue;
 
 
-	spin_lock_irqsave(&mdev->req_lock, flags);
-
-	if (likely(!test_and_clear_bit(STOP_SYNC_TIMER, &mdev->flags))) {
-		queue = 1;
-		if (mdev->state.conn == C_VERIFY_S)
-			mdev->resync_work.cb = w_make_ov_request;
-		else
-			mdev->resync_work.cb = w_make_resync_request;
-	} else {
+	queue = 1;
+	switch (mdev->state.conn) {
+	case C_VERIFY_S:
+		mdev->resync_work.cb = w_make_ov_request;
+		break;
+	case C_SYNC_TARGET:
+		mdev->resync_work.cb = w_make_resync_request;
+		break;
+	default:
 		queue = 0;
 		queue = 0;
 		mdev->resync_work.cb = w_resync_inactive;
 		mdev->resync_work.cb = w_resync_inactive;
 	}
 	}
 
 
-	spin_unlock_irqrestore(&mdev->req_lock, flags);
-
 	/* harmless race: list_empty outside data.work.q_lock */
 	/* harmless race: list_empty outside data.work.q_lock */
 	if (list_empty(&mdev->resync_work.list) && queue)
 	if (list_empty(&mdev->resync_work.list) && queue)
 		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 }
 
 
+static void fifo_set(struct fifo_buffer *fb, int value)
+{
+	int i;
+
+	for (i = 0; i < fb->size; i++)
+		fb->values[i] = value;
+}
+
+static int fifo_push(struct fifo_buffer *fb, int value)
+{
+	int ov;
+
+	ov = fb->values[fb->head_index];
+	fb->values[fb->head_index++] = value;
+
+	if (fb->head_index >= fb->size)
+		fb->head_index = 0;
+
+	return ov;
+}
+
+static void fifo_add_val(struct fifo_buffer *fb, int value)
+{
+	int i;
+
+	for (i = 0; i < fb->size; i++)
+		fb->values[i] += value;
+}
+
+int drbd_rs_controller(struct drbd_conf *mdev)
+{
+	unsigned int sect_in;  /* Number of sectors that came in since the last turn */
+	unsigned int want;     /* The number of sectors we want in the proxy */
+	int req_sect; /* Number of sectors to request in this turn */
+	int correction; /* Number of sectors more we need in the proxy*/
+	int cps; /* correction per invocation of drbd_rs_controller() */
+	int steps; /* Number of time steps to plan ahead */
+	int curr_corr;
+	int max_sect;
+
+	sect_in = atomic_xchg(&mdev->rs_sect_in, 0); /* Number of sectors that came in */
+	mdev->rs_in_flight -= sect_in;
+
+	spin_lock(&mdev->peer_seq_lock); /* get an atomic view on mdev->rs_plan_s */
+
+	steps = mdev->rs_plan_s.size; /* (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
+
+	if (mdev->rs_in_flight + sect_in == 0) { /* At start of resync */
+		want = ((mdev->sync_conf.rate * 2 * SLEEP_TIME) / HZ) * steps;
+	} else { /* normal path */
+		want = mdev->sync_conf.c_fill_target ? mdev->sync_conf.c_fill_target :
+			sect_in * mdev->sync_conf.c_delay_target * HZ / (SLEEP_TIME * 10);
+	}
+
+	correction = want - mdev->rs_in_flight - mdev->rs_planed;
+
+	/* Plan ahead */
+	cps = correction / steps;
+	fifo_add_val(&mdev->rs_plan_s, cps);
+	mdev->rs_planed += cps * steps;
+
+	/* What we do in this step */
+	curr_corr = fifo_push(&mdev->rs_plan_s, 0);
+	spin_unlock(&mdev->peer_seq_lock);
+	mdev->rs_planed -= curr_corr;
+
+	req_sect = sect_in + curr_corr;
+	if (req_sect < 0)
+		req_sect = 0;
+
+	max_sect = (mdev->sync_conf.c_max_rate * 2 * SLEEP_TIME) / HZ;
+	if (req_sect > max_sect)
+		req_sect = max_sect;
+
+	/*
+	dev_warn(DEV, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
+		 sect_in, mdev->rs_in_flight, want, correction,
+		 steps, cps, mdev->rs_planed, curr_corr, req_sect);
+	*/
+
+	return req_sect;
+}
+
 int w_make_resync_request(struct drbd_conf *mdev,
 int w_make_resync_request(struct drbd_conf *mdev,
 		struct drbd_work *w, int cancel)
 		struct drbd_work *w, int cancel)
 {
 {
@@ -431,8 +513,9 @@ int w_make_resync_request(struct drbd_conf *mdev,
 	sector_t sector;
 	sector_t sector;
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
 	const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
 	int max_segment_size;
 	int max_segment_size;
-	int number, i, size, pe, mx;
+	int number, rollback_i, size, pe, mx;
 	int align, queued, sndbuf;
 	int align, queued, sndbuf;
+	int i = 0;
 
 
 	if (unlikely(cancel))
 	if (unlikely(cancel))
 		return 1;
 		return 1;
@@ -446,6 +529,12 @@ int w_make_resync_request(struct drbd_conf *mdev,
 		dev_err(DEV, "%s in w_make_resync_request\n",
 		dev_err(DEV, "%s in w_make_resync_request\n",
 			drbd_conn_str(mdev->state.conn));
 			drbd_conn_str(mdev->state.conn));
 
 
+	if (mdev->rs_total == 0) {
+		/* empty resync? */
+		drbd_resync_finished(mdev);
+		return 1;
+	}
+
 	if (!get_ldev(mdev)) {
 	if (!get_ldev(mdev)) {
 		/* Since we only need to access mdev->rsync a
 		/* Since we only need to access mdev->rsync a
 		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
 		   get_ldev_if_state(mdev,D_FAILED) would be sufficient, but
@@ -458,11 +547,25 @@ int w_make_resync_request(struct drbd_conf *mdev,
 
 
 	/* starting with drbd 8.3.8, we can handle multi-bio EEs,
 	/* starting with drbd 8.3.8, we can handle multi-bio EEs,
 	 * if it should be necessary */
 	 * if it should be necessary */
-	max_segment_size = mdev->agreed_pro_version < 94 ?
-		queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
+	max_segment_size =
+		mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
+		mdev->agreed_pro_version < 95 ?	DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
 
 
-	number = SLEEP_TIME * mdev->sync_conf.rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
-	pe = atomic_read(&mdev->rs_pending_cnt);
+	if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+		number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+		mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+	} else {
+		mdev->c_sync_rate = mdev->sync_conf.rate;
+		number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+	}
+
+	/* Throttle resync on lower level disk activity, which may also be
+	 * caused by application IO on Primary/SyncTarget.
+	 * Keep this after the call to drbd_rs_controller, as that assumes
+	 * to be called as precisely as possible every SLEEP_TIME,
+	 * and would be confused otherwise. */
+	if (drbd_rs_should_slow_down(mdev))
+		goto requeue;
 
 
 	mutex_lock(&mdev->data.mutex);
 	mutex_lock(&mdev->data.mutex);
 	if (mdev->data.socket)
 	if (mdev->data.socket)
@@ -476,6 +579,7 @@ int w_make_resync_request(struct drbd_conf *mdev,
 		mx = number;
 		mx = number;
 
 
 	/* Limit the number of pending RS requests to no more than the peer's receive buffer */
 	/* Limit the number of pending RS requests to no more than the peer's receive buffer */
+	pe = atomic_read(&mdev->rs_pending_cnt);
 	if ((pe + number) > mx) {
 	if ((pe + number) > mx) {
 		number = mx - pe;
 		number = mx - pe;
 	}
 	}
@@ -526,6 +630,7 @@ next_sector:
 		 * be prepared for all stripe sizes of software RAIDs.
 		 * be prepared for all stripe sizes of software RAIDs.
 		 */
 		 */
 		align = 1;
 		align = 1;
+		rollback_i = i;
 		for (;;) {
 		for (;;) {
 			if (size + BM_BLOCK_SIZE > max_segment_size)
 			if (size + BM_BLOCK_SIZE > max_segment_size)
 				break;
 				break;
@@ -561,14 +666,19 @@ next_sector:
 			size = (capacity-sector)<<9;
 			size = (capacity-sector)<<9;
 		if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
 		if (mdev->agreed_pro_version >= 89 && mdev->csums_tfm) {
 			switch (read_for_csum(mdev, sector, size)) {
 			switch (read_for_csum(mdev, sector, size)) {
-			case 0: /* Disk failure*/
+			case -EIO: /* Disk failure */
 				put_ldev(mdev);
 				put_ldev(mdev);
 				return 0;
 				return 0;
-			case 2: /* Allocation failed */
+			case -EAGAIN: /* allocation failed, or ldev busy */
 				drbd_rs_complete_io(mdev, sector);
 				drbd_rs_complete_io(mdev, sector);
 				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
 				mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
+				i = rollback_i;
 				goto requeue;
 				goto requeue;
-			/* case 1: everything ok */
+			case 0:
+				/* everything ok */
+				break;
+			default:
+				BUG();
 			}
 			}
 		} else {
 		} else {
 			inc_rs_pending(mdev);
 			inc_rs_pending(mdev);
@@ -595,6 +705,7 @@ next_sector:
 	}
 	}
 
 
  requeue:
  requeue:
+	mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
 	put_ldev(mdev);
 	put_ldev(mdev);
 	return 1;
 	return 1;
@@ -670,6 +781,14 @@ static int w_resync_finished(struct drbd_conf *mdev, struct drbd_work *w, int ca
 	return 1;
 	return 1;
 }
 }
 
 
+static void ping_peer(struct drbd_conf *mdev)
+{
+	clear_bit(GOT_PING_ACK, &mdev->flags);
+	request_ping(mdev);
+	wait_event(mdev->misc_wait,
+		   test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
+}
+
 int drbd_resync_finished(struct drbd_conf *mdev)
 int drbd_resync_finished(struct drbd_conf *mdev)
 {
 {
 	unsigned long db, dt, dbdt;
 	unsigned long db, dt, dbdt;
@@ -709,6 +828,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
 	if (!get_ldev(mdev))
 	if (!get_ldev(mdev))
 		goto out;
 		goto out;
 
 
+	ping_peer(mdev);
+
 	spin_lock_irq(&mdev->req_lock);
 	spin_lock_irq(&mdev->req_lock);
 	os = mdev->state;
 	os = mdev->state;
 
 
@@ -801,6 +922,8 @@ out:
 	mdev->rs_paused = 0;
 	mdev->rs_paused = 0;
 	mdev->ov_start_sector = 0;
 	mdev->ov_start_sector = 0;
 
 
+	drbd_md_sync(mdev);
+
 	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
 	if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
 		dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
 		dev_warn(DEV, "Writing the whole bitmap, due to failed kmalloc\n");
 		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
 		drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
@@ -817,9 +940,13 @@ static void move_to_net_ee_or_free(struct drbd_conf *mdev, struct drbd_epoch_ent
 {
 {
 	if (drbd_ee_has_active_page(e)) {
 	if (drbd_ee_has_active_page(e)) {
 		/* This might happen if sendpage() has not finished */
 		/* This might happen if sendpage() has not finished */
+		int i = (e->size + PAGE_SIZE -1) >> PAGE_SHIFT;
+		atomic_add(i, &mdev->pp_in_use_by_net);
+		atomic_sub(i, &mdev->pp_in_use);
 		spin_lock_irq(&mdev->req_lock);
 		spin_lock_irq(&mdev->req_lock);
 		list_add_tail(&e->w.list, &mdev->net_ee);
 		list_add_tail(&e->w.list, &mdev->net_ee);
 		spin_unlock_irq(&mdev->req_lock);
 		spin_unlock_irq(&mdev->req_lock);
+		wake_up(&drbd_pp_wait);
 	} else
 	} else
 		drbd_free_ee(mdev, e);
 		drbd_free_ee(mdev, e);
 }
 }
@@ -926,9 +1053,12 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 		return 1;
 		return 1;
 	}
 	}
 
 
-	drbd_rs_complete_io(mdev, e->sector);
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, e->sector);
+		put_ldev(mdev);
+	}
 
 
-	di = (struct digest_info *)(unsigned long)e->block_id;
+	di = e->digest;
 
 
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		/* quick hack to try to avoid a race against reconfiguration.
 		/* quick hack to try to avoid a race against reconfiguration.
@@ -952,7 +1082,9 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 			ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
 			ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
 		} else {
 		} else {
 			inc_rs_pending(mdev);
 			inc_rs_pending(mdev);
-			e->block_id = ID_SYNCER;
+			e->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
+			e->flags &= ~EE_HAS_DIGEST; /* This e no longer has a digest pointer */
+			kfree(di);
 			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
 			ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
 		}
 		}
 	} else {
 	} else {
@@ -962,9 +1094,6 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	}
 	}
 
 
 	dec_unacked(mdev);
 	dec_unacked(mdev);
-
-	kfree(di);
-
 	move_to_net_ee_or_free(mdev, e);
 	move_to_net_ee_or_free(mdev, e);
 
 
 	if (unlikely(!ok))
 	if (unlikely(!ok))
@@ -1034,9 +1163,12 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 
 
 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
 	/* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
 	 * the resync lru has been cleaned up already */
 	 * the resync lru has been cleaned up already */
-	drbd_rs_complete_io(mdev, e->sector);
+	if (get_ldev(mdev)) {
+		drbd_rs_complete_io(mdev, e->sector);
+		put_ldev(mdev);
+	}
 
 
-	di = (struct digest_info *)(unsigned long)e->block_id;
+	di = e->digest;
 
 
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 	if (likely((e->flags & EE_WAS_ERROR) == 0)) {
 		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
 		digest_size = crypto_hash_digestsize(mdev->verify_tfm);
@@ -1055,9 +1187,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	}
 	}
 
 
 	dec_unacked(mdev);
 	dec_unacked(mdev);
-
-	kfree(di);
-
 	if (!eq)
 	if (!eq)
 		drbd_ov_oos_found(mdev, e->sector, e->size);
 		drbd_ov_oos_found(mdev, e->sector, e->size);
 	else
 	else
@@ -1108,7 +1237,7 @@ int w_send_barrier(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	 * dec_ap_pending will be done in got_BarrierAck
 	 * dec_ap_pending will be done in got_BarrierAck
 	 * or (on connection loss) in w_clear_epoch.  */
 	 * or (on connection loss) in w_clear_epoch.  */
 	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
 	ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BARRIER,
-				(struct p_header *)p, sizeof(*p), 0);
+				(struct p_header80 *)p, sizeof(*p), 0);
 	drbd_put_data_sock(mdev);
 	drbd_put_data_sock(mdev);
 
 
 	return ok;
 	return ok;
@@ -1173,6 +1302,24 @@ int w_send_read_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 	return ok;
 	return ok;
 }
 }
 
 
+int w_restart_disk_io(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+	struct drbd_request *req = container_of(w, struct drbd_request, w);
+
+	if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
+		drbd_al_begin_io(mdev, req->sector);
+	/* Calling drbd_al_begin_io() out of the worker might deadlocks
+	   theoretically. Practically it can not deadlock, since this is
+	   only used when unfreezing IOs. All the extents of the requests
+	   that made it into the TL are already active */
+
+	drbd_req_make_private_bio(req, req->master_bio);
+	req->private_bio->bi_bdev = mdev->ldev->backing_bdev;
+	generic_make_request(req->private_bio);
+
+	return 1;
+}
+
 static int _drbd_may_sync_now(struct drbd_conf *mdev)
 static int _drbd_may_sync_now(struct drbd_conf *mdev)
 {
 {
 	struct drbd_conf *odev = mdev;
 	struct drbd_conf *odev = mdev;
@@ -1298,14 +1445,6 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
 	return retcode;
 	return retcode;
 }
 }
 
 
-static void ping_peer(struct drbd_conf *mdev)
-{
-	clear_bit(GOT_PING_ACK, &mdev->flags);
-	request_ping(mdev);
-	wait_event(mdev->misc_wait,
-		   test_bit(GOT_PING_ACK, &mdev->flags) || mdev->state.conn < C_CONNECTED);
-}
-
 /**
 /**
  * drbd_start_resync() - Start the resync process
  * drbd_start_resync() - Start the resync process
  * @mdev:	DRBD device.
  * @mdev:	DRBD device.
@@ -1379,13 +1518,21 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 		r = SS_UNKNOWN_ERROR;
 		r = SS_UNKNOWN_ERROR;
 
 
 	if (r == SS_SUCCESS) {
 	if (r == SS_SUCCESS) {
-		mdev->rs_total     =
-		mdev->rs_mark_left = drbd_bm_total_weight(mdev);
+		unsigned long tw = drbd_bm_total_weight(mdev);
+		unsigned long now = jiffies;
+		int i;
+
 		mdev->rs_failed    = 0;
 		mdev->rs_failed    = 0;
 		mdev->rs_paused    = 0;
 		mdev->rs_paused    = 0;
-		mdev->rs_start     =
-		mdev->rs_mark_time = jiffies;
 		mdev->rs_same_csum = 0;
 		mdev->rs_same_csum = 0;
+		mdev->rs_last_events = 0;
+		mdev->rs_last_sect_ev = 0;
+		mdev->rs_total     = tw;
+		mdev->rs_start     = now;
+		for (i = 0; i < DRBD_SYNC_MARKS; i++) {
+			mdev->rs_mark_left[i] = tw;
+			mdev->rs_mark_time[i] = now;
+		}
 		_drbd_pause_after(mdev);
 		_drbd_pause_after(mdev);
 	}
 	}
 	write_unlock_irq(&global_state_lock);
 	write_unlock_irq(&global_state_lock);
@@ -1397,12 +1544,31 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
 		     (unsigned long) mdev->rs_total);
 		     (unsigned long) mdev->rs_total);
 
 
-		if (mdev->rs_total == 0) {
-			/* Peer still reachable? Beware of failing before-resync-target handlers! */
-			ping_peer(mdev);
+		if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
+			/* This still has a race (about when exactly the peers
+			 * detect connection loss) that can lead to a full sync
+			 * on next handshake. In 8.3.9 we fixed this with explicit
+			 * resync-finished notifications, but the fix
+			 * introduces a protocol change.  Sleeping for some
+			 * time longer than the ping interval + timeout on the
+			 * SyncSource, to give the SyncTarget the chance to
+			 * detect connection loss, then waiting for a ping
+			 * response (implicit in drbd_resync_finished) reduces
+			 * the race considerably, but does not solve it. */
+			if (side == C_SYNC_SOURCE)
+				schedule_timeout_interruptible(
+					mdev->net_conf->ping_int * HZ +
+					mdev->net_conf->ping_timeo*HZ/9);
 			drbd_resync_finished(mdev);
 			drbd_resync_finished(mdev);
 		}
 		}
 
 
+		atomic_set(&mdev->rs_sect_in, 0);
+		atomic_set(&mdev->rs_sect_ev, 0);
+		mdev->rs_in_flight = 0;
+		mdev->rs_planed = 0;
+		spin_lock(&mdev->peer_seq_lock);
+		fifo_set(&mdev->rs_plan_s, 0);
+		spin_unlock(&mdev->peer_seq_lock);
 		/* ns.conn may already be != mdev->state.conn,
 		/* ns.conn may already be != mdev->state.conn,
 		 * we may have been paused in between, or become paused until
 		 * we may have been paused in between, or become paused until
 		 * the timer triggers.
 		 * the timer triggers.

+ 45 - 21
drivers/block/floppy.c

@@ -258,8 +258,8 @@ static int irqdma_allocated;
 #include <linux/completion.h>
 #include <linux/completion.h>
 
 
 static struct request *current_req;
 static struct request *current_req;
-static struct request_queue *floppy_queue;
 static void do_fd_request(struct request_queue *q);
 static void do_fd_request(struct request_queue *q);
+static int set_next_request(void);
 
 
 #ifndef fd_get_dma_residue
 #ifndef fd_get_dma_residue
 #define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA)
 #define fd_get_dma_residue() get_dma_residue(FLOPPY_DMA)
@@ -413,6 +413,7 @@ static struct gendisk *disks[N_DRIVE];
 static struct block_device *opened_bdev[N_DRIVE];
 static struct block_device *opened_bdev[N_DRIVE];
 static DEFINE_MUTEX(open_lock);
 static DEFINE_MUTEX(open_lock);
 static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
 static struct floppy_raw_cmd *raw_cmd, default_raw_cmd;
+static int fdc_queue;
 
 
 /*
 /*
  * This struct defines the different floppy types.
  * This struct defines the different floppy types.
@@ -890,8 +891,8 @@ static void unlock_fdc(void)
 	del_timer(&fd_timeout);
 	del_timer(&fd_timeout);
 	cont = NULL;
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
 	clear_bit(0, &fdc_busy);
-	if (current_req || blk_peek_request(floppy_queue))
-		do_fd_request(floppy_queue);
+	if (current_req || set_next_request())
+		do_fd_request(current_req->q);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 	wake_up(&fdc_wait);
 	wake_up(&fdc_wait);
 }
 }
@@ -2243,8 +2244,8 @@ static void floppy_end_request(struct request *req, int error)
  * logical buffer */
  * logical buffer */
 static void request_done(int uptodate)
 static void request_done(int uptodate)
 {
 {
-	struct request_queue *q = floppy_queue;
 	struct request *req = current_req;
 	struct request *req = current_req;
+	struct request_queue *q;
 	unsigned long flags;
 	unsigned long flags;
 	int block;
 	int block;
 	char msg[sizeof("request done ") + sizeof(int) * 3];
 	char msg[sizeof("request done ") + sizeof(int) * 3];
@@ -2258,6 +2259,8 @@ static void request_done(int uptodate)
 		return;
 		return;
 	}
 	}
 
 
+	q = req->q;
+
 	if (uptodate) {
 	if (uptodate) {
 		/* maintain values for invalidation on geometry
 		/* maintain values for invalidation on geometry
 		 * change */
 		 * change */
@@ -2811,6 +2814,28 @@ static int make_raw_rw_request(void)
 	return 2;
 	return 2;
 }
 }
 
 
+/*
+ * Round-robin between our available drives, doing one request from each
+ */
+static int set_next_request(void)
+{
+	struct request_queue *q;
+	int old_pos = fdc_queue;
+
+	do {
+		q = disks[fdc_queue]->queue;
+		if (++fdc_queue == N_DRIVE)
+			fdc_queue = 0;
+		if (q) {
+			current_req = blk_fetch_request(q);
+			if (current_req)
+				break;
+		}
+	} while (fdc_queue != old_pos);
+
+	return current_req != NULL;
+}
+
 static void redo_fd_request(void)
 static void redo_fd_request(void)
 {
 {
 	int drive;
 	int drive;
@@ -2822,17 +2847,17 @@ static void redo_fd_request(void)
 
 
 do_request:
 do_request:
 	if (!current_req) {
 	if (!current_req) {
-		struct request *req;
+		int pending;
+
+		spin_lock_irq(&floppy_lock);
+		pending = set_next_request();
+		spin_unlock_irq(&floppy_lock);
 
 
-		spin_lock_irq(floppy_queue->queue_lock);
-		req = blk_fetch_request(floppy_queue);
-		spin_unlock_irq(floppy_queue->queue_lock);
-		if (!req) {
+		if (!pending) {
 			do_floppy = NULL;
 			do_floppy = NULL;
 			unlock_fdc();
 			unlock_fdc();
 			return;
 			return;
 		}
 		}
-		current_req = req;
 	}
 	}
 	drive = (long)current_req->rq_disk->private_data;
 	drive = (long)current_req->rq_disk->private_data;
 	set_fdc(drive);
 	set_fdc(drive);
@@ -4165,6 +4190,13 @@ static int __init floppy_init(void)
 			goto out_put_disk;
 			goto out_put_disk;
 		}
 		}
 
 
+		disks[dr]->queue = blk_init_queue(do_fd_request, &floppy_lock);
+		if (!disks[dr]->queue) {
+			err = -ENOMEM;
+			goto out_put_disk;
+		}
+
+		blk_queue_max_hw_sectors(disks[dr]->queue, 64);
 		disks[dr]->major = FLOPPY_MAJOR;
 		disks[dr]->major = FLOPPY_MAJOR;
 		disks[dr]->first_minor = TOMINOR(dr);
 		disks[dr]->first_minor = TOMINOR(dr);
 		disks[dr]->fops = &floppy_fops;
 		disks[dr]->fops = &floppy_fops;
@@ -4183,13 +4215,6 @@ static int __init floppy_init(void)
 	if (err)
 	if (err)
 		goto out_unreg_blkdev;
 		goto out_unreg_blkdev;
 
 
-	floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
-	if (!floppy_queue) {
-		err = -ENOMEM;
-		goto out_unreg_driver;
-	}
-	blk_queue_max_hw_sectors(floppy_queue, 64);
-
 	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
 	blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE,
 			    floppy_find, NULL, NULL);
 			    floppy_find, NULL, NULL);
 
 
@@ -4317,7 +4342,6 @@ static int __init floppy_init(void)
 
 
 		/* to be cleaned up... */
 		/* to be cleaned up... */
 		disks[drive]->private_data = (void *)(long)drive;
 		disks[drive]->private_data = (void *)(long)drive;
-		disks[drive]->queue = floppy_queue;
 		disks[drive]->flags |= GENHD_FL_REMOVABLE;
 		disks[drive]->flags |= GENHD_FL_REMOVABLE;
 		disks[drive]->driverfs_dev = &floppy_device[drive].dev;
 		disks[drive]->driverfs_dev = &floppy_device[drive].dev;
 		add_disk(disks[drive]);
 		add_disk(disks[drive]);
@@ -4333,8 +4357,6 @@ out_flush_work:
 		floppy_release_irq_and_dma();
 		floppy_release_irq_and_dma();
 out_unreg_region:
 out_unreg_region:
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
-	blk_cleanup_queue(floppy_queue);
-out_unreg_driver:
 	platform_driver_unregister(&floppy_driver);
 	platform_driver_unregister(&floppy_driver);
 out_unreg_blkdev:
 out_unreg_blkdev:
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
 	unregister_blkdev(FLOPPY_MAJOR, "fd");
@@ -4342,6 +4364,8 @@ out_put_disk:
 	while (dr--) {
 	while (dr--) {
 		del_timer(&motor_off_timer[dr]);
 		del_timer(&motor_off_timer[dr]);
 		put_disk(disks[dr]);
 		put_disk(disks[dr]);
+		if (disks[dr]->queue)
+			blk_cleanup_queue(disks[dr]->queue);
 	}
 	}
 	return err;
 	return err;
 }
 }
@@ -4550,11 +4574,11 @@ static void __exit floppy_module_exit(void)
 			platform_device_unregister(&floppy_device[drive]);
 			platform_device_unregister(&floppy_device[drive]);
 		}
 		}
 		put_disk(disks[drive]);
 		put_disk(disks[drive]);
+		blk_cleanup_queue(disks[drive]->queue);
 	}
 	}
 
 
 	del_timer_sync(&fd_timeout);
 	del_timer_sync(&fd_timeout);
 	del_timer_sync(&fd_timer);
 	del_timer_sync(&fd_timer);
-	blk_cleanup_queue(floppy_queue);
 
 
 	if (atomic_read(&usage_count))
 	if (atomic_read(&usage_count))
 		floppy_release_irq_and_dma();
 		floppy_release_irq_and_dma();

+ 101 - 0
drivers/block/loop.c

@@ -74,6 +74,7 @@
 #include <linux/highmem.h>
 #include <linux/highmem.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
 #include <linux/splice.h>
 #include <linux/splice.h>
+#include <linux/sysfs.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
@@ -738,6 +739,103 @@ static inline int is_loop_device(struct file *file)
 	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
 	return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
 }
 }
 
 
+/* loop sysfs attributes */
+
+static ssize_t loop_attr_show(struct device *dev, char *page,
+			      ssize_t (*callback)(struct loop_device *, char *))
+{
+	struct loop_device *l, *lo = NULL;
+
+	mutex_lock(&loop_devices_mutex);
+	list_for_each_entry(l, &loop_devices, lo_list)
+		if (disk_to_dev(l->lo_disk) == dev) {
+			lo = l;
+			break;
+		}
+	mutex_unlock(&loop_devices_mutex);
+
+	return lo ? callback(lo, page) : -EIO;
+}
+
+#define LOOP_ATTR_RO(_name)						\
+static ssize_t loop_attr_##_name##_show(struct loop_device *, char *);	\
+static ssize_t loop_attr_do_show_##_name(struct device *d,		\
+				struct device_attribute *attr, char *b)	\
+{									\
+	return loop_attr_show(d, b, loop_attr_##_name##_show);		\
+}									\
+static struct device_attribute loop_attr_##_name =			\
+	__ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
+
+static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
+{
+	ssize_t ret;
+	char *p = NULL;
+
+	mutex_lock(&lo->lo_ctl_mutex);
+	if (lo->lo_backing_file)
+		p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
+	mutex_unlock(&lo->lo_ctl_mutex);
+
+	if (IS_ERR_OR_NULL(p))
+		ret = PTR_ERR(p);
+	else {
+		ret = strlen(p);
+		memmove(buf, p, ret);
+		buf[ret++] = '\n';
+		buf[ret] = 0;
+	}
+
+	return ret;
+}
+
+static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
+}
+
+static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
+{
+	return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
+}
+
+static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
+{
+	int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
+
+	return sprintf(buf, "%s\n", autoclear ? "1" : "0");
+}
+
+LOOP_ATTR_RO(backing_file);
+LOOP_ATTR_RO(offset);
+LOOP_ATTR_RO(sizelimit);
+LOOP_ATTR_RO(autoclear);
+
+static struct attribute *loop_attrs[] = {
+	&loop_attr_backing_file.attr,
+	&loop_attr_offset.attr,
+	&loop_attr_sizelimit.attr,
+	&loop_attr_autoclear.attr,
+	NULL,
+};
+
+static struct attribute_group loop_attribute_group = {
+	.name = "loop",
+	.attrs= loop_attrs,
+};
+
+static int loop_sysfs_init(struct loop_device *lo)
+{
+	return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
+				  &loop_attribute_group);
+}
+
+static void loop_sysfs_exit(struct loop_device *lo)
+{
+	sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
+			   &loop_attribute_group);
+}
+
 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 		       struct block_device *bdev, unsigned int arg)
 		       struct block_device *bdev, unsigned int arg)
 {
 {
@@ -837,6 +935,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 
 
 	set_capacity(lo->lo_disk, size);
 	set_capacity(lo->lo_disk, size);
 	bd_set_size(bdev, size << 9);
 	bd_set_size(bdev, size << 9);
+	loop_sysfs_init(lo);
 	/* let user-space know about the new size */
 	/* let user-space know about the new size */
 	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
 	kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
 
 
@@ -855,6 +954,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
 	return 0;
 	return 0;
 
 
 out_clr:
 out_clr:
+	loop_sysfs_exit(lo);
 	lo->lo_thread = NULL;
 	lo->lo_thread = NULL;
 	lo->lo_device = NULL;
 	lo->lo_device = NULL;
 	lo->lo_backing_file = NULL;
 	lo->lo_backing_file = NULL;
@@ -951,6 +1051,7 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
 	set_capacity(lo->lo_disk, 0);
 	set_capacity(lo->lo_disk, 0);
 	if (bdev) {
 	if (bdev) {
 		bd_set_size(bdev, 0);
 		bd_set_size(bdev, 0);
+		loop_sysfs_exit(lo);
 		/* let user-space know about this change */
 		/* let user-space know about this change */
 		kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
 		kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
 	}
 	}

+ 17 - 5
include/linux/drbd.h

@@ -53,10 +53,10 @@
 
 
 
 
 extern const char *drbd_buildtag(void);
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.8.1"
+#define REL_VERSION "8.3.9rc2"
 #define API_VERSION 88
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MIN 86
-#define PRO_VERSION_MAX 94
+#define PRO_VERSION_MAX 95
 
 
 
 
 enum drbd_io_error_p {
 enum drbd_io_error_p {
@@ -91,6 +91,11 @@ enum drbd_after_sb_p {
 	ASB_VIOLENTLY
 	ASB_VIOLENTLY
 };
 };
 
 
+enum drbd_on_no_data {
+	OND_IO_ERROR,
+	OND_SUSPEND_IO
+};
+
 /* KEEP the order, do not delete or insert. Only append. */
 /* KEEP the order, do not delete or insert. Only append. */
 enum drbd_ret_codes {
 enum drbd_ret_codes {
 	ERR_CODE_BASE		= 100,
 	ERR_CODE_BASE		= 100,
@@ -140,6 +145,7 @@ enum drbd_ret_codes {
 	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
 	ERR_CONNECTED		= 151, /* DRBD 8.3 only */
 	ERR_PERM		= 152,
 	ERR_PERM		= 152,
 	ERR_NEED_APV_93		= 153,
 	ERR_NEED_APV_93		= 153,
+	ERR_STONITH_AND_PROT_A  = 154,
 
 
 	/* insert new ones above this line */
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
 	AFTER_LAST_ERR_CODE
@@ -226,13 +232,17 @@ union drbd_state {
 		unsigned conn:5 ;   /* 17/32	 cstates */
 		unsigned conn:5 ;   /* 17/32	 cstates */
 		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
 		unsigned disk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
 		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
 		unsigned pdsk:4 ;   /* 8/16	 from D_DISKLESS to D_UP_TO_DATE */
-		unsigned susp:1 ;   /* 2/2	 IO suspended  no/yes */
+		unsigned susp:1 ;   /* 2/2	 IO suspended no/yes (by user) */
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
 		unsigned peer_isp:1 ;
 		unsigned peer_isp:1 ;
 		unsigned user_isp:1 ;
 		unsigned user_isp:1 ;
-		unsigned _pad:11;   /* 0	 unused */
+		unsigned susp_nod:1 ; /* IO suspended because no data */
+		unsigned susp_fen:1 ; /* IO suspended because fence peer handler runs*/
+		unsigned _pad:9;   /* 0	 unused */
 #elif defined(__BIG_ENDIAN_BITFIELD)
 #elif defined(__BIG_ENDIAN_BITFIELD)
-		unsigned _pad:11;   /* 0	 unused */
+		unsigned _pad:9;
+		unsigned susp_fen:1 ;
+		unsigned susp_nod:1 ;
 		unsigned user_isp:1 ;
 		unsigned user_isp:1 ;
 		unsigned peer_isp:1 ;
 		unsigned peer_isp:1 ;
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
 		unsigned aftr_isp:1 ; /* isp .. imposed sync pause */
@@ -312,6 +322,8 @@ enum drbd_timeout_flag {
 
 
 #define DRBD_MAGIC 0x83740267
 #define DRBD_MAGIC 0x83740267
 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
 #define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC)
+#define DRBD_MAGIC_BIG 0x835a
+#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG)
 
 
 /* these are of type "int" */
 /* these are of type "int" */
 #define DRBD_MD_INDEX_INTERNAL -1
 #define DRBD_MD_INDEX_INTERNAL -1

+ 17 - 12
include/linux/drbd_limits.h

@@ -128,26 +128,31 @@
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
 #define DRBD_AFTER_SB_2P_DEF ASB_DISCONNECT
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
 #define DRBD_RR_CONFLICT_DEF ASB_DISCONNECT
+#define DRBD_ON_NO_DATA_DEF OND_IO_ERROR
 
 
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MIN 0
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_MAX 128
 #define DRBD_MAX_BIO_BVECS_DEF 0
 #define DRBD_MAX_BIO_BVECS_DEF 0
 
 
-#define DRBD_DP_VOLUME_MIN 4
-#define DRBD_DP_VOLUME_MAX 1048576
-#define DRBD_DP_VOLUME_DEF 16384
+#define DRBD_C_PLAN_AHEAD_MIN  0
+#define DRBD_C_PLAN_AHEAD_MAX  300
+#define DRBD_C_PLAN_AHEAD_DEF  0 /* RS rate controller disabled by default */
 
 
-#define DRBD_DP_INTERVAL_MIN 1
-#define DRBD_DP_INTERVAL_MAX 600
-#define DRBD_DP_INTERVAL_DEF 5
+#define DRBD_C_DELAY_TARGET_MIN 1
+#define DRBD_C_DELAY_TARGET_MAX 100
+#define DRBD_C_DELAY_TARGET_DEF 10
 
 
-#define DRBD_RS_THROTTLE_TH_MIN 1
-#define DRBD_RS_THROTTLE_TH_MAX 600
-#define DRBD_RS_THROTTLE_TH_DEF 20
+#define DRBD_C_FILL_TARGET_MIN 0
+#define DRBD_C_FILL_TARGET_MAX (1<<20) /* 500MByte in sec */
+#define DRBD_C_FILL_TARGET_DEF 0 /* By default disabled -> controlled by delay_target */
 
 
-#define DRBD_RS_HOLD_OFF_TH_MIN 1
-#define DRBD_RS_HOLD_OFF_TH_MAX 6000
-#define DRBD_RS_HOLD_OFF_TH_DEF 100
+#define DRBD_C_MAX_RATE_MIN     250 /* kByte/sec */
+#define DRBD_C_MAX_RATE_MAX     (4 << 20)
+#define DRBD_C_MAX_RATE_DEF     102400
+
+#define DRBD_C_MIN_RATE_MIN     0 /* kByte/sec */
+#define DRBD_C_MIN_RATE_MAX     (4 << 20)
+#define DRBD_C_MIN_RATE_DEF     4096
 
 
 #undef RANGE
 #undef RANGE
 #endif
 #endif

+ 6 - 0
include/linux/drbd_nl.h

@@ -87,6 +87,12 @@ NL_PACKET(syncer_conf, 8,
 	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
 	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)
 	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
 	NL_BIT(         65,     T_MAY_IGNORE,   use_rle)
+	NL_INTEGER(	75,	T_MAY_IGNORE,	on_no_data)
+	NL_INTEGER(	76,	T_MAY_IGNORE,	c_plan_ahead)
+	NL_INTEGER(     77,	T_MAY_IGNORE,	c_delay_target)
+	NL_INTEGER(     78,	T_MAY_IGNORE,	c_fill_target)
+	NL_INTEGER(     79,	T_MAY_IGNORE,	c_max_rate)
+	NL_INTEGER(     80,	T_MAY_IGNORE,	c_min_rate)
 )
 )
 
 
 NL_PACKET(invalidate, 9, )
 NL_PACKET(invalidate, 9, )

+ 1 - 1
include/linux/dynamic_debug.h

@@ -83,7 +83,7 @@ static inline int ddebug_remove_module(const char *mod)
 
 
 #define dynamic_pr_debug(fmt, ...)					\
 #define dynamic_pr_debug(fmt, ...)					\
 	do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
 	do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
-#define dynamic_dev_dbg(dev, format, ...)				\
+#define dynamic_dev_dbg(dev, fmt, ...)					\
 	do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
 	do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
 #endif
 #endif
 
 

+ 1 - 0
include/linux/pci_ids.h

@@ -743,6 +743,7 @@
 #define PCI_DEVICE_ID_HP_CISSC		0x3230
 #define PCI_DEVICE_ID_HP_CISSC		0x3230
 #define PCI_DEVICE_ID_HP_CISSD		0x3238
 #define PCI_DEVICE_ID_HP_CISSD		0x3238
 #define PCI_DEVICE_ID_HP_CISSE		0x323a
 #define PCI_DEVICE_ID_HP_CISSE		0x323a
+#define PCI_DEVICE_ID_HP_CISSF		0x323b
 #define PCI_DEVICE_ID_HP_ZX2_IOC	0x4031
 #define PCI_DEVICE_ID_HP_ZX2_IOC	0x4031
 
 
 #define PCI_VENDOR_ID_PCTECH		0x1042
 #define PCI_VENDOR_ID_PCTECH		0x1042

部分文件因为文件数量过多而无法显示