Browse Source

[SCSI] fusion: hold off error recovery while alternate ioc is initializing

After discussing this patch with LSI, I resubmitting with a recommended
40 second wait for the alternate ioc's initialization to complete.
--
Fusion FC chips are two function with some shared resources.  During
initialization of one function its driver inhibits the ability of the
other function's driver to allocate message frames by clearing its
"active" flag.  Should mid-layer error recovery be initiated for a
scsi command during this initialization (which can take up to 40 seconds)
error recovery will escalate to the level of host reset.  This host
reset might fail (as the other function is resetting) resulting in
all connected targets being taken offline.

This patch holds off mid-layer error recovery for up to 40 seconds
to permit initialization of the other function to complete.

Signed-off-by: Michael Reed <mdr@sgi.com>
Acked-by: "Desai, Kashyap" <Kashyap.Desai@lsi.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Michael Reed 15 years ago
parent
commit
03cb3829e0
1 changed files with 11 additions and 6 deletions
  1. 11 6
      drivers/message/fusion/mptfc.c

+ 11 - 6
drivers/message/fusion/mptfc.c

@@ -195,29 +195,34 @@ mptfc_block_error_handler(struct scsi_cmnd *SCpnt,
 	unsigned long		flags;
 	unsigned long		flags;
 	int			ready;
 	int			ready;
 	MPT_ADAPTER 		*ioc;
 	MPT_ADAPTER 		*ioc;
+	int			loops = 40;	/* seconds */
 
 
 	hd = shost_priv(SCpnt->device->host);
 	hd = shost_priv(SCpnt->device->host);
 	ioc = hd->ioc;
 	ioc = hd->ioc;
 	spin_lock_irqsave(shost->host_lock, flags);
 	spin_lock_irqsave(shost->host_lock, flags);
-	while ((ready = fc_remote_port_chkready(rport) >> 16) == DID_IMM_RETRY) {
+	while ((ready = fc_remote_port_chkready(rport) >> 16) == DID_IMM_RETRY
+	 || (loops > 0 && ioc->active == 0)) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 			"mptfc_block_error_handler.%d: %d:%d, port status is "
 			"mptfc_block_error_handler.%d: %d:%d, port status is "
-			"DID_IMM_RETRY, deferring %s recovery.\n",
+			"%x, active flag %d, deferring %s recovery.\n",
 			ioc->name, ioc->sh->host_no,
 			ioc->name, ioc->sh->host_no,
-			SCpnt->device->id, SCpnt->device->lun, caller));
+			SCpnt->device->id, SCpnt->device->lun,
+			ready, ioc->active, caller));
 		msleep(1000);
 		msleep(1000);
 		spin_lock_irqsave(shost->host_lock, flags);
 		spin_lock_irqsave(shost->host_lock, flags);
+		loops --;
 	}
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
 
-	if (ready == DID_NO_CONNECT || !SCpnt->device->hostdata) {
+	if (ready == DID_NO_CONNECT || !SCpnt->device->hostdata
+	 || ioc->active == 0) {
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 			"%s.%d: %d:%d, failing recovery, "
 			"%s.%d: %d:%d, failing recovery, "
-			"port state %d, vdevice %p.\n", caller,
+			"port state %x, active %d, vdevice %p.\n", caller,
 			ioc->name, ioc->sh->host_no,
 			ioc->name, ioc->sh->host_no,
 			SCpnt->device->id, SCpnt->device->lun, ready,
 			SCpnt->device->id, SCpnt->device->lun, ready,
-			SCpnt->device->hostdata));
+			ioc->active, SCpnt->device->hostdata));
 		return FAILED;
 		return FAILED;
 	}
 	}
 	dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 	dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT