Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband:
  RDMA/nes: Formatting cleanup
  RDMA/nes: Add support for SFP+ PHY
  RDMA/nes: Use LRO
  IPoIB: Copy child MTU from parent
  IB/mthca: Avoid changing userspace ABI to handle DMA write barrier attribute
  IB/mthca: Avoid recycling old FMR R_Keys too soon
  mlx4_core: Avoid recycling old FMR R_Keys too soon
  IB/ehca: Allocate event queue size depending on max number of CQs and QPs
  IPoIB: Use separate CQ for UD send completions
  IB/iser: Count FMR alignment violations per session
  IB/iser: Move high-volume debug output to higher debug level
  IB/ehca: handle negative return value from ibmebus_request_irq() properly
  RDMA/cxgb3: Support peer-2-peer connection setup
  RDMA/cxgb3: Set the max_mr_size device attribute correctly
  RDMA/cxgb3: Correctly serialize peer abort path
  mlx4_core: Add a way to set the "collapsed" CQ flag
Linus Torvalds 17 years ago
parent
commit
f12c037220
44 changed files with 845 additions and 327 deletions
  1. 16 2
      drivers/infiniband/hw/cxgb3/cxio_hal.c
  2. 1 0
      drivers/infiniband/hw/cxgb3/cxio_hal.h
  3. 17 4
      drivers/infiniband/hw/cxgb3/cxio_wr.h
  4. 1 0
      drivers/infiniband/hw/cxgb3/iwch.c
  5. 1 0
      drivers/infiniband/hw/cxgb3/iwch.h
  6. 115 52
      drivers/infiniband/hw/cxgb3/iwch_cm.c
  7. 2 0
      drivers/infiniband/hw/cxgb3/iwch_cm.h
  8. 1 1
      drivers/infiniband/hw/cxgb3/iwch_provider.c
  9. 3 0
      drivers/infiniband/hw/cxgb3/iwch_provider.h
  10. 55 5
      drivers/infiniband/hw/cxgb3/iwch_qp.c
  11. 5 0
      drivers/infiniband/hw/ehca/ehca_classes.h
  12. 11 0
      drivers/infiniband/hw/ehca/ehca_cq.c
  13. 17 18
      drivers/infiniband/hw/ehca/ehca_eq.c
  14. 34 2
      drivers/infiniband/hw/ehca/ehca_main.c
  15. 24 2
      drivers/infiniband/hw/ehca/ehca_qp.c
  16. 1 1
      drivers/infiniband/hw/mlx4/cq.c
  17. 0 13
      drivers/infiniband/hw/mthca/mthca_mr.c
  18. 13 1
      drivers/infiniband/hw/mthca/mthca_provider.c
  19. 1 0
      drivers/infiniband/hw/mthca/mthca_provider.h
  20. 6 4
      drivers/infiniband/hw/mthca/mthca_user.h
  21. 1 0
      drivers/infiniband/hw/nes/Kconfig
  22. 4 0
      drivers/infiniband/hw/nes/nes.c
  23. 3 2
      drivers/infiniband/hw/nes/nes.h
  24. 4 4
      drivers/infiniband/hw/nes/nes_cm.c
  25. 293 78
      drivers/infiniband/hw/nes/nes_hw.c
  26. 14 5
      drivers/infiniband/hw/nes/nes_hw.h
  27. 107 73
      drivers/infiniband/hw/nes/nes_nic.c
  28. 3 7
      drivers/infiniband/hw/nes/nes_utils.c
  29. 1 1
      drivers/infiniband/hw/nes/nes_verbs.c
  30. 5 2
      drivers/infiniband/ulp/ipoib/ipoib.h
  31. 4 4
      drivers/infiniband/ulp/ipoib/ipoib_cm.c
  32. 1 1
      drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
  33. 26 19
      drivers/infiniband/ulp/ipoib/ipoib_ib.c
  34. 2 1
      drivers/infiniband/ulp/ipoib/ipoib_main.c
  35. 26 13
      drivers/infiniband/ulp/ipoib/ipoib_verbs.c
  36. 3 0
      drivers/infiniband/ulp/ipoib/ipoib_vlan.c
  37. 3 1
      drivers/infiniband/ulp/iser/iscsi_iser.c
  38. 7 0
      drivers/infiniband/ulp/iser/iscsi_iser.h
  39. 7 2
      drivers/infiniband/ulp/iser/iser_memory.c
  40. 1 1
      drivers/net/cxgb3/version.h
  41. 3 1
      drivers/net/mlx4/cq.c
  42. 0 6
      drivers/net/mlx4/mr.c
  43. 2 1
      include/linux/mlx4/device.h
  44. 1 0
      include/scsi/libiscsi.h

+ 16 - 2
drivers/infiniband/hw/cxgb3/cxio_hal.c

@@ -456,7 +456,8 @@ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count)
 	ptr = cq->sw_rptr;
 	while (!Q_EMPTY(ptr, cq->sw_wptr)) {
 		cqe = cq->sw_queue + (Q_PTR2IDX(ptr, cq->size_log2));
-		if ((SQ_TYPE(*cqe) || (CQE_OPCODE(*cqe) == T3_READ_RESP)) &&
+		if ((SQ_TYPE(*cqe) ||
+		     ((CQE_OPCODE(*cqe) == T3_READ_RESP) && wq->oldest_read)) &&
 		    (CQE_QPID(*cqe) == wq->qpid))
 			(*count)++;
 		ptr++;
@@ -829,7 +830,8 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
 	wqe->mpaattrs = attr->mpaattrs;
 	wqe->qpcaps = attr->qpcaps;
 	wqe->ulpdu_size = cpu_to_be16(attr->tcp_emss);
-	wqe->flags = cpu_to_be32(attr->flags);
+	wqe->rqe_count = cpu_to_be16(attr->rqe_count);
+	wqe->flags_rtr_type = cpu_to_be16(attr->flags|V_RTR_TYPE(attr->rtr_type));
 	wqe->ord = cpu_to_be32(attr->ord);
 	wqe->ird = cpu_to_be32(attr->ird);
 	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
@@ -1134,6 +1136,18 @@ int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
 	 */
 	if (RQ_TYPE(*hw_cqe) && (CQE_OPCODE(*hw_cqe) == T3_READ_RESP)) {
 
+		/*
+		 * If this is an unsolicited read response, then the read
+		 * was generated by the kernel driver as part of peer-2-peer
+		 * connection setup.  So ignore the completion.
+		 */
+		if (!wq->oldest_read) {
+			if (CQE_STATUS(*hw_cqe))
+				wq->error = 1;
+			ret = -1;
+			goto skip_cqe;
+		}
+
 		/*
 		 * Don't write to the HWCQ, so create a new read req CQE
 		 * in local memory.

+ 1 - 0
drivers/infiniband/hw/cxgb3/cxio_hal.h

@@ -53,6 +53,7 @@
 #define T3_MAX_PBL_SIZE 256
 #define T3_MAX_RQ_SIZE 1024
 #define T3_MAX_NUM_STAG (1<<15)
+#define T3_MAX_MR_SIZE 0x100000000ULL
 
 #define T3_STAG_UNSET 0xffffffff
 

+ 17 - 4
drivers/infiniband/hw/cxgb3/cxio_wr.h

@@ -278,6 +278,17 @@ enum t3_qp_caps {
 	uP_RI_QP_STAG0_ENABLE = 0x10
 } __attribute__ ((packed));
 
+enum rdma_init_rtr_types {
+	RTR_READ = 1,
+	RTR_WRITE = 2,
+	RTR_SEND = 3,
+};
+
+#define S_RTR_TYPE	2
+#define M_RTR_TYPE	0x3
+#define V_RTR_TYPE(x)	((x) << S_RTR_TYPE)
+#define G_RTR_TYPE(x)	((((x) >> S_RTR_TYPE)) & M_RTR_TYPE)
+
 struct t3_rdma_init_attr {
 	u32 tid;
 	u32 qpid;
@@ -293,7 +304,9 @@ struct t3_rdma_init_attr {
 	u32 ird;
 	u64 qp_dma_addr;
 	u32 qp_dma_size;
-	u32 flags;
+	enum rdma_init_rtr_types rtr_type;
+	u16 flags;
+	u16 rqe_count;
 	u32 irs;
 };
 
@@ -309,8 +322,8 @@ struct t3_rdma_init_wr {
 	u8 mpaattrs;		/* 5 */
 	u8 qpcaps;
 	__be16 ulpdu_size;
-	__be32 flags;		/* bits 31-1 - reservered */
-				/* bit     0 - set if RECV posted */
+	__be16 flags_rtr_type;
+	__be16 rqe_count;
 	__be32 ord;		/* 6 */
 	__be32 ird;
 	__be64 qp_dma_addr;	/* 7 */
@@ -324,7 +337,7 @@ struct t3_genbit {
 };
 
 enum rdma_init_wr_flags {
-	RECVS_POSTED = (1<<0),
+	MPA_INITIATOR = (1<<0),
 	PRIV_QP = (1<<1),
 };
 

+ 1 - 0
drivers/infiniband/hw/cxgb3/iwch.c

@@ -83,6 +83,7 @@ static void rnic_init(struct iwch_dev *rnicp)
 	rnicp->attr.max_phys_buf_entries = T3_MAX_PBL_SIZE;
 	rnicp->attr.max_pds = T3_MAX_NUM_PD - 1;
 	rnicp->attr.mem_pgsizes_bitmask = 0x7FFF;	/* 4KB-128MB */
+	rnicp->attr.max_mr_size = T3_MAX_MR_SIZE;
 	rnicp->attr.can_resize_wq = 0;
 	rnicp->attr.max_rdma_reads_per_qp = 8;
 	rnicp->attr.max_rdma_read_resources =

+ 1 - 0
drivers/infiniband/hw/cxgb3/iwch.h

@@ -66,6 +66,7 @@ struct iwch_rnic_attributes {
 	 * size (4k)^i.  Phys block list mode unsupported.
 	 */
 	u32 mem_pgsizes_bitmask;
+	u64 max_mr_size;
 	u8 can_resize_wq;
 
 	/*

+ 115 - 52
drivers/infiniband/hw/cxgb3/iwch_cm.c

@@ -63,6 +63,10 @@ static char *states[] = {
 	NULL,
 };
 
+int peer2peer = 0;
+module_param(peer2peer, int, 0644);
+MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+
 static int ep_timeout_secs = 10;
 module_param(ep_timeout_secs, int, 0644);
 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
@@ -125,6 +129,12 @@ static void start_ep_timer(struct iwch_ep *ep)
 static void stop_ep_timer(struct iwch_ep *ep)
 {
 	PDBG("%s ep %p\n", __func__, ep);
+	if (!timer_pending(&ep->timer)) {
+		printk(KERN_ERR "%s timer stopped when its not running!  ep %p state %u\n",
+			__func__, ep, ep->com.state);
+		WARN_ON(1);
+		return;
+	}
 	del_timer_sync(&ep->timer);
 	put_ep(&ep->com);
 }
@@ -508,7 +518,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 	req->len = htonl(len);
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -559,7 +569,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 	set_arp_failure_handler(skb, arp_failure_discard);
 	skb_reset_transport_header(skb);
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 	req->len = htonl(mpalen);
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -611,7 +621,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	skb_reset_transport_header(skb);
 	len = skb->len;
 	req = (struct tx_data_wr *) skb_push(skb, sizeof(*req));
-	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA));
+	req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)|F_WR_COMPL);
 	req->wr_lo = htonl(V_WR_TID(ep->hwtid));
 	req->len = htonl(len);
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
@@ -879,6 +889,7 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 	 * the MPA header is valid.
 	 */
 	state_set(&ep->com, FPDU_MODE);
+	ep->mpa_attr.initiator = 1;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -901,8 +912,14 @@ static void process_mpa_reply(struct iwch_ep *ep, struct sk_buff *skb)
 	/* bind QP and TID with INIT_WR */
 	err = iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp, mask, &attrs, 1);
-	if (!err)
-		goto out;
+	if (err)
+		goto err;
+
+	if (peer2peer && iwch_rqes_posted(ep->com.qp) == 0) {
+		iwch_post_zb_read(ep->com.qp);
+	}
+
+	goto out;
 err:
 	abort_connection(ep, skb, GFP_KERNEL);
 out:
@@ -995,6 +1012,7 @@ static void process_mpa_request(struct iwch_ep *ep, struct sk_buff *skb)
 	 * If we get here we have accumulated the entire mpa
 	 * start reply message including private data.
 	 */
+	ep->mpa_attr.initiator = 0;
 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
@@ -1065,17 +1083,33 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 
 	PDBG("%s ep %p credits %u\n", __func__, ep, credits);
 
-	if (credits == 0)
+	if (credits == 0) {
+		PDBG(KERN_ERR "%s 0 credit ack  ep %p state %u\n",
+			__func__, ep, state_read(&ep->com));
 		return CPL_RET_BUF_DONE;
+	}
+
 	BUG_ON(credits != 1);
-	BUG_ON(ep->mpa_skb == NULL);
-	kfree_skb(ep->mpa_skb);
-	ep->mpa_skb = NULL;
 	dst_confirm(ep->dst);
-	if (state_read(&ep->com) == MPA_REP_SENT) {
-		ep->com.rpl_done = 1;
-		PDBG("waking up ep %p\n", ep);
-		wake_up(&ep->com.waitq);
+	if (!ep->mpa_skb) {
+		PDBG("%s rdma_init wr_ack ep %p state %u\n",
+			__func__, ep, state_read(&ep->com));
+		if (ep->mpa_attr.initiator) {
+			PDBG("%s initiator ep %p state %u\n",
+				__func__, ep, state_read(&ep->com));
+			if (peer2peer)
+				iwch_post_zb_read(ep->com.qp);
+		} else {
+			PDBG("%s responder ep %p state %u\n",
+				__func__, ep, state_read(&ep->com));
+			ep->com.rpl_done = 1;
+			wake_up(&ep->com.waitq);
+		}
+	} else {
+		PDBG("%s lsm ack ep %p state %u freeing skb\n",
+			__func__, ep, state_read(&ep->com));
+		kfree_skb(ep->mpa_skb);
+		ep->mpa_skb = NULL;
 	}
 	return CPL_RET_BUF_DONE;
 }
@@ -1083,8 +1117,11 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 {
 	struct iwch_ep *ep = ctx;
+	unsigned long flags;
+	int release = 0;
 
 	PDBG("%s ep %p\n", __func__, ep);
+	BUG_ON(!ep);
 
 	/*
 	 * We get 2 abort replies from the HW.  The first one must
@@ -1095,9 +1132,22 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		return CPL_RET_BUF_DONE;
 	}
 
-	close_complete_upcall(ep);
-	state_set(&ep->com, DEAD);
-	release_ep_resources(ep);
+	spin_lock_irqsave(&ep->com.lock, flags);
+	switch (ep->com.state) {
+	case ABORTING:
+		close_complete_upcall(ep);
+		__state_set(&ep->com, DEAD);
+		release = 1;
+		break;
+	default:
+		printk(KERN_ERR "%s ep %p state %d\n",
+		     __func__, ep, ep->com.state);
+		break;
+	}
+	spin_unlock_irqrestore(&ep->com.lock, flags);
+
+	if (release)
+		release_ep_resources(ep);
 	return CPL_RET_BUF_DONE;
 }
 
@@ -1470,7 +1520,8 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	struct sk_buff *rpl_skb;
 	struct iwch_qp_attributes attrs;
 	int ret;
-	int state;
+	int release = 0;
+	unsigned long flags;
 
 	if (is_neg_adv_abort(req->status)) {
 		PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep,
@@ -1488,9 +1539,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		return CPL_RET_BUF_DONE;
 	}
 
-	state = state_read(&ep->com);
-	PDBG("%s ep %p state %u\n", __func__, ep, state);
-	switch (state) {
+	spin_lock_irqsave(&ep->com.lock, flags);
+	PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state);
+	switch (ep->com.state) {
 	case CONNECTING:
 		break;
 	case MPA_REQ_WAIT:
@@ -1536,21 +1587,25 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		break;
 	case DEAD:
 		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
+		spin_unlock_irqrestore(&ep->com.lock, flags);
 		return CPL_RET_BUF_DONE;
 	default:
 		BUG_ON(1);
 		break;
 	}
 	dst_confirm(ep->dst);
+	if (ep->com.state != ABORTING) {
+		__state_set(&ep->com, DEAD);
+		release = 1;
+	}
+	spin_unlock_irqrestore(&ep->com.lock, flags);
 
 	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
 	if (!rpl_skb) {
 		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
 		       __func__);
-		dst_release(ep->dst);
-		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
-		put_ep(&ep->com);
-		return CPL_RET_BUF_DONE;
+		release = 1;
+		goto out;
 	}
 	rpl_skb->priority = CPL_PRIORITY_DATA;
 	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
@@ -1559,10 +1614,9 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
 	rpl->cmd = CPL_ABORT_NO_RST;
 	cxgb3_ofld_send(ep->com.tdev, rpl_skb);
-	if (state != ABORTING) {
-		state_set(&ep->com, DEAD);
+out:
+	if (release)
 		release_ep_resources(ep);
-	}
 	return CPL_RET_BUF_DONE;
 }
 
@@ -1661,15 +1715,18 @@ static void ep_timeout(unsigned long arg)
 	struct iwch_ep *ep = (struct iwch_ep *)arg;
 	struct iwch_qp_attributes attrs;
 	unsigned long flags;
+	int abort = 1;
 
 	spin_lock_irqsave(&ep->com.lock, flags);
 	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
 	     ep->com.state);
 	switch (ep->com.state) {
 	case MPA_REQ_SENT:
+		__state_set(&ep->com, ABORTING);
 		connect_reply_upcall(ep, -ETIMEDOUT);
 		break;
 	case MPA_REQ_WAIT:
+		__state_set(&ep->com, ABORTING);
 		break;
 	case CLOSING:
 	case MORIBUND:
@@ -1679,13 +1736,17 @@ static void ep_timeout(unsigned long arg)
 				     ep->com.qp, IWCH_QP_ATTR_NEXT_STATE,
 				     &attrs, 1);
 		}
+		__state_set(&ep->com, ABORTING);
 		break;
 	default:
-		BUG();
+		printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+			__func__, ep, ep->com.state);
+		WARN_ON(1);
+		abort = 0;
 	}
-	__state_set(&ep->com, CLOSING);
 	spin_unlock_irqrestore(&ep->com.lock, flags);
-	abort_connection(ep, NULL, GFP_ATOMIC);
+	if (abort)
+		abort_connection(ep, NULL, GFP_ATOMIC);
 	put_ep(&ep->com);
 }
 
@@ -1762,16 +1823,19 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	if (err)
 		goto err;
 
+	/* if needed, wait for wr_ack */
+	if (iwch_rqes_posted(qp)) {
+		wait_event(ep->com.waitq, ep->com.rpl_done);
+		err = ep->com.rpl_err;
+		if (err)
+			goto err;
+	}
+
 	err = send_mpa_reply(ep, conn_param->private_data,
 			     conn_param->private_data_len);
 	if (err)
 		goto err;
 
-	/* wait for wr_ack */
-	wait_event(ep->com.waitq, ep->com.rpl_done);
-	err = ep->com.rpl_err;
-	if (err)
-		goto err;
 
 	state_set(&ep->com, FPDU_MODE);
 	established_upcall(ep);
@@ -1968,40 +2032,39 @@ int iwch_ep_disconnect(struct iwch_ep *ep, int abrupt, gfp_t gfp)
 	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
 	     states[ep->com.state], abrupt);
 
-	if (ep->com.state == DEAD) {
-		PDBG("%s already dead ep %p\n", __func__, ep);
-		goto out;
-	}
-
-	if (abrupt) {
-		if (ep->com.state != ABORTING) {
-			ep->com.state = ABORTING;
-			close = 1;
-		}
-		goto out;
-	}
-
 	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 	case MPA_REQ_SENT:
 	case MPA_REQ_RCVD:
 	case MPA_REP_SENT:
 	case FPDU_MODE:
-		start_ep_timer(ep);
-		ep->com.state = CLOSING;
 		close = 1;
+		if (abrupt)
+			ep->com.state = ABORTING;
+		else {
+			ep->com.state = CLOSING;
+			start_ep_timer(ep);
+		}
 		break;
 	case CLOSING:
-		ep->com.state = MORIBUND;
 		close = 1;
+		if (abrupt) {
+			stop_ep_timer(ep);
+			ep->com.state = ABORTING;
+		} else
+			ep->com.state = MORIBUND;
 		break;
 	case MORIBUND:
+	case ABORTING:
+	case DEAD:
+		PDBG("%s ignoring disconnect ep %p state %u\n",
+		     __func__, ep, ep->com.state);
 		break;
 	default:
 		BUG();
 		break;
 	}
-out:
+
 	spin_unlock_irqrestore(&ep->com.lock, flags);
 	if (close) {
 		if (abrupt)

+ 2 - 0
drivers/infiniband/hw/cxgb3/iwch_cm.h

@@ -56,6 +56,7 @@
 #define put_ep(ep) { \
 	PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__,  \
 	     ep, atomic_read(&((ep)->kref.refcount))); \
+	WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
 	kref_put(&((ep)->kref), __free_ep); \
 }
 
@@ -225,5 +226,6 @@ int iwch_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, st
 
 int __init iwch_cm_init(void);
 void __exit iwch_cm_term(void);
+extern int peer2peer;
 
 #endif				/* _IWCH_CM_H_ */

+ 1 - 1
drivers/infiniband/hw/cxgb3/iwch_provider.c

@@ -998,7 +998,7 @@ static int iwch_query_device(struct ib_device *ibdev,
 	props->device_cap_flags = dev->device_cap_flags;
 	props->vendor_id = (u32)dev->rdev.rnic_info.pdev->vendor;
 	props->vendor_part_id = (u32)dev->rdev.rnic_info.pdev->device;
-	props->max_mr_size = ~0ull;
+	props->max_mr_size = dev->attr.max_mr_size;
 	props->max_qp = dev->attr.max_qps;
 	props->max_qp_wr = dev->attr.max_wrs;
 	props->max_sge = dev->attr.max_sge_per_wr;

+ 3 - 0
drivers/infiniband/hw/cxgb3/iwch_provider.h

@@ -118,6 +118,7 @@ enum IWCH_QP_FLAGS {
 };
 
 struct iwch_mpa_attributes {
+	u8 initiator;
 	u8 recv_marker_enabled;
 	u8 xmit_marker_enabled;	/* iWARP: enable inbound Read Resp. */
 	u8 crc_enabled;
@@ -322,6 +323,7 @@ enum iwch_qp_query_flags {
 	IWCH_QP_QUERY_TEST_USERWRITE = 0x32	/* Test special */
 };
 
+u16 iwch_rqes_posted(struct iwch_qp *qhp);
 int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		      struct ib_send_wr **bad_wr);
 int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
@@ -331,6 +333,7 @@ int iwch_bind_mw(struct ib_qp *qp,
 			     struct ib_mw_bind *mw_bind);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
+int iwch_post_zb_read(struct iwch_qp *qhp);
 int iwch_register_device(struct iwch_dev *dev);
 void iwch_unregister_device(struct iwch_dev *dev);
 int iwch_quiesce_qps(struct iwch_cq *chp);

+ 55 - 5
drivers/infiniband/hw/cxgb3/iwch_qp.c

@@ -586,6 +586,36 @@ static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
 	}
 }
 
+int iwch_post_zb_read(struct iwch_qp *qhp)
+{
+	union t3_wr *wqe;
+	struct sk_buff *skb;
+	u8 flit_cnt = sizeof(struct t3_rdma_read_wr) >> 3;
+
+	PDBG("%s enter\n", __func__);
+	skb = alloc_skb(40, GFP_KERNEL);
+	if (!skb) {
+		printk(KERN_ERR "%s cannot send zb_read!!\n", __func__);
+		return -ENOMEM;
+	}
+	wqe = (union t3_wr *)skb_put(skb, sizeof(struct t3_rdma_read_wr));
+	memset(wqe, 0, sizeof(struct t3_rdma_read_wr));
+	wqe->read.rdmaop = T3_READ_REQ;
+	wqe->read.reserved[0] = 0;
+	wqe->read.reserved[1] = 0;
+	wqe->read.reserved[2] = 0;
+	wqe->read.rem_stag = cpu_to_be32(1);
+	wqe->read.rem_to = cpu_to_be64(1);
+	wqe->read.local_stag = cpu_to_be32(1);
+	wqe->read.local_len = cpu_to_be32(0);
+	wqe->read.local_to = cpu_to_be64(1);
+	wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_READ));
+	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
+						V_FW_RIWR_LEN(flit_cnt));
+	skb->priority = CPL_PRIORITY_DATA;
+	return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+}
+
 /*
  * This posts a TERMINATE with layer=RDMA, type=catastrophic.
  */
@@ -671,11 +701,18 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
 
 
 /*
- * Return non zero if at least one RECV was pre-posted.
+ * Return count of RECV WRs posted
  */
-static int rqes_posted(struct iwch_qp *qhp)
+u16 iwch_rqes_posted(struct iwch_qp *qhp)
 {
-	return fw_riwrh_opcode((struct fw_riwrh *)qhp->wq.queue) == T3_WR_RCV;
+	union t3_wr *wqe = qhp->wq.queue;
+	u16 count = 0;
+	while ((count+1) != 0 && fw_riwrh_opcode((struct fw_riwrh *)wqe) == T3_WR_RCV) {
+		count++;
+		wqe++;
+	}
+	PDBG("%s qhp %p count %u\n", __func__, qhp, count);
+	return count;
 }
 
 static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
@@ -716,8 +753,17 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
 	init_attr.ird = qhp->attr.max_ird;
 	init_attr.qp_dma_addr = qhp->wq.dma_addr;
 	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
-	init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+	init_attr.rqe_count = iwch_rqes_posted(qhp);
+	init_attr.flags = qhp->attr.mpa_attr.initiator ? MPA_INITIATOR : 0;
 	init_attr.flags |= capable(CAP_NET_BIND_SERVICE) ? PRIV_QP : 0;
+	if (peer2peer) {
+		init_attr.rtr_type = RTR_READ;
+		if (init_attr.ord == 0 && qhp->attr.mpa_attr.initiator)
+			init_attr.ord = 1;
+		if (init_attr.ird == 0 && !qhp->attr.mpa_attr.initiator)
+			init_attr.ird = 1;
+	} else
+		init_attr.rtr_type = 0;
 	init_attr.irs = qhp->ep->rcv_seq;
 	PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
 	     "flags 0x%x qpcaps 0x%x\n", __func__,
@@ -832,6 +878,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
 				abort=0;
 				disconnect = 1;
 				ep = qhp->ep;
+				get_ep(&ep->com);
 			}
 			flush_qp(qhp, &flag);
 			break;
@@ -848,6 +895,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
 				abort=1;
 				disconnect = 1;
 				ep = qhp->ep;
+				get_ep(&ep->com);
 			}
 			goto err;
 			break;
@@ -929,8 +977,10 @@ out:
 	 * on the EP.  This can be a normal close (RTS->CLOSING) or
 	 * an abnormal close (RTS/CLOSING->ERROR).
 	 */
-	if (disconnect)
+	if (disconnect) {
 		iwch_ep_disconnect(ep, abort, GFP_KERNEL);
+		put_ep(&ep->com);
+	}
 
 	/*
 	 * If free is 1, then we've disassociated the EP from the QP

+ 5 - 0
drivers/infiniband/hw/ehca/ehca_classes.h

@@ -66,6 +66,7 @@ struct ehca_av;
 #include "ehca_irq.h"
 
 #define EHCA_EQE_CACHE_SIZE 20
+#define EHCA_MAX_NUM_QUEUES 0xffff
 
 struct ehca_eqe_cache_entry {
 	struct ehca_eqe *eqe;
@@ -127,6 +128,8 @@ struct ehca_shca {
 	/* MR pgsize: bit 0-3 means 4K, 64K, 1M, 16M respectively */
 	u32 hca_cap_mr_pgsize;
 	int max_mtu;
+	atomic_t num_cqs;
+	atomic_t num_qps;
 };
 
 struct ehca_pd {
@@ -344,6 +347,8 @@ extern int ehca_use_hp_mr;
 extern int ehca_scaling_code;
 extern int ehca_lock_hcalls;
 extern int ehca_nr_ports;
+extern int ehca_max_cq;
+extern int ehca_max_qp;
 
 struct ipzu_queue_resp {
 	u32 qe_size;      /* queue entry size */

+ 11 - 0
drivers/infiniband/hw/ehca/ehca_cq.c

@@ -132,10 +132,19 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 	if (cqe >= 0xFFFFFFFF - 64 - additional_cqe)
 		return ERR_PTR(-EINVAL);
 
+	if (!atomic_add_unless(&shca->num_cqs, 1, ehca_max_cq)) {
+		ehca_err(device, "Unable to create CQ, max number of %i "
+			"CQs reached.", ehca_max_cq);
+		ehca_err(device, "To increase the maximum number of CQs "
+			"use the number_of_cqs module parameter.\n");
+		return ERR_PTR(-ENOSPC);
+	}
+
 	my_cq = kmem_cache_zalloc(cq_cache, GFP_KERNEL);
 	if (!my_cq) {
 		ehca_err(device, "Out of memory for ehca_cq struct device=%p",
 			 device);
+		atomic_dec(&shca->num_cqs);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -305,6 +314,7 @@ create_cq_exit2:
 create_cq_exit1:
 	kmem_cache_free(cq_cache, my_cq);
 
+	atomic_dec(&shca->num_cqs);
 	return cq;
 }
 
@@ -359,6 +369,7 @@ int ehca_destroy_cq(struct ib_cq *cq)
 	ipz_queue_dtor(NULL, &my_cq->ipz_queue);
 	kmem_cache_free(cq_cache, my_cq);
 
+	atomic_dec(&shca->num_cqs);
 	return 0;
 }
 

+ 17 - 18
drivers/infiniband/hw/ehca/ehca_eq.c

@@ -54,7 +54,8 @@ int ehca_create_eq(struct ehca_shca *shca,
 		   struct ehca_eq *eq,
 		   const enum ehca_eq_type type, const u32 length)
 {
-	u64 ret;
+	int ret;
+	u64 h_ret;
 	u32 nr_pages;
 	u32 i;
 	void *vpage;
@@ -73,15 +74,15 @@ int ehca_create_eq(struct ehca_shca *shca,
 		return -EINVAL;
 	}
 
-	ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
-				       &eq->pf,
-				       type,
-				       length,
-				       &eq->ipz_eq_handle,
-				       &eq->length,
-				       &nr_pages, &eq->ist);
+	h_ret = hipz_h_alloc_resource_eq(shca->ipz_hca_handle,
+					 &eq->pf,
+					 type,
+					 length,
+					 &eq->ipz_eq_handle,
+					 &eq->length,
+					 &nr_pages, &eq->ist);
 
-	if (ret != H_SUCCESS) {
+	if (h_ret != H_SUCCESS) {
 		ehca_err(ib_dev, "Can't allocate EQ/NEQ. eq=%p", eq);
 		return -EINVAL;
 	}
@@ -97,24 +98,22 @@ int ehca_create_eq(struct ehca_shca *shca,
 		u64 rpage;
 
 		vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-		if (!vpage) {
-			ret = H_RESOURCE;
+		if (!vpage)
 			goto create_eq_exit2;
-		}
 
 		rpage = virt_to_abs(vpage);
-		ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
-					       eq->ipz_eq_handle,
-					       &eq->pf,
-					       0, 0, rpage, 1);
+		h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
+						 eq->ipz_eq_handle,
+						 &eq->pf,
+						 0, 0, rpage, 1);
 
 		if (i == (nr_pages - 1)) {
 			/* last page */
 			vpage = ipz_qpageit_get_inc(&eq->ipz_queue);
-			if (ret != H_SUCCESS || vpage)
+			if (h_ret != H_SUCCESS || vpage)
 				goto create_eq_exit2;
 		} else {
-			if (ret != H_PAGE_REGISTERED || !vpage)
+			if (h_ret != H_PAGE_REGISTERED || !vpage)
 				goto create_eq_exit2;
 		}
 	}

+ 34 - 2
drivers/infiniband/hw/ehca/ehca_main.c

@@ -68,6 +68,8 @@ int ehca_port_act_time = 30;
 int ehca_static_rate   = -1;
 int ehca_scaling_code  = 0;
 int ehca_lock_hcalls   = -1;
+int ehca_max_cq        = -1;
+int ehca_max_qp        = -1;
 
 module_param_named(open_aqp1,     ehca_open_aqp1,     bool, S_IRUGO);
 module_param_named(debug_level,   ehca_debug_level,   int,  S_IRUGO);
@@ -79,6 +81,8 @@ module_param_named(poll_all_eqs,  ehca_poll_all_eqs,  bool, S_IRUGO);
 module_param_named(static_rate,   ehca_static_rate,   int,  S_IRUGO);
 module_param_named(scaling_code,  ehca_scaling_code,  bool, S_IRUGO);
 module_param_named(lock_hcalls,   ehca_lock_hcalls,   bool, S_IRUGO);
+module_param_named(number_of_cqs, ehca_max_cq,        int,  S_IRUGO);
+module_param_named(number_of_qps, ehca_max_qp,        int,  S_IRUGO);
 
 MODULE_PARM_DESC(open_aqp1,
 		 "Open AQP1 on startup (default: no)");
@@ -104,6 +108,12 @@ MODULE_PARM_DESC(scaling_code,
 MODULE_PARM_DESC(lock_hcalls,
 		 "Serialize all hCalls made by the driver "
 		 "(default: autodetect)");
+MODULE_PARM_DESC(number_of_cqs,
+		"Max number of CQs which can be allocated "
+		"(default: autodetect)");
+MODULE_PARM_DESC(number_of_qps,
+		"Max number of QPs which can be allocated "
+		"(default: autodetect)");
 
 DEFINE_RWLOCK(ehca_qp_idr_lock);
 DEFINE_RWLOCK(ehca_cq_idr_lock);
@@ -355,6 +365,25 @@ static int ehca_sense_attributes(struct ehca_shca *shca)
 		if (rblock->memory_page_size_supported & pgsize_map[i])
 			shca->hca_cap_mr_pgsize |= pgsize_map[i + 1];
 
+	/* Set maximum number of CQs and QPs to calculate EQ size */
+	if (ehca_max_qp == -1)
+		ehca_max_qp = min_t(int, rblock->max_qp, EHCA_MAX_NUM_QUEUES);
+	else if (ehca_max_qp < 1 || ehca_max_qp > rblock->max_qp) {
+		ehca_gen_err("Requested number of QPs is out of range (1 - %i) "
+			"specified by HW", rblock->max_qp);
+		ret = -EINVAL;
+		goto sense_attributes1;
+	}
+
+	if (ehca_max_cq == -1)
+		ehca_max_cq = min_t(int, rblock->max_cq, EHCA_MAX_NUM_QUEUES);
+	else if (ehca_max_cq < 1 || ehca_max_cq > rblock->max_cq) {
+		ehca_gen_err("Requested number of CQs is out of range (1 - %i) "
+			"specified by HW", rblock->max_cq);
+		ret = -EINVAL;
+		goto sense_attributes1;
+	}
+
 	/* query max MTU from first port -- it's the same for all ports */
 	port = (struct hipz_query_port *)rblock;
 	h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
@@ -684,7 +713,7 @@ static int __devinit ehca_probe(struct of_device *dev,
 	struct ehca_shca *shca;
 	const u64 *handle;
 	struct ib_pd *ibpd;
-	int ret, i;
+	int ret, i, eq_size;
 
 	handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
 	if (!handle) {
@@ -705,6 +734,8 @@ static int __devinit ehca_probe(struct of_device *dev,
 		return -ENOMEM;
 	}
 	mutex_init(&shca->modify_mutex);
+	atomic_set(&shca->num_cqs, 0);
+	atomic_set(&shca->num_qps, 0);
 	for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
 		spin_lock_init(&shca->sport[i].mod_sqp_lock);
 
@@ -724,8 +755,9 @@ static int __devinit ehca_probe(struct of_device *dev,
 		goto probe1;
 	}
 
+	eq_size = 2 * ehca_max_cq + 4 * ehca_max_qp;
 	/* create event queues */
-	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, 2048);
+	ret = ehca_create_eq(shca, &shca->eq, EHCA_EQ, eq_size);
 	if (ret) {
 		ehca_err(&shca->ib_device, "Cannot create EQ.");
 		goto probe1;

+ 24 - 2
drivers/infiniband/hw/ehca/ehca_qp.c

@@ -421,8 +421,18 @@ static struct ehca_qp *internal_create_qp(
 	u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
 	unsigned long flags;
 
-	if (init_attr->create_flags)
+	if (!atomic_add_unless(&shca->num_qps, 1, ehca_max_qp)) {
+		ehca_err(pd->device, "Unable to create QP, max number of %i "
+			 "QPs reached.", ehca_max_qp);
+		ehca_err(pd->device, "To increase the maximum number of QPs "
+			 "use the number_of_qps module parameter.\n");
+		return ERR_PTR(-ENOSPC);
+	}
+
+	if (init_attr->create_flags) {
+		atomic_dec(&shca->num_qps);
 		return ERR_PTR(-EINVAL);
+	}
 
 	memset(&parms, 0, sizeof(parms));
 	qp_type = init_attr->qp_type;
@@ -431,6 +441,7 @@ static struct ehca_qp *internal_create_qp(
 		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
 		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
 			 init_attr->sq_sig_type);
+		atomic_dec(&shca->num_qps);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -455,6 +466,7 @@ static struct ehca_qp *internal_create_qp(
 
 	if (is_llqp && has_srq) {
 		ehca_err(pd->device, "LLQPs can't have an SRQ");
+		atomic_dec(&shca->num_qps);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -466,6 +478,7 @@ static struct ehca_qp *internal_create_qp(
 			ehca_err(pd->device, "no more than three SGEs "
 				 "supported for SRQ  pd=%p  max_sge=%x",
 				 pd, init_attr->cap.max_recv_sge);
+			atomic_dec(&shca->num_qps);
 			return ERR_PTR(-EINVAL);
 		}
 	}
@@ -477,6 +490,7 @@ static struct ehca_qp *internal_create_qp(
 	    qp_type != IB_QPT_SMI &&
 	    qp_type != IB_QPT_GSI) {
 		ehca_err(pd->device, "wrong QP Type=%x", qp_type);
+		atomic_dec(&shca->num_qps);
 		return ERR_PTR(-EINVAL);
 	}
 
@@ -490,6 +504,7 @@ static struct ehca_qp *internal_create_qp(
 					 "or max_rq_wr=%x for RC LLQP",
 					 init_attr->cap.max_send_wr,
 					 init_attr->cap.max_recv_wr);
+				atomic_dec(&shca->num_qps);
 				return ERR_PTR(-EINVAL);
 			}
 			break;
@@ -497,6 +512,7 @@ static struct ehca_qp *internal_create_qp(
 			if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
 				ehca_err(pd->device, "UD LLQP not supported "
 					 "by this adapter");
+				atomic_dec(&shca->num_qps);
 				return ERR_PTR(-ENOSYS);
 			}
 			if (!(init_attr->cap.max_send_sge <= 5
@@ -508,20 +524,22 @@ static struct ehca_qp *internal_create_qp(
 					 "or max_recv_sge=%x for UD LLQP",
 					 init_attr->cap.max_send_sge,
 					 init_attr->cap.max_recv_sge);
+				atomic_dec(&shca->num_qps);
 				return ERR_PTR(-EINVAL);
 			} else if (init_attr->cap.max_send_wr > 255) {
 				ehca_err(pd->device,
 					 "Invalid Number of "
 					 "max_send_wr=%x for UD QP_TYPE=%x",
 					 init_attr->cap.max_send_wr, qp_type);
+				atomic_dec(&shca->num_qps);
 				return ERR_PTR(-EINVAL);
 			}
 			break;
 		default:
 			ehca_err(pd->device, "unsupported LL QP Type=%x",
 				 qp_type);
+			atomic_dec(&shca->num_qps);
 			return ERR_PTR(-EINVAL);
-			break;
 		}
 	} else {
 		int max_sge = (qp_type == IB_QPT_UD || qp_type == IB_QPT_SMI
@@ -533,6 +551,7 @@ static struct ehca_qp *internal_create_qp(
 				 "send_sge=%x recv_sge=%x max_sge=%x",
 				 init_attr->cap.max_send_sge,
 				 init_attr->cap.max_recv_sge, max_sge);
+			atomic_dec(&shca->num_qps);
 			return ERR_PTR(-EINVAL);
 		}
 	}
@@ -543,6 +562,7 @@ static struct ehca_qp *internal_create_qp(
 	my_qp = kmem_cache_zalloc(qp_cache, GFP_KERNEL);
 	if (!my_qp) {
 		ehca_err(pd->device, "pd=%p not enough memory to alloc qp", pd);
+		atomic_dec(&shca->num_qps);
 		return ERR_PTR(-ENOMEM);
 	}
 
@@ -823,6 +843,7 @@ create_qp_exit1:
 
 create_qp_exit0:
 	kmem_cache_free(qp_cache, my_qp);
+	atomic_dec(&shca->num_qps);
 	return ERR_PTR(ret);
 }
 
@@ -1948,6 +1969,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
 	if (HAS_SQ(my_qp))
 		ipz_queue_dtor(my_pd, &my_qp->ipz_squeue);
 	kmem_cache_free(qp_cache, my_qp);
+	atomic_dec(&shca->num_qps);
 	return 0;
 }
 

+ 1 - 1
drivers/infiniband/hw/mlx4/cq.c

@@ -221,7 +221,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 	}
 
 	err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
-			    cq->db.dma, &cq->mcq);
+			    cq->db.dma, &cq->mcq, 0);
 	if (err)
 		goto err_dbmap;
 

+ 0 - 13
drivers/infiniband/hw/mthca/mthca_mr.c

@@ -818,15 +818,9 @@ int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
 
 void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 {
-	u32 key;
-
 	if (!fmr->maps)
 		return;
 
-	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
-	key &= dev->limits.num_mpts - 1;
-	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
-
 	fmr->maps = 0;
 
 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
@@ -834,16 +828,9 @@ void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 
 void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
 {
-	u32 key;
-
 	if (!fmr->maps)
 		return;
 
-	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
-	key &= dev->limits.num_mpts - 1;
-	key = adjust_key(dev, key);
-	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
-
 	fmr->maps = 0;
 
 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;

+ 13 - 1
drivers/infiniband/hw/mthca/mthca_provider.c

@@ -39,6 +39,8 @@
 #include <rdma/ib_smi.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
+
+#include <linux/sched.h>
 #include <linux/mm.h>
 
 #include "mthca_dev.h"
@@ -367,6 +369,8 @@ static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
 		return ERR_PTR(-EFAULT);
 	}
 
+	context->reg_mr_warned = 0;
+
 	return &context->ibucontext;
 }
 
@@ -1013,7 +1017,15 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	int err = 0;
 	int write_mtt_size;
 
-	if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
+	if (udata->inlen - sizeof (struct ib_uverbs_cmd_hdr) < sizeof ucmd) {
+		if (!to_mucontext(pd->uobject->context)->reg_mr_warned) {
+			mthca_warn(dev, "Process '%s' did not pass in MR attrs.\n",
+				   current->comm);
+			mthca_warn(dev, "  Update libmthca to fix this.\n");
+		}
+		++to_mucontext(pd->uobject->context)->reg_mr_warned;
+		ucmd.mr_attrs = 0;
+	} else if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
 		return ERR_PTR(-EFAULT);
 
 	mr = kmalloc(sizeof *mr, GFP_KERNEL);

+ 1 - 0
drivers/infiniband/hw/mthca/mthca_provider.h

@@ -67,6 +67,7 @@ struct mthca_ucontext {
 	struct ib_ucontext          ibucontext;
 	struct mthca_uar            uar;
 	struct mthca_user_db_table *db_tab;
+	int			    reg_mr_warned;
 };
 
 struct mthca_mtt;

+ 6 - 4
drivers/infiniband/hw/mthca/mthca_user.h

@@ -41,7 +41,7 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define MTHCA_UVERBS_ABI_VERSION	2
+#define MTHCA_UVERBS_ABI_VERSION	1
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -62,10 +62,12 @@ struct mthca_alloc_pd_resp {
 };
 
 struct mthca_reg_mr {
+/*
+ * Mark the memory region with a DMA attribute that causes
+ * in-flight DMA to be flushed when the region is written to:
+ */
+#define MTHCA_MR_DMASYNC	0x1
 	__u32 mr_attrs;
-#define MTHCA_MR_DMASYNC 0x1
-/* mark the memory region with a DMA attribute that causes
- * in-flight DMA to be flushed when the region is written to */
 	__u32 reserved;
 };
 

+ 1 - 0
drivers/infiniband/hw/nes/Kconfig

@@ -2,6 +2,7 @@ config INFINIBAND_NES
 	tristate "NetEffect RNIC Driver"
 	depends on PCI && INET && INFINIBAND
 	select LIBCRC32C
+	select INET_LRO
 	---help---
 	  This is a low-level driver for NetEffect RDMA enabled
 	  Network Interface Cards (RNIC).

+ 4 - 0
drivers/infiniband/hw/nes/nes.c

@@ -91,6 +91,10 @@ unsigned int nes_debug_level = 0;
 module_param_named(debug_level, nes_debug_level, uint, 0644);
 MODULE_PARM_DESC(debug_level, "Enable debug output level");
 
+unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
+module_param(nes_lro_max_aggr, int, NES_LRO_MAX_AGGR);
+MODULE_PARM_DESC(nes_mro_max_aggr, " nic LRO MAX packet aggregation");
+
 LIST_HEAD(nes_adapter_list);
 static LIST_HEAD(nes_dev_list);
 

+ 3 - 2
drivers/infiniband/hw/nes/nes.h

@@ -173,6 +173,7 @@ extern int disable_mpa_crc;
 extern unsigned int send_first;
 extern unsigned int nes_drv_opt;
 extern unsigned int nes_debug_level;
+extern unsigned int nes_lro_max_aggr;
 
 extern struct list_head nes_adapter_list;
 
@@ -535,8 +536,8 @@ int nes_register_ofa_device(struct nes_ib_device *);
 int nes_read_eeprom_values(struct nes_device *, struct nes_adapter *);
 void nes_write_1G_phy_reg(struct nes_device *, u8, u8, u16);
 void nes_read_1G_phy_reg(struct nes_device *, u8, u8, u16 *);
-void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16);
-void nes_read_10G_phy_reg(struct nes_device *, u16, u8);
+void nes_write_10G_phy_reg(struct nes_device *, u16, u8, u16, u16);
+void nes_read_10G_phy_reg(struct nes_device *, u8, u8, u16);
 struct nes_cqp_request *nes_get_cqp_request(struct nes_device *);
 void nes_post_cqp_request(struct nes_device *, struct nes_cqp_request *, int);
 int nes_arp_table(struct nes_device *, u32, u8 *, u32);

+ 4 - 4
drivers/infiniband/hw/nes/nes_cm.c

@@ -594,7 +594,7 @@ static void nes_cm_timer_tick(unsigned long pass)
 				continue;
 			}
 			/* this seems like the correct place, but leave send entry unprotected */
-			// spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+			/* spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags); */
 			atomic_inc(&send_entry->skb->users);
 			cm_packets_retrans++;
 			nes_debug(NES_DBG_CM, "Retransmitting send_entry %p for node %p,"
@@ -1335,7 +1335,7 @@ static int process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
 							cm_node->loc_addr, cm_node->loc_port,
 							cm_node->rem_addr, cm_node->rem_port,
 							cm_node->state, atomic_read(&cm_node->ref_count));
-				// create event
+				/* create event */
 				cm_node->state = NES_CM_STATE_CLOSED;
 
 				create_event(cm_node, NES_CM_EVENT_ABORTED);
@@ -1669,7 +1669,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
 	if (!cm_node)
 		return NULL;
 
-	// set our node side to client (active) side
+	/* set our node side to client (active) side */
 	cm_node->tcp_cntxt.client = 1;
 	cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
 
@@ -1694,7 +1694,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
 			loopbackremotenode->mpa_frame_size = mpa_frame_size -
 					sizeof(struct ietf_mpa_frame);
 
-			// we are done handling this state, set node to a TSA state
+			/* we are done handling this state, set node to a TSA state */
 			cm_node->state = NES_CM_STATE_TSA;
 			cm_node->tcp_cntxt.rcv_nxt = loopbackremotenode->tcp_cntxt.loc_seq_num;
 			loopbackremotenode->tcp_cntxt.rcv_nxt = cm_node->tcp_cntxt.loc_seq_num;

+ 293 - 78
drivers/infiniband/hw/nes/nes_hw.c

@@ -38,6 +38,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
+#include <linux/inet_lro.h>
 
 #include "nes.h"
 
@@ -832,7 +833,7 @@ static void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_cou
 	nes_write_indexed(nesdev, 0x00000900, 0x20000001);
 	nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);
 	nes_write_indexed(nesdev, 0x000060C8, 0x00000020);
-														//
+
 	nes_write_indexed(nesdev, 0x000001EC, 0x7b2625a0);
 	/* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */
 
@@ -1207,11 +1208,16 @@ int nes_init_phy(struct nes_device *nesdev)
 {
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	u32 counter = 0;
+	u32 sds_common_control0;
 	u32 mac_index = nesdev->mac_index;
-	u32 tx_config;
+	u32 tx_config = 0;
 	u16 phy_data;
+	u32 temp_phy_data = 0;
+	u32 temp_phy_data2 = 0;
+	u32 i = 0;
 
-	if (nesadapter->OneG_Mode) {
+	if ((nesadapter->OneG_Mode) &&
+	    (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
 		nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
 		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
 			printk(PFX "%s: Programming mdc config for 1G\n", __func__);
@@ -1223,7 +1229,7 @@ int nes_init_phy(struct nes_device *nesdev)
 		nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
 		nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
 				nesadapter->phy_index[mac_index], phy_data);
-		nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index],  0xb000);
+		nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
 
 		/* Reset the PHY */
 		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
@@ -1277,12 +1283,126 @@ int nes_init_phy(struct nes_device *nesdev)
 		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
 		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
 	} else {
-		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
+		if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) ||
+		    (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
 			/* setup 10G MDIO operation */
 			tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
 			tx_config |= 0x14;
 			nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
 		}
+		if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
+			nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+
+			temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+			mdelay(10);
+			nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+			temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+			/*
+			 * if firmware is already running (like from a
+			 * driver un-load/load, don't do anything.
+			 */
+			if (temp_phy_data == temp_phy_data2) {
+				/* configure QT2505 AMCC PHY */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0000);
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
+
+				/*
+				 * remove micro from reset; chip boots from ROM,
+				 * uploads EEPROM f/w image, uC executes f/w
+				 */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002);
+
+				/*
+				 * wait for heart beat to start to
+				 * know loading is done
+				 */
+				counter = 0;
+				do {
+					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+					temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+					if (counter++ > 1000) {
+						nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n");
+						break;
+					}
+					mdelay(100);
+					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+					temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+				} while ((temp_phy_data2 == temp_phy_data));
+
+				/*
+				 * wait for tracking to start to know
+				 * f/w is good to go
+				 */
+				counter = 0;
+				do {
+					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd);
+					temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+					if (counter++ > 1000) {
+						nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n");
+						break;
+					}
+					mdelay(1000);
+					/*
+					 * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n",
+					 *			temp_phy_data);
+					 */
+				} while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+
+				/* set LOS Control invert RXLOSB_I_PADINV */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000);
+				/* set LOS Control to mask of RXLOSB_I */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042);
+				/* set LED1 to input mode (LED1 and LED2 share same LED) */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007);
+				/* set LED2 to RX link_status and activity */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A);
+				/* set LED3 to RX link_status */
+				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009);
+
+				/*
+				 * reset the res-calibration on t2
+				 * serdes; ensures it is stable after
+				 * the amcc phy is stable
+				 */
+
+				sds_common_control0  = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+				sds_common_control0 |= 0x1;
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+				/* release the res-calibration reset */
+				sds_common_control0 &= 0xfffffffe;
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
+
+				i = 0;
+				while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+						&& (i++ < 5000)) {
+					/* mdelay(1); */
+				}
+
+				/*
+				 * wait for link train done before moving on,
+				 * or will get an interupt storm
+				 */
+				counter = 0;
+				do {
+					temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+								(0x200 * (nesdev->mac_index & 1)));
+					if (counter++ > 1000) {
+						nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n");
+						break;
+					}
+					mdelay(1);
+				} while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000));
+			}
+		}
 	}
 	return 0;
 }
@@ -1375,6 +1495,25 @@ static void nes_rq_wqes_timeout(unsigned long parm)
 }
 
 
+static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr,
+			       void **tcph, u64 *hdr_flags, void *priv)
+{
+	unsigned int ip_len;
+	struct iphdr *iph;
+	skb_reset_network_header(skb);
+	iph = ip_hdr(skb);
+	if (iph->protocol != IPPROTO_TCP)
+		return -1;
+	ip_len = ip_hdrlen(skb);
+	skb_set_transport_header(skb, ip_len);
+	*tcph = tcp_hdr(skb);
+
+	*hdr_flags = LRO_IPV4 | LRO_TCP;
+	*iphdr = iph;
+	return 0;
+}
+
+
 /**
  * nes_init_nic_qp
  */
@@ -1520,10 +1659,10 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
 	}
 
 	u64temp = (u64)nesvnic->nic.sq_pbase;
-	nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+	nic_context->context_words[NES_NIC_CTX_SQ_LOW_IDX]  = cpu_to_le32((u32)u64temp);
 	nic_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
 	u64temp = (u64)nesvnic->nic.rq_pbase;
-	nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
+	nic_context->context_words[NES_NIC_CTX_RQ_LOW_IDX]  = cpu_to_le32((u32)u64temp);
 	nic_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
 
 	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
@@ -1575,7 +1714,7 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
 		nic_rqe = &nesvnic->nic.rq_vbase[counter];
 		nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
 		nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
-		nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
+		nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]  = cpu_to_le32((u32)pmem);
 		nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
 		nesvnic->nic.rx_skb[counter] = skb;
 	}
@@ -1592,15 +1731,21 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
 	nesvnic->rq_wqes_timer.function = nes_rq_wqes_timeout;
 	nesvnic->rq_wqes_timer.data = (unsigned long)nesvnic;
 	nes_debug(NES_DBG_INIT, "NAPI support Enabled\n");
-
 	if (nesdev->nesadapter->et_use_adaptive_rx_coalesce)
 	{
 		nes_nic_init_timer(nesdev);
 		if (netdev->mtu > 1500)
 			jumbomode = 1;
-                nes_nic_init_timer_defaults(nesdev, jumbomode);
-	}
-
+		nes_nic_init_timer_defaults(nesdev, jumbomode);
+	}
+	nesvnic->lro_mgr.max_aggr       = NES_LRO_MAX_AGGR;
+	nesvnic->lro_mgr.max_desc       = NES_MAX_LRO_DESCRIPTORS;
+	nesvnic->lro_mgr.lro_arr        = nesvnic->lro_desc;
+	nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
+	nesvnic->lro_mgr.features       = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
+	nesvnic->lro_mgr.dev            = netdev;
+	nesvnic->lro_mgr.ip_summed      = CHECKSUM_UNNECESSARY;
+	nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
 	return 0;
 }
 
@@ -1620,8 +1765,8 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
 
 	/* Free remaining NIC receive buffers */
 	while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
-		nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
-		wqe_frag = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
+		nic_rqe   = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
+		wqe_frag  = (u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
 		wqe_frag |= ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
 		pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
 				nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
@@ -1704,17 +1849,17 @@ int nes_napi_isr(struct nes_device *nesdev)
 	/* iff NIC, process here, else wait for DPC */
 	if ((int_stat) && ((int_stat & 0x0000ff00) == int_stat)) {
 		nesdev->napi_isr_ran = 0;
-		nes_write32(nesdev->regs+NES_INT_STAT,
-				(int_stat &
-				~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+		nes_write32(nesdev->regs + NES_INT_STAT,
+			(int_stat &
+			~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 | NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
 
 		/* Process the CEQs */
 		nes_process_ceq(nesdev, &nesdev->nesadapter->ceq[nesdev->nic_ceq_index]);
 
 		if (unlikely((((nesadapter->et_rx_coalesce_usecs_irq) &&
-					   (!nesadapter->et_use_adaptive_rx_coalesce)) ||
-					  ((nesadapter->et_use_adaptive_rx_coalesce) &&
-					   (nesdev->deepcq_count > nesadapter->et_pkt_rate_low)))) ) {
+					(!nesadapter->et_use_adaptive_rx_coalesce)) ||
+					((nesadapter->et_use_adaptive_rx_coalesce) &&
+					 (nesdev->deepcq_count > nesadapter->et_pkt_rate_low))))) {
 			if ((nesdev->int_req & NES_INT_TIMER) == 0) {
 				/* Enable Periodic timer interrupts */
 				nesdev->int_req |= NES_INT_TIMER;
@@ -1792,12 +1937,12 @@ void nes_dpc(unsigned long param)
 		}
 
 		if (int_stat) {
-			if (int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
-					NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)) {
+			if (int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+					NES_INT_MAC1|NES_INT_MAC2 | NES_INT_MAC3)) {
 				/* Ack the interrupts */
 				nes_write32(nesdev->regs+NES_INT_STAT,
-						(int_stat & ~(NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
-						NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3)));
+					(int_stat & ~(NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0|
+					NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3)));
 			}
 
 			temp_int_stat = int_stat;
@@ -1862,8 +2007,8 @@ void nes_dpc(unsigned long param)
 			}
 		}
 		/* Don't use the interface interrupt bit stay in loop */
-		int_stat &= ~NES_INT_INTF|NES_INT_TIMER|NES_INT_MAC0|
-				NES_INT_MAC1|NES_INT_MAC2|NES_INT_MAC3;
+		int_stat &= ~NES_INT_INTF | NES_INT_TIMER | NES_INT_MAC0 |
+				NES_INT_MAC1 | NES_INT_MAC2 | NES_INT_MAC3;
 	} while ((int_stat != 0) && (loop_counter++ < MAX_DPC_ITERATIONS));
 
 	if (timer_ints == 1) {
@@ -1874,9 +2019,9 @@ void nes_dpc(unsigned long param)
 					nesdev->timer_only_int_count = 0;
 					nesdev->int_req &= ~NES_INT_TIMER;
 					nes_write32(nesdev->regs + NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
-					nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+					nes_write32(nesdev->regs + NES_INT_MASK, ~nesdev->int_req);
 				} else {
-					nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+					nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
 				}
 			} else {
 				if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
@@ -1884,7 +2029,7 @@ void nes_dpc(unsigned long param)
 					nes_nic_init_timer(nesdev);
 				}
 				nesdev->timer_only_int_count = 0;
-				nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff|(~nesdev->int_req));
+				nes_write32(nesdev->regs+NES_INT_MASK, 0x0000ffff | (~nesdev->int_req));
 			}
 		} else {
 			nesdev->timer_only_int_count = 0;
@@ -1933,7 +2078,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
 	do {
 		if (le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]) &
 				NES_CEQE_VALID) {
-			u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX])))<<32) |
+			u64temp = (((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_HIGH_IDX]))) << 32) |
 						((u64)(le32_to_cpu(ceq->ceq_vbase[head].ceqe_words[NES_CEQE_CQ_CTX_LOW_IDX])));
 			u64temp <<= 1;
 			cq = *((struct nes_hw_cq **)&u64temp);
@@ -1961,7 +2106,7 @@ static void nes_process_ceq(struct nes_device *nesdev, struct nes_hw_ceq *ceq)
  */
 static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
 {
-//	u64 u64temp;
+	/* u64 u64temp; */
 	u32 head;
 	u32 aeq_size;
 	u32 aeqe_misc;
@@ -1980,8 +2125,10 @@ static void nes_process_aeq(struct nes_device *nesdev, struct nes_hw_aeq *aeq)
 		if (aeqe_misc & (NES_AEQE_QP|NES_AEQE_CQ)) {
 			if (aeqe_cq_id >= NES_FIRST_QPN) {
 				/* dealing with an accelerated QP related AE */
-//				u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])))<<32) |
-//					((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+				/*
+				 * u64temp = (((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX]))) << 32) |
+				 *	     ((u64)(le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX])));
+				 */
 				nes_process_iwarp_aeqe(nesdev, (struct nes_hw_aeqe *)aeqe);
 			} else {
 				/* TODO: dealing with a CQP related AE */
@@ -2081,6 +2228,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 	u32 u32temp;
 	u16 phy_data;
 	u16 temp_phy_data;
+	u32 pcs_val  = 0x0f0f0000;
+	u32 pcs_mask = 0x0f1f0000;
 
 	spin_lock_irqsave(&nesadapter->phy_lock, flags);
 	if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
@@ -2144,13 +2293,30 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 		nes_debug(NES_DBG_PHY, "Eth SERDES Common Status: 0=0x%08X, 1=0x%08X\n",
 				nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0),
 				nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0+0x200));
-		pcs_control_status = nes_read_indexed(nesdev,
-				NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
-		pcs_control_status = nes_read_indexed(nesdev,
-				NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index&1)*0x200));
+
+		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_PUMA_1G) {
+			switch (mac_index) {
+			case 1:
+			case 3:
+				pcs_control_status = nes_read_indexed(nesdev,
+						NES_IDX_PHY_PCS_CONTROL_STATUS0 + 0x200);
+				break;
+			default:
+				pcs_control_status = nes_read_indexed(nesdev,
+						NES_IDX_PHY_PCS_CONTROL_STATUS0);
+				break;
+			}
+		} else {
+			pcs_control_status = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+			pcs_control_status = nes_read_indexed(nesdev,
+					NES_IDX_PHY_PCS_CONTROL_STATUS0 + ((mac_index & 1) * 0x200));
+		}
+
 		nes_debug(NES_DBG_PHY, "PCS PHY Control/Status%u: 0x%08X\n",
 				mac_index, pcs_control_status);
-		if (nesadapter->OneG_Mode) {
+		if ((nesadapter->OneG_Mode) &&
+				(nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
 			u32temp = 0x01010000;
 			if (nesadapter->port_count > 2) {
 				u32temp |= 0x02020000;
@@ -2159,24 +2325,59 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 				phy_data = 0;
 				nes_debug(NES_DBG_PHY, "PCS says the link is down\n");
 			}
-		} else if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) {
-			nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
-			temp_phy_data = (u16)nes_read_indexed(nesdev,
-								NES_IDX_MAC_MDIO_CONTROL);
-			u32temp = 20;
-			do {
-				nes_read_10G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index]);
-				phy_data = (u16)nes_read_indexed(nesdev,
-								NES_IDX_MAC_MDIO_CONTROL);
-				if ((phy_data == temp_phy_data) || (!(--u32temp)))
-					break;
-				temp_phy_data = phy_data;
-			} while (1);
-			nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
-				__func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
-
 		} else {
-			phy_data = (0x0f0f0000 == (pcs_control_status & 0x0f1f0000)) ? 4 : 0;
+			switch (nesadapter->phy_type[mac_index]) {
+			case NES_PHY_TYPE_IRIS:
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+				temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+				u32temp = 20;
+				do {
+					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+					phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+					if ((phy_data == temp_phy_data) || (!(--u32temp)))
+						break;
+					temp_phy_data = phy_data;
+				} while (1);
+				nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+					__func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
+				break;
+
+			case NES_PHY_TYPE_ARGUS:
+				/* clear the alarms */
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc002);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc005);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc006);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
+				/* check link status */
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+				temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+				u32temp = 100;
+				do {
+					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+
+					phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+					if ((phy_data == temp_phy_data) || (!(--u32temp)))
+						break;
+					temp_phy_data = phy_data;
+				} while (1);
+				nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
+					__func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+				break;
+
+			case NES_PHY_TYPE_PUMA_1G:
+				if (mac_index < 2)
+					pcs_val = pcs_mask = 0x01010000;
+				else
+					pcs_val = pcs_mask = 0x02020000;
+				/* fall through */
+			default:
+				phy_data = (pcs_val == (pcs_control_status & pcs_mask)) ? 0x4 : 0x0;
+				break;
+			}
 		}
 
 		if (phy_data & 0x0004) {
@@ -2185,8 +2386,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 				nes_debug(NES_DBG_PHY, "The Link is UP!!.  linkup was %d\n",
 						nesvnic->linkup);
 				if (nesvnic->linkup == 0) {
-					printk(PFX "The Link is now up for port %u, netdev %p.\n",
-							mac_index, nesvnic->netdev);
+					printk(PFX "The Link is now up for port %s, netdev %p.\n",
+							nesvnic->netdev->name, nesvnic->netdev);
 					if (netif_queue_stopped(nesvnic->netdev))
 						netif_start_queue(nesvnic->netdev);
 					nesvnic->linkup = 1;
@@ -2199,8 +2400,8 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
 				nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
 						nesvnic->linkup);
 				if (nesvnic->linkup == 1) {
-					printk(PFX "The Link is now down for port %u, netdev %p.\n",
-							mac_index, nesvnic->netdev);
+					printk(PFX "The Link is now down for port %s, netdev %p.\n",
+							nesvnic->netdev->name, nesvnic->netdev);
 					if (!(netif_queue_stopped(nesvnic->netdev)))
 						netif_stop_queue(nesvnic->netdev);
 					nesvnic->linkup = 0;
@@ -2254,10 +2455,13 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 	u16 pkt_type;
 	u16 rqes_processed = 0;
 	u8 sq_cqes = 0;
+	u8 nes_use_lro = 0;
 
 	head = cq->cq_head;
 	cq_size = cq->cq_size;
 	cq->cqes_pending = 1;
+	if (nesvnic->netdev->features & NETIF_F_LRO)
+		nes_use_lro = 1;
 	do {
 		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
 				NES_NIC_CQE_VALID) {
@@ -2272,8 +2476,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 				/* bump past the vlan tag */
 				wqe_fragment_length++;
 				if (le16_to_cpu(wqe_fragment_length[wqe_fragment_index]) != 0) {
-					u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
-					u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+					u64temp = (u64) le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+							wqe_fragment_index * 2]);
+					u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX +
+							wqe_fragment_index * 2])) << 32;
 					bus_address = (dma_addr_t)u64temp;
 					if (test_and_clear_bit(nesnic->sq_tail, nesnic->first_frag_overflow)) {
 						pci_unmap_single(nesdev->pcidev,
@@ -2283,8 +2489,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 					}
 					for (; wqe_fragment_index < 5; wqe_fragment_index++) {
 						if (wqe_fragment_length[wqe_fragment_index]) {
-							u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX+wqe_fragment_index*2]);
-							u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX+wqe_fragment_index*2]))<<32;
+							u64temp = le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX +
+										wqe_fragment_index * 2]);
+							u64temp += ((u64)le32_to_cpu(nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_HIGH_IDX
+										+ wqe_fragment_index * 2])) <<32;
 							bus_address = (dma_addr_t)u64temp;
 							pci_unmap_page(nesdev->pcidev,
 									bus_address,
@@ -2331,7 +2539,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 				if (atomic_read(&nesvnic->rx_skbs_needed) > (nesvnic->nic.rq_size>>1)) {
 					nes_write32(nesdev->regs+NES_CQE_ALLOC,
 							cq->cq_number | (cqe_count << 16));
-//					nesadapter->tune_timer.cq_count += cqe_count;
+					/* nesadapter->tune_timer.cq_count += cqe_count; */
 					nesdev->currcq_count += cqe_count;
 					cqe_count = 0;
 					nes_replenish_nic_rq(nesvnic);
@@ -2379,9 +2587,16 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 								>> 16);
 						nes_debug(NES_DBG_CQ, "%s: Reporting stripped VLAN packet. Tag = 0x%04X\n",
 								nesvnic->netdev->name, vlan_tag);
-						nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
+						if (nes_use_lro)
+							lro_vlan_hwaccel_receive_skb(&nesvnic->lro_mgr, rx_skb,
+									nesvnic->vlan_grp, vlan_tag, NULL);
+						else
+							nes_vlan_rx(rx_skb, nesvnic->vlan_grp, vlan_tag);
 					} else {
-						nes_netif_rx(rx_skb);
+						if (nes_use_lro)
+							lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
+						else
+							nes_netif_rx(rx_skb);
 					}
 				}
 
@@ -2399,7 +2614,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 				/* Replenish Nic CQ */
 				nes_write32(nesdev->regs+NES_CQE_ALLOC,
 						cq->cq_number | (cqe_count << 16));
-//				nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+				/* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
 				nesdev->currcq_count += cqe_count;
 				cqe_count = 0;
 			}
@@ -2413,26 +2628,27 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 
 	} while (1);
 
+	if (nes_use_lro)
+		lro_flush_all(&nesvnic->lro_mgr);
 	if (sq_cqes) {
 		barrier();
 		/* restart the queue if it had been stopped */
 		if (netif_queue_stopped(nesvnic->netdev))
 			netif_wake_queue(nesvnic->netdev);
 	}
-
 	cq->cq_head = head;
 	/* nes_debug(NES_DBG_CQ, "CQ%u Processed = %u cqes, new head = %u.\n",
 			cq->cq_number, cqe_count, cq->cq_head); */
 	cq->cqe_allocs_pending = cqe_count;
 	if (unlikely(nesadapter->et_use_adaptive_rx_coalesce))
 	{
-//		nesdev->nesadapter->tune_timer.cq_count += cqe_count;
+		/* nesdev->nesadapter->tune_timer.cq_count += cqe_count; */
 		nesdev->currcq_count += cqe_count;
 		nes_nic_tune_timer(nesdev);
 	}
 	if (atomic_read(&nesvnic->rx_skbs_needed))
 		nes_replenish_nic_rq(nesvnic);
-	}
+}
 
 
 /**
@@ -2461,7 +2677,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
 
 		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
 			u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
-					cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX])))<<32) |
+					cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
 					((u64)(le32_to_cpu(cq->cq_vbase[head].
 					cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
 			cqp = *((struct nes_hw_cqp **)&u64temp);
@@ -2478,7 +2694,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
 			}
 
 			u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
-					wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX])))<<32) |
+					wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
 					((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
 					wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
 			cqp_request = *((struct nes_cqp_request **)&u64temp);
@@ -2515,7 +2731,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
 				} else {
 					nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) freed.\n",
 							cqp_request,
-							le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f);
+							le32_to_cpu(cqp_request->cqp_wqe.wqe_words[NES_CQP_WQE_OPCODE_IDX]) & 0x3f);
 					if (cqp_request->dynamic) {
 						kfree(cqp_request);
 					} else {
@@ -2529,7 +2745,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
 			}
 
 			cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX] = 0;
-			nes_write32(nesdev->regs+NES_CQE_ALLOC, cq->cq_number | (1 << 16));
+			nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (1 << 16));
 			if (++cqp->sq_tail >= cqp->sq_size)
 				cqp->sq_tail = 0;
 
@@ -2598,13 +2814,13 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 	nes_debug(NES_DBG_AEQ, "\n");
 	aeq_info = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_MISC_IDX]);
 	if ((NES_AEQE_INBOUND_RDMA&aeq_info) || (!(NES_AEQE_QP&aeq_info))) {
-		context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
+		context  = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
 		context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
 	} else {
 		aeqe_context = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_LOW_IDX]);
 		aeqe_context += ((u64)le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_CTXT_HIGH_IDX])) << 32;
 		context = (unsigned long)nesadapter->qp_table[le32_to_cpu(
-						aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX])-NES_FIRST_QPN];
+						aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]) - NES_FIRST_QPN];
 		BUG_ON(!context);
 	}
 
@@ -2617,7 +2833,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 			le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
 			nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
 
-
 	switch (async_event_id) {
 		case NES_AEQE_AEID_LLP_FIN_RECEIVED:
 			nesqp = *((struct nes_qp **)&context);
@@ -3021,7 +3236,7 @@ void nes_manage_arp_cache(struct net_device *netdev, unsigned char *mac_addr,
 		cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_ARP_VALID);
 		cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_ADDR_LOW_IDX] = cpu_to_le32(
 				(((u32)mac_addr[2]) << 24) | (((u32)mac_addr[3]) << 16) |
-				(((u32)mac_addr[4]) << 8) | (u32)mac_addr[5]);
+				(((u32)mac_addr[4]) << 8)  | (u32)mac_addr[5]);
 		cqp_wqe->wqe_words[NES_CQP_ARP_WQE_MAC_HIGH_IDX] = cpu_to_le32(
 				(((u32)mac_addr[0]) << 16) | (u32)mac_addr[1]);
 	} else {

+ 14 - 5
drivers/infiniband/hw/nes/nes_hw.h

@@ -33,8 +33,12 @@
 #ifndef __NES_HW_H
 #define __NES_HW_H
 
-#define NES_PHY_TYPE_1G   2
-#define NES_PHY_TYPE_IRIS 3
+#include <linux/inet_lro.h>
+
+#define NES_PHY_TYPE_1G        2
+#define NES_PHY_TYPE_IRIS      3
+#define NES_PHY_TYPE_ARGUS     4
+#define NES_PHY_TYPE_PUMA_1G   5
 #define NES_PHY_TYPE_PUMA_10G  6
 
 #define NES_MULTICAST_PF_MAX 8
@@ -965,7 +969,7 @@ struct nes_arp_entry {
 #define NES_NIC_CQ_DOWNWARD_TREND   16
 
 struct nes_hw_tune_timer {
-    //u16 cq_count;
+    /* u16 cq_count; */
     u16 threshold_low;
     u16 threshold_target;
     u16 threshold_high;
@@ -982,8 +986,10 @@ struct nes_hw_tune_timer {
 #define NES_TIMER_INT_LIMIT         2
 #define NES_TIMER_INT_LIMIT_DYNAMIC 10
 #define NES_TIMER_ENABLE_LIMIT      4
-#define NES_MAX_LINK_INTERRUPTS		128
-#define NES_MAX_LINK_CHECK		200
+#define NES_MAX_LINK_INTERRUPTS     128
+#define NES_MAX_LINK_CHECK          200
+#define NES_MAX_LRO_DESCRIPTORS     32
+#define NES_LRO_MAX_AGGR            64
 
 struct nes_adapter {
 	u64              fw_ver;
@@ -1183,6 +1189,9 @@ struct nes_vnic {
 	u8  of_device_registered;
 	u8  rdma_enabled;
 	u8  rx_checksum_disabled;
+	u32 lro_max_aggr;
+	struct net_lro_mgr lro_mgr;
+	struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
 };
 
 struct nes_ib_device {

+ 107 - 73
drivers/infiniband/hw/nes/nes_nic.c

@@ -185,12 +185,13 @@ static int nes_netdev_open(struct net_device *netdev)
 	nic_active |= nic_active_bit;
 	nes_write_indexed(nesdev, NES_IDX_NIC_BROADCAST_ON, nic_active);
 
-	macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+	macaddr_high  = ((u16)netdev->dev_addr[0]) << 8;
 	macaddr_high += (u16)netdev->dev_addr[1];
-	macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
-	macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
-	macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
-	macaddr_low += (u32)netdev->dev_addr[5];
+
+	macaddr_low   = ((u32)netdev->dev_addr[2]) << 24;
+	macaddr_low  += ((u32)netdev->dev_addr[3]) << 16;
+	macaddr_low  += ((u32)netdev->dev_addr[4]) << 8;
+	macaddr_low  += (u32)netdev->dev_addr[5];
 
 	/* Program the various MAC regs */
 	for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
@@ -451,7 +452,7 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	__le16 *wqe_fragment_length;
 	u32 nr_frags;
 	u32 original_first_length;
-//	u64 *wqe_fragment_address;
+	/* u64 *wqe_fragment_address; */
 	/* first fragment (0) is used by copy buffer */
 	u16 wqe_fragment_index=1;
 	u16 hoffset;
@@ -461,11 +462,12 @@ static int nes_netdev_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 	u32 old_head;
 	u32 wqe_misc;
 
-	/* nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
-			" (%u frags), tso_size=%u\n",
-			netdev->name, skb->len, skb_headlen(skb),
-			skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
-	*/
+	/*
+	 * nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
+	 *		" (%u frags), tso_size=%u\n",
+	 *		netdev->name, skb->len, skb_headlen(skb),
+	 *		skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
+	 */
 
 	if (!netif_carrier_ok(netdev))
 		return NETDEV_TX_OK;
@@ -795,12 +797,12 @@ static int nes_netdev_set_mac_address(struct net_device *netdev, void *p)
 	memcpy(netdev->dev_addr, mac_addr->sa_data, netdev->addr_len);
 	printk(PFX "%s: Address length = %d, Address = %s\n",
 	       __func__, netdev->addr_len, print_mac(mac, mac_addr->sa_data));
-	macaddr_high = ((u16)netdev->dev_addr[0]) << 8;
+	macaddr_high  = ((u16)netdev->dev_addr[0]) << 8;
 	macaddr_high += (u16)netdev->dev_addr[1];
-	macaddr_low = ((u32)netdev->dev_addr[2]) << 24;
-	macaddr_low += ((u32)netdev->dev_addr[3]) << 16;
-	macaddr_low += ((u32)netdev->dev_addr[4]) << 8;
-	macaddr_low += (u32)netdev->dev_addr[5];
+	macaddr_low   = ((u32)netdev->dev_addr[2]) << 24;
+	macaddr_low  += ((u32)netdev->dev_addr[3]) << 16;
+	macaddr_low  += ((u32)netdev->dev_addr[4]) << 8;
+	macaddr_low  += (u32)netdev->dev_addr[5];
 
 	for (i = 0; i < NES_MAX_PORT_COUNT; i++) {
 		if (nesvnic->qp_nic_index[i] == 0xf) {
@@ -881,12 +883,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 					  print_mac(mac, multicast_addr->dmi_addr),
 					  perfect_filter_register_address+(mc_index * 8),
 					  mc_nic_index);
-				macaddr_high = ((u16)multicast_addr->dmi_addr[0]) << 8;
+				macaddr_high  = ((u16)multicast_addr->dmi_addr[0]) << 8;
 				macaddr_high += (u16)multicast_addr->dmi_addr[1];
-				macaddr_low = ((u32)multicast_addr->dmi_addr[2]) << 24;
-				macaddr_low += ((u32)multicast_addr->dmi_addr[3]) << 16;
-				macaddr_low += ((u32)multicast_addr->dmi_addr[4]) << 8;
-				macaddr_low += (u32)multicast_addr->dmi_addr[5];
+				macaddr_low   = ((u32)multicast_addr->dmi_addr[2]) << 24;
+				macaddr_low  += ((u32)multicast_addr->dmi_addr[3]) << 16;
+				macaddr_low  += ((u32)multicast_addr->dmi_addr[4]) << 8;
+				macaddr_low  += (u32)multicast_addr->dmi_addr[5];
 				nes_write_indexed(nesdev,
 						perfect_filter_register_address+(mc_index * 8),
 						macaddr_low);
@@ -910,23 +912,23 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 /**
  * nes_netdev_change_mtu
  */
-static int nes_netdev_change_mtu(struct	net_device *netdev,	int	new_mtu)
+static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct nes_vnic	*nesvnic = netdev_priv(netdev);
-	struct nes_device *nesdev =	nesvnic->nesdev;
-	int	ret	= 0;
-	u8 jumbomode=0;
+	struct nes_device *nesdev = nesvnic->nesdev;
+	int ret = 0;
+	u8 jumbomode = 0;
 
-	if ((new_mtu < ETH_ZLEN) ||	(new_mtu > max_mtu))
+	if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
 		return -EINVAL;
 
-	netdev->mtu	= new_mtu;
+	netdev->mtu = new_mtu;
 	nesvnic->max_frame_size	= new_mtu + VLAN_ETH_HLEN;
 
 	if (netdev->mtu	> 1500)	{
 		jumbomode=1;
 	}
-	nes_nic_init_timer_defaults(nesdev,	jumbomode);
+	nes_nic_init_timer_defaults(nesdev, jumbomode);
 
 	if (netif_running(netdev)) {
 		nes_netdev_stop(netdev);
@@ -936,8 +938,7 @@ static int nes_netdev_change_mtu(struct	net_device *netdev,	int	new_mtu)
 	return ret;
 }
 
-#define NES_ETHTOOL_STAT_COUNT 55
-static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN] = {
+static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
 	"Link Change Interrupts",
 	"Linearized SKBs",
 	"T/GSO Requests",
@@ -993,8 +994,12 @@ static const char nes_ethtool_stringset[NES_ETHTOOL_STAT_COUNT][ETH_GSTRING_LEN]
 	"CQ Depth 32",
 	"CQ Depth 128",
 	"CQ Depth 256",
+	"LRO aggregated",
+	"LRO flushed",
+	"LRO no_desc",
 };
 
+#define NES_ETHTOOL_STAT_COUNT  ARRAY_SIZE(nes_ethtool_stringset)
 
 /**
  * nes_netdev_get_rx_csum
@@ -1189,6 +1194,9 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
 	target_stat_values[52] = int_mod_cq_depth_32;
 	target_stat_values[53] = int_mod_cq_depth_128;
 	target_stat_values[54] = int_mod_cq_depth_256;
+	target_stat_values[55] = nesvnic->lro_mgr.stats.aggregated;
+	target_stat_values[56] = nesvnic->lro_mgr.stats.flushed;
+	target_stat_values[57] = nesvnic->lro_mgr.stats.no_desc;
 
 }
 
@@ -1219,14 +1227,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev,
 		struct ethtool_coalesce	*et_coalesce)
 {
 	struct nes_vnic	*nesvnic = netdev_priv(netdev);
-	struct nes_device *nesdev =	nesvnic->nesdev;
+	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
 	unsigned long flags;
 
-	spin_lock_irqsave(&nesadapter->periodic_timer_lock,	flags);
+	spin_lock_irqsave(&nesadapter->periodic_timer_lock, flags);
 	if (et_coalesce->rx_max_coalesced_frames_low) {
-		shared_timer->threshold_low	 = et_coalesce->rx_max_coalesced_frames_low;
+		shared_timer->threshold_low = et_coalesce->rx_max_coalesced_frames_low;
 	}
 	if (et_coalesce->rx_max_coalesced_frames_irq) {
 		shared_timer->threshold_target = et_coalesce->rx_max_coalesced_frames_irq;
@@ -1246,14 +1254,14 @@ static int nes_netdev_set_coalesce(struct net_device *netdev,
 	nesadapter->et_rx_coalesce_usecs_irq = et_coalesce->rx_coalesce_usecs_irq;
 	if (et_coalesce->use_adaptive_rx_coalesce) {
 		nesadapter->et_use_adaptive_rx_coalesce	= 1;
-		nesadapter->timer_int_limit	= NES_TIMER_INT_LIMIT_DYNAMIC;
+		nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT_DYNAMIC;
 		nesadapter->et_rx_coalesce_usecs_irq = 0;
 		if (et_coalesce->pkt_rate_low) {
-			nesadapter->et_pkt_rate_low	= et_coalesce->pkt_rate_low;
+			nesadapter->et_pkt_rate_low = et_coalesce->pkt_rate_low;
 		}
 	} else {
 		nesadapter->et_use_adaptive_rx_coalesce	= 0;
-		nesadapter->timer_int_limit	= NES_TIMER_INT_LIMIT;
+		nesadapter->timer_int_limit = NES_TIMER_INT_LIMIT;
 		if (nesadapter->et_rx_coalesce_usecs_irq) {
 			nes_write32(nesdev->regs+NES_PERIODIC_CONTROL,
 					0x80000000 | ((u32)(nesadapter->et_rx_coalesce_usecs_irq*8)));
@@ -1270,28 +1278,28 @@ static int nes_netdev_get_coalesce(struct net_device *netdev,
 		struct ethtool_coalesce	*et_coalesce)
 {
 	struct nes_vnic	*nesvnic = netdev_priv(netdev);
-	struct nes_device *nesdev =	nesvnic->nesdev;
+	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	struct ethtool_coalesce	temp_et_coalesce;
 	struct nes_hw_tune_timer *shared_timer = &nesadapter->tune_timer;
 	unsigned long flags;
 
 	memset(&temp_et_coalesce, 0, sizeof(temp_et_coalesce));
-	temp_et_coalesce.rx_coalesce_usecs_irq = nesadapter->et_rx_coalesce_usecs_irq;
-	temp_et_coalesce.use_adaptive_rx_coalesce =	nesadapter->et_use_adaptive_rx_coalesce;
-	temp_et_coalesce.rate_sample_interval =	nesadapter->et_rate_sample_interval;
+	temp_et_coalesce.rx_coalesce_usecs_irq    = nesadapter->et_rx_coalesce_usecs_irq;
+	temp_et_coalesce.use_adaptive_rx_coalesce = nesadapter->et_use_adaptive_rx_coalesce;
+	temp_et_coalesce.rate_sample_interval     = nesadapter->et_rate_sample_interval;
 	temp_et_coalesce.pkt_rate_low =	nesadapter->et_pkt_rate_low;
 	spin_lock_irqsave(&nesadapter->periodic_timer_lock,	flags);
-	temp_et_coalesce.rx_max_coalesced_frames_low =	shared_timer->threshold_low;
-	temp_et_coalesce.rx_max_coalesced_frames_irq =	shared_timer->threshold_target;
+	temp_et_coalesce.rx_max_coalesced_frames_low  = shared_timer->threshold_low;
+	temp_et_coalesce.rx_max_coalesced_frames_irq  = shared_timer->threshold_target;
 	temp_et_coalesce.rx_max_coalesced_frames_high = shared_timer->threshold_high;
-	temp_et_coalesce.rx_coalesce_usecs_low = shared_timer->timer_in_use_min;
+	temp_et_coalesce.rx_coalesce_usecs_low  = shared_timer->timer_in_use_min;
 	temp_et_coalesce.rx_coalesce_usecs_high = shared_timer->timer_in_use_max;
 	if (nesadapter->et_use_adaptive_rx_coalesce) {
 		temp_et_coalesce.rx_coalesce_usecs_irq = shared_timer->timer_in_use;
 	}
 	spin_unlock_irqrestore(&nesadapter->periodic_timer_lock, flags);
-	memcpy(et_coalesce,	&temp_et_coalesce, sizeof(*et_coalesce));
+	memcpy(et_coalesce, &temp_et_coalesce, sizeof(*et_coalesce));
 	return 0;
 }
 
@@ -1370,30 +1378,38 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
 	u16 phy_data;
 
 	et_cmd->duplex = DUPLEX_FULL;
-	et_cmd->port = PORT_MII;
+	et_cmd->port   = PORT_MII;
+
 	if (nesadapter->OneG_Mode) {
-		et_cmd->supported = SUPPORTED_1000baseT_Full|SUPPORTED_Autoneg;
-		et_cmd->advertising = ADVERTISED_1000baseT_Full|ADVERTISED_Autoneg;
 		et_cmd->speed = SPEED_1000;
-		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
-				&phy_data);
-		if (phy_data&0x1000) {
-			et_cmd->autoneg = AUTONEG_ENABLE;
+		if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+			et_cmd->supported   = SUPPORTED_1000baseT_Full;
+			et_cmd->advertising = ADVERTISED_1000baseT_Full;
+			et_cmd->autoneg     = AUTONEG_DISABLE;
+			et_cmd->transceiver = XCVR_INTERNAL;
+			et_cmd->phy_address = nesdev->mac_index;
 		} else {
-			et_cmd->autoneg = AUTONEG_DISABLE;
+			et_cmd->supported   = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg;
+			et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg;
+			nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data);
+			if (phy_data & 0x1000)
+				et_cmd->autoneg = AUTONEG_ENABLE;
+			else
+				et_cmd->autoneg = AUTONEG_DISABLE;
+			et_cmd->transceiver = XCVR_EXTERNAL;
+			et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
 		}
-		et_cmd->transceiver = XCVR_EXTERNAL;
-		et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
 	} else {
-		if (nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+		if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) ||
+		    (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) {
 			et_cmd->transceiver = XCVR_EXTERNAL;
-			et_cmd->port = PORT_FIBRE;
-			et_cmd->supported = SUPPORTED_FIBRE;
+			et_cmd->port        = PORT_FIBRE;
+			et_cmd->supported   = SUPPORTED_FIBRE;
 			et_cmd->advertising = ADVERTISED_FIBRE;
 			et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
 		} else {
 			et_cmd->transceiver = XCVR_INTERNAL;
-			et_cmd->supported = SUPPORTED_10000baseT_Full;
+			et_cmd->supported   = SUPPORTED_10000baseT_Full;
 			et_cmd->advertising = ADVERTISED_10000baseT_Full;
 			et_cmd->phy_address = nesdev->mac_index;
 		}
@@ -1416,14 +1432,15 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	u16 phy_data;
 
-	if (nesadapter->OneG_Mode) {
+	if ((nesadapter->OneG_Mode) &&
+	    (nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G)) {
 		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
 				&phy_data);
 		if (et_cmd->autoneg) {
 			/* Turn on Full duplex, Autoneg, and restart autonegotiation */
 			phy_data |= 0x1300;
 		} else {
-			// Turn off autoneg
+			/* Turn off autoneg */
 			phy_data &= ~0x1000;
 		}
 		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index],
@@ -1454,6 +1471,8 @@ static struct ethtool_ops nes_ethtool_ops = {
 	.set_sg = ethtool_op_set_sg,
 	.get_tso = ethtool_op_get_tso,
 	.set_tso = ethtool_op_set_tso,
+	.get_flags = ethtool_op_get_flags,
+	.set_flags = ethtool_op_set_flags,
 };
 
 
@@ -1607,27 +1626,34 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 	list_add_tail(&nesvnic->list, &nesdev->nesadapter->nesvnic_list[nesdev->mac_index]);
 
 	if ((nesdev->netdev_count == 0) &&
-	    (PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index)) {
-		nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
-				NES_IDX_PHY_PCS_CONTROL_STATUS0+(0x200*(nesvnic->logical_port&1)));
+	    ((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
+	     ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) &&
+	      (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
+	       ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
+		/*
+		 * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
+		 *		NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
+		 */
 		u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
-				(0x200*(nesvnic->logical_port&1)));
-		u32temp |= 0x00200000;
-		nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
-				(0x200*(nesvnic->logical_port&1)), u32temp);
+				(0x200 * (nesdev->mac_index & 1)));
+		if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) {
+			u32temp |= 0x00200000;
+			nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
+				(0x200 * (nesdev->mac_index & 1)), u32temp);
+		}
+
 		u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
-				(0x200*(nesvnic->logical_port&1)) );
+				(0x200 * (nesdev->mac_index & 1)));
+
 		if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
-			if (nesdev->nesadapter->phy_type[nesvnic->logical_port] == NES_PHY_TYPE_IRIS) {
+			if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
 				nes_init_phy(nesdev);
-				nes_read_10G_phy_reg(nesdev, 1,
-						nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+				nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
 				temp_phy_data = (u16)nes_read_indexed(nesdev,
 									NES_IDX_MAC_MDIO_CONTROL);
 				u32temp = 20;
 				do {
-					nes_read_10G_phy_reg(nesdev, 1,
-							nesdev->nesadapter->phy_index[nesvnic->logical_port]);
+					nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
 					phy_data = (u16)nes_read_indexed(nesdev,
 									NES_IDX_MAC_MDIO_CONTROL);
 					if ((phy_data == temp_phy_data) || (!(--u32temp)))
@@ -1644,6 +1670,14 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 				nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
 				nesvnic->linkup = 1;
 			}
+		} else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+			nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
+				nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
+			if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
+			    ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000)))  {
+				nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
+				nesvnic->linkup = 1;
+			}
 		}
 		/* clear the MAC interrupt status, assumes direct logical to physical mapping */
 		u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));

+ 3 - 7
drivers/infiniband/hw/nes/nes_utils.c

@@ -444,15 +444,13 @@ void nes_read_1G_phy_reg(struct nes_device *nesdev, u8 phy_reg, u8 phy_addr, u16
 /**
  * nes_write_10G_phy_reg
  */
-void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
-		u8 phy_addr, u16 data)
+void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_addr, u8 dev_addr, u16 phy_reg,
+		u16 data)
 {
-	u32 dev_addr;
 	u32 port_addr;
 	u32 u32temp;
 	u32 counter;
 
-	dev_addr = 1;
 	port_addr = phy_addr;
 
 	/* set address */
@@ -492,14 +490,12 @@ void nes_write_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg,
  * This routine only issues the read, the data must be read
  * separately.
  */
-void nes_read_10G_phy_reg(struct nes_device *nesdev, u16 phy_reg, u8 phy_addr)
+void nes_read_10G_phy_reg(struct nes_device *nesdev, u8 phy_addr, u8 dev_addr, u16 phy_reg)
 {
-	u32 dev_addr;
 	u32 port_addr;
 	u32 u32temp;
 	u32 counter;
 
-	dev_addr = 1;
 	port_addr = phy_addr;
 
 	/* set address */

+ 1 - 1
drivers/infiniband/hw/nes/nes_verbs.c

@@ -1266,7 +1266,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 			sq_size = init_attr->cap.max_send_wr;
 			rq_size = init_attr->cap.max_recv_wr;
 
-			// check if the encoded sizes are OK or not...
+			/* check if the encoded sizes are OK or not... */
 			sq_encoded_size = nes_get_encoded_size(&sq_size);
 			rq_encoded_size = nes_get_encoded_size(&rq_size);
 

+ 5 - 2
drivers/infiniband/ulp/ipoib/ipoib.h

@@ -95,6 +95,8 @@ enum {
 	IPOIB_MCAST_FLAG_SENDONLY = 1,
 	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+	MAX_SEND_CQE		  = 16,
 };
 
 #define	IPOIB_OP_RECV   (1ul << 31)
@@ -285,7 +287,8 @@ struct ipoib_dev_priv {
 	u16		  pkey_index;
 	struct ib_pd	 *pd;
 	struct ib_mr	 *mr;
-	struct ib_cq	 *cq;
+	struct ib_cq	 *recv_cq;
+	struct ib_cq	 *send_cq;
 	struct ib_qp	 *qp;
 	u32		  qkey;
 
@@ -305,6 +308,7 @@ struct ipoib_dev_priv {
 	struct ib_sge	     tx_sge[MAX_SKB_FRAGS + 1];
 	struct ib_send_wr    tx_wr;
 	unsigned	     tx_outstanding;
+	struct ib_wc	     send_wc[MAX_SEND_CQE];
 
 	struct ib_recv_wr    rx_wr;
 	struct ib_sge	     rx_sge[IPOIB_UD_RX_SG];
@@ -662,7 +666,6 @@ static inline int ipoib_register_debugfs(void) { return 0; }
 static inline void ipoib_unregister_debugfs(void) { }
 #endif
 
-
 #define ipoib_printk(level, priv, format, arg...)	\
 	printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
 #define ipoib_warn(priv, format, arg...)		\

+ 4 - 4
drivers/infiniband/ulp/ipoib/ipoib_cm.c

@@ -249,8 +249,8 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_init_attr attr = {
 		.event_handler = ipoib_cm_rx_event_handler,
-		.send_cq = priv->cq, /* For drain WR */
-		.recv_cq = priv->cq,
+		.send_cq = priv->recv_cq, /* For drain WR */
+		.recv_cq = priv->recv_cq,
 		.srq = priv->cm.srq,
 		.cap.max_send_wr = 1, /* For drain WR */
 		.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
@@ -951,8 +951,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_init_attr attr = {
-		.send_cq		= priv->cq,
-		.recv_cq		= priv->cq,
+		.send_cq		= priv->recv_cq,
+		.recv_cq		= priv->recv_cq,
 		.srq			= priv->cm.srq,
 		.cap.max_send_wr	= ipoib_sendq_size,
 		.cap.max_send_sge	= 1,

+ 1 - 1
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c

@@ -71,7 +71,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
 	    coal->rx_max_coalesced_frames > 0xffff)
 		return -EINVAL;
 
-	ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+	ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
 			   coal->rx_coalesce_usecs);
 	if (ret && ret != -ENOSYS) {
 		ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);

+ 26 - 19
drivers/infiniband/ulp/ipoib/ipoib_ib.c

@@ -364,7 +364,6 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	unsigned int wr_id = wc->wr_id;
 	struct ipoib_tx_buf *tx_req;
-	unsigned long flags;
 
 	ipoib_dbg_data(priv, "send completion: id %d, status: %d\n",
 		       wr_id, wc->status);
@@ -384,13 +383,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
 	dev_kfree_skb_any(tx_req->skb);
 
-	spin_lock_irqsave(&priv->tx_lock, flags);
 	++priv->tx_tail;
 	if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
 	    netif_queue_stopped(dev) &&
 	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
 		netif_wake_queue(dev);
-	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	if (wc->status != IB_WC_SUCCESS &&
 	    wc->status != IB_WC_WR_FLUSH_ERR)
@@ -399,6 +396,17 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 			   wc->status, wr_id, wc->vendor_err);
 }
 
+static int poll_tx(struct ipoib_dev_priv *priv)
+{
+	int n, i;
+
+	n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
+	for (i = 0; i < n; ++i)
+		ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i);
+
+	return n == MAX_SEND_CQE;
+}
+
 int ipoib_poll(struct napi_struct *napi, int budget)
 {
 	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
@@ -414,7 +422,7 @@ poll_more:
 		int max = (budget - done);
 
 		t = min(IPOIB_NUM_WC, max);
-		n = ib_poll_cq(priv->cq, t, priv->ibwc);
+		n = ib_poll_cq(priv->recv_cq, t, priv->ibwc);
 
 		for (i = 0; i < n; i++) {
 			struct ib_wc *wc = priv->ibwc + i;
@@ -425,12 +433,8 @@ poll_more:
 					ipoib_cm_handle_rx_wc(dev, wc);
 				else
 					ipoib_ib_handle_rx_wc(dev, wc);
-			} else {
-				if (wc->wr_id & IPOIB_OP_CM)
-					ipoib_cm_handle_tx_wc(dev, wc);
-				else
-					ipoib_ib_handle_tx_wc(dev, wc);
-			}
+			} else
+				ipoib_cm_handle_tx_wc(priv->dev, wc);
 		}
 
 		if (n != t)
@@ -439,7 +443,7 @@ poll_more:
 
 	if (done < budget) {
 		netif_rx_complete(dev, napi);
-		if (unlikely(ib_req_notify_cq(priv->cq,
+		if (unlikely(ib_req_notify_cq(priv->recv_cq,
 					      IB_CQ_NEXT_COMP |
 					      IB_CQ_REPORT_MISSED_EVENTS)) &&
 		    netif_rx_reschedule(dev, napi))
@@ -562,12 +566,16 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 
 		address->last_send = priv->tx_head;
 		++priv->tx_head;
+		skb_orphan(skb);
 
 		if (++priv->tx_outstanding == ipoib_sendq_size) {
 			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
 			netif_stop_queue(dev);
 		}
 	}
+
+	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+		poll_tx(priv);
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
@@ -714,7 +722,7 @@ void ipoib_drain_cq(struct net_device *dev)
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int i, n;
 	do {
-		n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+		n = ib_poll_cq(priv->recv_cq, IPOIB_NUM_WC, priv->ibwc);
 		for (i = 0; i < n; ++i) {
 			/*
 			 * Convert any successful completions to flush
@@ -729,14 +737,13 @@ void ipoib_drain_cq(struct net_device *dev)
 					ipoib_cm_handle_rx_wc(dev, priv->ibwc + i);
 				else
 					ipoib_ib_handle_rx_wc(dev, priv->ibwc + i);
-			} else {
-				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
-					ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
-				else
-					ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
-			}
+			} else
+				ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
 		}
 	} while (n == IPOIB_NUM_WC);
+
+	while (poll_tx(priv))
+		; /* nothing */
 }
 
 int ipoib_ib_dev_stop(struct net_device *dev, int flush)
@@ -826,7 +833,7 @@ timeout:
 		msleep(1);
 	}
 
-	ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
+	ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
 
 	return 0;
 }

+ 2 - 1
drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -1298,7 +1298,8 @@ static int __init ipoib_init_module(void)
 
 	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
 	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
-	ipoib_sendq_size = max(ipoib_sendq_size, IPOIB_MIN_QUEUE_SIZE);
+	ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
+						     IPOIB_MIN_QUEUE_SIZE));
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
 	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
 #endif

+ 26 - 13
drivers/infiniband/ulp/ipoib/ipoib_verbs.c

@@ -171,26 +171,33 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		goto out_free_pd;
 	}
 
-	size = ipoib_sendq_size + ipoib_recvq_size + 1;
+	size = ipoib_recvq_size + 1;
 	ret = ipoib_cm_dev_init(dev);
 	if (!ret) {
+		size += ipoib_sendq_size;
 		if (ipoib_cm_has_srq(dev))
 			size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
 		else
 			size += ipoib_recvq_size * ipoib_max_conn_qp;
 	}
 
-	priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
-	if (IS_ERR(priv->cq)) {
-		printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
+	priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
+	if (IS_ERR(priv->recv_cq)) {
+		printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
 		goto out_free_mr;
 	}
 
-	if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
-		goto out_free_cq;
+	priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
+	if (IS_ERR(priv->send_cq)) {
+		printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+		goto out_free_recv_cq;
+	}
+
+	if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
+		goto out_free_send_cq;
 
-	init_attr.send_cq = priv->cq;
-	init_attr.recv_cq = priv->cq;
+	init_attr.send_cq = priv->send_cq;
+	init_attr.recv_cq = priv->recv_cq;
 
 	if (priv->hca_caps & IB_DEVICE_UD_TSO)
 		init_attr.create_flags = IB_QP_CREATE_IPOIB_UD_LSO;
@@ -201,7 +208,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 	priv->qp = ib_create_qp(priv->pd, &init_attr);
 	if (IS_ERR(priv->qp)) {
 		printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
-		goto out_free_cq;
+		goto out_free_send_cq;
 	}
 
 	priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -230,8 +237,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 
 	return 0;
 
-out_free_cq:
-	ib_destroy_cq(priv->cq);
+out_free_send_cq:
+	ib_destroy_cq(priv->send_cq);
+
+out_free_recv_cq:
+	ib_destroy_cq(priv->recv_cq);
 
 out_free_mr:
 	ib_dereg_mr(priv->mr);
@@ -254,8 +264,11 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
 		clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
 	}
 
-	if (ib_destroy_cq(priv->cq))
-		ipoib_warn(priv, "ib_cq_destroy failed\n");
+	if (ib_destroy_cq(priv->send_cq))
+		ipoib_warn(priv, "ib_cq_destroy (send) failed\n");
+
+	if (ib_destroy_cq(priv->recv_cq))
+		ipoib_warn(priv, "ib_cq_destroy (recv) failed\n");
 
 	ipoib_cm_dev_cleanup(dev);
 

+ 3 - 0
drivers/infiniband/ulp/ipoib/ipoib_vlan.c

@@ -90,6 +90,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 	}
 
 	priv->max_ib_mtu = ppriv->max_ib_mtu;
+	/* MTU will be reset when mcast join happens */
+	priv->dev->mtu   = IPOIB_UD_MTU(priv->max_ib_mtu);
+	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
 	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
 
 	priv->pkey = pkey;

+ 3 - 1
drivers/infiniband/ulp/iser/iscsi_iser.c

@@ -473,13 +473,15 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
 	stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
 	stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
 	stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
-	stats->custom_length = 3;
+	stats->custom_length = 4;
 	strcpy(stats->custom[0].desc, "qp_tx_queue_full");
 	stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */
 	strcpy(stats->custom[1].desc, "fmr_map_not_avail");
 	stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */;
 	strcpy(stats->custom[2].desc, "eh_abort_cnt");
 	stats->custom[2].value = conn->eh_abort_cnt;
+	strcpy(stats->custom[3].desc, "fmr_unalign_cnt");
+	stats->custom[3].value = conn->fmr_unalign_cnt;
 }
 
 static int

+ 7 - 0
drivers/infiniband/ulp/iser/iscsi_iser.h

@@ -70,6 +70,13 @@
 #define DRV_DATE	"May 7th, 2006"
 
 #define iser_dbg(fmt, arg...)				\
+	do {						\
+		if (iser_debug_level > 1)		\
+			printk(KERN_DEBUG PFX "%s:" fmt,\
+				__func__ , ## arg);	\
+	} while (0)
+
+#define iser_warn(fmt, arg...)				\
 	do {						\
 		if (iser_debug_level > 0)		\
 			printk(KERN_DEBUG PFX "%s:" fmt,\

+ 7 - 2
drivers/infiniband/ulp/iser/iser_memory.c

@@ -334,8 +334,11 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
 	struct scatterlist *sg;
 	int i;
 
+	if (iser_debug_level == 0)
+		return;
+
 	for_each_sg(sgl, sg, data->dma_nents, i)
-		iser_err("sg[%d] dma_addr:0x%lX page:0x%p "
+		iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
 			 "off:0x%x sz:0x%x dma_len:0x%x\n",
 			 i, (unsigned long)ib_sg_dma_address(ibdev, sg),
 			 sg_page(sg), sg->offset,
@@ -420,6 +423,7 @@ void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask)
 int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 		      enum   iser_data_dir        cmd_dir)
 {
+	struct iscsi_conn    *iscsi_conn = iser_ctask->iser_conn->iscsi_conn;
 	struct iser_conn     *ib_conn = iser_ctask->iser_conn->ib_conn;
 	struct iser_device   *device = ib_conn->device;
 	struct ib_device     *ibdev = device->ib_device;
@@ -434,7 +438,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask,
 
 	aligned_len = iser_data_buf_aligned_len(mem, ibdev);
 	if (aligned_len != mem->dma_nents) {
-		iser_err("rdma alignment violation %d/%d aligned\n",
+		iscsi_conn->fmr_unalign_cnt++;
+		iser_warn("rdma alignment violation %d/%d aligned\n",
 			 aligned_len, mem->size);
 		iser_data_buf_dump(mem, ibdev);
 

+ 1 - 1
drivers/net/cxgb3/version.h

@@ -38,7 +38,7 @@
 #define DRV_VERSION "1.0-ko"
 
 /* Firmware version */
-#define FW_VERSION_MAJOR 5
+#define FW_VERSION_MAJOR 6
 #define FW_VERSION_MINOR 0
 #define FW_VERSION_MICRO 0
 #endif				/* __CHELSIO_VERSION_H */

+ 3 - 1
drivers/net/mlx4/cq.c

@@ -188,7 +188,8 @@ int mlx4_cq_resize(struct mlx4_dev *dev, struct mlx4_cq *cq,
 EXPORT_SYMBOL_GPL(mlx4_cq_resize);
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
-		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq)
+		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
+		  int collapsed)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cq_table *cq_table = &priv->cq_table;
@@ -224,6 +225,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 	cq_context = mailbox->buf;
 	memset(cq_context, 0, sizeof *cq_context);
 
+	cq_context->flags	    = cpu_to_be32(!!collapsed << 18);
 	cq_context->logsize_usrpage = cpu_to_be32((ilog2(nent) << 24) | uar->index);
 	cq_context->comp_eqn        = priv->eq_table.eq[MLX4_EQ_COMP].eqn;
 	cq_context->log_page_size   = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;

+ 0 - 6
drivers/net/mlx4/mr.c

@@ -607,15 +607,9 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
 void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
 		    u32 *lkey, u32 *rkey)
 {
-	u32 key;
-
 	if (!fmr->maps)
 		return;
 
-	key = key_to_hw_index(fmr->mr.key);
-	key &= dev->caps.num_mpts - 1;
-	*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
-
 	fmr->maps = 0;
 
 	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;

+ 2 - 1
include/linux/mlx4/device.h

@@ -382,7 +382,8 @@ void mlx4_free_hwq_res(struct mlx4_dev *mdev, struct mlx4_hwq_resources *wqres,
 		       int size);
 
 int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
-		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq);
+		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
+		  int collapsed);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 
 int mlx4_qp_alloc(struct mlx4_dev *dev, int sqpn, struct mlx4_qp *qp);

+ 1 - 0
include/scsi/libiscsi.h

@@ -225,6 +225,7 @@ struct iscsi_conn {
 
 	/* custom statistics */
 	uint32_t		eh_abort_cnt;
+	uint32_t		fmr_unalign_cnt;
 };
 
 struct iscsi_pool {