Просмотр исходного кода

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband upate from Roland Dreier:
 "First batch of InfiniBand/RDMA changes for the 3.8 merge window:
   - A good chunk of Bart Van Assche's SRP fixes
   - UAPI disintegration from David Howells
   - mlx4 support for "64-byte CQE" hardware feature from Or Gerlitz
   - Other miscellaneous fixes"

Fix up trivial conflict in mellanox/mlx4 driver.

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (33 commits)
  RDMA/nes: Fix for crash when registering zero length MR for CQ
  RDMA/nes: Fix for terminate timer crash
  RDMA/nes: Fix for BUG_ON due to adding already-pending timer
  IB/srp: Allow SRP disconnect through sysfs
  srp_transport: Document sysfs attributes
  srp_transport: Simplify attribute initialization code
  srp_transport: Fix attribute registration
  IB/srp: Document sysfs attributes
  IB/srp: send disconnect request without waiting for CM timewait exit
  IB/srp: destroy and recreate QP and CQs when reconnecting
  IB/srp: Eliminate state SRP_TARGET_DEAD
  IB/srp: Introduce the helper function srp_remove_target()
  IB/srp: Suppress superfluous error messages
  IB/srp: Process all error completions
  IB/srp: Introduce srp_handle_qp_err()
  IB/srp: Simplify SCSI error handling
  IB/srp: Keep processing commands during host removal
  IB/srp: Eliminate state SRP_TARGET_CONNECTING
  IB/srp: Increase block layer timeout
  RDMA/cm: Change return value from find_gid_port()
  ...
Linus Torvalds 12 лет назад
Родитель
Сommit
f132c54e3a
42 измененных файлов с 689 добавлено и 322 удалено
  1. 156 0
      Documentation/ABI/stable/sysfs-driver-ib_srp
  2. 19 0
      Documentation/ABI/stable/sysfs-transport-srp
  3. 4 5
      drivers/infiniband/core/cma.c
  4. 1 0
      drivers/infiniband/hw/amso1100/c2_ae.c
  5. 2 4
      drivers/infiniband/hw/cxgb3/iwch_cm.c
  6. 2 4
      drivers/infiniband/hw/cxgb4/cm.c
  7. 0 10
      drivers/infiniband/hw/ipath/ipath_init_chip.c
  8. 2 2
      drivers/infiniband/hw/mlx4/cm.c
  9. 26 8
      drivers/infiniband/hw/mlx4/cq.c
  10. 22 5
      drivers/infiniband/hw/mlx4/main.c
  11. 1 0
      drivers/infiniband/hw/mlx4/mlx4_ib.h
  12. 11 1
      drivers/infiniband/hw/mlx4/user.h
  13. 1 0
      drivers/infiniband/hw/nes/nes.h
  14. 9 23
      drivers/infiniband/hw/nes/nes_cm.c
  15. 2 7
      drivers/infiniband/hw/nes/nes_hw.c
  16. 20 22
      drivers/infiniband/hw/nes/nes_mgt.c
  17. 7 6
      drivers/infiniband/hw/nes/nes_nic.c
  18. 8 1
      drivers/infiniband/hw/nes/nes_verbs.c
  19. 185 129
      drivers/infiniband/ulp/srp/ib_srp.c
  20. 6 5
      drivers/infiniband/ulp/srp/ib_srp.h
  21. 6 5
      drivers/net/ethernet/mellanox/mlx4/cmd.c
  22. 1 1
      drivers/net/ethernet/mellanox/mlx4/en_cq.c
  23. 1 0
      drivers/net/ethernet/mellanox/mlx4/en_netdev.c
  24. 3 2
      drivers/net/ethernet/mellanox/mlx4/en_rx.c
  25. 3 2
      drivers/net/ethernet/mellanox/mlx4/en_tx.c
  26. 23 13
      drivers/net/ethernet/mellanox/mlx4/eq.c
  27. 29 1
      drivers/net/ethernet/mellanox/mlx4/fw.c
  28. 1 0
      drivers/net/ethernet/mellanox/mlx4/fw.h
  29. 37 1
      drivers/net/ethernet/mellanox/mlx4/main.c
  30. 1 0
      drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
  31. 27 24
      drivers/scsi/scsi_transport_srp.c
  32. 21 0
      include/linux/mlx4/device.h
  33. 0 6
      include/rdma/Kbuild
  34. 1 35
      include/rdma/rdma_netlink.h
  35. 8 0
      include/scsi/scsi_transport_srp.h
  36. 6 0
      include/uapi/rdma/Kbuild
  37. 0 0
      include/uapi/rdma/ib_user_cm.h
  38. 0 0
      include/uapi/rdma/ib_user_mad.h
  39. 0 0
      include/uapi/rdma/ib_user_sa.h
  40. 0 0
      include/uapi/rdma/ib_user_verbs.h
  41. 37 0
      include/uapi/rdma/rdma_netlink.h
  42. 0 0
      include/uapi/rdma/rdma_user_cm.h

+ 156 - 0
Documentation/ABI/stable/sysfs-driver-ib_srp

@@ -0,0 +1,156 @@
+What:		/sys/class/infiniband_srp/srp-<hca>-<port_number>/add_target
+Date:		January 2, 2006
+KernelVersion:	2.6.15
+Contact:	linux-rdma@vger.kernel.org
+Description:	Interface for making ib_srp connect to a new target.
+		One can request ib_srp to connect to a new target by writing
+		a comma-separated list of login parameters to this sysfs
+		attribute. The supported parameters are:
+		* id_ext, a 16-digit hexadecimal number specifying the eight
+		  byte identifier extension in the 16-byte SRP target port
+		  identifier. The target port identifier is sent by ib_srp
+		  to the target in the SRP_LOGIN_REQ request.
+		* ioc_guid, a 16-digit hexadecimal number specifying the eight
+		  byte I/O controller GUID portion of the 16-byte target port
+		  identifier.
+		* dgid, a 32-digit hexadecimal number specifying the
+		  destination GID.
+		* pkey, a four-digit hexadecimal number specifying the
+		  InfiniBand partition key.
+		* service_id, a 16-digit hexadecimal number specifying the
+		  InfiniBand service ID used to establish communication with
+		  the SRP target. How to find out the value of the service ID
+		  is specified in the documentation of the SRP target.
+		* max_sect, a decimal number specifying the maximum number of
+		  512-byte sectors to be transferred via a single SCSI command.
+		* max_cmd_per_lun, a decimal number specifying the maximum
+		  number of outstanding commands for a single LUN.
+		* io_class, a hexadecimal number specifying the SRP I/O class.
+		  Must be either 0xff00 (rev 10) or 0x0100 (rev 16a). The I/O
+		  class defines the format of the SRP initiator and target
+		  port identifiers.
+		* initiator_ext, a 16-digit hexadecimal number specifying the
+		  identifier extension portion of the SRP initiator port
+		  identifier. This data is sent by the initiator to the target
+		  in the SRP_LOGIN_REQ request.
+		* cmd_sg_entries, a number in the range 1..255 that specifies
+		  the maximum number of data buffer descriptors stored in the
+		  SRP_CMD information unit itself. With allow_ext_sg=0 the
+		  parameter cmd_sg_entries defines the maximum S/G list length
+		  for a single SRP_CMD, and commands whose S/G list length
+		  exceeds this limit after S/G list collapsing will fail.
+		* allow_ext_sg, whether ib_srp is allowed to include a partial
+		  memory descriptor list in an SRP_CMD instead of the entire
+		  list. If a partial memory descriptor list has been included
+		  in an SRP_CMD the remaining memory descriptors are
+		  communicated from initiator to target via an additional RDMA
+		  transfer. Setting allow_ext_sg to 1 increases the maximum
+		  amount of data that can be transferred between initiator and
+		  target via a single SCSI command. Since not all SRP target
+		  implementations support partial memory descriptor lists the
+		  default value for this option is 0.
+		* sg_tablesize, a number in the range 1..2048 specifying the
+		  maximum S/G list length the SCSI layer is allowed to pass to
+		  ib_srp. Specifying a value that exceeds cmd_sg_entries is
+		  only safe with partial memory descriptor list support enabled
+		  (allow_ext_sg=1).
+
+What:		/sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
+Date:		January 2, 2006
+KernelVersion:	2.6.15
+Contact:	linux-rdma@vger.kernel.org
+Description:	HCA name (<hca>).
+
+What:		/sys/class/infiniband_srp/srp-<hca>-<port_number>/port
+Date:		January 2, 2006
+KernelVersion:	2.6.15
+Contact:	linux-rdma@vger.kernel.org
+Description:	HCA port number (<port_number>).
+
+What:		/sys/class/scsi_host/host<n>/allow_ext_sg
+Date:		May 19, 2011
+KernelVersion:	2.6.39
+Contact:	linux-rdma@vger.kernel.org
+Description:	Whether ib_srp is allowed to include a partial memory
+		descriptor list in an SRP_CMD when communicating with an SRP
+		target.
+
+What:		/sys/class/scsi_host/host<n>/cmd_sg_entries
+Date:		May 19, 2011
+KernelVersion:	2.6.39
+Contact:	linux-rdma@vger.kernel.org
+Description:	Maximum number of data buffer descriptors that may be sent to
+		the target in a single SRP_CMD request.
+
+What:		/sys/class/scsi_host/host<n>/dgid
+Date:		June 17, 2006
+KernelVersion:	2.6.17
+Contact:	linux-rdma@vger.kernel.org
+Description:	InfiniBand destination GID used for communication with the SRP
+		target. Differs from orig_dgid if port redirection has happened.
+
+What:		/sys/class/scsi_host/host<n>/id_ext
+Date:		June 17, 2006
+KernelVersion:	2.6.17
+Contact:	linux-rdma@vger.kernel.org
+Description:	Eight-byte identifier extension portion of the 16-byte target
+		port identifier.
+
+What:		/sys/class/scsi_host/host<n>/ioc_guid
+Date:		June 17, 2006
+KernelVersion:	2.6.17
+Contact:	linux-rdma@vger.kernel.org
+Description:	Eight-byte I/O controller GUID portion of the 16-byte target
+		port identifier.
+
+What:		/sys/class/scsi_host/host<n>/local_ib_device
+Date:		November 29, 2006
+KernelVersion:	2.6.19
+Contact:	linux-rdma@vger.kernel.org
+Description:	Name of the InfiniBand HCA used for communicating with the
+		SRP target.
+
+What:		/sys/class/scsi_host/host<n>/local_ib_port
+Date:		November 29, 2006
+KernelVersion:	2.6.19
+Contact:	linux-rdma@vger.kernel.org
+Description:	Number of the HCA port used for communicating with the
+		SRP target.
+
+What:		/sys/class/scsi_host/host<n>/orig_dgid
+Date:		June 17, 2006
+KernelVersion:	2.6.17
+Contact:	linux-rdma@vger.kernel.org
+Description:	InfiniBand destination GID specified in the parameters
+		written to the add_target sysfs attribute.
+
+What:		/sys/class/scsi_host/host<n>/pkey
+Date:		June 17, 2006
+KernelVersion:	2.6.17
+Contact:	linux-rdma@vger.kernel.org
+Description:	A 16-bit number representing the InfiniBand partition key used
+		for communication with the SRP target.
+
+What:		/sys/class/scsi_host/host<n>/req_lim
+Date:		October 20, 2010
+KernelVersion:	2.6.36
+Contact:	linux-rdma@vger.kernel.org
+Description:	Number of requests ib_srp can send to the target before it has
+		to wait for more credits. For more information see also the
+		SRP credit algorithm in the SRP specification.
+
+What:		/sys/class/scsi_host/host<n>/service_id
+Date:		June 17, 2006
+KernelVersion:	2.6.17
+Contact:	linux-rdma@vger.kernel.org
+Description:	InfiniBand service ID used for establishing communication with
+		the SRP	target.
+
+What:		/sys/class/scsi_host/host<n>/zero_req_lim
+Date:		September 20, 2006
+KernelVersion:	2.6.18
+Contact:	linux-rdma@vger.kernel.org
+Description:	Number of times the initiator had to wait before sending a
+		request to the target because it ran out of credits. For more
+		information see also the SRP credit algorithm in the SRP
+		specification.

+ 19 - 0
Documentation/ABI/stable/sysfs-transport-srp

@@ -0,0 +1,19 @@
+What:		/sys/class/srp_remote_ports/port-<h>:<n>/delete
+Date:		June 1, 2012
+KernelVersion:	3.7
+Contact:	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
+Description:	Instructs an SRP initiator to disconnect from a target and to
+		remove all LUNs imported from that target.
+
+What:		/sys/class/srp_remote_ports/port-<h>:<n>/port_id
+Date:		June 27, 2007
+KernelVersion:	2.6.24
+Contact:	linux-scsi@vger.kernel.org
+Description:	16-byte local SRP port identifier in hexadecimal format. An
+		example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00.
+
+What:		/sys/class/srp_remote_ports/port-<h>:<n>/roles
+Date:		June 27, 2007
+KernelVersion:	2.6.24
+Contact:	linux-scsi@vger.kernel.org
+Description:	Role of the remote port. Either "SRP Initiator" or "SRP Target".

+ 4 - 5
drivers/infiniband/core/cma.c

@@ -345,17 +345,17 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
 
 	err = ib_query_port(device, port_num, &props);
 	if (err)
-		return 1;
+		return err;
 
 	for (i = 0; i < props.gid_tbl_len; ++i) {
 		err = ib_query_gid(device, port_num, i, &tmp);
 		if (err)
-			return 1;
+			return err;
 		if (!memcmp(&tmp, gid, sizeof tmp))
 			return 0;
 	}
 
-	return -EAGAIN;
+	return -EADDRNOTAVAIL;
 }
 
 static int cma_acquire_dev(struct rdma_id_private *id_priv)
@@ -388,8 +388,7 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
 				if (!ret) {
 					id_priv->id.port_num = port;
 					goto out;
-				} else if (ret == 1)
-					break;
+				}
 			}
 		}
 	}

+ 1 - 0
drivers/infiniband/hw/amso1100/c2_ae.c

@@ -311,6 +311,7 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
 		if (cq->ibcq.event_handler)
 			cq->ibcq.event_handler(&ib_event,
 					       cq->ibcq.cq_context);
+		break;
 	}
 
 	default:

+ 2 - 4
drivers/infiniband/hw/cxgb3/iwch_cm.c

@@ -128,9 +128,8 @@ static void stop_ep_timer(struct iwch_ep *ep)
 {
 	PDBG("%s ep %p\n", __func__, ep);
 	if (!timer_pending(&ep->timer)) {
-		printk(KERN_ERR "%s timer stopped when its not running!  ep %p state %u\n",
+		WARN(1, "%s timer stopped when its not running!  ep %p state %u\n",
 			__func__, ep, ep->com.state);
-		WARN_ON(1);
 		return;
 	}
 	del_timer_sync(&ep->timer);
@@ -1756,9 +1755,8 @@ static void ep_timeout(unsigned long arg)
 		__state_set(&ep->com, ABORTING);
 		break;
 	default:
-		printk(KERN_ERR "%s unexpected state ep %p state %u\n",
+		WARN(1, "%s unexpected state ep %p state %u\n",
 			__func__, ep, ep->com.state);
-		WARN_ON(1);
 		abort = 0;
 	}
 	spin_unlock_irqrestore(&ep->com.lock, flags);

+ 2 - 4
drivers/infiniband/hw/cxgb4/cm.c

@@ -151,9 +151,8 @@ static void stop_ep_timer(struct c4iw_ep *ep)
 {
 	PDBG("%s ep %p\n", __func__, ep);
 	if (!timer_pending(&ep->timer)) {
-		printk(KERN_ERR "%s timer stopped when its not running! "
+		WARN(1, "%s timer stopped when its not running! "
 		       "ep %p state %u\n", __func__, ep, ep->com.state);
-		WARN_ON(1);
 		return;
 	}
 	del_timer_sync(&ep->timer);
@@ -2551,9 +2550,8 @@ static void process_timeout(struct c4iw_ep *ep)
 		__state_set(&ep->com, ABORTING);
 		break;
 	default:
-		printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n",
+		WARN(1, "%s unexpected state ep %p tid %u state %u\n",
 			__func__, ep, ep->hwtid, ep->com.state);
-		WARN_ON(1);
 		abort = 0;
 	}
 	mutex_unlock(&ep->com.mutex);

+ 0 - 10
drivers/infiniband/hw/ipath/ipath_init_chip.c

@@ -718,16 +718,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	if (ret)
 		goto done;
 
-	/*
-	 * we ignore most issues after reporting them, but have to specially
-	 * handle hardware-disabled chips.
-	 */
-	if (ret == 2) {
-		/* unique error, known to ipath_init_one */
-		ret = -EPERM;
-		goto done;
-	}
-
 	/*
 	 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
 	 * but then it no longer nicely fits power of two, and since

+ 2 - 2
drivers/infiniband/hw/mlx4/cm.c

@@ -268,15 +268,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
 	struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
 	unsigned long flags;
 
-	spin_lock_irqsave(&sriov->going_down_lock, flags);
 	spin_lock(&sriov->id_map_lock);
+	spin_lock_irqsave(&sriov->going_down_lock, flags);
 	/*make sure that there is no schedule inside the scheduled work.*/
 	if (!sriov->is_going_down) {
 		id->scheduled_delete = 1;
 		schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
 	}
-	spin_unlock(&sriov->id_map_lock);
 	spin_unlock_irqrestore(&sriov->going_down_lock, flags);
+	spin_unlock(&sriov->id_map_lock);
 }
 
 int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,

+ 26 - 8
drivers/infiniband/hw/mlx4/cq.c

@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
 
 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
 {
-	return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
+	return mlx4_buf_offset(&buf->buf, n * buf->entry_size);
 }
 
 static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,8 +77,9 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
 {
 	struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
+	struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
 
-	return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
+	return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
 		!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
 }
 
@@ -99,12 +100,13 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 {
 	int err;
 
-	err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
+	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
 			     PAGE_SIZE * 2, &buf->buf);
 
 	if (err)
 		goto out;
 
+	buf->entry_size = dev->dev->caps.cqe_size;
 	err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
 				    &buf->mtt);
 	if (err)
@@ -120,8 +122,7 @@ err_mtt:
 	mlx4_mtt_cleanup(dev->dev, &buf->mtt);
 
 err_buf:
-	mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
-			      &buf->buf);
+	mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf);
 
 out:
 	return err;
@@ -129,7 +130,7 @@ out:
 
 static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
 {
-	mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
+	mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf);
 }
 
 static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -137,8 +138,9 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
 			       u64 buf_addr, int cqe)
 {
 	int err;
+	int cqe_size = dev->dev->caps.cqe_size;
 
-	*umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
+	*umem = ib_umem_get(context, buf_addr, cqe * cqe_size,
 			    IB_ACCESS_LOCAL_WRITE, 1);
 	if (IS_ERR(*umem))
 		return PTR_ERR(*umem);
@@ -331,16 +333,23 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
 {
 	struct mlx4_cqe *cqe, *new_cqe;
 	int i;
+	int cqe_size = cq->buf.entry_size;
+	int cqe_inc = cqe_size == 64 ? 1 : 0;
 
 	i = cq->mcq.cons_index;
 	cqe = get_cqe(cq, i & cq->ibcq.cqe);
+	cqe += cqe_inc;
+
 	while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
 		new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
 					   (i + 1) & cq->resize_buf->cqe);
-		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
+		memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size);
+		new_cqe += cqe_inc;
+
 		new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
 			(((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
 		cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
+		cqe += cqe_inc;
 	}
 	++cq->mcq.cons_index;
 }
@@ -438,6 +447,7 @@ err_buf:
 
 out:
 	mutex_unlock(&cq->resize_mutex);
+
 	return err;
 }
 
@@ -586,6 +596,9 @@ repoll:
 	if (!cqe)
 		return -EAGAIN;
 
+	if (cq->buf.entry_size == 64)
+		cqe++;
+
 	++cq->mcq.cons_index;
 
 	/*
@@ -807,6 +820,7 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	int nfreed = 0;
 	struct mlx4_cqe *cqe, *dest;
 	u8 owner_bit;
+	int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
 
 	/*
 	 * First we need to find the current producer index, so we
@@ -825,12 +839,16 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
 	 */
 	while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
+		cqe += cqe_inc;
+
 		if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
 			if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
 				mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
 			++nfreed;
 		} else if (nfreed) {
 			dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
+			dest += cqe_inc;
+
 			owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
 			memcpy(dest, cqe, sizeof *cqe);
 			dest->owner_sr_opcode = owner_bit |

+ 22 - 5
drivers/infiniband/hw/mlx4/main.c

@@ -563,15 +563,24 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 {
 	struct mlx4_ib_dev *dev = to_mdev(ibdev);
 	struct mlx4_ib_ucontext *context;
+	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
 	struct mlx4_ib_alloc_ucontext_resp resp;
 	int err;
 
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
 
-	resp.qp_tab_size      = dev->dev->caps.num_qps;
-	resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
-	resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
+		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
+		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
+		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+	} else {
+		resp.dev_caps	      = dev->dev->caps.userspace_caps;
+		resp.qp_tab_size      = dev->dev->caps.num_qps;
+		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
+		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
+		resp.cqe_size	      = dev->dev->caps.cqe_size;
+	}
 
 	context = kmalloc(sizeof *context, GFP_KERNEL);
 	if (!context)
@@ -586,7 +595,11 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
-	err = ib_copy_to_udata(udata, &resp, sizeof resp);
+	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
+		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
+	else
+		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+
 	if (err) {
 		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
 		kfree(context);
@@ -1342,7 +1355,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
 	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
 
-	ibdev->ib_dev.uverbs_abi_ver	= MLX4_IB_UVERBS_ABI_VERSION;
+	if (dev->caps.userspace_caps)
+		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
+	else
+		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
+
 	ibdev->ib_dev.uverbs_cmd_mask	=
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|

+ 1 - 0
drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -90,6 +90,7 @@ struct mlx4_ib_xrcd {
 struct mlx4_ib_cq_buf {
 	struct mlx4_buf		buf;
 	struct mlx4_mtt		mtt;
+	int			entry_size;
 };
 
 struct mlx4_ib_cq_resize {

+ 11 - 1
drivers/infiniband/hw/mlx4/user.h

@@ -40,7 +40,9 @@
  * Increment this value if any changes that break userspace ABI
  * compatibility are made.
  */
-#define MLX4_IB_UVERBS_ABI_VERSION	3
+
+#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION	3
+#define MLX4_IB_UVERBS_ABI_VERSION		4
 
 /*
  * Make sure that all structs defined in this file remain laid out so
@@ -50,10 +52,18 @@
  * instead.
  */
 
+struct mlx4_ib_alloc_ucontext_resp_v3 {
+	__u32	qp_tab_size;
+	__u16	bf_reg_size;
+	__u16	bf_regs_per_page;
+};
+
 struct mlx4_ib_alloc_ucontext_resp {
+	__u32	dev_caps;
 	__u32	qp_tab_size;
 	__u16	bf_reg_size;
 	__u16	bf_regs_per_page;
+	__u32	cqe_size;
 };
 
 struct mlx4_ib_alloc_pd_resp {

+ 1 - 0
drivers/infiniband/hw/nes/nes.h

@@ -532,6 +532,7 @@ void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
 int nes_destroy_cqp(struct nes_device *);
 int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
 void nes_recheck_link_status(struct work_struct *work);
+void nes_terminate_timeout(unsigned long context);
 
 /* nes_nic.c */
 struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);

+ 9 - 23
drivers/infiniband/hw/nes/nes_cm.c

@@ -629,11 +629,9 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
 
 	case SEND_RDMA_READ_ZERO:
 	default:
-		if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO) {
-			printk(KERN_ERR "%s[%u]: Unsupported RDMA0 len operation=%u\n",
-				 __func__, __LINE__, cm_node->send_rdma0_op);
-			WARN_ON(1);
-		}
+		if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
+			WARN(1, "Unsupported RDMA0 len operation=%u\n",
+			     cm_node->send_rdma0_op);
 		nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
 		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
 			cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
@@ -671,7 +669,6 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
 	struct nes_cm_core *cm_core = cm_node->cm_core;
 	struct nes_timer_entry *new_send;
 	int ret = 0;
-	u32 was_timer_set;
 
 	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
 	if (!new_send)
@@ -723,12 +720,8 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		}
 	}
 
-	was_timer_set = timer_pending(&cm_core->tcp_timer);
-
-	if (!was_timer_set) {
-		cm_core->tcp_timer.expires = new_send->timetosend;
-		add_timer(&cm_core->tcp_timer);
-	}
+	if (!timer_pending(&cm_core->tcp_timer))
+		mod_timer(&cm_core->tcp_timer, new_send->timetosend);
 
 	return ret;
 }
@@ -946,10 +939,8 @@ static void nes_cm_timer_tick(unsigned long pass)
 	}
 
 	if (settimer) {
-		if (!timer_pending(&cm_core->tcp_timer)) {
-			cm_core->tcp_timer.expires = nexttimeout;
-			add_timer(&cm_core->tcp_timer);
-		}
+		if (!timer_pending(&cm_core->tcp_timer))
+			mod_timer(&cm_core->tcp_timer, nexttimeout);
 	}
 }
 
@@ -1314,8 +1305,6 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
 static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
 				      struct nes_cm_node *cm_node)
 {
-	u32 was_timer_set;
-
 	cm_node->accelerated = 1;
 
 	if (cm_node->accept_pend) {
@@ -1325,11 +1314,8 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
 		BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
 	}
 
-	was_timer_set = timer_pending(&cm_core->tcp_timer);
-	if (!was_timer_set) {
-		cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
-		add_timer(&cm_core->tcp_timer);
-	}
+	if (!timer_pending(&cm_core->tcp_timer))
+		mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME));
 
 	return 0;
 }

+ 2 - 7
drivers/infiniband/hw/nes/nes_hw.c

@@ -75,7 +75,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
 static void process_critical_error(struct nes_device *nesdev);
 static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
 static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
-static void nes_terminate_timeout(unsigned long context);
 static void nes_terminate_start_timer(struct nes_qp *nesqp);
 
 #ifdef CONFIG_INFINIBAND_NES_DEBUG
@@ -3520,7 +3519,7 @@ static void nes_terminate_received(struct nes_device *nesdev,
 }
 
 /* Timeout routine in case terminate fails to complete */
-static void nes_terminate_timeout(unsigned long context)
+void nes_terminate_timeout(unsigned long context)
 {
 	struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
 
@@ -3530,11 +3529,7 @@ static void nes_terminate_timeout(unsigned long context)
 /* Set a timer in case hw cannot complete the terminate sequence */
 static void nes_terminate_start_timer(struct nes_qp *nesqp)
 {
-	init_timer(&nesqp->terminate_timer);
-	nesqp->terminate_timer.function = nes_terminate_timeout;
-	nesqp->terminate_timer.expires = jiffies + HZ;
-	nesqp->terminate_timer.data = (unsigned long)nesqp;
-	add_timer(&nesqp->terminate_timer);
+	mod_timer(&nesqp->terminate_timer, (jiffies + HZ));
 }
 
 /**

+ 20 - 22
drivers/infiniband/hw/nes/nes_mgt.c

@@ -210,6 +210,9 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
 	}
 
 	while (1) {
+		if (skb_queue_empty(&nesqp->pau_list))
+			goto out;
+
 		seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
 		if (seq == nextseq) {
 			if (skb->len || processacks)
@@ -218,14 +221,13 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
 			goto out;
 		}
 
-		if (skb->next == (struct sk_buff *)&nesqp->pau_list)
-			goto out;
-
 		old_skb = skb;
 		skb = skb->next;
 		skb_unlink(old_skb, &nesqp->pau_list);
 		nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
 		nes_rem_ref_cm_node(nesqp->cm_node);
+		if (skb == (struct sk_buff *)&nesqp->pau_list)
+			goto out;
 	}
 	return skb;
 
@@ -245,7 +247,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
 	struct nes_rskb_cb *cb;
 	struct pau_fpdu_info *fpdu_info = NULL;
 	struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
-	unsigned long flags;
 	u32 fpdu_len = 0;
 	u32 tmp_len;
 	int frag_cnt = 0;
@@ -260,12 +261,10 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
 
 	*pau_fpdu_info = NULL;
 
-	spin_lock_irqsave(&nesqp->pau_lock, flags);
 	skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
-	if (!skb) {
-		spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+	if (!skb)
 		goto out;
-	}
+
 	cb = (struct nes_rskb_cb *)&skb->cb[0];
 	if (skb->len) {
 		fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
@@ -290,10 +289,9 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
 
 			skb = nes_get_next_skb(nesdev, nesqp, skb,
 					       nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
-			if (!skb) {
-				spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+			if (!skb)
 				goto out;
-			} else if (rst_rcvd) {
+			if (rst_rcvd) {
 				/* rst received in the middle of fpdu */
 				for (; i >= 0; i--) {
 					skb_unlink(frags[i].skb, &nesqp->pau_list);
@@ -320,8 +318,6 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
 		frag_cnt = 1;
 	}
 
-	spin_unlock_irqrestore(&nesqp->pau_lock, flags);
-
 	/* Found one */
 	fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
 	if (fpdu_info == NULL) {
@@ -383,9 +379,8 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
 
 		if (frags[i].skb->len == 0) {
 			/* Pull skb off the list - it will be freed in the callback */
-			spin_lock_irqsave(&nesqp->pau_lock, flags);
-			skb_unlink(frags[i].skb, &nesqp->pau_list);
-			spin_unlock_irqrestore(&nesqp->pau_lock, flags);
+			if (!skb_queue_empty(&nesqp->pau_list))
+				skb_unlink(frags[i].skb, &nesqp->pau_list);
 		} else {
 			/* Last skb still has data so update the seq */
 			iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
@@ -414,14 +409,18 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
 	struct pau_fpdu_info *fpdu_info;
 	struct nes_hw_cqp_wqe *cqp_wqe;
 	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
 	u64 u64tmp;
 	u32 u32tmp;
 	int rc;
 
 	while (1) {
+		spin_lock_irqsave(&nesqp->pau_lock, flags);
 		rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
-		if (fpdu_info == NULL)
+		if (rc || (fpdu_info == NULL)) {
+			spin_unlock_irqrestore(&nesqp->pau_lock, flags);
 			return rc;
+		}
 
 		cqp_request = fpdu_info->cqp_request;
 		cqp_wqe = &cqp_request->cqp_wqe;
@@ -447,7 +446,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
 		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
 				    lower_32_bits(u64tmp));
 		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
-				    upper_32_bits(u64tmp >> 32));
+				    upper_32_bits(u64tmp));
 
 		set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
 				    lower_32_bits(fpdu_info->frags[0].physaddr));
@@ -475,6 +474,7 @@ static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
 
 		atomic_set(&cqp_request->refcount, 1);
 		nes_post_cqp_request(nesdev, cqp_request);
+		spin_unlock_irqrestore(&nesqp->pau_lock, flags);
 	}
 
 	return 0;
@@ -649,11 +649,9 @@ static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request
 	nesqp = qh_chg->nesqp;
 
 	/* Should we handle the bad completion */
-	if (cqp_request->major_code) {
-		printk(KERN_ERR PFX "Invalid cqp_request major_code=0x%x\n",
+	if (cqp_request->major_code)
+		WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
 		       cqp_request->major_code);
-		WARN_ON(1);
-	}
 
 	switch (nesqp->pau_state) {
 	case PAU_DEL_QH:

+ 7 - 6
drivers/infiniband/hw/nes/nes_nic.c

@@ -944,12 +944,13 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 					  addr,
 					  perfect_filter_register_address+(mc_index * 8),
 					  mc_nic_index);
-				macaddr_high  = ((u16) addr[0]) << 8;
-				macaddr_high += (u16) addr[1];
-				macaddr_low   = ((u32) addr[2]) << 24;
-				macaddr_low  += ((u32) addr[3]) << 16;
-				macaddr_low  += ((u32) addr[4]) << 8;
-				macaddr_low  += (u32) addr[5];
+				macaddr_high  = ((u8) addr[0]) << 8;
+				macaddr_high += (u8) addr[1];
+				macaddr_low   = ((u8) addr[2]) << 24;
+				macaddr_low  += ((u8) addr[3]) << 16;
+				macaddr_low  += ((u8) addr[4]) << 8;
+				macaddr_low  += (u8) addr[5];
+
 				nes_write_indexed(nesdev,
 						perfect_filter_register_address+(mc_index * 8),
 						macaddr_low);

+ 8 - 1
drivers/infiniband/hw/nes/nes_verbs.c

@@ -1404,6 +1404,9 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 	}
 
 	nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
+	init_timer(&nesqp->terminate_timer);
+	nesqp->terminate_timer.function = nes_terminate_timeout;
+	nesqp->terminate_timer.data = (unsigned long)nesqp;
 
 	/* update the QP table */
 	nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@@ -1413,7 +1416,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
 	return &nesqp->ibqp;
 }
 
-
 /**
  * nes_clean_cq
  */
@@ -2559,6 +2561,11 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 			return ibmr;
 		case IWNES_MEMREG_TYPE_QP:
 		case IWNES_MEMREG_TYPE_CQ:
+			if (!region->length) {
+				nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
+				ib_umem_release(region);
+				return ERR_PTR(-EINVAL);
+			}
 			nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
 			if (!nespbl) {
 				nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");

+ 185 - 129
drivers/infiniband/ulp/srp/ib_srp.c

@@ -222,27 +222,29 @@ static int srp_new_cm_id(struct srp_target_port *target)
 static int srp_create_target_ib(struct srp_target_port *target)
 {
 	struct ib_qp_init_attr *init_attr;
+	struct ib_cq *recv_cq, *send_cq;
+	struct ib_qp *qp;
 	int ret;
 
 	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
 	if (!init_attr)
 		return -ENOMEM;
 
-	target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-				       srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
-	if (IS_ERR(target->recv_cq)) {
-		ret = PTR_ERR(target->recv_cq);
+	recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
+			       srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
+	if (IS_ERR(recv_cq)) {
+		ret = PTR_ERR(recv_cq);
 		goto err;
 	}
 
-	target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
-				       srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
-	if (IS_ERR(target->send_cq)) {
-		ret = PTR_ERR(target->send_cq);
+	send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
+			       srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
+	if (IS_ERR(send_cq)) {
+		ret = PTR_ERR(send_cq);
 		goto err_recv_cq;
 	}
 
-	ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP);
+	ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
 
 	init_attr->event_handler       = srp_qp_event;
 	init_attr->cap.max_send_wr     = SRP_SQ_SIZE;
@@ -251,30 +253,41 @@ static int srp_create_target_ib(struct srp_target_port *target)
 	init_attr->cap.max_send_sge    = 1;
 	init_attr->sq_sig_type         = IB_SIGNAL_ALL_WR;
 	init_attr->qp_type             = IB_QPT_RC;
-	init_attr->send_cq             = target->send_cq;
-	init_attr->recv_cq             = target->recv_cq;
+	init_attr->send_cq             = send_cq;
+	init_attr->recv_cq             = recv_cq;
 
-	target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
-	if (IS_ERR(target->qp)) {
-		ret = PTR_ERR(target->qp);
+	qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
+	if (IS_ERR(qp)) {
+		ret = PTR_ERR(qp);
 		goto err_send_cq;
 	}
 
-	ret = srp_init_qp(target, target->qp);
+	ret = srp_init_qp(target, qp);
 	if (ret)
 		goto err_qp;
 
+	if (target->qp)
+		ib_destroy_qp(target->qp);
+	if (target->recv_cq)
+		ib_destroy_cq(target->recv_cq);
+	if (target->send_cq)
+		ib_destroy_cq(target->send_cq);
+
+	target->qp = qp;
+	target->recv_cq = recv_cq;
+	target->send_cq = send_cq;
+
 	kfree(init_attr);
 	return 0;
 
 err_qp:
-	ib_destroy_qp(target->qp);
+	ib_destroy_qp(qp);
 
 err_send_cq:
-	ib_destroy_cq(target->send_cq);
+	ib_destroy_cq(send_cq);
 
 err_recv_cq:
-	ib_destroy_cq(target->recv_cq);
+	ib_destroy_cq(recv_cq);
 
 err:
 	kfree(init_attr);
@@ -289,6 +302,9 @@ static void srp_free_target_ib(struct srp_target_port *target)
 	ib_destroy_cq(target->send_cq);
 	ib_destroy_cq(target->recv_cq);
 
+	target->qp = NULL;
+	target->send_cq = target->recv_cq = NULL;
+
 	for (i = 0; i < SRP_RQ_SIZE; ++i)
 		srp_free_iu(target->srp_host, target->rx_ring[i]);
 	for (i = 0; i < SRP_SQ_SIZE; ++i)
@@ -428,34 +444,50 @@ static int srp_send_req(struct srp_target_port *target)
 	return status;
 }
 
-static void srp_disconnect_target(struct srp_target_port *target)
+static bool srp_queue_remove_work(struct srp_target_port *target)
 {
-	/* XXX should send SRP_I_LOGOUT request */
+	bool changed = false;
 
-	init_completion(&target->done);
-	if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
-		shost_printk(KERN_DEBUG, target->scsi_host,
-			     PFX "Sending CM DREQ failed\n");
-		return;
+	spin_lock_irq(&target->lock);
+	if (target->state != SRP_TARGET_REMOVED) {
+		target->state = SRP_TARGET_REMOVED;
+		changed = true;
 	}
-	wait_for_completion(&target->done);
+	spin_unlock_irq(&target->lock);
+
+	if (changed)
+		queue_work(system_long_wq, &target->remove_work);
+
+	return changed;
 }
 
-static bool srp_change_state(struct srp_target_port *target,
-			    enum srp_target_state old,
-			    enum srp_target_state new)
+static bool srp_change_conn_state(struct srp_target_port *target,
+				  bool connected)
 {
 	bool changed = false;
 
 	spin_lock_irq(&target->lock);
-	if (target->state == old) {
-		target->state = new;
+	if (target->connected != connected) {
+		target->connected = connected;
 		changed = true;
 	}
 	spin_unlock_irq(&target->lock);
+
 	return changed;
 }
 
+static void srp_disconnect_target(struct srp_target_port *target)
+{
+	if (srp_change_conn_state(target, false)) {
+		/* XXX should send SRP_I_LOGOUT request */
+
+		if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
+			shost_printk(KERN_DEBUG, target->scsi_host,
+				     PFX "Sending CM DREQ failed\n");
+		}
+	}
+}
+
 static void srp_free_req_data(struct srp_target_port *target)
 {
 	struct ib_device *ibdev = target->srp_host->srp_dev->dev;
@@ -489,32 +521,50 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
 		device_remove_file(&shost->shost_dev, *attr);
 }
 
-static void srp_remove_work(struct work_struct *work)
+static void srp_remove_target(struct srp_target_port *target)
 {
-	struct srp_target_port *target =
-		container_of(work, struct srp_target_port, work);
-
-	if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED))
-		return;
-
-	spin_lock(&target->srp_host->target_lock);
-	list_del(&target->list);
-	spin_unlock(&target->srp_host->target_lock);
+	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
 
 	srp_del_scsi_host_attr(target->scsi_host);
 	srp_remove_host(target->scsi_host);
 	scsi_remove_host(target->scsi_host);
+	srp_disconnect_target(target);
 	ib_destroy_cm_id(target->cm_id);
 	srp_free_target_ib(target);
 	srp_free_req_data(target);
 	scsi_host_put(target->scsi_host);
 }
 
+static void srp_remove_work(struct work_struct *work)
+{
+	struct srp_target_port *target =
+		container_of(work, struct srp_target_port, remove_work);
+
+	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
+
+	spin_lock(&target->srp_host->target_lock);
+	list_del(&target->list);
+	spin_unlock(&target->srp_host->target_lock);
+
+	srp_remove_target(target);
+}
+
+static void srp_rport_delete(struct srp_rport *rport)
+{
+	struct srp_target_port *target = rport->lld_data;
+
+	srp_queue_remove_work(target);
+}
+
 static int srp_connect_target(struct srp_target_port *target)
 {
 	int retries = 3;
 	int ret;
 
+	WARN_ON_ONCE(target->connected);
+
+	target->qp_in_error = false;
+
 	ret = srp_lookup_path(target);
 	if (ret)
 		return ret;
@@ -534,6 +584,7 @@ static int srp_connect_target(struct srp_target_port *target)
 		 */
 		switch (target->status) {
 		case 0:
+			srp_change_conn_state(target, true);
 			return 0;
 
 		case SRP_PORT_REDIRECT:
@@ -646,13 +697,14 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re
 
 static int srp_reconnect_target(struct srp_target_port *target)
 {
-	struct ib_qp_attr qp_attr;
-	struct ib_wc wc;
+	struct Scsi_Host *shost = target->scsi_host;
 	int i, ret;
 
-	if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING))
+	if (target->state != SRP_TARGET_LIVE)
 		return -EAGAIN;
 
+	scsi_target_block(&shost->shost_gendev);
+
 	srp_disconnect_target(target);
 	/*
 	 * Now get a new local CM ID so that we avoid confusing the
@@ -660,21 +712,11 @@ static int srp_reconnect_target(struct srp_target_port *target)
 	 */
 	ret = srp_new_cm_id(target);
 	if (ret)
-		goto err;
+		goto unblock;
 
-	qp_attr.qp_state = IB_QPS_RESET;
-	ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
-	if (ret)
-		goto err;
-
-	ret = srp_init_qp(target, target->qp);
+	ret = srp_create_target_ib(target);
 	if (ret)
-		goto err;
-
-	while (ib_poll_cq(target->recv_cq, 1, &wc) > 0)
-		; /* nothing */
-	while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
-		; /* nothing */
+		goto unblock;
 
 	for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
 		struct srp_request *req = &target->req_ring[i];
@@ -686,13 +728,16 @@ static int srp_reconnect_target(struct srp_target_port *target)
 	for (i = 0; i < SRP_SQ_SIZE; ++i)
 		list_add(&target->tx_ring[i]->list, &target->free_tx);
 
-	target->qp_in_error = 0;
 	ret = srp_connect_target(target);
+
+unblock:
+	scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
+			    SDEV_TRANSPORT_OFFLINE);
+
 	if (ret)
 		goto err;
 
-	if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE))
-		ret = -EAGAIN;
+	shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n");
 
 	return ret;
 
@@ -705,17 +750,8 @@ err:
 	 * However, we have to defer the real removal because we
 	 * are in the context of the SCSI error handler now, which
 	 * will deadlock if we call scsi_remove_host().
-	 *
-	 * Schedule our work inside the lock to avoid a race with
-	 * the flush_scheduled_work() in srp_remove_one().
 	 */
-	spin_lock_irq(&target->lock);
-	if (target->state == SRP_TARGET_CONNECTING) {
-		target->state = SRP_TARGET_DEAD;
-		INIT_WORK(&target->work, srp_remove_work);
-		queue_work(ib_wq, &target->work);
-	}
-	spin_unlock_irq(&target->lock);
+	srp_queue_remove_work(target);
 
 	return ret;
 }
@@ -1262,6 +1298,19 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
 			     PFX "Recv failed with error code %d\n", res);
 }
 
+static void srp_handle_qp_err(enum ib_wc_status wc_status,
+			      enum ib_wc_opcode wc_opcode,
+			      struct srp_target_port *target)
+{
+	if (target->connected && !target->qp_in_error) {
+		shost_printk(KERN_ERR, target->scsi_host,
+			     PFX "failed %s status %d\n",
+			     wc_opcode & IB_WC_RECV ? "receive" : "send",
+			     wc_status);
+	}
+	target->qp_in_error = true;
+}
+
 static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
 {
 	struct srp_target_port *target = target_ptr;
@@ -1269,15 +1318,11 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
 
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	while (ib_poll_cq(cq, 1, &wc) > 0) {
-		if (wc.status) {
-			shost_printk(KERN_ERR, target->scsi_host,
-				     PFX "failed receive status %d\n",
-				     wc.status);
-			target->qp_in_error = 1;
-			break;
+		if (likely(wc.status == IB_WC_SUCCESS)) {
+			srp_handle_recv(target, &wc);
+		} else {
+			srp_handle_qp_err(wc.status, wc.opcode, target);
 		}
-
-		srp_handle_recv(target, &wc);
 	}
 }
 
@@ -1288,16 +1333,12 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
 	struct srp_iu *iu;
 
 	while (ib_poll_cq(cq, 1, &wc) > 0) {
-		if (wc.status) {
-			shost_printk(KERN_ERR, target->scsi_host,
-				     PFX "failed send status %d\n",
-				     wc.status);
-			target->qp_in_error = 1;
-			break;
+		if (likely(wc.status == IB_WC_SUCCESS)) {
+			iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
+			list_add(&iu->list, &target->free_tx);
+		} else {
+			srp_handle_qp_err(wc.status, wc.opcode, target);
 		}
-
-		iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
-		list_add(&iu->list, &target->free_tx);
 	}
 }
 
@@ -1311,16 +1352,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
 	unsigned long flags;
 	int len;
 
-	if (target->state == SRP_TARGET_CONNECTING)
-		goto err;
-
-	if (target->state == SRP_TARGET_DEAD ||
-	    target->state == SRP_TARGET_REMOVED) {
-		scmnd->result = DID_BAD_TARGET << 16;
-		scmnd->scsi_done(scmnd);
-		return 0;
-	}
-
 	spin_lock_irqsave(&target->lock, flags);
 	iu = __srp_get_tx_iu(target, SRP_IU_CMD);
 	if (!iu)
@@ -1377,7 +1408,6 @@ err_iu:
 err_unlock:
 	spin_unlock_irqrestore(&target->lock, flags);
 
-err:
 	return SCSI_MLQUEUE_HOST_BUSY;
 }
 
@@ -1419,6 +1449,33 @@ err:
 	return -ENOMEM;
 }
 
+static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
+{
+	uint64_t T_tr_ns, max_compl_time_ms;
+	uint32_t rq_tmo_jiffies;
+
+	/*
+	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
+	 * table 91), both the QP timeout and the retry count have to be set
+	 * for RC QP's during the RTR to RTS transition.
+	 */
+	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
+		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
+
+	/*
+	 * Set target->rq_tmo_jiffies to one second more than the largest time
+	 * it can take before an error completion is generated. See also
+	 * C9-140..142 in the IBTA spec for more information about how to
+	 * convert the QP Local ACK Timeout value to nanoseconds.
+	 */
+	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
+	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
+	do_div(max_compl_time_ms, NSEC_PER_MSEC);
+	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
+
+	return rq_tmo_jiffies;
+}
+
 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
 			       struct srp_login_rsp *lrsp,
 			       struct srp_target_port *target)
@@ -1478,6 +1535,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
 	if (ret)
 		goto error_free;
 
+	target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
+
 	ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
 	if (ret)
 		goto error_free;
@@ -1599,6 +1658,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 	case IB_CM_DREQ_RECEIVED:
 		shost_printk(KERN_WARNING, target->scsi_host,
 			     PFX "DREQ received - connection closed\n");
+		srp_change_conn_state(target, false);
 		if (ib_send_cm_drep(cm_id, NULL, 0))
 			shost_printk(KERN_ERR, target->scsi_host,
 				     PFX "Sending CM DREP failed\n");
@@ -1608,7 +1668,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 		shost_printk(KERN_ERR, target->scsi_host,
 			     PFX "connection closed\n");
 
-		comp = 1;
 		target->status = 0;
 		break;
 
@@ -1636,10 +1695,6 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
 	struct srp_iu *iu;
 	struct srp_tsk_mgmt *tsk_mgmt;
 
-	if (target->state == SRP_TARGET_DEAD ||
-	    target->state == SRP_TARGET_REMOVED)
-		return -1;
-
 	init_completion(&target->tsk_mgmt_done);
 
 	spin_lock_irq(&target->lock);
@@ -1729,6 +1784,21 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
 	return ret;
 }
 
+static int srp_slave_configure(struct scsi_device *sdev)
+{
+	struct Scsi_Host *shost = sdev->host;
+	struct srp_target_port *target = host_to_target(shost);
+	struct request_queue *q = sdev->request_queue;
+	unsigned long timeout;
+
+	if (sdev->type == TYPE_DISK) {
+		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
+		blk_queue_rq_timeout(q, timeout);
+	}
+
+	return 0;
+}
+
 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
 			   char *buf)
 {
@@ -1861,6 +1931,7 @@ static struct scsi_host_template srp_template = {
 	.module				= THIS_MODULE,
 	.name				= "InfiniBand SRP initiator",
 	.proc_name			= DRV_NAME,
+	.slave_configure		= srp_slave_configure,
 	.info				= srp_target_info,
 	.queuecommand			= srp_queuecommand,
 	.eh_abort_handler		= srp_abort,
@@ -1894,11 +1965,14 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
 		return PTR_ERR(rport);
 	}
 
+	rport->lld_data = target;
+
 	spin_lock(&host->target_lock);
 	list_add_tail(&target->list, &host->target_list);
 	spin_unlock(&host->target_lock);
 
 	target->state = SRP_TARGET_LIVE;
+	target->connected = false;
 
 	scsi_scan_target(&target->scsi_host->shost_gendev,
 			 0, target->scsi_id, SCAN_WILD_CARD, 0);
@@ -2188,6 +2262,7 @@ static ssize_t srp_create_target(struct device *dev,
 			     sizeof (struct srp_indirect_buf) +
 			     target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
 
+	INIT_WORK(&target->remove_work, srp_remove_work);
 	spin_lock_init(&target->lock);
 	INIT_LIST_HEAD(&target->free_tx);
 	INIT_LIST_HEAD(&target->free_reqs);
@@ -2232,7 +2307,6 @@ static ssize_t srp_create_target(struct device *dev,
 	if (ret)
 		goto err_free_ib;
 
-	target->qp_in_error = 0;
 	ret = srp_connect_target(target);
 	if (ret) {
 		shost_printk(KERN_ERR, target->scsi_host,
@@ -2422,8 +2496,7 @@ static void srp_remove_one(struct ib_device *device)
 {
 	struct srp_device *srp_dev;
 	struct srp_host *host, *tmp_host;
-	LIST_HEAD(target_list);
-	struct srp_target_port *target, *tmp_target;
+	struct srp_target_port *target;
 
 	srp_dev = ib_get_client_data(device, &srp_client);
 
@@ -2436,35 +2509,17 @@ static void srp_remove_one(struct ib_device *device)
 		wait_for_completion(&host->released);
 
 		/*
-		 * Mark all target ports as removed, so we stop queueing
-		 * commands and don't try to reconnect.
+		 * Remove all target ports.
 		 */
 		spin_lock(&host->target_lock);
-		list_for_each_entry(target, &host->target_list, list) {
-			spin_lock_irq(&target->lock);
-			target->state = SRP_TARGET_REMOVED;
-			spin_unlock_irq(&target->lock);
-		}
+		list_for_each_entry(target, &host->target_list, list)
+			srp_queue_remove_work(target);
 		spin_unlock(&host->target_lock);
 
 		/*
-		 * Wait for any reconnection tasks that may have
-		 * started before we marked our target ports as
-		 * removed, and any target port removal tasks.
+		 * Wait for target port removal tasks.
 		 */
-		flush_workqueue(ib_wq);
-
-		list_for_each_entry_safe(target, tmp_target,
-					 &host->target_list, list) {
-			srp_del_scsi_host_attr(target->scsi_host);
-			srp_remove_host(target->scsi_host);
-			scsi_remove_host(target->scsi_host);
-			srp_disconnect_target(target);
-			ib_destroy_cm_id(target->cm_id);
-			srp_free_target_ib(target);
-			srp_free_req_data(target);
-			scsi_host_put(target->scsi_host);
-		}
+		flush_workqueue(system_long_wq);
 
 		kfree(host);
 	}
@@ -2478,6 +2533,7 @@ static void srp_remove_one(struct ib_device *device)
 }
 
 static struct srp_function_template ib_srp_transport_functions = {
+	.rport_delete		 = srp_rport_delete,
 };
 
 static int __init srp_init_module(void)

+ 6 - 5
drivers/infiniband/ulp/srp/ib_srp.h

@@ -80,9 +80,7 @@ enum {
 
 enum srp_target_state {
 	SRP_TARGET_LIVE,
-	SRP_TARGET_CONNECTING,
-	SRP_TARGET_DEAD,
-	SRP_TARGET_REMOVED
+	SRP_TARGET_REMOVED,
 };
 
 enum srp_iu_type {
@@ -163,6 +161,9 @@ struct srp_target_port {
 	struct ib_sa_query     *path_query;
 	int			path_query_id;
 
+	u32			rq_tmo_jiffies;
+	bool			connected;
+
 	struct ib_cm_id	       *cm_id;
 
 	int			max_ti_iu_len;
@@ -173,12 +174,12 @@ struct srp_target_port {
 	struct srp_iu	       *rx_ring[SRP_RQ_SIZE];
 	struct srp_request	req_ring[SRP_CMD_SQ_SIZE];
 
-	struct work_struct	work;
+	struct work_struct	remove_work;
 
 	struct list_head	list;
 	struct completion	done;
 	int			status;
-	int			qp_in_error;
+	bool			qp_in_error;
 
 	struct completion	tsk_mgmt_done;
 	u8			tsk_mgmt_status;

+ 6 - 5
drivers/net/ethernet/mellanox/mlx4/cmd.c

@@ -1498,6 +1498,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
 	u32 reply;
 	u8 is_going_down = 0;
 	int i;
+	unsigned long flags;
 
 	slave_state[slave].comm_toggle ^= 1;
 	reply = (u32) slave_state[slave].comm_toggle << 31;
@@ -1576,12 +1577,12 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
 		mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
 		goto reset_slave;
 	}
-	spin_lock(&priv->mfunc.master.slave_state_lock);
+	spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
 	if (!slave_state[slave].is_slave_going_down)
 		slave_state[slave].last_cmd = cmd;
 	else
 		is_going_down = 1;
-	spin_unlock(&priv->mfunc.master.slave_state_lock);
+	spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
 	if (is_going_down) {
 		mlx4_warn(dev, "Slave is going down aborting command(%d)"
 			  " executing from slave:%d\n",
@@ -1597,10 +1598,10 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
 reset_slave:
 	/* cleanup any slave resources */
 	mlx4_delete_all_resources_for_slave(dev, slave);
-	spin_lock(&priv->mfunc.master.slave_state_lock);
+	spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
 	if (!slave_state[slave].is_slave_going_down)
 		slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
-	spin_unlock(&priv->mfunc.master.slave_state_lock);
+	spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
 	/*with slave in the middle of flr, no need to clean resources again.*/
 inform_slave_state:
 	memset(&slave_state[slave].event_eq, 0,
@@ -1755,7 +1756,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 			spin_lock_init(&s_state->lock);
 		}
 
-		memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+		memset(&priv->mfunc.master.cmd_eqe, 0, dev->caps.eqe_size);
 		priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
 		INIT_WORK(&priv->mfunc.master.comm_work,
 			  mlx4_master_comm_channel);

+ 1 - 1
drivers/net/ethernet/mellanox/mlx4/en_cq.c

@@ -51,7 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	int err;
 
 	cq->size = entries;
-	cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
+	cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
 
 	cq->ring = ring;
 	cq->is_tx = mode;

+ 1 - 0
drivers/net/ethernet/mellanox/mlx4/en_netdev.c

@@ -1604,6 +1604,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		goto out;
 	}
 	priv->rx_ring_num = prof->rx_ring_num;
+	priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
 	priv->mac_index = -1;
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 	spin_lock_init(&priv->stats_lock);

+ 3 - 2
drivers/net/ethernet/mellanox/mlx4/en_rx.c

@@ -566,6 +566,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct ethhdr *ethh;
 	dma_addr_t dma;
 	u64 s_mac;
+	int factor = priv->cqe_factor;
 
 	if (!priv->port_up)
 		return 0;
@@ -574,7 +575,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	 * descriptor offset can be deduced from the CQE index instead of
 	 * reading 'cqe->index' */
 	index = cq->mcq.cons_index & ring->size_mask;
-	cqe = &cq->buf[index];
+	cqe = &cq->buf[(index << factor) + factor];
 
 	/* Process all completed CQEs */
 	while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
@@ -709,7 +710,7 @@ next:
 
 		++cq->mcq.cons_index;
 		index = (cq->mcq.cons_index) & ring->size_mask;
-		cqe = &cq->buf[index];
+		cqe = &cq->buf[(index << factor) + factor];
 		if (++polled == budget)
 			goto out;
 	}

+ 3 - 2
drivers/net/ethernet/mellanox/mlx4/en_tx.c

@@ -315,12 +315,13 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 	struct mlx4_cqe *buf = cq->buf;
 	u32 packets = 0;
 	u32 bytes = 0;
+	int factor = priv->cqe_factor;
 
 	if (!priv->port_up)
 		return;
 
 	index = cons_index & size_mask;
-	cqe = &buf[index];
+	cqe = &buf[(index << factor) + factor];
 	ring_index = ring->cons & size_mask;
 
 	/* Process all completed CQEs */
@@ -349,7 +350,7 @@ static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
 
 		++cons_index;
 		index = cons_index & size_mask;
-		cqe = &buf[index];
+		cqe = &buf[(index << factor) + factor];
 	}
 
 

+ 23 - 13
drivers/net/ethernet/mellanox/mlx4/eq.c

@@ -101,15 +101,21 @@ static void eq_set_ci(struct mlx4_eq *eq, int req_not)
 	mb();
 }
 
-static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry)
+static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
 {
-	unsigned long off = (entry & (eq->nent - 1)) * MLX4_EQ_ENTRY_SIZE;
-	return eq->page_list[off / PAGE_SIZE].buf + off % PAGE_SIZE;
+	/* (entry & (eq->nent - 1)) gives us a cyclic array */
+	unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
+	/* CX3 is capable of extending the EQE from 32 to 64 bytes.
+	 * When this feature is enabled, the first (in the lower addresses)
+	 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
+	 * contain the legacy EQE information.
+	 */
+	return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
 }
 
-static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq)
+static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
 {
-	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index);
+	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
 	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
 }
 
@@ -177,7 +183,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
 		return;
 	}
 
-	memcpy(s_eqe, eqe, sizeof(struct mlx4_eqe) - 1);
+	memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
 	s_eqe->slave_id = slave;
 	/* ensure all information is written before setting the ownersip bit */
 	wmb();
@@ -401,6 +407,7 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
 	struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
 	int i;
 	int err;
+	unsigned long flags;
 
 	mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
 
@@ -412,10 +419,10 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
 
 			mlx4_delete_all_resources_for_slave(dev, i);
 			/*return the slave to running mode*/
-			spin_lock(&priv->mfunc.master.slave_state_lock);
+			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
 			slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
 			slave_state[i].is_slave_going_down = 0;
-			spin_unlock(&priv->mfunc.master.slave_state_lock);
+			spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
 			/*notify the FW:*/
 			err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
 				       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
@@ -440,8 +447,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 	u8 update_slave_state;
 	int i;
 	enum slave_port_gen_event gen_event;
+	unsigned long flags;
 
-	while ((eqe = next_eqe_sw(eq))) {
+	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
 		/*
 		 * Make sure we read EQ entry contents after we've
 		 * checked the ownership bit.
@@ -647,13 +655,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 			} else
 				update_slave_state = 1;
 
-			spin_lock(&priv->mfunc.master.slave_state_lock);
+			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
 			if (update_slave_state) {
 				priv->mfunc.master.slave_state[flr_slave].active = false;
 				priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
 				priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
 			}
-			spin_unlock(&priv->mfunc.master.slave_state_lock);
+			spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
 			queue_work(priv->mfunc.master.comm_wq,
 				   &priv->mfunc.master.slave_flr_event_work);
 			break;
@@ -864,7 +872,8 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 
 	eq->dev   = dev;
 	eq->nent  = roundup_pow_of_two(max(nent, 2));
-	npages = PAGE_ALIGN(eq->nent * MLX4_EQ_ENTRY_SIZE) / PAGE_SIZE;
+	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
+	npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
 
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);
@@ -966,8 +975,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_cmd_mailbox *mailbox;
 	int err;
-	int npages = PAGE_ALIGN(MLX4_EQ_ENTRY_SIZE * eq->nent) / PAGE_SIZE;
 	int i;
+	/* CX3 is capable of extending the CQE/EQE from 32 to 64 bytes */
+	int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))

+ 29 - 1
drivers/net/ethernet/mellanox/mlx4/fw.c

@@ -110,6 +110,8 @@ static void dump_dev_cap_flags(struct mlx4_dev *dev, u64 flags)
 		[42] = "Multicast VEP steering support",
 		[48] = "Counters support",
 		[59] = "Port management change event support",
+		[61] = "64 byte EQE support",
+		[62] = "64 byte CQE support",
 	};
 	int i;
 
@@ -235,7 +237,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		field = dev->caps.num_ports;
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
 
-		size = 0; /* no PF behaviour is set for now */
+		size = dev->caps.function_caps; /* set PF behaviours */
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
 
 		field = 0; /* protected FMR support not available as yet */
@@ -1237,6 +1239,24 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS)
 		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 4);
 
+	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_EQE) {
+		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 29);
+		dev->caps.eqe_size   = 64;
+		dev->caps.eqe_factor = 1;
+	} else {
+		dev->caps.eqe_size   = 32;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
+		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
+		dev->caps.cqe_size   = 64;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+	} else {
+		dev->caps.cqe_size   = 32;
+	}
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
@@ -1319,6 +1339,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	struct mlx4_cmd_mailbox *mailbox;
 	__be32 *outbox;
 	int err;
+	u8 byte_field;
 
 #define QUERY_HCA_GLOBAL_CAPS_OFFSET	0x04
 
@@ -1370,6 +1391,13 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 			 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
 	}
 
+	/* CX3 is capable of extending CQEs/EQEs from 32 to 64 bytes */
+	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_OFFSETS);
+	if (byte_field & 0x20) /* 64-bytes eqe enabled */
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+	if (byte_field & 0x40) /* 64-bytes cqe enabled */
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+
 	/* TPT attributes */
 
 	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);

+ 1 - 0
drivers/net/ethernet/mellanox/mlx4/fw.h

@@ -172,6 +172,7 @@ struct mlx4_init_hca_param {
 	u8  log_uar_sz;
 	u8  uar_page_sz; /* log pg sz in 4k chunks */
 	u8  fs_hash_enable_bits;
+	u64 dev_cap_enabled;
 };
 
 struct mlx4_init_ib_param {

+ 37 - 1
drivers/net/ethernet/mellanox/mlx4/main.c

@@ -95,8 +95,14 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " Not in use with device managed"
 					 " flow steering");
 
+static bool enable_64b_cqe_eqe;
+module_param(enable_64b_cqe_eqe, bool, 0444);
+MODULE_PARM_DESC(enable_64b_cqe_eqe,
+		 "Enable 64 byte CQEs/EQEs when the the FW supports this");
+
 #define HCA_GLOBAL_CAP_MASK            0
-#define PF_CONTEXT_BEHAVIOUR_MASK      0
+
+#define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
 
 static char mlx4_version[] =
 	DRV_NAME ": Mellanox ConnectX core driver v"
@@ -386,6 +392,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
 
 	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
+
+	if (!enable_64b_cqe_eqe) {
+		if (dev_cap->flags &
+		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
+			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
+			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+		}
+	}
+
+	if ((dev_cap->flags &
+	    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
+	    mlx4_is_master(dev))
+		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
+
 	return 0;
 }
 /*The function checks if there are live vf, return the num of them*/
@@ -599,6 +620,21 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		goto err_mem;
 	}
 
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
+		dev->caps.eqe_size   = 64;
+		dev->caps.eqe_factor = 1;
+	} else {
+		dev->caps.eqe_size   = 32;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
+		dev->caps.cqe_size   = 64;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+	} else {
+		dev->caps.cqe_size   = 32;
+	}
+
 	return 0;
 
 err_mem:

+ 1 - 0
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

@@ -473,6 +473,7 @@ struct mlx4_en_priv {
 	int mac_index;
 	unsigned max_mtu;
 	int base_qpn;
+	int cqe_factor;
 
 	struct mlx4_en_rss_map rss_map;
 	__be32 ctrl_flags;

+ 27 - 24
drivers/scsi/scsi_transport_srp.c

@@ -38,7 +38,7 @@ struct srp_host_attrs {
 #define to_srp_host_attrs(host)	((struct srp_host_attrs *)(host)->shost_data)
 
 #define SRP_HOST_ATTRS 0
-#define SRP_RPORT_ATTRS 2
+#define SRP_RPORT_ATTRS 3
 
 struct srp_internal {
 	struct scsi_transport_template t;
@@ -47,7 +47,6 @@ struct srp_internal {
 	struct device_attribute *host_attrs[SRP_HOST_ATTRS + 1];
 
 	struct device_attribute *rport_attrs[SRP_RPORT_ATTRS + 1];
-	struct device_attribute private_rport_attrs[SRP_RPORT_ATTRS];
 	struct transport_container rport_attr_cont;
 };
 
@@ -72,24 +71,6 @@ static DECLARE_TRANSPORT_CLASS(srp_host_class, "srp_host", srp_host_setup,
 static DECLARE_TRANSPORT_CLASS(srp_rport_class, "srp_remote_ports",
 			       NULL, NULL, NULL);
 
-#define SETUP_TEMPLATE(attrb, field, perm, test, ro_test, ro_perm)	\
-	i->private_##attrb[count] = dev_attr_##field;		\
-	i->private_##attrb[count].attr.mode = perm;			\
-	if (ro_test) {							\
-		i->private_##attrb[count].attr.mode = ro_perm;		\
-		i->private_##attrb[count].store = NULL;			\
-	}								\
-	i->attrb[count] = &i->private_##attrb[count];			\
-	if (test)							\
-		count++
-
-#define SETUP_RPORT_ATTRIBUTE_RD(field)					\
-	SETUP_TEMPLATE(rport_attrs, field, S_IRUGO, 1, 0, 0)
-
-#define SETUP_RPORT_ATTRIBUTE_RW(field)					\
-	SETUP_TEMPLATE(rport_attrs, field, S_IRUGO | S_IWUSR,		\
-		       1, 1, S_IRUGO)
-
 #define SRP_PID(p) \
 	(p)->port_id[0], (p)->port_id[1], (p)->port_id[2], (p)->port_id[3], \
 	(p)->port_id[4], (p)->port_id[5], (p)->port_id[6], (p)->port_id[7], \
@@ -135,6 +116,24 @@ show_srp_rport_roles(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(roles, S_IRUGO, show_srp_rport_roles, NULL);
 
+static ssize_t store_srp_rport_delete(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct srp_rport *rport = transport_class_to_srp_rport(dev);
+	struct Scsi_Host *shost = dev_to_shost(dev);
+	struct srp_internal *i = to_srp_internal(shost->transportt);
+
+	if (i->f->rport_delete) {
+		i->f->rport_delete(rport);
+		return count;
+	} else {
+		return -ENOSYS;
+	}
+}
+
+static DEVICE_ATTR(delete, S_IWUSR, NULL, store_srp_rport_delete);
+
 static void srp_rport_release(struct device *dev)
 {
 	struct srp_rport *rport = dev_to_rport(dev);
@@ -324,12 +323,16 @@ srp_attach_transport(struct srp_function_template *ft)
 	i->rport_attr_cont.ac.attrs = &i->rport_attrs[0];
 	i->rport_attr_cont.ac.class = &srp_rport_class.class;
 	i->rport_attr_cont.ac.match = srp_rport_match;
-	transport_container_register(&i->rport_attr_cont);
 
 	count = 0;
-	SETUP_RPORT_ATTRIBUTE_RD(port_id);
-	SETUP_RPORT_ATTRIBUTE_RD(roles);
-	i->rport_attrs[count] = NULL;
+	i->rport_attrs[count++] = &dev_attr_port_id;
+	i->rport_attrs[count++] = &dev_attr_roles;
+	if (ft->rport_delete)
+		i->rport_attrs[count++] = &dev_attr_delete;
+	i->rport_attrs[count++] = NULL;
+	BUG_ON(count > ARRAY_SIZE(i->rport_attrs));
+
+	transport_container_register(&i->rport_attr_cont);
 
 	i->f = ft;
 

+ 21 - 0
include/linux/mlx4/device.h

@@ -142,6 +142,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48,
 	MLX4_DEV_CAP_FLAG_SENSE_SUPPORT	= 1LL << 55,
 	MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV = 1LL << 59,
+	MLX4_DEV_CAP_FLAG_64B_EQE	= 1LL << 61,
+	MLX4_DEV_CAP_FLAG_64B_CQE	= 1LL << 62
 };
 
 enum {
@@ -151,6 +153,20 @@ enum {
 	MLX4_DEV_CAP_FLAG2_FS_EN		= 1LL <<  3
 };
 
+enum {
+	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
+	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1
+};
+
+enum {
+	MLX4_USER_DEV_CAP_64B_CQE	= 1L << 0
+};
+
+enum {
+	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0
+};
+
+
 #define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
 
 enum {
@@ -419,6 +435,11 @@ struct mlx4_caps {
 	u32			max_counters;
 	u8			port_ib_mtu[MLX4_MAX_PORTS + 1];
 	u16			sqp_demux;
+	u32			eqe_size;
+	u32			cqe_size;
+	u8			eqe_factor;
+	u32			userspace_caps; /* userspace must be aware of these */
+	u32			function_caps;  /* VFs must be aware of these */
 };
 
 struct mlx4_buf_list {

+ 0 - 6
include/rdma/Kbuild

@@ -1,6 +0,0 @@
-header-y += ib_user_cm.h
-header-y += ib_user_mad.h
-header-y += ib_user_sa.h
-header-y += ib_user_verbs.h
-header-y += rdma_netlink.h
-header-y += rdma_user_cm.h

+ 1 - 35
include/rdma/rdma_netlink.h

@@ -1,41 +1,9 @@
 #ifndef _RDMA_NETLINK_H
 #define _RDMA_NETLINK_H
 
-#include <linux/types.h>
-
-enum {
-	RDMA_NL_RDMA_CM = 1
-};
-
-#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
-#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
-#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
-
-enum {
-	RDMA_NL_RDMA_CM_ID_STATS = 0,
-	RDMA_NL_RDMA_CM_NUM_OPS
-};
-
-enum {
-	RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
-	RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
-	RDMA_NL_RDMA_CM_NUM_ATTR,
-};
-
-struct rdma_cm_id_stats {
-	__u32	qp_num;
-	__u32	bound_dev_if;
-	__u32	port_space;
-	__s32	pid;
-	__u8	cm_state;
-	__u8	node_type;
-	__u8	port_num;
-	__u8	qp_type;
-};
-
-#ifdef __KERNEL__
 
 #include <linux/netlink.h>
+#include <uapi/rdma/rdma_netlink.h>
 
 struct ibnl_client_cbs {
 	int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb);
@@ -88,6 +56,4 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
 int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
 		  int len, void *data, int type);
 
-#endif /* __KERNEL__ */
-
 #endif /* _RDMA_NETLINK_H */

+ 8 - 0
include/scsi/scsi_transport_srp.h

@@ -14,13 +14,21 @@ struct srp_rport_identifiers {
 };
 
 struct srp_rport {
+	/* for initiator and target drivers */
+
 	struct device dev;
 
 	u8 port_id[16];
 	u8 roles;
+
+	/* for initiator drivers */
+
+	void *lld_data;	/* LLD private data */
 };
 
 struct srp_function_template {
+	/* for initiator drivers */
+	void (*rport_delete)(struct srp_rport *rport);
 	/* for target drivers */
 	int (* tsk_mgmt_response)(struct Scsi_Host *, u64, u64, int);
 	int (* it_nexus_response)(struct Scsi_Host *, u64, int);

+ 6 - 0
include/uapi/rdma/Kbuild

@@ -1 +1,7 @@
 # UAPI Header export list
+header-y += ib_user_cm.h
+header-y += ib_user_mad.h
+header-y += ib_user_sa.h
+header-y += ib_user_verbs.h
+header-y += rdma_netlink.h
+header-y += rdma_user_cm.h

+ 0 - 0
include/rdma/ib_user_cm.h → include/uapi/rdma/ib_user_cm.h


+ 0 - 0
include/rdma/ib_user_mad.h → include/uapi/rdma/ib_user_mad.h


+ 0 - 0
include/rdma/ib_user_sa.h → include/uapi/rdma/ib_user_sa.h


+ 0 - 0
include/rdma/ib_user_verbs.h → include/uapi/rdma/ib_user_verbs.h


+ 37 - 0
include/uapi/rdma/rdma_netlink.h

@@ -0,0 +1,37 @@
+#ifndef _UAPI_RDMA_NETLINK_H
+#define _UAPI_RDMA_NETLINK_H
+
+#include <linux/types.h>
+
+enum {
+	RDMA_NL_RDMA_CM = 1
+};
+
+#define RDMA_NL_GET_CLIENT(type) ((type & (((1 << 6) - 1) << 10)) >> 10)
+#define RDMA_NL_GET_OP(type) (type & ((1 << 10) - 1))
+#define RDMA_NL_GET_TYPE(client, op) ((client << 10) + op)
+
+enum {
+	RDMA_NL_RDMA_CM_ID_STATS = 0,
+	RDMA_NL_RDMA_CM_NUM_OPS
+};
+
+enum {
+	RDMA_NL_RDMA_CM_ATTR_SRC_ADDR = 1,
+	RDMA_NL_RDMA_CM_ATTR_DST_ADDR,
+	RDMA_NL_RDMA_CM_NUM_ATTR,
+};
+
+struct rdma_cm_id_stats {
+	__u32	qp_num;
+	__u32	bound_dev_if;
+	__u32	port_space;
+	__s32	pid;
+	__u8	cm_state;
+	__u8	node_type;
+	__u8	port_num;
+	__u8	qp_type;
+};
+
+
+#endif /* _UAPI_RDMA_NETLINK_H */

+ 0 - 0
include/rdma/rdma_user_cm.h → include/uapi/rdma/rdma_user_cm.h