Bladeren bron

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Linus Torvalds 19 jaren geleden
bovenliggende
commit
5367f2d67c

+ 11 - 5
drivers/infiniband/core/cm.c

@@ -308,10 +308,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
 	unsigned long flags;
 	int ret;
+	static int next_id;
 
 	do {
 		spin_lock_irqsave(&cm.lock, flags);
-		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1,
+		ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
 					(__force int *) &cm_id_priv->id.local_id);
 		spin_unlock_irqrestore(&cm.lock, flags);
 	} while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
@@ -684,6 +685,13 @@ retest:
 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
 		break;
 	case IB_CM_REQ_SENT:
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
+			       &cm_id_priv->av.port->cm_dev->ca_guid,
+			       sizeof cm_id_priv->av.port->cm_dev->ca_guid,
+			       NULL, 0);
+		break;
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
@@ -694,10 +702,8 @@ retest:
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
-			       &cm_id_priv->av.port->cm_dev->ca_guid,
-			       sizeof cm_id_priv->av.port->cm_dev->ca_guid,
-			       NULL, 0);
+		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+			       NULL, 0, NULL, 0);
 		break;
 	case IB_CM_ESTABLISHED:
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);

+ 2 - 2
drivers/infiniband/core/user_mad.c

@@ -197,8 +197,8 @@ static void send_handler(struct ib_mad_agent *agent,
 		memcpy(timeout->mad.data, packet->mad.data,
 		       sizeof (struct ib_mad_hdr));
 
-		if (!queue_packet(file, agent, timeout))
-				return;
+		if (queue_packet(file, agent, timeout))
+			kfree(timeout);
 	}
 out:
 	kfree(packet);

+ 18 - 3
drivers/infiniband/core/uverbs_cmd.c

@@ -489,6 +489,7 @@ err_idr:
 
 err_unreg:
 	ib_dereg_mr(mr);
+	atomic_dec(&pd->usecnt);
 
 err_up:
 	up(&ib_uverbs_idr_mutex);
@@ -593,13 +594,18 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 	if (cmd.comp_vector >= file->device->num_comp_vectors)
 		return -EINVAL;
 
-	if (cmd.comp_channel >= 0)
-		ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
-
 	uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
 	if (!uobj)
 		return -ENOMEM;
 
+	if (cmd.comp_channel >= 0) {
+		ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+		if (!ev_file) {
+			ret = -EINVAL;
+			goto err;
+		}
+	}
+
 	uobj->uobject.user_handle   = cmd.user_handle;
 	uobj->uobject.context       = file->ucontext;
 	uobj->uverbs_file	    = file;
@@ -663,6 +669,8 @@ err_up:
 	ib_destroy_cq(cq);
 
 err:
+	if (ev_file)
+		ib_uverbs_release_ucq(file, ev_file, uobj);
 	kfree(uobj);
 	return ret;
 }
@@ -935,6 +943,11 @@ err_idr:
 
 err_destroy:
 	ib_destroy_qp(qp);
+	atomic_dec(&pd->usecnt);
+	atomic_dec(&attr.send_cq->usecnt);
+	atomic_dec(&attr.recv_cq->usecnt);
+	if (attr.srq)
+		atomic_dec(&attr.srq->usecnt);
 
 err_up:
 	up(&ib_uverbs_idr_mutex);
@@ -1448,6 +1461,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	attr.sl 	       = cmd.attr.sl;
 	attr.src_path_bits     = cmd.attr.src_path_bits;
 	attr.static_rate       = cmd.attr.static_rate;
+	attr.ah_flags          = cmd.attr.is_global ? IB_AH_GRH : 0;
 	attr.port_num 	       = cmd.attr.port_num;
 	attr.grh.flow_label    = cmd.attr.grh.flow_label;
 	attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
@@ -1729,6 +1743,7 @@ err_idr:
 
 err_destroy:
 	ib_destroy_srq(srq);
+	atomic_dec(&pd->usecnt);
 
 err_up:
 	up(&ib_uverbs_idr_mutex);

+ 2 - 2
drivers/infiniband/core/verbs.c

@@ -107,9 +107,9 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
 
 	if (wc->wc_flags & IB_WC_GRH) {
 		ah_attr.ah_flags = IB_AH_GRH;
-		ah_attr.grh.dgid = grh->dgid;
+		ah_attr.grh.dgid = grh->sgid;
 
-		ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num,
+		ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num,
 					 &gid_index);
 		if (ret)
 			return ERR_PTR(ret);

+ 8 - 4
drivers/infiniband/hw/mthca/mthca_cmd.c

@@ -937,10 +937,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 	if (err)
 		goto out;
 
-	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
-	dev_lim->max_srq_sz = (1 << field) - 1;
-	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
-	dev_lim->max_qp_sz = (1 << field) - 1;
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
 	dev_lim->reserved_qps = 1 << (field & 0xf);
 	MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1056,6 +1052,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 	mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
 
 	if (mthca_is_memfree(dev)) {
+		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+		dev_lim->max_srq_sz = 1 << field;
+		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+		dev_lim->max_qp_sz = 1 << field;
 		MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
 		dev_lim->hca.arbel.resize_srq = field & 1;
 		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
@@ -1087,6 +1087,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
 		mthca_dbg(dev, "Max ICM size %lld MB\n",
 			  (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
 	} else {
+		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+		dev_lim->max_srq_sz = (1 << field) - 1;
+		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+		dev_lim->max_qp_sz = (1 << field) - 1;
 		MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
 		dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
 		dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;

+ 17 - 6
drivers/infiniband/hw/mthca/mthca_cq.c

@@ -128,12 +128,12 @@ struct mthca_err_cqe {
 	__be32 my_qpn;
 	u32    reserved1[3];
 	u8     syndrome;
-	u8     reserved2;
+	u8     vendor_err;
 	__be16 db_cnt;
-	u32    reserved3;
+	u32    reserved2;
 	__be32 wqe;
 	u8     opcode;
-	u8     reserved4[2];
+	u8     reserved3[2];
 	u8     owner;
 };
 
@@ -253,6 +253,15 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
 		wake_up(&cq->wait);
 }
 
+static inline int is_recv_cqe(struct mthca_cqe *cqe)
+{
+	if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
+	    MTHCA_ERROR_CQE_OPCODE_MASK)
+		return !(cqe->opcode & 0x01);
+	else
+		return !(cqe->is_send & 0x80);
+}
+
 void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
 		    struct mthca_srq *srq)
 {
@@ -296,7 +305,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
 	while ((int) --prod_index - (int) cq->cons_index >= 0) {
 		cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
 		if (cqe->my_qpn == cpu_to_be32(qpn)) {
-			if (srq)
+			if (srq && is_recv_cqe(cqe))
 				mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
 			++nfreed;
 		} else if (nfreed)
@@ -333,8 +342,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 	}
 
 	/*
-	 * For completions in error, only work request ID, status (and
-	 * freed resource count for RD) have to be set.
+	 * For completions in error, only work request ID, status, vendor error
+	 * (and freed resource count for RD) have to be set.
 	 */
 	switch (cqe->syndrome) {
 	case SYNDROME_LOCAL_LENGTH_ERR:
@@ -396,6 +405,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
 		break;
 	}
 
+	entry->vendor_err = cqe->vendor_err;
+
 	/*
 	 * Mem-free HCAs always generate one CQE per WQE, even in the
 	 * error case, so we don't have to check the doorbell count, etc.

+ 2 - 2
drivers/infiniband/hw/mthca/mthca_eq.c

@@ -484,8 +484,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 				     u8 intr,
 				     struct mthca_eq *eq)
 {
-	int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
-		PAGE_SIZE;
+	int npages;
 	u64 *dma_list = NULL;
 	dma_addr_t t;
 	struct mthca_mailbox *mailbox;
@@ -496,6 +495,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 
 	eq->dev  = dev;
 	eq->nent = roundup_pow_of_two(max(nent, 2));
+ 	npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
 
 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
 				GFP_KERNEL);

+ 4 - 0
drivers/infiniband/hw/mthca/mthca_main.c

@@ -261,6 +261,10 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
 	}
 
 	err = mthca_dev_lim(mdev, &dev_lim);
+	if (err) {
+		mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+		goto err_disable;
+	}
 
 	profile = default_profile;
 	profile.num_uar   = dev_lim.uar_size / PAGE_SIZE;

+ 35 - 19
drivers/infiniband/hw/mthca/mthca_mcg.c

@@ -111,7 +111,8 @@ static int find_mgm(struct mthca_dev *dev,
 			goto out;
 		if (status) {
 			mthca_err(dev, "READ_MGM returned status %02x\n", status);
-			return -EINVAL;
+			err = -EINVAL;
+			goto out;
 		}
 
 		if (!memcmp(mgm->gid, zero_gid, 16)) {
@@ -126,7 +127,7 @@ static int find_mgm(struct mthca_dev *dev,
 			goto out;
 
 		*prev = *index;
-		*index = be32_to_cpu(mgm->next_gid_index) >> 5;
+		*index = be32_to_cpu(mgm->next_gid_index) >> 6;
 	} while (*index);
 
 	*index = -1;
@@ -153,8 +154,10 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		return PTR_ERR(mailbox);
 	mgm = mailbox->buf;
 
-	if (down_interruptible(&dev->mcg_table.sem))
-		return -EINTR;
+	if (down_interruptible(&dev->mcg_table.sem)) {
+		err = -EINTR;
+		goto err_sem;
+	}
 
 	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
@@ -181,9 +184,8 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 			err = -EINVAL;
 			goto out;
 		}
-
+		memset(mgm, 0, sizeof *mgm);
 		memcpy(mgm->gid, gid->raw, 16);
-		mgm->next_gid_index = 0;
 	}
 
 	for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
@@ -209,6 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	if (status) {
 		mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
 		err = -EINVAL;
+		goto out;
 	}
 
 	if (!link)
@@ -223,7 +226,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		goto out;
 	}
 
-	mgm->next_gid_index = cpu_to_be32(index << 5);
+	mgm->next_gid_index = cpu_to_be32(index << 6);
 
 	err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 	if (err)
@@ -234,7 +237,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	}
 
  out:
+	if (err && link && index != -1) {
+		BUG_ON(index < dev->limits.num_mgms);
+		mthca_free(&dev->mcg_table.alloc, index);
+	}
 	up(&dev->mcg_table.sem);
+ err_sem:
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
@@ -255,8 +263,10 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		return PTR_ERR(mailbox);
 	mgm = mailbox->buf;
 
-	if (down_interruptible(&dev->mcg_table.sem))
-		return -EINTR;
+	if (down_interruptible(&dev->mcg_table.sem)) {
+		err = -EINTR;
+		goto err_sem;
+	}
 
 	err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
 	if (err)
@@ -305,13 +315,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	if (i != 1)
 		goto out;
 
-	goto out;
-
 	if (prev == -1) {
 		/* Remove entry from MGM */
-		if (be32_to_cpu(mgm->next_gid_index) >> 5) {
-			err = mthca_READ_MGM(dev,
-					     be32_to_cpu(mgm->next_gid_index) >> 5,
+		int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
+		if (amgm_index_to_free) {
+			err = mthca_READ_MGM(dev, amgm_index_to_free,
 					     mailbox, &status);
 			if (err)
 				goto out;
@@ -332,9 +340,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 			err = -EINVAL;
 			goto out;
 		}
+		if (amgm_index_to_free) {
+			BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
+			mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
+		}
 	} else {
 		/* Remove entry from AMGM */
-		index = be32_to_cpu(mgm->next_gid_index) >> 5;
+		int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
 		err = mthca_READ_MGM(dev, prev, mailbox, &status);
 		if (err)
 			goto out;
@@ -344,7 +356,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 			goto out;
 		}
 
-		mgm->next_gid_index = cpu_to_be32(index << 5);
+		mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
 
 		err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
 		if (err)
@@ -354,10 +366,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 			err = -EINVAL;
 			goto out;
 		}
+		BUG_ON(index < dev->limits.num_mgms);
+		mthca_free(&dev->mcg_table.alloc, index);
 	}
 
  out:
 	up(&dev->mcg_table.sem);
+ err_sem:
 	mthca_free_mailbox(dev, mailbox);
 	return err;
 }
@@ -365,11 +380,12 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
 {
 	int err;
+	int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
 
 	err = mthca_alloc_init(&dev->mcg_table.alloc,
-			       dev->limits.num_amgms,
-			       dev->limits.num_amgms - 1,
-			       0);
+			       table_size,
+			       table_size - 1,
+			       dev->limits.num_mgms);
 	if (err)
 		return err;
 

+ 3 - 1
drivers/infiniband/hw/mthca/mthca_memfree.c

@@ -233,7 +233,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
 		for (i = 0; i < chunk->npages; ++i) {
 			if (chunk->mem[i].length >= offset) {
 				page = chunk->mem[i].page;
-				break;
+				goto out;
 			}
 			offset -= chunk->mem[i].length;
 		}
@@ -485,6 +485,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
 			put_page(db_tab->page[i].mem.page);
 		}
 	}
+
+	kfree(db_tab);
 }
 
 int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,

+ 147 - 118
drivers/infiniband/hw/mthca/mthca_qp.c

@@ -383,12 +383,10 @@ static const struct {
 				[UC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_PKEY_INDEX            |
 					 IB_QP_PATH_MIG_STATE),
 				[RC]  = (IB_QP_CUR_STATE             |
 					 IB_QP_ALT_PATH              |
 					 IB_QP_ACCESS_FLAGS          |
-					 IB_QP_PKEY_INDEX            |
 					 IB_QP_MIN_RNR_TIMER         |
 					 IB_QP_PATH_MIG_STATE),
 				[MLX] = (IB_QP_CUR_STATE             |
@@ -476,9 +474,8 @@ static const struct {
 			.opt_param = {
 				[UD]  = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
-				[UC]  = IB_QP_CUR_STATE,
-				[RC]  = (IB_QP_CUR_STATE             |
-					 IB_QP_MIN_RNR_TIMER),
+				[UC]  = (IB_QP_CUR_STATE             |
+					 IB_QP_ACCESS_FLAGS),
 				[MLX] = (IB_QP_CUR_STATE             |
 					 IB_QP_QKEY),
 			}
@@ -522,6 +519,55 @@ static void init_port(struct mthca_dev *dev, int port)
 		mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
 }
 
+static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
+				  int attr_mask)
+{
+	u8 dest_rd_atomic;
+	u32 access_flags;
+	u32 hw_access_flags = 0;
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+		dest_rd_atomic = attr->max_dest_rd_atomic;
+	else
+		dest_rd_atomic = qp->resp_depth;
+
+	if (attr_mask & IB_QP_ACCESS_FLAGS)
+		access_flags = attr->qp_access_flags;
+	else
+		access_flags = qp->atomic_rd_en;
+
+	if (!dest_rd_atomic)
+		access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+	if (access_flags & IB_ACCESS_REMOTE_READ)
+		hw_access_flags |= MTHCA_QP_BIT_RRE;
+	if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+		hw_access_flags |= MTHCA_QP_BIT_RAE;
+	if (access_flags & IB_ACCESS_REMOTE_WRITE)
+		hw_access_flags |= MTHCA_QP_BIT_RWE;
+
+	return cpu_to_be32(hw_access_flags);
+}
+
+static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path)
+{
+	path->g_mylmc     = ah->src_path_bits & 0x7f;
+	path->rlid        = cpu_to_be16(ah->dlid);
+	path->static_rate = !!ah->static_rate;
+
+	if (ah->ah_flags & IB_AH_GRH) {
+		path->g_mylmc   |= 1 << 7;
+		path->mgid_index = ah->grh.sgid_index;
+		path->hop_limit  = ah->grh.hop_limit;
+		path->sl_tclass_flowlabel = 
+			cpu_to_be32((ah->sl << 28)                |
+				    (ah->grh.traffic_class << 20) | 
+				    (ah->grh.flow_label));
+		memcpy(path->rgid, ah->grh.dgid.raw, 16);
+	} else
+		path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+}
+
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
@@ -591,6 +637,26 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		return -EINVAL;
 	}
 
+	if ((attr_mask & IB_QP_PORT) &&
+	    (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
+		mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
+		return -EINVAL;
+	}
+
+	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+	    attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
+		mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
+			  attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
+		return -EINVAL;
+	}
+
+	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+	    attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
+		mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
+			  attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
+		return -EINVAL;
+	}
+
 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
@@ -665,28 +731,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	}
 
 	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
-		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
+		qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
+			attr->rnr_retry << 5;
+		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | 
+							MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
 	}
 
 	if (attr_mask & IB_QP_AV) {
-		qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
-		qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
-		qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
-		if (attr->ah_attr.ah_flags & IB_AH_GRH) {
-			qp_context->pri_path.g_mylmc |= 1 << 7;
-			qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
-			qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
-			qp_context->pri_path.sl_tclass_flowlabel =
-				cpu_to_be32((attr->ah_attr.sl << 28)                |
-					    (attr->ah_attr.grh.traffic_class << 20) |
-					    (attr->ah_attr.grh.flow_label));
-			memcpy(qp_context->pri_path.rgid,
-			       attr->ah_attr.grh.dgid.raw, 16);
-		} else {
-			qp_context->pri_path.sl_tclass_flowlabel =
-				cpu_to_be32(attr->ah_attr.sl << 28);
-		}
+		mthca_path_set(&attr->ah_attr, &qp_context->pri_path);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
 	}
 
@@ -695,7 +747,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
 	}
 
-	/* XXX alt_path */
+	if (attr_mask & IB_QP_ALT_PATH) {
+		if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
+			mthca_dbg(dev, "Alternate port number (%u) is invalid\n", 
+				attr->alt_port_num);
+			return -EINVAL;
+		}
+
+		mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path);
+		qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 
+							      attr->alt_port_num << 24);
+		qp_context->alt_path.ackto = attr->alt_timeout << 3;
+		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
+	}
 
 	/* leave rdd as 0 */
 	qp_context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
@@ -703,9 +767,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	qp_context->wqe_lkey   = cpu_to_be32(qp->mr.ibmr.lkey);
 	qp_context->params1    = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
 					     (MTHCA_FLIGHT_LIMIT << 24) |
-					     MTHCA_QP_BIT_SRE           |
-					     MTHCA_QP_BIT_SWE           |
-					     MTHCA_QP_BIT_SAE);
+					     MTHCA_QP_BIT_SWE);
 	if (qp->sq_policy == IB_SIGNAL_ALL_WR)
 		qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
 	if (attr_mask & IB_QP_RETRY_CNT) {
@@ -714,9 +776,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	}
 
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-		qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ?
-						       ffs(attr->max_rd_atomic) - 1 : 0,
-						       7) << 21);
+		if (attr->max_rd_atomic) {
+			qp_context->params1 |=
+				cpu_to_be32(MTHCA_QP_BIT_SRE |
+					    MTHCA_QP_BIT_SAE);
+			qp_context->params1 |=
+				cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+		}
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
 	}
 
@@ -729,71 +795,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		qp_context->snd_db_index   = cpu_to_be32(qp->sq.db_index);
 	}
 
-	if (attr_mask & IB_QP_ACCESS_FLAGS) {
-		qp_context->params2 |=
-			cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
-				    MTHCA_QP_BIT_RWE : 0);
-
-		/*
-		 * Only enable RDMA reads and atomics if we have
-		 * responder resources set to a non-zero value.
-		 */
-		if (qp->resp_depth) {
-			qp_context->params2 |=
-				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
-					    MTHCA_QP_BIT_RRE : 0);
-			qp_context->params2 |=
-				cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
-					    MTHCA_QP_BIT_RAE : 0);
-		}
-
-		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
-							MTHCA_QP_OPTPAR_RRE |
-							MTHCA_QP_OPTPAR_RAE);
-
-		qp->atomic_rd_en = attr->qp_access_flags;
-	}
-
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-		u8 rra_max;
-
-		if (qp->resp_depth && !attr->max_dest_rd_atomic) {
-			/*
-			 * Lowering our responder resources to zero.
-			 * Turn off reads RDMA and atomics as responder.
-			 * (RRE/RAE in params2 already zero)
-			 */
-			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
-								MTHCA_QP_OPTPAR_RAE);
-		}
-
-		if (!qp->resp_depth && attr->max_dest_rd_atomic) {
-			/*
-			 * Increasing our responder resources from
-			 * zero.  Turn on RDMA reads and atomics as
-			 * appropriate.
-			 */
+		if (attr->max_dest_rd_atomic)
 			qp_context->params2 |=
-				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
-					    MTHCA_QP_BIT_RRE : 0);
-			qp_context->params2 |=
-				cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
-					    MTHCA_QP_BIT_RAE : 0);
-
-			qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
-								MTHCA_QP_OPTPAR_RAE);
-		}
+				cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
 
-		for (rra_max = 0;
-		     1 << rra_max < attr->max_dest_rd_atomic &&
-			     rra_max < dev->qp_table.rdb_shift;
-		     ++rra_max)
-			; /* nothing */
-
-		qp_context->params2      |= cpu_to_be32(rra_max << 21);
 		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
+	}
 
-		qp->resp_depth = attr->max_dest_rd_atomic;
+	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+		qp_context->params2      |= get_hw_access_flags(qp, attr, attr_mask);
+		qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
+							MTHCA_QP_OPTPAR_RRE |
+							MTHCA_QP_OPTPAR_RAE);
 	}
 
 	qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,8 +849,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 		err = -EINVAL;
 	}
 
-	if (!err)
+	if (!err) {
 		qp->state = new_state;
+		if (attr_mask & IB_QP_ACCESS_FLAGS)
+			qp->atomic_rd_en = attr->qp_access_flags;
+		if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+			qp->resp_depth = attr->max_dest_rd_atomic;
+	}
 
 	mthca_free_mailbox(dev, mailbox);
 
@@ -885,18 +904,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 	return err;
 }
 
-static void mthca_adjust_qp_caps(struct mthca_dev *dev,
-				 struct mthca_pd *pd,
-				 struct mthca_qp *qp)
+static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
 {
-	int max_data_size;
-
 	/*
 	 * Calculate the maximum size of WQE s/g segments, excluding
 	 * the next segment and other non-data segments.
 	 */
-	max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) -
-		sizeof (struct mthca_next_seg);
+	int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
 
 	switch (qp->transport) {
 	case MLX:
@@ -915,11 +929,24 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev,
 		break;
 	}
 
+	return max_data_size;
+}
+
+static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
+{
 	/* We don't support inline data for kernel QPs (yet). */
-	if (!pd->ibpd.uobject)
-		qp->max_inline_data = 0;
-        else
-		qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE;
+	return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
+}
+
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+				 struct mthca_pd *pd,
+				 struct mthca_qp *qp)
+{
+	int max_data_size = mthca_max_data_size(dev, qp,
+						min(dev->limits.max_desc_sz,
+						    1 << qp->sq.wqe_shift));
+
+	qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
 
 	qp->sq.max_gs = min_t(int, dev->limits.max_sg,
 			      max_data_size / sizeof (struct mthca_data_seg));
@@ -1186,13 +1213,23 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
 }
 
 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
-			     struct mthca_qp *qp)
+			     struct mthca_pd *pd, struct mthca_qp *qp)
 {
+	int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
+
 	/* Sanity check QP size before proceeding */
-	if (cap->max_send_wr  > dev->limits.max_wqes ||
-	    cap->max_recv_wr  > dev->limits.max_wqes ||
-	    cap->max_send_sge > dev->limits.max_sg   ||
-	    cap->max_recv_sge > dev->limits.max_sg)
+	if (cap->max_send_wr  	 > dev->limits.max_wqes ||
+	    cap->max_recv_wr  	 > dev->limits.max_wqes ||
+	    cap->max_send_sge 	 > dev->limits.max_sg   ||
+	    cap->max_recv_sge 	 > dev->limits.max_sg   ||
+	    cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
+		return -EINVAL;
+
+	/*
+	 * For MLX transport we need 2 extra S/G entries:
+	 * one for the header and one for the checksum at the end
+	 */
+	if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg)
 		return -EINVAL;
 
 	if (mthca_is_memfree(dev)) {
@@ -1211,14 +1248,6 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
 				    MTHCA_INLINE_CHUNK_SIZE) /
 			      sizeof (struct mthca_data_seg));
 
-	/*
-	 * For MLX transport we need 2 extra S/G entries:
-	 * one for the header and one for the checksum at the end
-	 */
-	if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
-	    qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
-		return -EINVAL;
-
 	return 0;
 }
 
@@ -1233,7 +1262,7 @@ int mthca_alloc_qp(struct mthca_dev *dev,
 {
 	int err;
 
-	err = mthca_set_qp_size(dev, cap, qp);
+	err = mthca_set_qp_size(dev, cap, pd, qp);
 	if (err)
 		return err;
 
@@ -1276,7 +1305,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
 	u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
 	int err;
 
-	err = mthca_set_qp_size(dev, cap, &sqp->qp);
+	err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
 	if (err)
 		return err;
 

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_srq.c

@@ -201,7 +201,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
 	if (mthca_is_memfree(dev))
 		srq->max = roundup_pow_of_two(srq->max + 1);
 
-	ds = min(64UL,
+	ds = max(64UL,
 		 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
 				    srq->max_gs * sizeof (struct mthca_data_seg)));
 	srq->wqe_shift = long_log2(ds);