15 years ago · 6fa8f71984
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -661,6 +661,14 @@ repoll:
 
				 			wc->opcode    = IB_WC_FETCH_ADD;
			
 
				 			wc->byte_len  = 8;
			
 
				 			break;
			
 
				+		case MLX4_OPCODE_MASKED_ATOMIC_CS:
			
 
				+			wc->opcode    = IB_WC_MASKED_COMP_SWAP;
			
 
				+			wc->byte_len  = 8;
			
 
				+			break;
			
 
				+		case MLX4_OPCODE_MASKED_ATOMIC_FA:
			
 
				+			wc->opcode    = IB_WC_MASKED_FETCH_ADD;
			
 
				+			wc->byte_len  = 8;
			
 
				+			break;
			
 
				 		case MLX4_OPCODE_BIND_MW:
			
 
				 			wc->opcode    = IB_WC_BIND_MW;
			
 
				 			break;
			
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 
				 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
			
 
				 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
			
 
				 		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
			
 
				+	props->masked_atomic_cap   = IB_ATOMIC_HCA;
			
 
				 	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
			
 
				 	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
			
 
				 	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
			
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -74,17 +74,19 @@ enum {
 
				 };
			
 
				 
			
 
				 static const __be32 mlx4_ib_opcode[] = {
			
 
				-	[IB_WR_SEND]			= cpu_to_be32(MLX4_OPCODE_SEND),
			
 
				-	[IB_WR_LSO]			= cpu_to_be32(MLX4_OPCODE_LSO),
			
 
				-	[IB_WR_SEND_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_SEND_IMM),
			
 
				-	[IB_WR_RDMA_WRITE]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
			
 
				-	[IB_WR_RDMA_WRITE_WITH_IMM]	= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
			
 
				-	[IB_WR_RDMA_READ]		= cpu_to_be32(MLX4_OPCODE_RDMA_READ),
			
 
				-	[IB_WR_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
			
 
				-	[IB_WR_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
			
 
				-	[IB_WR_SEND_WITH_INV]		= cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
			
 
				-	[IB_WR_LOCAL_INV]		= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
			
 
				-	[IB_WR_FAST_REG_MR]		= cpu_to_be32(MLX4_OPCODE_FMR),
			
 
				+	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND),
			
 
				+	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO),
			
 
				+	[IB_WR_SEND_WITH_IMM]			= cpu_to_be32(MLX4_OPCODE_SEND_IMM),
			
 
				+	[IB_WR_RDMA_WRITE]			= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
			
 
				+	[IB_WR_RDMA_WRITE_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
			
 
				+	[IB_WR_RDMA_READ]			= cpu_to_be32(MLX4_OPCODE_RDMA_READ),
			
 
				+	[IB_WR_ATOMIC_CMP_AND_SWP]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
			
 
				+	[IB_WR_ATOMIC_FETCH_AND_ADD]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
			
 
				+	[IB_WR_SEND_WITH_INV]			= cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
			
 
				+	[IB_WR_LOCAL_INV]			= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
			
 
				+	[IB_WR_FAST_REG_MR]			= cpu_to_be32(MLX4_OPCODE_FMR),
			
 
				+	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
			
 
				+	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
			
 
				 };
			
 
				 
			
 
				 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
			
@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
 
				 	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
			
 
				 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
			
 
				 		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
			
 
				+	} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
			
 
				+		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
			
 
				+		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
			
 
				 	} else {
			
 
				 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
			
 
				 		aseg->compare  = 0;
			
@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
 
				 
			
 
				 }
			
 
				 
			
 
				+static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
			
 
				+				  struct ib_send_wr *wr)
			
 
				+{
			
 
				+	aseg->swap_add		= cpu_to_be64(wr->wr.atomic.swap);
			
 
				+	aseg->swap_add_mask	= cpu_to_be64(wr->wr.atomic.swap_mask);
			
 
				+	aseg->compare		= cpu_to_be64(wr->wr.atomic.compare_add);
			
 
				+	aseg->compare_mask	= cpu_to_be64(wr->wr.atomic.compare_add_mask);
			
 
				+}
			
 
				+
			
 
				 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
			
 
				 			     struct ib_send_wr *wr)
			
 
				 {
			
@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
				 			switch (wr->opcode) {
			
 
				 			case IB_WR_ATOMIC_CMP_AND_SWP:
			
 
				 			case IB_WR_ATOMIC_FETCH_AND_ADD:
			
 
				+			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
			
 
				 				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
			
 
				 					      wr->wr.atomic.rkey);
			
 
				 				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
			
@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
				 
			
 
				 				break;
			
 
				 
			
 
				+			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
			
 
				+				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
			
 
				+					      wr->wr.atomic.rkey);
			
 
				+				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
			
 
				+
			
 
				+				set_masked_atomic_seg(wqe, wr);
			
 
				+				wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
			
 
				+
			
 
				+				size += (sizeof (struct mlx4_wqe_raddr_seg) +
			
 
				+					 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
			
 
				+
			
 
				+				break;
			
 
				+
			
 
				 			case IB_WR_RDMA_READ:
			
 
				 			case IB_WR_RDMA_WRITE:
			
 
				 			case IB_WR_RDMA_WRITE_WITH_IMM:
			
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -123,8 +123,8 @@ enum {
 
				 	MLX4_OPCODE_RDMA_READ		= 0x10,
			
 
				 	MLX4_OPCODE_ATOMIC_CS		= 0x11,
			
 
				 	MLX4_OPCODE_ATOMIC_FA		= 0x12,
			
 
				-	MLX4_OPCODE_ATOMIC_MASK_CS	= 0x14,
			
 
				-	MLX4_OPCODE_ATOMIC_MASK_FA	= 0x15,
			
 
				+	MLX4_OPCODE_MASKED_ATOMIC_CS	= 0x14,
			
 
				+	MLX4_OPCODE_MASKED_ATOMIC_FA	= 0x15,
			
 
				 	MLX4_OPCODE_BIND_MW		= 0x18,
			
 
				 	MLX4_OPCODE_FMR			= 0x19,
			
 
				 	MLX4_OPCODE_LOCAL_INVAL		= 0x1b,
			
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg {
 
				 	__be64			compare;
			
 
				 };
			
 
				 
			
 
				+struct mlx4_wqe_masked_atomic_seg {
			
 
				+	__be64			swap_add;
			
 
				+	__be64			compare;
			
 
				+	__be64			swap_add_mask;
			
 
				+	__be64			compare_mask;
			
 
				+};
			
 
				+
			
 
				 struct mlx4_wqe_data_seg {
			
 
				 	__be32			byte_count;
			
 
				 	__be32			lkey;