|
@@ -192,6 +192,8 @@ static int send_wqe_overhead(enum ib_qp_type type)
|
|
|
case IB_QPT_GSI:
|
|
|
return sizeof (struct mlx4_wqe_ctrl_seg) +
|
|
|
ALIGN(MLX4_IB_UD_HEADER_SIZE +
|
|
|
+ DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
|
|
|
+ MLX4_INLINE_ALIGN) *
|
|
|
sizeof (struct mlx4_wqe_inline_seg),
|
|
|
sizeof (struct mlx4_wqe_data_seg)) +
|
|
|
ALIGN(4 +
|
|
@@ -1049,6 +1051,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
|
|
u16 pkey;
|
|
|
int send_size;
|
|
|
int header_size;
|
|
|
+ int spc;
|
|
|
int i;
|
|
|
|
|
|
send_size = 0;
|
|
@@ -1124,10 +1127,43 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
|
|
|
printk("\n");
|
|
|
}
|
|
|
|
|
|
- inl->byte_count = cpu_to_be32(1 << 31 | header_size);
|
|
|
- memcpy(inl + 1, sqp->header_buf, header_size);
|
|
|
+ /*
|
|
|
+ * Inline data segments may not cross a 64 byte boundary. If
|
|
|
+ * our UD header is bigger than the space available up to the
|
|
|
+ * next 64 byte boundary in the WQE, use two inline data
|
|
|
+ * segments to hold the UD header.
|
|
|
+ */
|
|
|
+ spc = MLX4_INLINE_ALIGN -
|
|
|
+ ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
|
|
|
+ if (header_size <= spc) {
|
|
|
+ inl->byte_count = cpu_to_be32(1 << 31 | header_size);
|
|
|
+ memcpy(inl + 1, sqp->header_buf, header_size);
|
|
|
+ i = 1;
|
|
|
+ } else {
|
|
|
+ inl->byte_count = cpu_to_be32(1 << 31 | spc);
|
|
|
+ memcpy(inl + 1, sqp->header_buf, spc);
|
|
|
+
|
|
|
+ inl = (void *) (inl + 1) + spc;
|
|
|
+ memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
|
|
|
+ /*
|
|
|
+ * Need a barrier here to make sure all the data is
|
|
|
+ * visible before the byte_count field is set.
|
|
|
+ * Otherwise the HCA prefetcher could grab the 64-byte
|
|
|
+ * chunk with this inline segment and get a valid (!=
|
|
|
+ * 0xffffffff) byte count but stale data, and end up
|
|
|
+ * generating a packet with bad headers.
|
|
|
+ *
|
|
|
+ * The first inline segment's byte_count field doesn't
|
|
|
+ * need a barrier, because it comes after a
|
|
|
+ * control/MLX segment and therefore is at an offset
|
|
|
+ * of 16 mod 64.
|
|
|
+ */
|
|
|
+ wmb();
|
|
|
+ inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
|
|
|
+ i = 2;
|
|
|
+ }
|
|
|
|
|
|
- return ALIGN(sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
|
|
|
+ return ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
|
|
|
}
|
|
|
|
|
|
static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
|