|
@@ -183,6 +183,145 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+#ifdef EFX_USE_PIO
|
|
|
+
|
|
|
+struct efx_short_copy_buffer {
|
|
|
+ int used;
|
|
|
+ u8 buf[L1_CACHE_BYTES];
|
|
|
+};
|
|
|
+
|
|
|
+/* Copy to PIO, respecting that writes to PIO buffers must be dword aligned.
|
|
|
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
|
|
|
+ */
|
|
|
+static void efx_memcpy_toio_aligned(struct efx_nic *efx, u8 __iomem **piobuf,
|
|
|
+ u8 *data, int len,
|
|
|
+ struct efx_short_copy_buffer *copy_buf)
|
|
|
+{
|
|
|
+ int block_len = len & ~(sizeof(copy_buf->buf) - 1);
|
|
|
+
|
|
|
+ memcpy_toio(*piobuf, data, block_len);
|
|
|
+ *piobuf += block_len;
|
|
|
+ len -= block_len;
|
|
|
+
|
|
|
+ if (len) {
|
|
|
+ data += block_len;
|
|
|
+ BUG_ON(copy_buf->used);
|
|
|
+ BUG_ON(len > sizeof(copy_buf->buf));
|
|
|
+ memcpy(copy_buf->buf, data, len);
|
|
|
+ copy_buf->used = len;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/* Copy to PIO, respecting dword alignment, popping data from copy buffer first.
|
|
|
+ * Advances piobuf pointer. Leaves additional data in the copy buffer.
|
|
|
+ */
|
|
|
+static void efx_memcpy_toio_aligned_cb(struct efx_nic *efx, u8 __iomem **piobuf,
|
|
|
+ u8 *data, int len,
|
|
|
+ struct efx_short_copy_buffer *copy_buf)
|
|
|
+{
|
|
|
+ if (copy_buf->used) {
|
|
|
+ /* if the copy buffer is partially full, fill it up and write */
|
|
|
+ int copy_to_buf =
|
|
|
+ min_t(int, sizeof(copy_buf->buf) - copy_buf->used, len);
|
|
|
+
|
|
|
+ memcpy(copy_buf->buf + copy_buf->used, data, copy_to_buf);
|
|
|
+ copy_buf->used += copy_to_buf;
|
|
|
+
|
|
|
+ /* if we didn't fill it up then we're done for now */
|
|
|
+ if (copy_buf->used < sizeof(copy_buf->buf))
|
|
|
+ return;
|
|
|
+
|
|
|
+ memcpy_toio(*piobuf, copy_buf->buf, sizeof(copy_buf->buf));
|
|
|
+ *piobuf += sizeof(copy_buf->buf);
|
|
|
+ data += copy_to_buf;
|
|
|
+ len -= copy_to_buf;
|
|
|
+ copy_buf->used = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ efx_memcpy_toio_aligned(efx, piobuf, data, len, copy_buf);
|
|
|
+}
|
|
|
+
|
|
|
+static void efx_flush_copy_buffer(struct efx_nic *efx, u8 __iomem *piobuf,
|
|
|
+ struct efx_short_copy_buffer *copy_buf)
|
|
|
+{
|
|
|
+ /* if there's anything in it, write the whole buffer, including junk */
|
|
|
+ if (copy_buf->used)
|
|
|
+ memcpy_toio(piobuf, copy_buf->buf, sizeof(copy_buf->buf));
|
|
|
+}
|
|
|
+
|
|
|
+/* Traverse skb structure and copy fragments in to PIO buffer.
|
|
|
+ * Advances piobuf pointer.
|
|
|
+ */
|
|
|
+static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
|
|
|
+ u8 __iomem **piobuf,
|
|
|
+ struct efx_short_copy_buffer *copy_buf)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ efx_memcpy_toio_aligned(efx, piobuf, skb->data, skb_headlen(skb),
|
|
|
+ copy_buf);
|
|
|
+
|
|
|
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
|
|
|
+ skb_frag_t *f = &skb_shinfo(skb)->frags[i];
|
|
|
+ u8 *vaddr;
|
|
|
+
|
|
|
+ vaddr = kmap_atomic(skb_frag_page(f));
|
|
|
+
|
|
|
+ efx_memcpy_toio_aligned_cb(efx, piobuf, vaddr + f->page_offset,
|
|
|
+ skb_frag_size(f), copy_buf);
|
|
|
+ kunmap_atomic(vaddr);
|
|
|
+ }
|
|
|
+
|
|
|
+ EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list);
|
|
|
+}
|
|
|
+
|
|
|
+static struct efx_tx_buffer *
|
|
|
+efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
|
|
|
+{
|
|
|
+ struct efx_tx_buffer *buffer =
|
|
|
+ efx_tx_queue_get_insert_buffer(tx_queue);
|
|
|
+ u8 __iomem *piobuf = tx_queue->piobuf;
|
|
|
+
|
|
|
+ /* Copy to PIO buffer. Ensure the writes are padded to the end
|
|
|
+ * of a cache line, as this is required for write-combining to be
|
|
|
+ * effective on at least x86.
|
|
|
+ */
|
|
|
+
|
|
|
+ if (skb_shinfo(skb)->nr_frags) {
|
|
|
+ /* The size of the copy buffer will ensure all writes
|
|
|
+ * are the size of a cache line.
|
|
|
+ */
|
|
|
+ struct efx_short_copy_buffer copy_buf;
|
|
|
+
|
|
|
+ copy_buf.used = 0;
|
|
|
+
|
|
|
+ efx_skb_copy_bits_to_pio(tx_queue->efx, skb,
|
|
|
+ &piobuf, ©_buf);
|
|
|
+ efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf);
|
|
|
+ } else {
|
|
|
+ /* Pad the write to the size of a cache line.
|
|
|
+ * We can do this because we know the skb_shared_info sruct is
|
|
|
+ * after the source, and the destination buffer is big enough.
|
|
|
+ */
|
|
|
+ BUILD_BUG_ON(L1_CACHE_BYTES >
|
|
|
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
|
|
|
+ memcpy_toio(tx_queue->piobuf, skb->data,
|
|
|
+ ALIGN(skb->len, L1_CACHE_BYTES));
|
|
|
+ }
|
|
|
+
|
|
|
+ EFX_POPULATE_QWORD_5(buffer->option,
|
|
|
+ ESF_DZ_TX_DESC_IS_OPT, 1,
|
|
|
+ ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
|
|
|
+ ESF_DZ_TX_PIO_CONT, 0,
|
|
|
+ ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
|
|
|
+ ESF_DZ_TX_PIO_BUF_ADDR,
|
|
|
+ tx_queue->piobuf_offset);
|
|
|
+ ++tx_queue->pio_packets;
|
|
|
+ ++tx_queue->insert_count;
|
|
|
+ return buffer;
|
|
|
+}
|
|
|
+#endif /* EFX_USE_PIO */
|
|
|
+
|
|
|
/*
|
|
|
* Add a socket buffer to a TX queue
|
|
|
*
|
|
@@ -227,6 +366,17 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
|
|
|
return NETDEV_TX_OK;
|
|
|
}
|
|
|
|
|
|
+ /* Consider using PIO for short packets */
|
|
|
+#ifdef EFX_USE_PIO
|
|
|
+ if (skb->len <= efx_piobuf_size && tx_queue->piobuf &&
|
|
|
+ efx_nic_tx_is_empty(tx_queue) &&
|
|
|
+ efx_nic_tx_is_empty(efx_tx_queue_partner(tx_queue))) {
|
|
|
+ buffer = efx_enqueue_skb_pio(tx_queue, skb);
|
|
|
+ dma_flags = EFX_TX_BUF_OPTION;
|
|
|
+ goto finish_packet;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
/* Map for DMA. Use dma_map_single rather than dma_map_page
|
|
|
* since this is more efficient on machines with sparse
|
|
|
* memory.
|
|
@@ -279,6 +429,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
|
|
|
}
|
|
|
|
|
|
/* Transfer ownership of the skb to the final buffer */
|
|
|
+finish_packet:
|
|
|
buffer->skb = skb;
|
|
|
buffer->flags = EFX_TX_BUF_SKB | dma_flags;
|
|
|
|