Browse Source

bna: Tx and Rx Optimizations

Change details:
 -      Have contiguous queue pages for TxQ, RxQ and CQ. Data structure and
        QPT changes related to contiguous queue pages
 -      Optimized Tx and Rx unmap structures. Tx and Rx fast path changes due to
        unmap data structure changes
 -      Re-factored Tx and Rx fastpath routines as per the new queue data structures
 -      Implemented bnad_txq_wi_prepare() to program the opcode, flags, frame_len
        and num_vectors in the work item
 -      Reduced Max TxQ and RxQ depth to 2048 while default value for Tx/Rx queue
        depth is unaltered (512)

Signed-off-by: Rasesh Mody <rmody@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Rasesh Mody 12 years ago
parent
commit
5216562a2c

+ 2 - 0
drivers/net/ethernet/brocade/bna/bna.h

@@ -138,6 +138,8 @@ do {								\
 #define BNA_QE_INDX_ADD(_qe_idx, _qe_num, _q_depth)			\
 	((_qe_idx) = ((_qe_idx) + (_qe_num)) & ((_q_depth) - 1))
 
+#define BNA_QE_INDX_INC(_idx, _q_depth) BNA_QE_INDX_ADD(_idx, 1, _q_depth)
+
 #define BNA_Q_INDEX_CHANGE(_old_idx, _updated_idx, _q_depth)		\
 	(((_updated_idx) - (_old_idx)) & ((_q_depth) - 1))
 

+ 62 - 47
drivers/net/ethernet/brocade/bna/bna_tx_rx.c

@@ -1908,6 +1908,9 @@ bna_rxq_qpt_setup(struct bna_rxq *rxq,
 		struct bna_mem_descr *swqpt_mem,
 		struct bna_mem_descr *page_mem)
 {
+	u8 *kva;
+	u64 dma;
+	struct bna_dma_addr bna_dma;
 	int	i;
 
 	rxq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb;
@@ -1917,13 +1920,21 @@ bna_rxq_qpt_setup(struct bna_rxq *rxq,
 	rxq->qpt.page_size = page_size;
 
 	rxq->rcb->sw_qpt = (void **) swqpt_mem->kva;
+	rxq->rcb->sw_q = page_mem->kva;
+
+	kva = page_mem->kva;
+	BNA_GET_DMA_ADDR(&page_mem->dma, dma);
 
 	for (i = 0; i < rxq->qpt.page_count; i++) {
-		rxq->rcb->sw_qpt[i] = page_mem[i].kva;
+		rxq->rcb->sw_qpt[i] = kva;
+		kva += PAGE_SIZE;
+
+		BNA_SET_DMA_ADDR(dma, &bna_dma);
 		((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].lsb =
-			page_mem[i].dma.lsb;
+			bna_dma.lsb;
 		((struct bna_dma_addr *)rxq->qpt.kv_qpt_ptr)[i].msb =
-			page_mem[i].dma.msb;
+			bna_dma.msb;
+		dma += PAGE_SIZE;
 	}
 }
 
@@ -1935,6 +1946,9 @@ bna_rxp_cqpt_setup(struct bna_rxp *rxp,
 		struct bna_mem_descr *swqpt_mem,
 		struct bna_mem_descr *page_mem)
 {
+	u8 *kva;
+	u64 dma;
+	struct bna_dma_addr bna_dma;
 	int	i;
 
 	rxp->cq.qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb;
@@ -1944,14 +1958,21 @@ bna_rxp_cqpt_setup(struct bna_rxp *rxp,
 	rxp->cq.qpt.page_size = page_size;
 
 	rxp->cq.ccb->sw_qpt = (void **) swqpt_mem->kva;
+	rxp->cq.ccb->sw_q = page_mem->kva;
+
+	kva = page_mem->kva;
+	BNA_GET_DMA_ADDR(&page_mem->dma, dma);
 
 	for (i = 0; i < rxp->cq.qpt.page_count; i++) {
-		rxp->cq.ccb->sw_qpt[i] = page_mem[i].kva;
+		rxp->cq.ccb->sw_qpt[i] = kva;
+		kva += PAGE_SIZE;
 
+		BNA_SET_DMA_ADDR(dma, &bna_dma);
 		((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].lsb =
-			page_mem[i].dma.lsb;
+			bna_dma.lsb;
 		((struct bna_dma_addr *)rxp->cq.qpt.kv_qpt_ptr)[i].msb =
-			page_mem[i].dma.msb;
+			bna_dma.msb;
+		dma += PAGE_SIZE;
 	}
 }
 
@@ -2250,8 +2271,8 @@ bna_rx_res_req(struct bna_rx_config *q_cfg, struct bna_res_info *res_info)
 	res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info;
 	mem_info->mem_type = BNA_MEM_T_DMA;
-	mem_info->len = PAGE_SIZE;
-	mem_info->num = cpage_count * q_cfg->num_paths;
+	mem_info->len = PAGE_SIZE * cpage_count;
+	mem_info->num = q_cfg->num_paths;
 
 	res_info[BNA_RX_RES_MEM_T_DQPT].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_RX_RES_MEM_T_DQPT].res_u.mem_info;
@@ -2268,8 +2289,8 @@ bna_rx_res_req(struct bna_rx_config *q_cfg, struct bna_res_info *res_info)
 	res_info[BNA_RX_RES_MEM_T_DPAGE].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info;
 	mem_info->mem_type = BNA_MEM_T_DMA;
-	mem_info->len = PAGE_SIZE;
-	mem_info->num = dpage_count * q_cfg->num_paths;
+	mem_info->len = PAGE_SIZE * dpage_count;
+	mem_info->num = q_cfg->num_paths;
 
 	res_info[BNA_RX_RES_MEM_T_HQPT].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_RX_RES_MEM_T_HQPT].res_u.mem_info;
@@ -2286,8 +2307,8 @@ bna_rx_res_req(struct bna_rx_config *q_cfg, struct bna_res_info *res_info)
 	res_info[BNA_RX_RES_MEM_T_HPAGE].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info;
 	mem_info->mem_type = BNA_MEM_T_DMA;
-	mem_info->len = (hpage_count ? PAGE_SIZE : 0);
-	mem_info->num = (hpage_count ? (hpage_count * q_cfg->num_paths) : 0);
+	mem_info->len = PAGE_SIZE * hpage_count;
+	mem_info->num = (hpage_count ? q_cfg->num_paths : 0);
 
 	res_info[BNA_RX_RES_MEM_T_IBIDX].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_RX_RES_MEM_T_IBIDX].res_u.mem_info;
@@ -2332,7 +2353,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad,
 	struct bna_mem_descr *dsqpt_mem;
 	struct bna_mem_descr *hpage_mem;
 	struct bna_mem_descr *dpage_mem;
-	int i, cpage_idx = 0, dpage_idx = 0, hpage_idx = 0;
+	int i;
 	int dpage_count, hpage_count, rcb_idx;
 
 	if (!bna_rx_res_check(rx_mod, rx_cfg))
@@ -2352,14 +2373,14 @@ bna_rx_create(struct bna *bna, struct bnad *bnad,
 	hpage_mem = &res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.mdl[0];
 	dpage_mem = &res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.mdl[0];
 
-	page_count = res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.num /
-			rx_cfg->num_paths;
+	page_count = res_info[BNA_RX_RES_MEM_T_CQPT_PAGE].res_u.mem_info.len /
+			PAGE_SIZE;
 
-	dpage_count = res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.num /
-			rx_cfg->num_paths;
+	dpage_count = res_info[BNA_RX_RES_MEM_T_DPAGE].res_u.mem_info.len /
+			PAGE_SIZE;
 
-	hpage_count = res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.num /
-			rx_cfg->num_paths;
+	hpage_count = res_info[BNA_RX_RES_MEM_T_HPAGE].res_u.mem_info.len /
+			PAGE_SIZE;
 
 	rx = bna_rx_get(rx_mod, rx_cfg->rx_type);
 	rx->bna = bna;
@@ -2446,10 +2467,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad,
 		q0->rx_packets_with_error = q0->rxbuf_alloc_failed = 0;
 
 		bna_rxq_qpt_setup(q0, rxp, dpage_count, PAGE_SIZE,
-			&dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[dpage_idx]);
-		q0->rcb->page_idx = dpage_idx;
-		q0->rcb->page_count = dpage_count;
-		dpage_idx += dpage_count;
+			&dqpt_mem[i], &dsqpt_mem[i], &dpage_mem[i]);
 
 		if (rx->rcb_setup_cbfn)
 			rx->rcb_setup_cbfn(bnad, q0->rcb);
@@ -2475,10 +2493,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad,
 
 			bna_rxq_qpt_setup(q1, rxp, hpage_count, PAGE_SIZE,
 				&hqpt_mem[i], &hsqpt_mem[i],
-				&hpage_mem[hpage_idx]);
-			q1->rcb->page_idx = hpage_idx;
-			q1->rcb->page_count = hpage_count;
-			hpage_idx += hpage_count;
+				&hpage_mem[i]);
 
 			if (rx->rcb_setup_cbfn)
 				rx->rcb_setup_cbfn(bnad, q1->rcb);
@@ -2510,10 +2525,7 @@ bna_rx_create(struct bna *bna, struct bnad *bnad,
 		rxp->cq.ccb->id = i;
 
 		bna_rxp_cqpt_setup(rxp, page_count, PAGE_SIZE,
-			&cqpt_mem[i], &cswqpt_mem[i], &cpage_mem[cpage_idx]);
-		rxp->cq.ccb->page_idx = cpage_idx;
-		rxp->cq.ccb->page_count = page_count;
-		cpage_idx += page_count;
+			&cqpt_mem[i], &cswqpt_mem[i], &cpage_mem[i]);
 
 		if (rx->ccb_setup_cbfn)
 			rx->ccb_setup_cbfn(bnad, rxp->cq.ccb);
@@ -3230,6 +3242,9 @@ bna_txq_qpt_setup(struct bna_txq *txq, int page_count, int page_size,
 		struct bna_mem_descr *swqpt_mem,
 		struct bna_mem_descr *page_mem)
 {
+	u8 *kva;
+	u64 dma;
+	struct bna_dma_addr bna_dma;
 	int i;
 
 	txq->qpt.hw_qpt_ptr.lsb = qpt_mem->dma.lsb;
@@ -3239,14 +3254,21 @@ bna_txq_qpt_setup(struct bna_txq *txq, int page_count, int page_size,
 	txq->qpt.page_size = page_size;
 
 	txq->tcb->sw_qpt = (void **) swqpt_mem->kva;
+	txq->tcb->sw_q = page_mem->kva;
+
+	kva = page_mem->kva;
+	BNA_GET_DMA_ADDR(&page_mem->dma, dma);
 
 	for (i = 0; i < page_count; i++) {
-		txq->tcb->sw_qpt[i] = page_mem[i].kva;
+		txq->tcb->sw_qpt[i] = kva;
+		kva += PAGE_SIZE;
 
+		BNA_SET_DMA_ADDR(dma, &bna_dma);
 		((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].lsb =
-			page_mem[i].dma.lsb;
+			bna_dma.lsb;
 		((struct bna_dma_addr *)txq->qpt.kv_qpt_ptr)[i].msb =
-			page_mem[i].dma.msb;
+			bna_dma.msb;
+		dma += PAGE_SIZE;
 	}
 }
 
@@ -3430,8 +3452,8 @@ bna_tx_res_req(int num_txq, int txq_depth, struct bna_res_info *res_info)
 	res_info[BNA_TX_RES_MEM_T_PAGE].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info;
 	mem_info->mem_type = BNA_MEM_T_DMA;
-	mem_info->len = PAGE_SIZE;
-	mem_info->num = num_txq * page_count;
+	mem_info->len = PAGE_SIZE * page_count;
+	mem_info->num = num_txq;
 
 	res_info[BNA_TX_RES_MEM_T_IBIDX].res_type = BNA_RES_T_MEM;
 	mem_info = &res_info[BNA_TX_RES_MEM_T_IBIDX].res_u.mem_info;
@@ -3457,14 +3479,11 @@ bna_tx_create(struct bna *bna, struct bnad *bnad,
 	struct bna_txq *txq;
 	struct list_head *qe;
 	int page_count;
-	int page_size;
-	int page_idx;
 	int i;
 
 	intr_info = &res_info[BNA_TX_RES_INTR_T_TXCMPL].res_u.intr_info;
-	page_count = (res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.num) /
-			tx_cfg->num_txq;
-	page_size = res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.len;
+	page_count = (res_info[BNA_TX_RES_MEM_T_PAGE].res_u.mem_info.len) /
+					PAGE_SIZE;
 
 	/**
 	 * Get resources
@@ -3529,7 +3548,6 @@ bna_tx_create(struct bna *bna, struct bnad *bnad,
 	/* TxQ */
 
 	i = 0;
-	page_idx = 0;
 	list_for_each(qe, &tx->txq_q) {
 		txq = (struct bna_txq *)qe;
 		txq->tcb = (struct bna_tcb *)
@@ -3569,14 +3587,11 @@ bna_tx_create(struct bna *bna, struct bnad *bnad,
 		txq->tcb->id = i;
 
 		/* QPT, SWQPT, Pages */
-		bna_txq_qpt_setup(txq, page_count, page_size,
+		bna_txq_qpt_setup(txq, page_count, PAGE_SIZE,
 			&res_info[BNA_TX_RES_MEM_T_QPT].res_u.mem_info.mdl[i],
 			&res_info[BNA_TX_RES_MEM_T_SWQPT].res_u.mem_info.mdl[i],
 			&res_info[BNA_TX_RES_MEM_T_PAGE].
-				  res_u.mem_info.mdl[page_idx]);
-		txq->tcb->page_idx = page_idx;
-		txq->tcb->page_count = page_count;
-		page_idx += page_count;
+				  res_u.mem_info.mdl[i]);
 
 		/* Callback to bnad for setting up TCB */
 		if (tx->tcb_setup_cbfn)

+ 3 - 6
drivers/net/ethernet/brocade/bna/bna_types.h

@@ -430,6 +430,7 @@ struct bna_ib {
 struct bna_tcb {
 	/* Fast path */
 	void			**sw_qpt;
+	void			*sw_q;
 	void			*unmap_q;
 	u32		producer_index;
 	u32		consumer_index;
@@ -437,8 +438,6 @@ struct bna_tcb {
 	u32		q_depth;
 	void __iomem   *q_dbell;
 	struct bna_ib_dbell *i_dbell;
-	int			page_idx;
-	int			page_count;
 	/* Control path */
 	struct bna_txq *txq;
 	struct bnad *bnad;
@@ -563,13 +562,12 @@ struct bna_tx_mod {
 struct bna_rcb {
 	/* Fast path */
 	void			**sw_qpt;
+	void			*sw_q;
 	void			*unmap_q;
 	u32		producer_index;
 	u32		consumer_index;
 	u32		q_depth;
 	void __iomem   *q_dbell;
-	int			page_idx;
-	int			page_count;
 	/* Control path */
 	struct bna_rxq *rxq;
 	struct bna_ccb *ccb;
@@ -626,6 +624,7 @@ struct bna_pkt_rate {
 struct bna_ccb {
 	/* Fast path */
 	void			**sw_qpt;
+	void			*sw_q;
 	u32		producer_index;
 	volatile u32	*hw_producer_index;
 	u32		q_depth;
@@ -633,8 +632,6 @@ struct bna_ccb {
 	struct bna_rcb *rcb[2];
 	void			*ctrl; /* For bnad */
 	struct bna_pkt_rate pkt_rate;
-	int			page_idx;
-	int			page_count;
 
 	/* Control path */
 	struct bna_cq *cq;

File diff suppressed because it is too large
+ 294 - 393
drivers/net/ethernet/brocade/bna/bnad.c


+ 19 - 22
drivers/net/ethernet/brocade/bna/bnad.h

@@ -83,12 +83,9 @@ struct bnad_rx_ctrl {
 
 #define BNAD_IOCETH_TIMEOUT	     10000
 
-#define BNAD_MAX_Q_DEPTH		0x10000
-#define BNAD_MIN_Q_DEPTH		0x200
-
-#define BNAD_MAX_RXQ_DEPTH		(BNAD_MAX_Q_DEPTH / bnad_rxqs_per_cq)
-/* keeping MAX TX and RX Q depth equal */
-#define BNAD_MAX_TXQ_DEPTH		BNAD_MAX_RXQ_DEPTH
+#define BNAD_MIN_Q_DEPTH		512
+#define BNAD_MAX_RXQ_DEPTH		2048
+#define BNAD_MAX_TXQ_DEPTH		2048
 
 #define BNAD_JUMBO_MTU			9000
 
@@ -101,9 +98,8 @@ struct bnad_rx_ctrl {
 #define BNAD_TXQ_TX_STARTED		1
 
 /* Bit positions for rcb->flags */
-#define BNAD_RXQ_REFILL			0
-#define BNAD_RXQ_STARTED		1
-#define BNAD_RXQ_POST_OK		2
+#define BNAD_RXQ_STARTED		0
+#define BNAD_RXQ_POST_OK		1
 
 /* Resource limits */
 #define BNAD_NUM_TXQ			(bnad->num_tx * bnad->num_txq_per_tx)
@@ -221,18 +217,24 @@ struct bnad_rx_info {
 	struct work_struct rx_cleanup_work;
 } ____cacheline_aligned;
 
-/* Unmap queues for Tx / Rx cleanup */
-struct bnad_skb_unmap {
+struct bnad_tx_vector {
+	DEFINE_DMA_UNMAP_ADDR(dma_addr);
+};
+
+struct bnad_tx_unmap {
 	struct sk_buff		*skb;
+	u32			nvecs;
+	struct bnad_tx_vector	vectors[BFI_TX_MAX_VECTORS_PER_WI];
+};
+
+struct bnad_rx_vector {
 	DEFINE_DMA_UNMAP_ADDR(dma_addr);
+	u32			len;
 };
 
-struct bnad_unmap_q {
-	u32		producer_index;
-	u32		consumer_index;
-	u32		q_depth;
-	/* This should be the last one */
-	struct bnad_skb_unmap unmap_array[1];
+struct bnad_rx_unmap {
+	struct sk_buff		*skb;
+	struct bnad_rx_vector	vector;
 };
 
 /* Bit mask values for bnad->cfg_flags */
@@ -252,11 +254,6 @@ struct bnad_unmap_q {
 #define BNAD_RF_STATS_TIMER_RUNNING	5
 #define BNAD_RF_TX_PRIO_SET		6
 
-
-/* Define for Fast Path flags */
-/* Defined as bit positions */
-#define BNAD_FP_IN_RX_PATH	      0
-
 struct bnad {
 	struct net_device	*netdev;
 	u32			id;

Some files were not shown because too many files changed in this diff