en_rx.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. /*
  2. * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. *
  32. */
  33. #include <linux/mlx4/cq.h>
  34. #include <linux/slab.h>
  35. #include <linux/mlx4/qp.h>
  36. #include <linux/skbuff.h>
  37. #include <linux/if_ether.h>
  38. #include <linux/if_vlan.h>
  39. #include <linux/vmalloc.h>
  40. #include "mlx4_en.h"
  41. static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr,
  42. void **ip_hdr, void **tcpudp_hdr,
  43. u64 *hdr_flags, void *priv)
  44. {
  45. *mac_hdr = page_address(frags->page) + frags->page_offset;
  46. *ip_hdr = *mac_hdr + ETH_HLEN;
  47. *tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr));
  48. *hdr_flags = LRO_IPV4 | LRO_TCP;
  49. return 0;
  50. }
  51. static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
  52. struct mlx4_en_rx_desc *rx_desc,
  53. struct skb_frag_struct *skb_frags,
  54. struct mlx4_en_rx_alloc *ring_alloc,
  55. int i)
  56. {
  57. struct mlx4_en_dev *mdev = priv->mdev;
  58. struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
  59. struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
  60. struct page *page;
  61. dma_addr_t dma;
  62. if (page_alloc->offset == frag_info->last_offset) {
  63. /* Allocate new page */
  64. page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER);
  65. if (!page)
  66. return -ENOMEM;
  67. skb_frags[i].page = page_alloc->page;
  68. skb_frags[i].page_offset = page_alloc->offset;
  69. page_alloc->page = page;
  70. page_alloc->offset = frag_info->frag_align;
  71. } else {
  72. page = page_alloc->page;
  73. get_page(page);
  74. skb_frags[i].page = page;
  75. skb_frags[i].page_offset = page_alloc->offset;
  76. page_alloc->offset += frag_info->frag_stride;
  77. }
  78. dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
  79. skb_frags[i].page_offset, frag_info->frag_size,
  80. PCI_DMA_FROMDEVICE);
  81. rx_desc->data[i].addr = cpu_to_be64(dma);
  82. return 0;
  83. }
  84. static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
  85. struct mlx4_en_rx_ring *ring)
  86. {
  87. struct mlx4_en_rx_alloc *page_alloc;
  88. int i;
  89. for (i = 0; i < priv->num_frags; i++) {
  90. page_alloc = &ring->page_alloc[i];
  91. page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
  92. MLX4_EN_ALLOC_ORDER);
  93. if (!page_alloc->page)
  94. goto out;
  95. page_alloc->offset = priv->frag_info[i].frag_align;
  96. en_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
  97. i, page_alloc->page);
  98. }
  99. return 0;
  100. out:
  101. while (i--) {
  102. page_alloc = &ring->page_alloc[i];
  103. put_page(page_alloc->page);
  104. page_alloc->page = NULL;
  105. }
  106. return -ENOMEM;
  107. }
  108. static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
  109. struct mlx4_en_rx_ring *ring)
  110. {
  111. struct mlx4_en_rx_alloc *page_alloc;
  112. int i;
  113. for (i = 0; i < priv->num_frags; i++) {
  114. page_alloc = &ring->page_alloc[i];
  115. en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
  116. i, page_count(page_alloc->page));
  117. put_page(page_alloc->page);
  118. page_alloc->page = NULL;
  119. }
  120. }
  121. static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
  122. struct mlx4_en_rx_ring *ring, int index)
  123. {
  124. struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
  125. struct skb_frag_struct *skb_frags = ring->rx_info +
  126. (index << priv->log_rx_info);
  127. int possible_frags;
  128. int i;
  129. /* Set size and memtype fields */
  130. for (i = 0; i < priv->num_frags; i++) {
  131. skb_frags[i].size = priv->frag_info[i].frag_size;
  132. rx_desc->data[i].byte_count =
  133. cpu_to_be32(priv->frag_info[i].frag_size);
  134. rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
  135. }
  136. /* If the number of used fragments does not fill up the ring stride,
  137. * remaining (unused) fragments must be padded with null address/size
  138. * and a special memory key */
  139. possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
  140. for (i = priv->num_frags; i < possible_frags; i++) {
  141. rx_desc->data[i].byte_count = 0;
  142. rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
  143. rx_desc->data[i].addr = 0;
  144. }
  145. }
  146. static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
  147. struct mlx4_en_rx_ring *ring, int index)
  148. {
  149. struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
  150. struct skb_frag_struct *skb_frags = ring->rx_info +
  151. (index << priv->log_rx_info);
  152. int i;
  153. for (i = 0; i < priv->num_frags; i++)
  154. if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i))
  155. goto err;
  156. return 0;
  157. err:
  158. while (i--)
  159. put_page(skb_frags[i].page);
  160. return -ENOMEM;
  161. }
  162. static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
  163. {
  164. *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
  165. }
  166. static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
  167. struct mlx4_en_rx_ring *ring,
  168. int index)
  169. {
  170. struct mlx4_en_dev *mdev = priv->mdev;
  171. struct skb_frag_struct *skb_frags;
  172. struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
  173. dma_addr_t dma;
  174. int nr;
  175. skb_frags = ring->rx_info + (index << priv->log_rx_info);
  176. for (nr = 0; nr < priv->num_frags; nr++) {
  177. en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
  178. dma = be64_to_cpu(rx_desc->data[nr].addr);
  179. en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
  180. pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
  181. PCI_DMA_FROMDEVICE);
  182. put_page(skb_frags[nr].page);
  183. }
  184. }
  185. static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
  186. {
  187. struct mlx4_en_rx_ring *ring;
  188. int ring_ind;
  189. int buf_ind;
  190. int new_size;
  191. for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
  192. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  193. ring = &priv->rx_ring[ring_ind];
  194. if (mlx4_en_prepare_rx_desc(priv, ring,
  195. ring->actual_size)) {
  196. if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
  197. en_err(priv, "Failed to allocate "
  198. "enough rx buffers\n");
  199. return -ENOMEM;
  200. } else {
  201. new_size = rounddown_pow_of_two(ring->actual_size);
  202. en_warn(priv, "Only %d buffers allocated "
  203. "reducing ring size to %d",
  204. ring->actual_size, new_size);
  205. goto reduce_rings;
  206. }
  207. }
  208. ring->actual_size++;
  209. ring->prod++;
  210. }
  211. }
  212. return 0;
  213. reduce_rings:
  214. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  215. ring = &priv->rx_ring[ring_ind];
  216. while (ring->actual_size > new_size) {
  217. ring->actual_size--;
  218. ring->prod--;
  219. mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
  220. }
  221. ring->size_mask = ring->actual_size - 1;
  222. }
  223. return 0;
  224. }
  225. static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
  226. struct mlx4_en_rx_ring *ring)
  227. {
  228. int index;
  229. en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
  230. ring->cons, ring->prod);
  231. /* Unmap and free Rx buffers */
  232. BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
  233. while (ring->cons != ring->prod) {
  234. index = ring->cons & ring->size_mask;
  235. en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
  236. mlx4_en_free_rx_desc(priv, ring, index);
  237. ++ring->cons;
  238. }
  239. }
  240. int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
  241. struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
  242. {
  243. struct mlx4_en_dev *mdev = priv->mdev;
  244. int err;
  245. int tmp;
  246. ring->prod = 0;
  247. ring->cons = 0;
  248. ring->size = size;
  249. ring->size_mask = size - 1;
  250. ring->stride = stride;
  251. ring->log_stride = ffs(ring->stride) - 1;
  252. ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
  253. tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
  254. sizeof(struct skb_frag_struct));
  255. ring->rx_info = vmalloc(tmp);
  256. if (!ring->rx_info) {
  257. en_err(priv, "Failed allocating rx_info ring\n");
  258. return -ENOMEM;
  259. }
  260. en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
  261. ring->rx_info, tmp);
  262. err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
  263. ring->buf_size, 2 * PAGE_SIZE);
  264. if (err)
  265. goto err_ring;
  266. err = mlx4_en_map_buffer(&ring->wqres.buf);
  267. if (err) {
  268. en_err(priv, "Failed to map RX buffer\n");
  269. goto err_hwq;
  270. }
  271. ring->buf = ring->wqres.buf.direct.buf;
  272. /* Configure lro mngr */
  273. memset(&ring->lro, 0, sizeof(struct net_lro_mgr));
  274. ring->lro.dev = priv->dev;
  275. ring->lro.features = LRO_F_NAPI;
  276. ring->lro.frag_align_pad = NET_IP_ALIGN;
  277. ring->lro.ip_summed = CHECKSUM_UNNECESSARY;
  278. ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY;
  279. ring->lro.max_desc = mdev->profile.num_lro;
  280. ring->lro.max_aggr = MAX_SKB_FRAGS;
  281. ring->lro.lro_arr = kzalloc(mdev->profile.num_lro *
  282. sizeof(struct net_lro_desc),
  283. GFP_KERNEL);
  284. if (!ring->lro.lro_arr) {
  285. en_err(priv, "Failed to allocate lro array\n");
  286. goto err_map;
  287. }
  288. ring->lro.get_frag_header = mlx4_en_get_frag_header;
  289. return 0;
  290. err_map:
  291. mlx4_en_unmap_buffer(&ring->wqres.buf);
  292. err_hwq:
  293. mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
  294. err_ring:
  295. vfree(ring->rx_info);
  296. ring->rx_info = NULL;
  297. return err;
  298. }
  299. int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
  300. {
  301. struct mlx4_en_rx_ring *ring;
  302. int i;
  303. int ring_ind;
  304. int err;
  305. int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
  306. DS_SIZE * priv->num_frags);
  307. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  308. ring = &priv->rx_ring[ring_ind];
  309. ring->prod = 0;
  310. ring->cons = 0;
  311. ring->actual_size = 0;
  312. ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
  313. ring->stride = stride;
  314. if (ring->stride <= TXBB_SIZE)
  315. ring->buf += TXBB_SIZE;
  316. ring->log_stride = ffs(ring->stride) - 1;
  317. ring->buf_size = ring->size * ring->stride;
  318. memset(ring->buf, 0, ring->buf_size);
  319. mlx4_en_update_rx_prod_db(ring);
  320. /* Initailize all descriptors */
  321. for (i = 0; i < ring->size; i++)
  322. mlx4_en_init_rx_desc(priv, ring, i);
  323. /* Initialize page allocators */
  324. err = mlx4_en_init_allocator(priv, ring);
  325. if (err) {
  326. en_err(priv, "Failed initializing ring allocator\n");
  327. ring_ind--;
  328. goto err_allocator;
  329. }
  330. }
  331. err = mlx4_en_fill_rx_buffers(priv);
  332. if (err)
  333. goto err_buffers;
  334. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  335. ring = &priv->rx_ring[ring_ind];
  336. mlx4_en_update_rx_prod_db(ring);
  337. }
  338. return 0;
  339. err_buffers:
  340. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
  341. mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
  342. ring_ind = priv->rx_ring_num - 1;
  343. err_allocator:
  344. while (ring_ind >= 0) {
  345. mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
  346. ring_ind--;
  347. }
  348. return err;
  349. }
  350. void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
  351. struct mlx4_en_rx_ring *ring)
  352. {
  353. struct mlx4_en_dev *mdev = priv->mdev;
  354. kfree(ring->lro.lro_arr);
  355. mlx4_en_unmap_buffer(&ring->wqres.buf);
  356. mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size + TXBB_SIZE);
  357. vfree(ring->rx_info);
  358. ring->rx_info = NULL;
  359. }
  360. void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
  361. struct mlx4_en_rx_ring *ring)
  362. {
  363. mlx4_en_free_rx_buf(priv, ring);
  364. if (ring->stride <= TXBB_SIZE)
  365. ring->buf -= TXBB_SIZE;
  366. mlx4_en_destroy_allocator(priv, ring);
  367. }
  368. /* Unmap a completed descriptor and free unused pages */
  369. static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
  370. struct mlx4_en_rx_desc *rx_desc,
  371. struct skb_frag_struct *skb_frags,
  372. struct skb_frag_struct *skb_frags_rx,
  373. struct mlx4_en_rx_alloc *page_alloc,
  374. int length)
  375. {
  376. struct mlx4_en_dev *mdev = priv->mdev;
  377. struct mlx4_en_frag_info *frag_info;
  378. int nr;
  379. dma_addr_t dma;
  380. /* Collect used fragments while replacing them in the HW descirptors */
  381. for (nr = 0; nr < priv->num_frags; nr++) {
  382. frag_info = &priv->frag_info[nr];
  383. if (length <= frag_info->frag_prefix_size)
  384. break;
  385. /* Save page reference in skb */
  386. skb_frags_rx[nr].page = skb_frags[nr].page;
  387. skb_frags_rx[nr].size = skb_frags[nr].size;
  388. skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset;
  389. dma = be64_to_cpu(rx_desc->data[nr].addr);
  390. /* Allocate a replacement page */
  391. if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr))
  392. goto fail;
  393. /* Unmap buffer */
  394. pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
  395. PCI_DMA_FROMDEVICE);
  396. }
  397. /* Adjust size of last fragment to match actual length */
  398. if (nr > 0)
  399. skb_frags_rx[nr - 1].size = length -
  400. priv->frag_info[nr - 1].frag_prefix_size;
  401. return nr;
  402. fail:
  403. /* Drop all accumulated fragments (which have already been replaced in
  404. * the descriptor) of this packet; remaining fragments are reused... */
  405. while (nr > 0) {
  406. nr--;
  407. put_page(skb_frags_rx[nr].page);
  408. }
  409. return 0;
  410. }
  411. static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
  412. struct mlx4_en_rx_desc *rx_desc,
  413. struct skb_frag_struct *skb_frags,
  414. struct mlx4_en_rx_alloc *page_alloc,
  415. unsigned int length)
  416. {
  417. struct mlx4_en_dev *mdev = priv->mdev;
  418. struct sk_buff *skb;
  419. void *va;
  420. int used_frags;
  421. dma_addr_t dma;
  422. skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN);
  423. if (!skb) {
  424. en_dbg(RX_ERR, priv, "Failed allocating skb\n");
  425. return NULL;
  426. }
  427. skb->dev = priv->dev;
  428. skb_reserve(skb, NET_IP_ALIGN);
  429. skb->len = length;
  430. skb->truesize = length + sizeof(struct sk_buff);
  431. /* Get pointer to first fragment so we could copy the headers into the
  432. * (linear part of the) skb */
  433. va = page_address(skb_frags[0].page) + skb_frags[0].page_offset;
  434. if (length <= SMALL_PACKET_SIZE) {
  435. /* We are copying all relevant data to the skb - temporarily
  436. * synch buffers for the copy */
  437. dma = be64_to_cpu(rx_desc->data[0].addr);
  438. dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
  439. DMA_FROM_DEVICE);
  440. skb_copy_to_linear_data(skb, va, length);
  441. dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
  442. DMA_FROM_DEVICE);
  443. skb->tail += length;
  444. } else {
  445. /* Move relevant fragments to skb */
  446. used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags,
  447. skb_shinfo(skb)->frags,
  448. page_alloc, length);
  449. if (unlikely(!used_frags)) {
  450. kfree_skb(skb);
  451. return NULL;
  452. }
  453. skb_shinfo(skb)->nr_frags = used_frags;
  454. /* Copy headers into the skb linear buffer */
  455. memcpy(skb->data, va, HEADER_COPY_SIZE);
  456. skb->tail += HEADER_COPY_SIZE;
  457. /* Skip headers in first fragment */
  458. skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;
  459. /* Adjust size of first fragment */
  460. skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE;
  461. skb->data_len = length - HEADER_COPY_SIZE;
  462. }
  463. return skb;
  464. }
  465. int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
  466. {
  467. struct mlx4_en_priv *priv = netdev_priv(dev);
  468. struct mlx4_cqe *cqe;
  469. struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
  470. struct skb_frag_struct *skb_frags;
  471. struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS];
  472. struct mlx4_en_rx_desc *rx_desc;
  473. struct sk_buff *skb;
  474. int index;
  475. int nr;
  476. unsigned int length;
  477. int polled = 0;
  478. int ip_summed;
  479. if (!priv->port_up)
  480. return 0;
  481. /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
  482. * descriptor offset can be deduced from the CQE index instead of
  483. * reading 'cqe->index' */
  484. index = cq->mcq.cons_index & ring->size_mask;
  485. cqe = &cq->buf[index];
  486. /* Process all completed CQEs */
  487. while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
  488. cq->mcq.cons_index & cq->size)) {
  489. skb_frags = ring->rx_info + (index << priv->log_rx_info);
  490. rx_desc = ring->buf + (index << ring->log_stride);
  491. /*
  492. * make sure we read the CQE after we read the ownership bit
  493. */
  494. rmb();
  495. /* Drop packet on bad receive or bad checksum */
  496. if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
  497. MLX4_CQE_OPCODE_ERROR)) {
  498. en_err(priv, "CQE completed in error - vendor "
  499. "syndrom:%d syndrom:%d\n",
  500. ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
  501. ((struct mlx4_err_cqe *) cqe)->syndrome);
  502. goto next;
  503. }
  504. if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
  505. en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
  506. goto next;
  507. }
  508. /*
  509. * Packet is OK - process it.
  510. */
  511. length = be32_to_cpu(cqe->byte_cnt);
  512. ring->bytes += length;
  513. ring->packets++;
  514. if (likely(priv->rx_csum)) {
  515. if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
  516. (cqe->checksum == cpu_to_be16(0xffff))) {
  517. priv->port_stats.rx_chksum_good++;
  518. /* This packet is eligible for LRO if it is:
  519. * - DIX Ethernet (type interpretation)
  520. * - TCP/IP (v4)
  521. * - without IP options
  522. * - not an IP fragment */
  523. if (mlx4_en_can_lro(cqe->status) &&
  524. dev->features & NETIF_F_LRO) {
  525. nr = mlx4_en_complete_rx_desc(
  526. priv, rx_desc,
  527. skb_frags, lro_frags,
  528. ring->page_alloc, length);
  529. if (!nr)
  530. goto next;
  531. if (priv->vlgrp && (cqe->vlan_my_qpn &
  532. cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) {
  533. lro_vlan_hwaccel_receive_frags(
  534. &ring->lro, lro_frags,
  535. length, length,
  536. priv->vlgrp,
  537. be16_to_cpu(cqe->sl_vid),
  538. NULL, 0);
  539. } else
  540. lro_receive_frags(&ring->lro,
  541. lro_frags,
  542. length,
  543. length,
  544. NULL, 0);
  545. goto next;
  546. }
  547. /* LRO not possible, complete processing here */
  548. ip_summed = CHECKSUM_UNNECESSARY;
  549. INC_PERF_COUNTER(priv->pstats.lro_misses);
  550. } else {
  551. ip_summed = CHECKSUM_NONE;
  552. priv->port_stats.rx_chksum_none++;
  553. }
  554. } else {
  555. ip_summed = CHECKSUM_NONE;
  556. priv->port_stats.rx_chksum_none++;
  557. }
  558. skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags,
  559. ring->page_alloc, length);
  560. if (!skb) {
  561. priv->stats.rx_dropped++;
  562. goto next;
  563. }
  564. skb->ip_summed = ip_summed;
  565. skb->protocol = eth_type_trans(skb, dev);
  566. skb_record_rx_queue(skb, cq->ring);
  567. /* Push it up the stack */
  568. if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) &
  569. MLX4_CQE_VLAN_PRESENT_MASK)) {
  570. vlan_hwaccel_receive_skb(skb, priv->vlgrp,
  571. be16_to_cpu(cqe->sl_vid));
  572. } else
  573. netif_receive_skb(skb);
  574. next:
  575. ++cq->mcq.cons_index;
  576. index = (cq->mcq.cons_index) & ring->size_mask;
  577. cqe = &cq->buf[index];
  578. if (++polled == budget) {
  579. /* We are here because we reached the NAPI budget -
  580. * flush only pending LRO sessions */
  581. lro_flush_all(&ring->lro);
  582. goto out;
  583. }
  584. }
  585. /* If CQ is empty flush all LRO sessions unconditionally */
  586. lro_flush_all(&ring->lro);
  587. out:
  588. AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
  589. mlx4_cq_set_ci(&cq->mcq);
  590. wmb(); /* ensure HW sees CQ consumer before we post new buffers */
  591. ring->cons = cq->mcq.cons_index;
  592. ring->prod += polled; /* Polled descriptors were realocated in place */
  593. mlx4_en_update_rx_prod_db(ring);
  594. return polled;
  595. }
  596. void mlx4_en_rx_irq(struct mlx4_cq *mcq)
  597. {
  598. struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
  599. struct mlx4_en_priv *priv = netdev_priv(cq->dev);
  600. if (priv->port_up)
  601. napi_schedule(&cq->napi);
  602. else
  603. mlx4_en_arm_cq(priv, cq);
  604. }
  605. /* Rx CQ polling - called by NAPI */
  606. int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
  607. {
  608. struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
  609. struct net_device *dev = cq->dev;
  610. struct mlx4_en_priv *priv = netdev_priv(dev);
  611. int done;
  612. done = mlx4_en_process_rx_cq(dev, cq, budget);
  613. /* If we used up all the quota - we're probably not done yet... */
  614. if (done == budget)
  615. INC_PERF_COUNTER(priv->pstats.napi_quota);
  616. else {
  617. /* Done for now */
  618. napi_complete(napi);
  619. mlx4_en_arm_cq(priv, cq);
  620. }
  621. return done;
  622. }
  623. /* Calculate the last offset position that accomodates a full fragment
  624. * (assuming fagment size = stride-align) */
  625. static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
  626. {
  627. u16 res = MLX4_EN_ALLOC_SIZE % stride;
  628. u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
  629. en_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
  630. "res:%d offset:%d\n", stride, align, res, offset);
  631. return offset;
  632. }
  633. static int frag_sizes[] = {
  634. FRAG_SZ0,
  635. FRAG_SZ1,
  636. FRAG_SZ2,
  637. FRAG_SZ3
  638. };
  639. void mlx4_en_calc_rx_buf(struct net_device *dev)
  640. {
  641. struct mlx4_en_priv *priv = netdev_priv(dev);
  642. int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
  643. int buf_size = 0;
  644. int i = 0;
  645. while (buf_size < eff_mtu) {
  646. priv->frag_info[i].frag_size =
  647. (eff_mtu > buf_size + frag_sizes[i]) ?
  648. frag_sizes[i] : eff_mtu - buf_size;
  649. priv->frag_info[i].frag_prefix_size = buf_size;
  650. if (!i) {
  651. priv->frag_info[i].frag_align = NET_IP_ALIGN;
  652. priv->frag_info[i].frag_stride =
  653. ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
  654. } else {
  655. priv->frag_info[i].frag_align = 0;
  656. priv->frag_info[i].frag_stride =
  657. ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
  658. }
  659. priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
  660. priv, priv->frag_info[i].frag_stride,
  661. priv->frag_info[i].frag_align);
  662. buf_size += priv->frag_info[i].frag_size;
  663. i++;
  664. }
  665. priv->num_frags = i;
  666. priv->rx_skb_size = eff_mtu;
  667. priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct));
  668. en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
  669. "num_frags:%d):\n", eff_mtu, priv->num_frags);
  670. for (i = 0; i < priv->num_frags; i++) {
  671. en_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d "
  672. "stride:%d last_offset:%d\n", i,
  673. priv->frag_info[i].frag_size,
  674. priv->frag_info[i].frag_prefix_size,
  675. priv->frag_info[i].frag_align,
  676. priv->frag_info[i].frag_stride,
  677. priv->frag_info[i].last_offset);
  678. }
  679. }
  680. /* RSS related functions */
  681. static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
  682. struct mlx4_en_rx_ring *ring,
  683. enum mlx4_qp_state *state,
  684. struct mlx4_qp *qp)
  685. {
  686. struct mlx4_en_dev *mdev = priv->mdev;
  687. struct mlx4_qp_context *context;
  688. int err = 0;
  689. context = kmalloc(sizeof *context , GFP_KERNEL);
  690. if (!context) {
  691. en_err(priv, "Failed to allocate qp context\n");
  692. return -ENOMEM;
  693. }
  694. err = mlx4_qp_alloc(mdev->dev, qpn, qp);
  695. if (err) {
  696. en_err(priv, "Failed to allocate qp #%x\n", qpn);
  697. goto out;
  698. }
  699. qp->event = mlx4_en_sqp_event;
  700. memset(context, 0, sizeof *context);
  701. mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 0, 0,
  702. qpn, ring->cqn, context);
  703. context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
  704. err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
  705. if (err) {
  706. mlx4_qp_remove(mdev->dev, qp);
  707. mlx4_qp_free(mdev->dev, qp);
  708. }
  709. mlx4_en_update_rx_prod_db(ring);
  710. out:
  711. kfree(context);
  712. return err;
  713. }
  714. /* Allocate rx qp's and configure them according to rss map */
  715. int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
  716. {
  717. struct mlx4_en_dev *mdev = priv->mdev;
  718. struct mlx4_en_rss_map *rss_map = &priv->rss_map;
  719. struct mlx4_qp_context context;
  720. struct mlx4_en_rss_context *rss_context;
  721. void *ptr;
  722. int rss_xor = mdev->profile.rss_xor;
  723. u8 rss_mask = mdev->profile.rss_mask;
  724. int i, qpn;
  725. int err = 0;
  726. int good_qps = 0;
  727. en_dbg(DRV, priv, "Configuring rss steering\n");
  728. err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
  729. priv->rx_ring_num,
  730. &rss_map->base_qpn);
  731. if (err) {
  732. en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
  733. return err;
  734. }
  735. for (i = 0; i < priv->rx_ring_num; i++) {
  736. qpn = rss_map->base_qpn + i;
  737. err = mlx4_en_config_rss_qp(priv, qpn, &priv->rx_ring[i],
  738. &rss_map->state[i],
  739. &rss_map->qps[i]);
  740. if (err)
  741. goto rss_err;
  742. ++good_qps;
  743. }
  744. /* Configure RSS indirection qp */
  745. err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
  746. if (err) {
  747. en_err(priv, "Failed to reserve range for RSS "
  748. "indirection qp\n");
  749. goto rss_err;
  750. }
  751. err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
  752. if (err) {
  753. en_err(priv, "Failed to allocate RSS indirection QP\n");
  754. goto reserve_err;
  755. }
  756. rss_map->indir_qp.event = mlx4_en_sqp_event;
  757. mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
  758. priv->rx_ring[0].cqn, &context);
  759. ptr = ((void *) &context) + 0x3c;
  760. rss_context = (struct mlx4_en_rss_context *) ptr;
  761. rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 |
  762. (rss_map->base_qpn));
  763. rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
  764. rss_context->hash_fn = rss_xor & 0x3;
  765. rss_context->flags = rss_mask << 2;
  766. err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
  767. &rss_map->indir_qp, &rss_map->indir_state);
  768. if (err)
  769. goto indir_err;
  770. return 0;
  771. indir_err:
  772. mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
  773. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
  774. mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
  775. mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
  776. reserve_err:
  777. mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
  778. rss_err:
  779. for (i = 0; i < good_qps; i++) {
  780. mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
  781. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
  782. mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
  783. mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
  784. }
  785. mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
  786. return err;
  787. }
  788. void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
  789. {
  790. struct mlx4_en_dev *mdev = priv->mdev;
  791. struct mlx4_en_rss_map *rss_map = &priv->rss_map;
  792. int i;
  793. mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
  794. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
  795. mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
  796. mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
  797. mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
  798. for (i = 0; i < priv->rx_ring_num; i++) {
  799. mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
  800. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
  801. mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
  802. mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
  803. }
  804. mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
  805. }