en_rx.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098
  1. /*
  2. * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. *
  32. */
  33. #include <linux/mlx4/cq.h>
  34. #include <linux/mlx4/qp.h>
  35. #include <linux/skbuff.h>
  36. #include <linux/if_ether.h>
  37. #include <linux/if_vlan.h>
  38. #include <linux/vmalloc.h>
  39. #include "mlx4_en.h"
  40. static void *get_wqe(struct mlx4_en_rx_ring *ring, int n)
  41. {
  42. int offset = n << ring->srq.wqe_shift;
  43. return ring->buf + offset;
  44. }
  45. static void mlx4_en_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
  46. {
  47. return;
  48. }
  49. static int mlx4_en_get_frag_header(struct skb_frag_struct *frags, void **mac_hdr,
  50. void **ip_hdr, void **tcpudp_hdr,
  51. u64 *hdr_flags, void *priv)
  52. {
  53. *mac_hdr = page_address(frags->page) + frags->page_offset;
  54. *ip_hdr = *mac_hdr + ETH_HLEN;
  55. *tcpudp_hdr = (struct tcphdr *)(*ip_hdr + sizeof(struct iphdr));
  56. *hdr_flags = LRO_IPV4 | LRO_TCP;
  57. return 0;
  58. }
  59. static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
  60. struct mlx4_en_rx_desc *rx_desc,
  61. struct skb_frag_struct *skb_frags,
  62. struct mlx4_en_rx_alloc *ring_alloc,
  63. int i)
  64. {
  65. struct mlx4_en_dev *mdev = priv->mdev;
  66. struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
  67. struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
  68. struct page *page;
  69. dma_addr_t dma;
  70. if (page_alloc->offset == frag_info->last_offset) {
  71. /* Allocate new page */
  72. page = alloc_pages(GFP_ATOMIC | __GFP_COMP, MLX4_EN_ALLOC_ORDER);
  73. if (!page)
  74. return -ENOMEM;
  75. skb_frags[i].page = page_alloc->page;
  76. skb_frags[i].page_offset = page_alloc->offset;
  77. page_alloc->page = page;
  78. page_alloc->offset = frag_info->frag_align;
  79. } else {
  80. page = page_alloc->page;
  81. get_page(page);
  82. skb_frags[i].page = page;
  83. skb_frags[i].page_offset = page_alloc->offset;
  84. page_alloc->offset += frag_info->frag_stride;
  85. }
  86. dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
  87. skb_frags[i].page_offset, frag_info->frag_size,
  88. PCI_DMA_FROMDEVICE);
  89. rx_desc->data[i].addr = cpu_to_be64(dma);
  90. return 0;
  91. }
  92. static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
  93. struct mlx4_en_rx_ring *ring)
  94. {
  95. struct mlx4_en_rx_alloc *page_alloc;
  96. int i;
  97. for (i = 0; i < priv->num_frags; i++) {
  98. page_alloc = &ring->page_alloc[i];
  99. page_alloc->page = alloc_pages(GFP_ATOMIC | __GFP_COMP,
  100. MLX4_EN_ALLOC_ORDER);
  101. if (!page_alloc->page)
  102. goto out;
  103. page_alloc->offset = priv->frag_info[i].frag_align;
  104. mlx4_dbg(DRV, priv, "Initialized allocator:%d with page:%p\n",
  105. i, page_alloc->page);
  106. }
  107. return 0;
  108. out:
  109. while (i--) {
  110. page_alloc = &ring->page_alloc[i];
  111. put_page(page_alloc->page);
  112. page_alloc->page = NULL;
  113. }
  114. return -ENOMEM;
  115. }
  116. static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
  117. struct mlx4_en_rx_ring *ring)
  118. {
  119. struct mlx4_en_rx_alloc *page_alloc;
  120. int i;
  121. for (i = 0; i < priv->num_frags; i++) {
  122. page_alloc = &ring->page_alloc[i];
  123. mlx4_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
  124. i, page_count(page_alloc->page));
  125. put_page(page_alloc->page);
  126. page_alloc->page = NULL;
  127. }
  128. }
  129. static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
  130. struct mlx4_en_rx_ring *ring, int index)
  131. {
  132. struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
  133. struct skb_frag_struct *skb_frags = ring->rx_info +
  134. (index << priv->log_rx_info);
  135. int possible_frags;
  136. int i;
  137. /* Pre-link descriptor */
  138. rx_desc->next.next_wqe_index = cpu_to_be16((index + 1) & ring->size_mask);
  139. /* Set size and memtype fields */
  140. for (i = 0; i < priv->num_frags; i++) {
  141. skb_frags[i].size = priv->frag_info[i].frag_size;
  142. rx_desc->data[i].byte_count =
  143. cpu_to_be32(priv->frag_info[i].frag_size);
  144. rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
  145. }
  146. /* If the number of used fragments does not fill up the ring stride,
  147. * remaining (unused) fragments must be padded with null address/size
  148. * and a special memory key */
  149. possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
  150. for (i = priv->num_frags; i < possible_frags; i++) {
  151. rx_desc->data[i].byte_count = 0;
  152. rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
  153. rx_desc->data[i].addr = 0;
  154. }
  155. }
  156. static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
  157. struct mlx4_en_rx_ring *ring, int index)
  158. {
  159. struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
  160. struct skb_frag_struct *skb_frags = ring->rx_info +
  161. (index << priv->log_rx_info);
  162. int i;
  163. for (i = 0; i < priv->num_frags; i++)
  164. if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, ring->page_alloc, i))
  165. goto err;
  166. return 0;
  167. err:
  168. while (i--)
  169. put_page(skb_frags[i].page);
  170. return -ENOMEM;
  171. }
  172. static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
  173. {
  174. *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
  175. }
  176. static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
  177. struct mlx4_en_rx_ring *ring,
  178. int index)
  179. {
  180. struct mlx4_en_dev *mdev = priv->mdev;
  181. struct skb_frag_struct *skb_frags;
  182. struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
  183. dma_addr_t dma;
  184. int nr;
  185. skb_frags = ring->rx_info + (index << priv->log_rx_info);
  186. for (nr = 0; nr < priv->num_frags; nr++) {
  187. mlx4_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
  188. dma = be64_to_cpu(rx_desc->data[nr].addr);
  189. mlx4_dbg(DRV, priv, "Unmaping buffer at dma:0x%llx\n", (u64) dma);
  190. pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
  191. PCI_DMA_FROMDEVICE);
  192. put_page(skb_frags[nr].page);
  193. }
  194. }
  195. static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
  196. {
  197. struct mlx4_en_dev *mdev = priv->mdev;
  198. struct mlx4_en_rx_ring *ring;
  199. int ring_ind;
  200. int buf_ind;
  201. int new_size;
  202. for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
  203. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  204. ring = &priv->rx_ring[ring_ind];
  205. if (mlx4_en_prepare_rx_desc(priv, ring,
  206. ring->actual_size)) {
  207. if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
  208. mlx4_err(mdev, "Failed to allocate "
  209. "enough rx buffers\n");
  210. return -ENOMEM;
  211. } else {
  212. new_size = rounddown_pow_of_two(ring->actual_size);
  213. mlx4_warn(mdev, "Only %d buffers allocated "
  214. "reducing ring size to %d",
  215. ring->actual_size, new_size);
  216. goto reduce_rings;
  217. }
  218. }
  219. ring->actual_size++;
  220. ring->prod++;
  221. }
  222. }
  223. return 0;
  224. reduce_rings:
  225. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  226. ring = &priv->rx_ring[ring_ind];
  227. while (ring->actual_size > new_size) {
  228. ring->actual_size--;
  229. ring->prod--;
  230. mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
  231. }
  232. ring->size_mask = ring->actual_size - 1;
  233. }
  234. return 0;
  235. }
  236. static int mlx4_en_fill_rx_buf(struct net_device *dev,
  237. struct mlx4_en_rx_ring *ring)
  238. {
  239. struct mlx4_en_priv *priv = netdev_priv(dev);
  240. int num = 0;
  241. int err;
  242. while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
  243. err = mlx4_en_prepare_rx_desc(priv, ring, ring->prod &
  244. ring->size_mask);
  245. if (err) {
  246. if (netif_msg_rx_err(priv))
  247. mlx4_warn(priv->mdev,
  248. "Failed preparing rx descriptor\n");
  249. priv->port_stats.rx_alloc_failed++;
  250. break;
  251. }
  252. ++num;
  253. ++ring->prod;
  254. }
  255. if ((u32) (ring->prod - ring->cons) == ring->actual_size)
  256. ring->full = 1;
  257. return num;
  258. }
  259. static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
  260. struct mlx4_en_rx_ring *ring)
  261. {
  262. int index;
  263. mlx4_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
  264. ring->cons, ring->prod);
  265. /* Unmap and free Rx buffers */
  266. BUG_ON((u32) (ring->prod - ring->cons) > ring->actual_size);
  267. while (ring->cons != ring->prod) {
  268. index = ring->cons & ring->size_mask;
  269. mlx4_dbg(DRV, priv, "Processing descriptor:%d\n", index);
  270. mlx4_en_free_rx_desc(priv, ring, index);
  271. ++ring->cons;
  272. }
  273. }
  274. void mlx4_en_rx_refill(struct work_struct *work)
  275. {
  276. struct delayed_work *delay = to_delayed_work(work);
  277. struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
  278. refill_task);
  279. struct mlx4_en_dev *mdev = priv->mdev;
  280. struct net_device *dev = priv->dev;
  281. struct mlx4_en_rx_ring *ring;
  282. int need_refill = 0;
  283. int i;
  284. mutex_lock(&mdev->state_lock);
  285. if (!mdev->device_up || !priv->port_up)
  286. goto out;
  287. /* We only get here if there are no receive buffers, so we can't race
  288. * with Rx interrupts while filling buffers */
  289. for (i = 0; i < priv->rx_ring_num; i++) {
  290. ring = &priv->rx_ring[i];
  291. if (ring->need_refill) {
  292. if (mlx4_en_fill_rx_buf(dev, ring)) {
  293. ring->need_refill = 0;
  294. mlx4_en_update_rx_prod_db(ring);
  295. } else
  296. need_refill = 1;
  297. }
  298. }
  299. if (need_refill)
  300. queue_delayed_work(mdev->workqueue, &priv->refill_task, HZ);
  301. out:
  302. mutex_unlock(&mdev->state_lock);
  303. }
  304. int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
  305. struct mlx4_en_rx_ring *ring, u32 size, u16 stride)
  306. {
  307. struct mlx4_en_dev *mdev = priv->mdev;
  308. int err;
  309. int tmp;
  310. /* Sanity check SRQ size before proceeding */
  311. if (size >= mdev->dev->caps.max_srq_wqes)
  312. return -EINVAL;
  313. ring->prod = 0;
  314. ring->cons = 0;
  315. ring->size = size;
  316. ring->size_mask = size - 1;
  317. ring->stride = stride;
  318. ring->log_stride = ffs(ring->stride) - 1;
  319. ring->buf_size = ring->size * ring->stride;
  320. tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
  321. sizeof(struct skb_frag_struct));
  322. ring->rx_info = vmalloc(tmp);
  323. if (!ring->rx_info) {
  324. mlx4_err(mdev, "Failed allocating rx_info ring\n");
  325. return -ENOMEM;
  326. }
  327. mlx4_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
  328. ring->rx_info, tmp);
  329. err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
  330. ring->buf_size, 2 * PAGE_SIZE);
  331. if (err)
  332. goto err_ring;
  333. err = mlx4_en_map_buffer(&ring->wqres.buf);
  334. if (err) {
  335. mlx4_err(mdev, "Failed to map RX buffer\n");
  336. goto err_hwq;
  337. }
  338. ring->buf = ring->wqres.buf.direct.buf;
  339. /* Configure lro mngr */
  340. memset(&ring->lro, 0, sizeof(struct net_lro_mgr));
  341. ring->lro.dev = priv->dev;
  342. ring->lro.features = LRO_F_NAPI;
  343. ring->lro.frag_align_pad = NET_IP_ALIGN;
  344. ring->lro.ip_summed = CHECKSUM_UNNECESSARY;
  345. ring->lro.ip_summed_aggr = CHECKSUM_UNNECESSARY;
  346. ring->lro.max_desc = mdev->profile.num_lro;
  347. ring->lro.max_aggr = MAX_SKB_FRAGS;
  348. ring->lro.lro_arr = kzalloc(mdev->profile.num_lro *
  349. sizeof(struct net_lro_desc),
  350. GFP_KERNEL);
  351. if (!ring->lro.lro_arr) {
  352. mlx4_err(mdev, "Failed to allocate lro array\n");
  353. goto err_map;
  354. }
  355. ring->lro.get_frag_header = mlx4_en_get_frag_header;
  356. return 0;
  357. err_map:
  358. mlx4_en_unmap_buffer(&ring->wqres.buf);
  359. err_hwq:
  360. mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
  361. err_ring:
  362. vfree(ring->rx_info);
  363. ring->rx_info = NULL;
  364. return err;
  365. }
  366. int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
  367. {
  368. struct mlx4_en_dev *mdev = priv->mdev;
  369. struct mlx4_wqe_srq_next_seg *next;
  370. struct mlx4_en_rx_ring *ring;
  371. int i;
  372. int ring_ind;
  373. int err;
  374. int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
  375. DS_SIZE * priv->num_frags);
  376. int max_gs = (stride - sizeof(struct mlx4_wqe_srq_next_seg)) / DS_SIZE;
  377. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  378. ring = &priv->rx_ring[ring_ind];
  379. ring->prod = 0;
  380. ring->cons = 0;
  381. ring->actual_size = 0;
  382. ring->cqn = priv->rx_cq[ring_ind].mcq.cqn;
  383. ring->stride = stride;
  384. ring->log_stride = ffs(ring->stride) - 1;
  385. ring->buf_size = ring->size * ring->stride;
  386. memset(ring->buf, 0, ring->buf_size);
  387. mlx4_en_update_rx_prod_db(ring);
  388. /* Initailize all descriptors */
  389. for (i = 0; i < ring->size; i++)
  390. mlx4_en_init_rx_desc(priv, ring, i);
  391. /* Initialize page allocators */
  392. err = mlx4_en_init_allocator(priv, ring);
  393. if (err) {
  394. mlx4_err(mdev, "Failed initializing ring allocator\n");
  395. ring_ind--;
  396. goto err_allocator;
  397. }
  398. /* Fill Rx buffers */
  399. ring->full = 0;
  400. }
  401. err = mlx4_en_fill_rx_buffers(priv);
  402. if (err)
  403. goto err_buffers;
  404. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
  405. ring = &priv->rx_ring[ring_ind];
  406. mlx4_en_update_rx_prod_db(ring);
  407. /* Configure SRQ representing the ring */
  408. ring->srq.max = ring->actual_size;
  409. ring->srq.max_gs = max_gs;
  410. ring->srq.wqe_shift = ilog2(ring->stride);
  411. for (i = 0; i < ring->srq.max; ++i) {
  412. next = get_wqe(ring, i);
  413. next->next_wqe_index =
  414. cpu_to_be16((i + 1) & (ring->srq.max - 1));
  415. }
  416. err = mlx4_srq_alloc(mdev->dev, mdev->priv_pdn, &ring->wqres.mtt,
  417. ring->wqres.db.dma, &ring->srq);
  418. if (err){
  419. mlx4_err(mdev, "Failed to allocate srq\n");
  420. ring_ind--;
  421. goto err_srq;
  422. }
  423. ring->srq.event = mlx4_en_srq_event;
  424. }
  425. return 0;
  426. err_srq:
  427. while (ring_ind >= 0) {
  428. ring = &priv->rx_ring[ring_ind];
  429. mlx4_srq_free(mdev->dev, &ring->srq);
  430. ring_ind--;
  431. }
  432. err_buffers:
  433. for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
  434. mlx4_en_free_rx_buf(priv, &priv->rx_ring[ring_ind]);
  435. ring_ind = priv->rx_ring_num - 1;
  436. err_allocator:
  437. while (ring_ind >= 0) {
  438. mlx4_en_destroy_allocator(priv, &priv->rx_ring[ring_ind]);
  439. ring_ind--;
  440. }
  441. return err;
  442. }
  443. void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
  444. struct mlx4_en_rx_ring *ring)
  445. {
  446. struct mlx4_en_dev *mdev = priv->mdev;
  447. kfree(ring->lro.lro_arr);
  448. mlx4_en_unmap_buffer(&ring->wqres.buf);
  449. mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
  450. vfree(ring->rx_info);
  451. ring->rx_info = NULL;
  452. }
  453. void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
  454. struct mlx4_en_rx_ring *ring)
  455. {
  456. struct mlx4_en_dev *mdev = priv->mdev;
  457. mlx4_srq_free(mdev->dev, &ring->srq);
  458. mlx4_en_free_rx_buf(priv, ring);
  459. mlx4_en_destroy_allocator(priv, ring);
  460. }
  461. /* Unmap a completed descriptor and free unused pages */
  462. static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
  463. struct mlx4_en_rx_desc *rx_desc,
  464. struct skb_frag_struct *skb_frags,
  465. struct skb_frag_struct *skb_frags_rx,
  466. struct mlx4_en_rx_alloc *page_alloc,
  467. int length)
  468. {
  469. struct mlx4_en_dev *mdev = priv->mdev;
  470. struct mlx4_en_frag_info *frag_info;
  471. int nr;
  472. dma_addr_t dma;
  473. /* Collect used fragments while replacing them in the HW descirptors */
  474. for (nr = 0; nr < priv->num_frags; nr++) {
  475. frag_info = &priv->frag_info[nr];
  476. if (length <= frag_info->frag_prefix_size)
  477. break;
  478. /* Save page reference in skb */
  479. skb_frags_rx[nr].page = skb_frags[nr].page;
  480. skb_frags_rx[nr].size = skb_frags[nr].size;
  481. skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset;
  482. dma = be64_to_cpu(rx_desc->data[nr].addr);
  483. /* Allocate a replacement page */
  484. if (mlx4_en_alloc_frag(priv, rx_desc, skb_frags, page_alloc, nr))
  485. goto fail;
  486. /* Unmap buffer */
  487. pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
  488. PCI_DMA_FROMDEVICE);
  489. }
  490. /* Adjust size of last fragment to match actual length */
  491. skb_frags_rx[nr - 1].size = length -
  492. priv->frag_info[nr - 1].frag_prefix_size;
  493. return nr;
  494. fail:
  495. /* Drop all accumulated fragments (which have already been replaced in
  496. * the descriptor) of this packet; remaining fragments are reused... */
  497. while (nr > 0) {
  498. nr--;
  499. put_page(skb_frags_rx[nr].page);
  500. }
  501. return 0;
  502. }
  503. static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
  504. struct mlx4_en_rx_desc *rx_desc,
  505. struct skb_frag_struct *skb_frags,
  506. struct mlx4_en_rx_alloc *page_alloc,
  507. unsigned int length)
  508. {
  509. struct mlx4_en_dev *mdev = priv->mdev;
  510. struct sk_buff *skb;
  511. void *va;
  512. int used_frags;
  513. dma_addr_t dma;
  514. skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN);
  515. if (!skb) {
  516. mlx4_dbg(RX_ERR, priv, "Failed allocating skb\n");
  517. return NULL;
  518. }
  519. skb->dev = priv->dev;
  520. skb_reserve(skb, NET_IP_ALIGN);
  521. skb->len = length;
  522. skb->truesize = length + sizeof(struct sk_buff);
  523. /* Get pointer to first fragment so we could copy the headers into the
  524. * (linear part of the) skb */
  525. va = page_address(skb_frags[0].page) + skb_frags[0].page_offset;
  526. if (length <= SMALL_PACKET_SIZE) {
  527. /* We are copying all relevant data to the skb - temporarily
  528. * synch buffers for the copy */
  529. dma = be64_to_cpu(rx_desc->data[0].addr);
  530. dma_sync_single_range_for_cpu(&mdev->pdev->dev, dma, 0,
  531. length, DMA_FROM_DEVICE);
  532. skb_copy_to_linear_data(skb, va, length);
  533. dma_sync_single_range_for_device(&mdev->pdev->dev, dma, 0,
  534. length, DMA_FROM_DEVICE);
  535. skb->tail += length;
  536. } else {
  537. /* Move relevant fragments to skb */
  538. used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, skb_frags,
  539. skb_shinfo(skb)->frags,
  540. page_alloc, length);
  541. if (unlikely(!used_frags)) {
  542. kfree_skb(skb);
  543. return NULL;
  544. }
  545. skb_shinfo(skb)->nr_frags = used_frags;
  546. /* Copy headers into the skb linear buffer */
  547. memcpy(skb->data, va, HEADER_COPY_SIZE);
  548. skb->tail += HEADER_COPY_SIZE;
  549. /* Skip headers in first fragment */
  550. skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE;
  551. /* Adjust size of first fragment */
  552. skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE;
  553. skb->data_len = length - HEADER_COPY_SIZE;
  554. }
  555. return skb;
  556. }
  557. static void mlx4_en_copy_desc(struct mlx4_en_priv *priv,
  558. struct mlx4_en_rx_ring *ring,
  559. int from, int to, int num)
  560. {
  561. struct skb_frag_struct *skb_frags_from;
  562. struct skb_frag_struct *skb_frags_to;
  563. struct mlx4_en_rx_desc *rx_desc_from;
  564. struct mlx4_en_rx_desc *rx_desc_to;
  565. int from_index, to_index;
  566. int nr, i;
  567. for (i = 0; i < num; i++) {
  568. from_index = (from + i) & ring->size_mask;
  569. to_index = (to + i) & ring->size_mask;
  570. skb_frags_from = ring->rx_info + (from_index << priv->log_rx_info);
  571. skb_frags_to = ring->rx_info + (to_index << priv->log_rx_info);
  572. rx_desc_from = ring->buf + (from_index << ring->log_stride);
  573. rx_desc_to = ring->buf + (to_index << ring->log_stride);
  574. for (nr = 0; nr < priv->num_frags; nr++) {
  575. skb_frags_to[nr].page = skb_frags_from[nr].page;
  576. skb_frags_to[nr].page_offset = skb_frags_from[nr].page_offset;
  577. rx_desc_to->data[nr].addr = rx_desc_from->data[nr].addr;
  578. }
  579. }
  580. }
  581. int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
  582. {
  583. struct mlx4_en_priv *priv = netdev_priv(dev);
  584. struct mlx4_en_dev *mdev = priv->mdev;
  585. struct mlx4_cqe *cqe;
  586. struct mlx4_en_rx_ring *ring = &priv->rx_ring[cq->ring];
  587. struct skb_frag_struct *skb_frags;
  588. struct skb_frag_struct lro_frags[MLX4_EN_MAX_RX_FRAGS];
  589. struct mlx4_en_rx_desc *rx_desc;
  590. struct sk_buff *skb;
  591. int index;
  592. int nr;
  593. unsigned int length;
  594. int polled = 0;
  595. int ip_summed;
  596. if (!priv->port_up)
  597. return 0;
  598. /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
  599. * descriptor offset can be deduced from the CQE index instead of
  600. * reading 'cqe->index' */
  601. index = cq->mcq.cons_index & ring->size_mask;
  602. cqe = &cq->buf[index];
  603. /* Process all completed CQEs */
  604. while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
  605. cq->mcq.cons_index & cq->size)) {
  606. skb_frags = ring->rx_info + (index << priv->log_rx_info);
  607. rx_desc = ring->buf + (index << ring->log_stride);
  608. /*
  609. * make sure we read the CQE after we read the ownership bit
  610. */
  611. rmb();
  612. /* Drop packet on bad receive or bad checksum */
  613. if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
  614. MLX4_CQE_OPCODE_ERROR)) {
  615. mlx4_err(mdev, "CQE completed in error - vendor "
  616. "syndrom:%d syndrom:%d\n",
  617. ((struct mlx4_err_cqe *) cqe)->vendor_err_syndrome,
  618. ((struct mlx4_err_cqe *) cqe)->syndrome);
  619. goto next;
  620. }
  621. if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
  622. mlx4_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
  623. goto next;
  624. }
  625. /*
  626. * Packet is OK - process it.
  627. */
  628. length = be32_to_cpu(cqe->byte_cnt);
  629. ring->bytes += length;
  630. ring->packets++;
  631. if (likely(priv->rx_csum)) {
  632. if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
  633. (cqe->checksum == cpu_to_be16(0xffff))) {
  634. priv->port_stats.rx_chksum_good++;
  635. /* This packet is eligible for LRO if it is:
  636. * - DIX Ethernet (type interpretation)
  637. * - TCP/IP (v4)
  638. * - without IP options
  639. * - not an IP fragment */
  640. if (mlx4_en_can_lro(cqe->status) &&
  641. dev->features & NETIF_F_LRO) {
  642. nr = mlx4_en_complete_rx_desc(
  643. priv, rx_desc,
  644. skb_frags, lro_frags,
  645. ring->page_alloc, length);
  646. if (!nr)
  647. goto next;
  648. if (priv->vlgrp && (cqe->vlan_my_qpn &
  649. cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK))) {
  650. lro_vlan_hwaccel_receive_frags(
  651. &ring->lro, lro_frags,
  652. length, length,
  653. priv->vlgrp,
  654. be16_to_cpu(cqe->sl_vid),
  655. NULL, 0);
  656. } else
  657. lro_receive_frags(&ring->lro,
  658. lro_frags,
  659. length,
  660. length,
  661. NULL, 0);
  662. goto next;
  663. }
  664. /* LRO not possible, complete processing here */
  665. ip_summed = CHECKSUM_UNNECESSARY;
  666. INC_PERF_COUNTER(priv->pstats.lro_misses);
  667. } else {
  668. ip_summed = CHECKSUM_NONE;
  669. priv->port_stats.rx_chksum_none++;
  670. }
  671. } else {
  672. ip_summed = CHECKSUM_NONE;
  673. priv->port_stats.rx_chksum_none++;
  674. }
  675. skb = mlx4_en_rx_skb(priv, rx_desc, skb_frags,
  676. ring->page_alloc, length);
  677. if (!skb) {
  678. priv->stats.rx_dropped++;
  679. goto next;
  680. }
  681. skb->ip_summed = ip_summed;
  682. skb->protocol = eth_type_trans(skb, dev);
  683. skb_record_rx_queue(skb, cq->ring);
  684. /* Push it up the stack */
  685. if (priv->vlgrp && (be32_to_cpu(cqe->vlan_my_qpn) &
  686. MLX4_CQE_VLAN_PRESENT_MASK)) {
  687. vlan_hwaccel_receive_skb(skb, priv->vlgrp,
  688. be16_to_cpu(cqe->sl_vid));
  689. } else
  690. netif_receive_skb(skb);
  691. next:
  692. ++cq->mcq.cons_index;
  693. index = (cq->mcq.cons_index) & ring->size_mask;
  694. cqe = &cq->buf[index];
  695. if (++polled == budget) {
  696. /* We are here because we reached the NAPI budget -
  697. * flush only pending LRO sessions */
  698. lro_flush_all(&ring->lro);
  699. goto out;
  700. }
  701. }
  702. /* If CQ is empty flush all LRO sessions unconditionally */
  703. lro_flush_all(&ring->lro);
  704. out:
  705. AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
  706. mlx4_cq_set_ci(&cq->mcq);
  707. wmb(); /* ensure HW sees CQ consumer before we post new buffers */
  708. ring->cons = cq->mcq.cons_index;
  709. ring->prod += polled; /* Polled descriptors were realocated in place */
  710. if (unlikely(!ring->full)) {
  711. mlx4_en_copy_desc(priv, ring, ring->cons - polled,
  712. ring->prod - polled, polled);
  713. mlx4_en_fill_rx_buf(dev, ring);
  714. }
  715. mlx4_en_update_rx_prod_db(ring);
  716. return polled;
  717. }
  718. void mlx4_en_rx_irq(struct mlx4_cq *mcq)
  719. {
  720. struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
  721. struct mlx4_en_priv *priv = netdev_priv(cq->dev);
  722. if (priv->port_up)
  723. napi_schedule(&cq->napi);
  724. else
  725. mlx4_en_arm_cq(priv, cq);
  726. }
  727. /* Rx CQ polling - called by NAPI */
  728. int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
  729. {
  730. struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
  731. struct net_device *dev = cq->dev;
  732. struct mlx4_en_priv *priv = netdev_priv(dev);
  733. int done;
  734. done = mlx4_en_process_rx_cq(dev, cq, budget);
  735. /* If we used up all the quota - we're probably not done yet... */
  736. if (done == budget)
  737. INC_PERF_COUNTER(priv->pstats.napi_quota);
  738. else {
  739. /* Done for now */
  740. napi_complete(napi);
  741. mlx4_en_arm_cq(priv, cq);
  742. }
  743. return done;
  744. }
  745. /* Calculate the last offset position that accomodates a full fragment
  746. * (assuming fagment size = stride-align) */
  747. static int mlx4_en_last_alloc_offset(struct mlx4_en_priv *priv, u16 stride, u16 align)
  748. {
  749. u16 res = MLX4_EN_ALLOC_SIZE % stride;
  750. u16 offset = MLX4_EN_ALLOC_SIZE - stride - res + align;
  751. mlx4_dbg(DRV, priv, "Calculated last offset for stride:%d align:%d "
  752. "res:%d offset:%d\n", stride, align, res, offset);
  753. return offset;
  754. }
  755. static int frag_sizes[] = {
  756. FRAG_SZ0,
  757. FRAG_SZ1,
  758. FRAG_SZ2,
  759. FRAG_SZ3
  760. };
  761. void mlx4_en_calc_rx_buf(struct net_device *dev)
  762. {
  763. struct mlx4_en_priv *priv = netdev_priv(dev);
  764. int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN + ETH_LLC_SNAP_SIZE;
  765. int buf_size = 0;
  766. int i = 0;
  767. while (buf_size < eff_mtu) {
  768. priv->frag_info[i].frag_size =
  769. (eff_mtu > buf_size + frag_sizes[i]) ?
  770. frag_sizes[i] : eff_mtu - buf_size;
  771. priv->frag_info[i].frag_prefix_size = buf_size;
  772. if (!i) {
  773. priv->frag_info[i].frag_align = NET_IP_ALIGN;
  774. priv->frag_info[i].frag_stride =
  775. ALIGN(frag_sizes[i] + NET_IP_ALIGN, SMP_CACHE_BYTES);
  776. } else {
  777. priv->frag_info[i].frag_align = 0;
  778. priv->frag_info[i].frag_stride =
  779. ALIGN(frag_sizes[i], SMP_CACHE_BYTES);
  780. }
  781. priv->frag_info[i].last_offset = mlx4_en_last_alloc_offset(
  782. priv, priv->frag_info[i].frag_stride,
  783. priv->frag_info[i].frag_align);
  784. buf_size += priv->frag_info[i].frag_size;
  785. i++;
  786. }
  787. priv->num_frags = i;
  788. priv->rx_skb_size = eff_mtu;
  789. priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct skb_frag_struct));
  790. mlx4_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d "
  791. "num_frags:%d):\n", eff_mtu, priv->num_frags);
  792. for (i = 0; i < priv->num_frags; i++) {
  793. mlx4_dbg(DRV, priv, " frag:%d - size:%d prefix:%d align:%d "
  794. "stride:%d last_offset:%d\n", i,
  795. priv->frag_info[i].frag_size,
  796. priv->frag_info[i].frag_prefix_size,
  797. priv->frag_info[i].frag_align,
  798. priv->frag_info[i].frag_stride,
  799. priv->frag_info[i].last_offset);
  800. }
  801. }
  802. /* RSS related functions */
  803. /* Calculate rss size and map each entry in rss table to rx ring */
  804. void mlx4_en_set_default_rss_map(struct mlx4_en_priv *priv,
  805. struct mlx4_en_rss_map *rss_map,
  806. int num_entries, int num_rings)
  807. {
  808. int i;
  809. rss_map->size = roundup_pow_of_two(num_entries);
  810. mlx4_dbg(DRV, priv, "Setting default RSS map of %d entires\n",
  811. rss_map->size);
  812. for (i = 0; i < rss_map->size; i++) {
  813. rss_map->map[i] = i % num_rings;
  814. mlx4_dbg(DRV, priv, "Entry %d ---> ring %d\n", i, rss_map->map[i]);
  815. }
  816. }
  817. static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv,
  818. int qpn, int srqn, int cqn,
  819. enum mlx4_qp_state *state,
  820. struct mlx4_qp *qp)
  821. {
  822. struct mlx4_en_dev *mdev = priv->mdev;
  823. struct mlx4_qp_context *context;
  824. int err = 0;
  825. context = kmalloc(sizeof *context , GFP_KERNEL);
  826. if (!context) {
  827. mlx4_err(mdev, "Failed to allocate qp context\n");
  828. return -ENOMEM;
  829. }
  830. err = mlx4_qp_alloc(mdev->dev, qpn, qp);
  831. if (err) {
  832. mlx4_err(mdev, "Failed to allocate qp #%d\n", qpn);
  833. goto out;
  834. }
  835. qp->event = mlx4_en_sqp_event;
  836. memset(context, 0, sizeof *context);
  837. mlx4_en_fill_qp_context(priv, 0, 0, 0, 0, qpn, cqn, srqn, context);
  838. err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, context, qp, state);
  839. if (err) {
  840. mlx4_qp_remove(mdev->dev, qp);
  841. mlx4_qp_free(mdev->dev, qp);
  842. }
  843. out:
  844. kfree(context);
  845. return err;
  846. }
  847. /* Allocate rx qp's and configure them according to rss map */
  848. int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
  849. {
  850. struct mlx4_en_dev *mdev = priv->mdev;
  851. struct mlx4_en_rss_map *rss_map = &priv->rss_map;
  852. struct mlx4_qp_context context;
  853. struct mlx4_en_rss_context *rss_context;
  854. void *ptr;
  855. int rss_xor = mdev->profile.rss_xor;
  856. u8 rss_mask = mdev->profile.rss_mask;
  857. int i, srqn, qpn, cqn;
  858. int err = 0;
  859. int good_qps = 0;
  860. mlx4_dbg(DRV, priv, "Configuring rss steering for port %u\n", priv->port);
  861. err = mlx4_qp_reserve_range(mdev->dev, rss_map->size,
  862. rss_map->size, &rss_map->base_qpn);
  863. if (err) {
  864. mlx4_err(mdev, "Failed reserving %d qps for port %u\n",
  865. rss_map->size, priv->port);
  866. return err;
  867. }
  868. for (i = 0; i < rss_map->size; i++) {
  869. cqn = priv->rx_ring[rss_map->map[i]].cqn;
  870. srqn = priv->rx_ring[rss_map->map[i]].srq.srqn;
  871. qpn = rss_map->base_qpn + i;
  872. err = mlx4_en_config_rss_qp(priv, qpn, srqn, cqn,
  873. &rss_map->state[i],
  874. &rss_map->qps[i]);
  875. if (err)
  876. goto rss_err;
  877. ++good_qps;
  878. }
  879. /* Configure RSS indirection qp */
  880. err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &priv->base_qpn);
  881. if (err) {
  882. mlx4_err(mdev, "Failed to reserve range for RSS "
  883. "indirection qp\n");
  884. goto rss_err;
  885. }
  886. err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
  887. if (err) {
  888. mlx4_err(mdev, "Failed to allocate RSS indirection QP\n");
  889. goto reserve_err;
  890. }
  891. rss_map->indir_qp.event = mlx4_en_sqp_event;
  892. mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
  893. priv->rx_ring[0].cqn, 0, &context);
  894. ptr = ((void *) &context) + 0x3c;
  895. rss_context = (struct mlx4_en_rss_context *) ptr;
  896. rss_context->base_qpn = cpu_to_be32(ilog2(rss_map->size) << 24 |
  897. (rss_map->base_qpn));
  898. rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
  899. rss_context->hash_fn = rss_xor & 0x3;
  900. rss_context->flags = rss_mask << 2;
  901. err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
  902. &rss_map->indir_qp, &rss_map->indir_state);
  903. if (err)
  904. goto indir_err;
  905. return 0;
  906. indir_err:
  907. mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
  908. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
  909. mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
  910. mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
  911. reserve_err:
  912. mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
  913. rss_err:
  914. for (i = 0; i < good_qps; i++) {
  915. mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
  916. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
  917. mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
  918. mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
  919. }
  920. mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size);
  921. return err;
  922. }
  923. void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
  924. {
  925. struct mlx4_en_dev *mdev = priv->mdev;
  926. struct mlx4_en_rss_map *rss_map = &priv->rss_map;
  927. int i;
  928. mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
  929. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
  930. mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
  931. mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
  932. mlx4_qp_release_range(mdev->dev, priv->base_qpn, 1);
  933. for (i = 0; i < rss_map->size; i++) {
  934. mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
  935. MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
  936. mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
  937. mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
  938. }
  939. mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, rss_map->size);
  940. }