tx.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171
  1. /****************************************************************************
  2. * Driver for Solarflare Solarstorm network controllers and boards
  3. * Copyright 2005-2006 Fen Systems Ltd.
  4. * Copyright 2005-2009 Solarflare Communications Inc.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published
  8. * by the Free Software Foundation, incorporated herein by reference.
  9. */
  10. #include <linux/pci.h>
  11. #include <linux/tcp.h>
  12. #include <linux/ip.h>
  13. #include <linux/in.h>
  14. #include <linux/ipv6.h>
  15. #include <linux/slab.h>
  16. #include <net/ipv6.h>
  17. #include <linux/if_ether.h>
  18. #include <linux/highmem.h>
  19. #include "net_driver.h"
  20. #include "efx.h"
  21. #include "nic.h"
  22. #include "workarounds.h"
  23. /*
  24. * TX descriptor ring full threshold
  25. *
  26. * The tx_queue descriptor ring fill-level must fall below this value
  27. * before we restart the netif queue
  28. */
  29. #define EFX_TXQ_THRESHOLD (EFX_TXQ_MASK / 2u)
  30. /* We need to be able to nest calls to netif_tx_stop_queue(), partly
  31. * because of the 2 hardware queues associated with each core queue,
  32. * but also so that we can inhibit TX for reasons other than a full
  33. * hardware queue. */
  34. void efx_stop_queue(struct efx_channel *channel)
  35. {
  36. struct efx_nic *efx = channel->efx;
  37. if (!channel->tx_queue)
  38. return;
  39. spin_lock_bh(&channel->tx_stop_lock);
  40. EFX_TRACE(efx, "stop TX queue\n");
  41. atomic_inc(&channel->tx_stop_count);
  42. netif_tx_stop_queue(
  43. netdev_get_tx_queue(
  44. efx->net_dev,
  45. channel->tx_queue->queue / EFX_TXQ_TYPES));
  46. spin_unlock_bh(&channel->tx_stop_lock);
  47. }
  48. /* Decrement core TX queue stop count and wake it if the count is 0 */
  49. void efx_wake_queue(struct efx_channel *channel)
  50. {
  51. struct efx_nic *efx = channel->efx;
  52. if (!channel->tx_queue)
  53. return;
  54. local_bh_disable();
  55. if (atomic_dec_and_lock(&channel->tx_stop_count,
  56. &channel->tx_stop_lock)) {
  57. EFX_TRACE(efx, "waking TX queue\n");
  58. netif_tx_wake_queue(
  59. netdev_get_tx_queue(
  60. efx->net_dev,
  61. channel->tx_queue->queue / EFX_TXQ_TYPES));
  62. spin_unlock(&channel->tx_stop_lock);
  63. }
  64. local_bh_enable();
  65. }
  66. static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
  67. struct efx_tx_buffer *buffer)
  68. {
  69. if (buffer->unmap_len) {
  70. struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
  71. dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
  72. buffer->unmap_len);
  73. if (buffer->unmap_single)
  74. pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
  75. PCI_DMA_TODEVICE);
  76. else
  77. pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
  78. PCI_DMA_TODEVICE);
  79. buffer->unmap_len = 0;
  80. buffer->unmap_single = false;
  81. }
  82. if (buffer->skb) {
  83. dev_kfree_skb_any((struct sk_buff *) buffer->skb);
  84. buffer->skb = NULL;
  85. EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
  86. "complete\n", tx_queue->queue, read_ptr);
  87. }
  88. }
  89. /**
  90. * struct efx_tso_header - a DMA mapped buffer for packet headers
  91. * @next: Linked list of free ones.
  92. * The list is protected by the TX queue lock.
  93. * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
  94. * @dma_addr: The DMA address of the header below.
  95. *
  96. * This controls the memory used for a TSO header. Use TSOH_DATA()
  97. * to find the packet header data. Use TSOH_SIZE() to calculate the
  98. * total size required for a given packet header length. TSO headers
  99. * in the free list are exactly %TSOH_STD_SIZE bytes in size.
  100. */
  101. struct efx_tso_header {
  102. union {
  103. struct efx_tso_header *next;
  104. size_t unmap_len;
  105. };
  106. dma_addr_t dma_addr;
  107. };
  108. static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
  109. struct sk_buff *skb);
  110. static void efx_fini_tso(struct efx_tx_queue *tx_queue);
  111. static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
  112. struct efx_tso_header *tsoh);
  113. static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
  114. struct efx_tx_buffer *buffer)
  115. {
  116. if (buffer->tsoh) {
  117. if (likely(!buffer->tsoh->unmap_len)) {
  118. buffer->tsoh->next = tx_queue->tso_headers_free;
  119. tx_queue->tso_headers_free = buffer->tsoh;
  120. } else {
  121. efx_tsoh_heap_free(tx_queue, buffer->tsoh);
  122. }
  123. buffer->tsoh = NULL;
  124. }
  125. }
  126. static inline unsigned
  127. efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
  128. {
  129. /* Depending on the NIC revision, we can use descriptor
  130. * lengths up to 8K or 8K-1. However, since PCI Express
  131. * devices must split read requests at 4K boundaries, there is
  132. * little benefit from using descriptors that cross those
  133. * boundaries and we keep things simple by not doing so.
  134. */
  135. unsigned len = (~dma_addr & 0xfff) + 1;
  136. /* Work around hardware bug for unaligned buffers. */
  137. if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
  138. len = min_t(unsigned, len, 512 - (dma_addr & 0xf));
  139. return len;
  140. }
  141. /*
  142. * Add a socket buffer to a TX queue
  143. *
  144. * This maps all fragments of a socket buffer for DMA and adds them to
  145. * the TX queue. The queue's insert pointer will be incremented by
  146. * the number of fragments in the socket buffer.
  147. *
  148. * If any DMA mapping fails, any mapped fragments will be unmapped,
  149. * the queue's insert pointer will be restored to its original value.
  150. *
  151. * This function is split out from efx_hard_start_xmit to allow the
  152. * loopback test to direct packets via specific TX queues.
  153. *
  154. * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
  155. * You must hold netif_tx_lock() to call this function.
  156. */
  157. netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
  158. {
  159. struct efx_nic *efx = tx_queue->efx;
  160. struct pci_dev *pci_dev = efx->pci_dev;
  161. struct efx_tx_buffer *buffer;
  162. skb_frag_t *fragment;
  163. struct page *page;
  164. int page_offset;
  165. unsigned int len, unmap_len = 0, fill_level, insert_ptr;
  166. dma_addr_t dma_addr, unmap_addr = 0;
  167. unsigned int dma_len;
  168. bool unmap_single;
  169. int q_space, i = 0;
  170. netdev_tx_t rc = NETDEV_TX_OK;
  171. EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
  172. if (skb_shinfo(skb)->gso_size)
  173. return efx_enqueue_skb_tso(tx_queue, skb);
  174. /* Get size of the initial fragment */
  175. len = skb_headlen(skb);
  176. /* Pad if necessary */
  177. if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
  178. EFX_BUG_ON_PARANOID(skb->data_len);
  179. len = 32 + 1;
  180. if (skb_pad(skb, len - skb->len))
  181. return NETDEV_TX_OK;
  182. }
  183. fill_level = tx_queue->insert_count - tx_queue->old_read_count;
  184. q_space = EFX_TXQ_MASK - 1 - fill_level;
  185. /* Map for DMA. Use pci_map_single rather than pci_map_page
  186. * since this is more efficient on machines with sparse
  187. * memory.
  188. */
  189. unmap_single = true;
  190. dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
  191. /* Process all fragments */
  192. while (1) {
  193. if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
  194. goto pci_err;
  195. /* Store fields for marking in the per-fragment final
  196. * descriptor */
  197. unmap_len = len;
  198. unmap_addr = dma_addr;
  199. /* Add to TX queue, splitting across DMA boundaries */
  200. do {
  201. if (unlikely(q_space-- <= 0)) {
  202. /* It might be that completions have
  203. * happened since the xmit path last
  204. * checked. Update the xmit path's
  205. * copy of read_count.
  206. */
  207. ++tx_queue->stopped;
  208. /* This memory barrier protects the
  209. * change of stopped from the access
  210. * of read_count. */
  211. smp_mb();
  212. tx_queue->old_read_count =
  213. *(volatile unsigned *)
  214. &tx_queue->read_count;
  215. fill_level = (tx_queue->insert_count
  216. - tx_queue->old_read_count);
  217. q_space = EFX_TXQ_MASK - 1 - fill_level;
  218. if (unlikely(q_space-- <= 0))
  219. goto stop;
  220. smp_mb();
  221. --tx_queue->stopped;
  222. }
  223. insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
  224. buffer = &tx_queue->buffer[insert_ptr];
  225. efx_tsoh_free(tx_queue, buffer);
  226. EFX_BUG_ON_PARANOID(buffer->tsoh);
  227. EFX_BUG_ON_PARANOID(buffer->skb);
  228. EFX_BUG_ON_PARANOID(buffer->len);
  229. EFX_BUG_ON_PARANOID(!buffer->continuation);
  230. EFX_BUG_ON_PARANOID(buffer->unmap_len);
  231. dma_len = efx_max_tx_len(efx, dma_addr);
  232. if (likely(dma_len >= len))
  233. dma_len = len;
  234. /* Fill out per descriptor fields */
  235. buffer->len = dma_len;
  236. buffer->dma_addr = dma_addr;
  237. len -= dma_len;
  238. dma_addr += dma_len;
  239. ++tx_queue->insert_count;
  240. } while (len);
  241. /* Transfer ownership of the unmapping to the final buffer */
  242. buffer->unmap_single = unmap_single;
  243. buffer->unmap_len = unmap_len;
  244. unmap_len = 0;
  245. /* Get address and size of next fragment */
  246. if (i >= skb_shinfo(skb)->nr_frags)
  247. break;
  248. fragment = &skb_shinfo(skb)->frags[i];
  249. len = fragment->size;
  250. page = fragment->page;
  251. page_offset = fragment->page_offset;
  252. i++;
  253. /* Map for DMA */
  254. unmap_single = false;
  255. dma_addr = pci_map_page(pci_dev, page, page_offset, len,
  256. PCI_DMA_TODEVICE);
  257. }
  258. /* Transfer ownership of the skb to the final buffer */
  259. buffer->skb = skb;
  260. buffer->continuation = false;
  261. /* Pass off to hardware */
  262. efx_nic_push_buffers(tx_queue);
  263. return NETDEV_TX_OK;
  264. pci_err:
  265. EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
  266. "fragments for DMA\n", tx_queue->queue, skb->len,
  267. skb_shinfo(skb)->nr_frags + 1);
  268. /* Mark the packet as transmitted, and free the SKB ourselves */
  269. dev_kfree_skb_any(skb);
  270. goto unwind;
  271. stop:
  272. rc = NETDEV_TX_BUSY;
  273. if (tx_queue->stopped == 1)
  274. efx_stop_queue(tx_queue->channel);
  275. unwind:
  276. /* Work backwards until we hit the original insert pointer value */
  277. while (tx_queue->insert_count != tx_queue->write_count) {
  278. --tx_queue->insert_count;
  279. insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
  280. buffer = &tx_queue->buffer[insert_ptr];
  281. efx_dequeue_buffer(tx_queue, buffer);
  282. buffer->len = 0;
  283. }
  284. /* Free the fragment we were mid-way through pushing */
  285. if (unmap_len) {
  286. if (unmap_single)
  287. pci_unmap_single(pci_dev, unmap_addr, unmap_len,
  288. PCI_DMA_TODEVICE);
  289. else
  290. pci_unmap_page(pci_dev, unmap_addr, unmap_len,
  291. PCI_DMA_TODEVICE);
  292. }
  293. return rc;
  294. }
  295. /* Remove packets from the TX queue
  296. *
  297. * This removes packets from the TX queue, up to and including the
  298. * specified index.
  299. */
  300. static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
  301. unsigned int index)
  302. {
  303. struct efx_nic *efx = tx_queue->efx;
  304. unsigned int stop_index, read_ptr;
  305. stop_index = (index + 1) & EFX_TXQ_MASK;
  306. read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
  307. while (read_ptr != stop_index) {
  308. struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
  309. if (unlikely(buffer->len == 0)) {
  310. EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
  311. "completion id %x\n", tx_queue->queue,
  312. read_ptr);
  313. efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
  314. return;
  315. }
  316. efx_dequeue_buffer(tx_queue, buffer);
  317. buffer->continuation = true;
  318. buffer->len = 0;
  319. ++tx_queue->read_count;
  320. read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
  321. }
  322. }
  323. /* Initiate a packet transmission. We use one channel per CPU
  324. * (sharing when we have more CPUs than channels). On Falcon, the TX
  325. * completion events will be directed back to the CPU that transmitted
  326. * the packet, which should be cache-efficient.
  327. *
  328. * Context: non-blocking.
  329. * Note that returning anything other than NETDEV_TX_OK will cause the
  330. * OS to free the skb.
  331. */
  332. netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
  333. struct net_device *net_dev)
  334. {
  335. struct efx_nic *efx = netdev_priv(net_dev);
  336. struct efx_tx_queue *tx_queue;
  337. if (unlikely(efx->port_inhibited))
  338. return NETDEV_TX_BUSY;
  339. tx_queue = &efx->tx_queue[EFX_TXQ_TYPES * skb_get_queue_mapping(skb)];
  340. if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
  341. tx_queue += EFX_TXQ_TYPE_OFFLOAD;
  342. return efx_enqueue_skb(tx_queue, skb);
  343. }
  344. void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
  345. {
  346. unsigned fill_level;
  347. struct efx_nic *efx = tx_queue->efx;
  348. EFX_BUG_ON_PARANOID(index > EFX_TXQ_MASK);
  349. efx_dequeue_buffers(tx_queue, index);
  350. /* See if we need to restart the netif queue. This barrier
  351. * separates the update of read_count from the test of
  352. * stopped. */
  353. smp_mb();
  354. if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
  355. fill_level = tx_queue->insert_count - tx_queue->read_count;
  356. if (fill_level < EFX_TXQ_THRESHOLD) {
  357. EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
  358. /* Do this under netif_tx_lock(), to avoid racing
  359. * with efx_xmit(). */
  360. netif_tx_lock(efx->net_dev);
  361. if (tx_queue->stopped) {
  362. tx_queue->stopped = 0;
  363. efx_wake_queue(tx_queue->channel);
  364. }
  365. netif_tx_unlock(efx->net_dev);
  366. }
  367. }
  368. }
  369. int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
  370. {
  371. struct efx_nic *efx = tx_queue->efx;
  372. unsigned int txq_size;
  373. int i, rc;
  374. EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
  375. /* Allocate software ring */
  376. txq_size = EFX_TXQ_SIZE * sizeof(*tx_queue->buffer);
  377. tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
  378. if (!tx_queue->buffer)
  379. return -ENOMEM;
  380. for (i = 0; i <= EFX_TXQ_MASK; ++i)
  381. tx_queue->buffer[i].continuation = true;
  382. /* Allocate hardware ring */
  383. rc = efx_nic_probe_tx(tx_queue);
  384. if (rc)
  385. goto fail;
  386. return 0;
  387. fail:
  388. kfree(tx_queue->buffer);
  389. tx_queue->buffer = NULL;
  390. return rc;
  391. }
  392. void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
  393. {
  394. EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
  395. tx_queue->insert_count = 0;
  396. tx_queue->write_count = 0;
  397. tx_queue->read_count = 0;
  398. tx_queue->old_read_count = 0;
  399. BUG_ON(tx_queue->stopped);
  400. /* Set up TX descriptor ring */
  401. efx_nic_init_tx(tx_queue);
  402. }
  403. void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
  404. {
  405. struct efx_tx_buffer *buffer;
  406. if (!tx_queue->buffer)
  407. return;
  408. /* Free any buffers left in the ring */
  409. while (tx_queue->read_count != tx_queue->write_count) {
  410. buffer = &tx_queue->buffer[tx_queue->read_count & EFX_TXQ_MASK];
  411. efx_dequeue_buffer(tx_queue, buffer);
  412. buffer->continuation = true;
  413. buffer->len = 0;
  414. ++tx_queue->read_count;
  415. }
  416. }
  417. void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
  418. {
  419. EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
  420. /* Flush TX queue, remove descriptor ring */
  421. efx_nic_fini_tx(tx_queue);
  422. efx_release_tx_buffers(tx_queue);
  423. /* Free up TSO header cache */
  424. efx_fini_tso(tx_queue);
  425. /* Release queue's stop on port, if any */
  426. if (tx_queue->stopped) {
  427. tx_queue->stopped = 0;
  428. efx_wake_queue(tx_queue->channel);
  429. }
  430. }
  431. void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
  432. {
  433. EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
  434. efx_nic_remove_tx(tx_queue);
  435. kfree(tx_queue->buffer);
  436. tx_queue->buffer = NULL;
  437. }
  438. /* Efx TCP segmentation acceleration.
  439. *
  440. * Why? Because by doing it here in the driver we can go significantly
  441. * faster than the GSO.
  442. *
  443. * Requires TX checksum offload support.
  444. */
  445. /* Number of bytes inserted at the start of a TSO header buffer,
  446. * similar to NET_IP_ALIGN.
  447. */
  448. #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  449. #define TSOH_OFFSET 0
  450. #else
  451. #define TSOH_OFFSET NET_IP_ALIGN
  452. #endif
  453. #define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
  454. /* Total size of struct efx_tso_header, buffer and padding */
  455. #define TSOH_SIZE(hdr_len) \
  456. (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
  457. /* Size of blocks on free list. Larger blocks must be allocated from
  458. * the heap.
  459. */
  460. #define TSOH_STD_SIZE 128
  461. #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
  462. #define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
  463. #define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
  464. #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
  465. #define SKB_IPV6_OFF(skb) PTR_DIFF(ipv6_hdr(skb), (skb)->data)
  466. /**
  467. * struct tso_state - TSO state for an SKB
  468. * @out_len: Remaining length in current segment
  469. * @seqnum: Current sequence number
  470. * @ipv4_id: Current IPv4 ID, host endian
  471. * @packet_space: Remaining space in current packet
  472. * @dma_addr: DMA address of current position
  473. * @in_len: Remaining length in current SKB fragment
  474. * @unmap_len: Length of SKB fragment
  475. * @unmap_addr: DMA address of SKB fragment
  476. * @unmap_single: DMA single vs page mapping flag
  477. * @protocol: Network protocol (after any VLAN header)
  478. * @header_len: Number of bytes of header
  479. * @full_packet_size: Number of bytes to put in each outgoing segment
  480. *
  481. * The state used during segmentation. It is put into this data structure
  482. * just to make it easy to pass into inline functions.
  483. */
  484. struct tso_state {
  485. /* Output position */
  486. unsigned out_len;
  487. unsigned seqnum;
  488. unsigned ipv4_id;
  489. unsigned packet_space;
  490. /* Input position */
  491. dma_addr_t dma_addr;
  492. unsigned in_len;
  493. unsigned unmap_len;
  494. dma_addr_t unmap_addr;
  495. bool unmap_single;
  496. __be16 protocol;
  497. unsigned header_len;
  498. int full_packet_size;
  499. };
  500. /*
  501. * Verify that our various assumptions about sk_buffs and the conditions
  502. * under which TSO will be attempted hold true. Return the protocol number.
  503. */
  504. static __be16 efx_tso_check_protocol(struct sk_buff *skb)
  505. {
  506. __be16 protocol = skb->protocol;
  507. EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
  508. protocol);
  509. if (protocol == htons(ETH_P_8021Q)) {
  510. /* Find the encapsulated protocol; reset network header
  511. * and transport header based on that. */
  512. struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
  513. protocol = veh->h_vlan_encapsulated_proto;
  514. skb_set_network_header(skb, sizeof(*veh));
  515. if (protocol == htons(ETH_P_IP))
  516. skb_set_transport_header(skb, sizeof(*veh) +
  517. 4 * ip_hdr(skb)->ihl);
  518. else if (protocol == htons(ETH_P_IPV6))
  519. skb_set_transport_header(skb, sizeof(*veh) +
  520. sizeof(struct ipv6hdr));
  521. }
  522. if (protocol == htons(ETH_P_IP)) {
  523. EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
  524. } else {
  525. EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6));
  526. EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
  527. }
  528. EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
  529. + (tcp_hdr(skb)->doff << 2u)) >
  530. skb_headlen(skb));
  531. return protocol;
  532. }
  533. /*
  534. * Allocate a page worth of efx_tso_header structures, and string them
  535. * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
  536. */
  537. static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
  538. {
  539. struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
  540. struct efx_tso_header *tsoh;
  541. dma_addr_t dma_addr;
  542. u8 *base_kva, *kva;
  543. base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
  544. if (base_kva == NULL) {
  545. EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
  546. " headers\n");
  547. return -ENOMEM;
  548. }
  549. /* pci_alloc_consistent() allocates pages. */
  550. EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
  551. for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
  552. tsoh = (struct efx_tso_header *)kva;
  553. tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
  554. tsoh->next = tx_queue->tso_headers_free;
  555. tx_queue->tso_headers_free = tsoh;
  556. }
  557. return 0;
  558. }
  559. /* Free up a TSO header, and all others in the same page. */
  560. static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
  561. struct efx_tso_header *tsoh,
  562. struct pci_dev *pci_dev)
  563. {
  564. struct efx_tso_header **p;
  565. unsigned long base_kva;
  566. dma_addr_t base_dma;
  567. base_kva = (unsigned long)tsoh & PAGE_MASK;
  568. base_dma = tsoh->dma_addr & PAGE_MASK;
  569. p = &tx_queue->tso_headers_free;
  570. while (*p != NULL) {
  571. if (((unsigned long)*p & PAGE_MASK) == base_kva)
  572. *p = (*p)->next;
  573. else
  574. p = &(*p)->next;
  575. }
  576. pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
  577. }
  578. static struct efx_tso_header *
  579. efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
  580. {
  581. struct efx_tso_header *tsoh;
  582. tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
  583. if (unlikely(!tsoh))
  584. return NULL;
  585. tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
  586. TSOH_BUFFER(tsoh), header_len,
  587. PCI_DMA_TODEVICE);
  588. if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
  589. tsoh->dma_addr))) {
  590. kfree(tsoh);
  591. return NULL;
  592. }
  593. tsoh->unmap_len = header_len;
  594. return tsoh;
  595. }
  596. static void
  597. efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
  598. {
  599. pci_unmap_single(tx_queue->efx->pci_dev,
  600. tsoh->dma_addr, tsoh->unmap_len,
  601. PCI_DMA_TODEVICE);
  602. kfree(tsoh);
  603. }
  604. /**
  605. * efx_tx_queue_insert - push descriptors onto the TX queue
  606. * @tx_queue: Efx TX queue
  607. * @dma_addr: DMA address of fragment
  608. * @len: Length of fragment
  609. * @final_buffer: The final buffer inserted into the queue
  610. *
  611. * Push descriptors onto the TX queue. Return 0 on success or 1 if
  612. * @tx_queue full.
  613. */
  614. static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
  615. dma_addr_t dma_addr, unsigned len,
  616. struct efx_tx_buffer **final_buffer)
  617. {
  618. struct efx_tx_buffer *buffer;
  619. struct efx_nic *efx = tx_queue->efx;
  620. unsigned dma_len, fill_level, insert_ptr;
  621. int q_space;
  622. EFX_BUG_ON_PARANOID(len <= 0);
  623. fill_level = tx_queue->insert_count - tx_queue->old_read_count;
  624. /* -1 as there is no way to represent all descriptors used */
  625. q_space = EFX_TXQ_MASK - 1 - fill_level;
  626. while (1) {
  627. if (unlikely(q_space-- <= 0)) {
  628. /* It might be that completions have happened
  629. * since the xmit path last checked. Update
  630. * the xmit path's copy of read_count.
  631. */
  632. ++tx_queue->stopped;
  633. /* This memory barrier protects the change of
  634. * stopped from the access of read_count. */
  635. smp_mb();
  636. tx_queue->old_read_count =
  637. *(volatile unsigned *)&tx_queue->read_count;
  638. fill_level = (tx_queue->insert_count
  639. - tx_queue->old_read_count);
  640. q_space = EFX_TXQ_MASK - 1 - fill_level;
  641. if (unlikely(q_space-- <= 0)) {
  642. *final_buffer = NULL;
  643. return 1;
  644. }
  645. smp_mb();
  646. --tx_queue->stopped;
  647. }
  648. insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
  649. buffer = &tx_queue->buffer[insert_ptr];
  650. ++tx_queue->insert_count;
  651. EFX_BUG_ON_PARANOID(tx_queue->insert_count -
  652. tx_queue->read_count >
  653. EFX_TXQ_MASK);
  654. efx_tsoh_free(tx_queue, buffer);
  655. EFX_BUG_ON_PARANOID(buffer->len);
  656. EFX_BUG_ON_PARANOID(buffer->unmap_len);
  657. EFX_BUG_ON_PARANOID(buffer->skb);
  658. EFX_BUG_ON_PARANOID(!buffer->continuation);
  659. EFX_BUG_ON_PARANOID(buffer->tsoh);
  660. buffer->dma_addr = dma_addr;
  661. dma_len = efx_max_tx_len(efx, dma_addr);
  662. /* If there is enough space to send then do so */
  663. if (dma_len >= len)
  664. break;
  665. buffer->len = dma_len; /* Don't set the other members */
  666. dma_addr += dma_len;
  667. len -= dma_len;
  668. }
  669. EFX_BUG_ON_PARANOID(!len);
  670. buffer->len = len;
  671. *final_buffer = buffer;
  672. return 0;
  673. }
  674. /*
  675. * Put a TSO header into the TX queue.
  676. *
  677. * This is special-cased because we know that it is small enough to fit in
  678. * a single fragment, and we know it doesn't cross a page boundary. It
  679. * also allows us to not worry about end-of-packet etc.
  680. */
  681. static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
  682. struct efx_tso_header *tsoh, unsigned len)
  683. {
  684. struct efx_tx_buffer *buffer;
  685. buffer = &tx_queue->buffer[tx_queue->insert_count & EFX_TXQ_MASK];
  686. efx_tsoh_free(tx_queue, buffer);
  687. EFX_BUG_ON_PARANOID(buffer->len);
  688. EFX_BUG_ON_PARANOID(buffer->unmap_len);
  689. EFX_BUG_ON_PARANOID(buffer->skb);
  690. EFX_BUG_ON_PARANOID(!buffer->continuation);
  691. EFX_BUG_ON_PARANOID(buffer->tsoh);
  692. buffer->len = len;
  693. buffer->dma_addr = tsoh->dma_addr;
  694. buffer->tsoh = tsoh;
  695. ++tx_queue->insert_count;
  696. }
  697. /* Remove descriptors put into a tx_queue. */
  698. static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
  699. {
  700. struct efx_tx_buffer *buffer;
  701. dma_addr_t unmap_addr;
  702. /* Work backwards until we hit the original insert pointer value */
  703. while (tx_queue->insert_count != tx_queue->write_count) {
  704. --tx_queue->insert_count;
  705. buffer = &tx_queue->buffer[tx_queue->insert_count &
  706. EFX_TXQ_MASK];
  707. efx_tsoh_free(tx_queue, buffer);
  708. EFX_BUG_ON_PARANOID(buffer->skb);
  709. if (buffer->unmap_len) {
  710. unmap_addr = (buffer->dma_addr + buffer->len -
  711. buffer->unmap_len);
  712. if (buffer->unmap_single)
  713. pci_unmap_single(tx_queue->efx->pci_dev,
  714. unmap_addr, buffer->unmap_len,
  715. PCI_DMA_TODEVICE);
  716. else
  717. pci_unmap_page(tx_queue->efx->pci_dev,
  718. unmap_addr, buffer->unmap_len,
  719. PCI_DMA_TODEVICE);
  720. buffer->unmap_len = 0;
  721. }
  722. buffer->len = 0;
  723. buffer->continuation = true;
  724. }
  725. }
  726. /* Parse the SKB header and initialise state. */
  727. static void tso_start(struct tso_state *st, const struct sk_buff *skb)
  728. {
  729. /* All ethernet/IP/TCP headers combined size is TCP header size
  730. * plus offset of TCP header relative to start of packet.
  731. */
  732. st->header_len = ((tcp_hdr(skb)->doff << 2u)
  733. + PTR_DIFF(tcp_hdr(skb), skb->data));
  734. st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
  735. if (st->protocol == htons(ETH_P_IP))
  736. st->ipv4_id = ntohs(ip_hdr(skb)->id);
  737. else
  738. st->ipv4_id = 0;
  739. st->seqnum = ntohl(tcp_hdr(skb)->seq);
  740. EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
  741. EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
  742. EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
  743. st->packet_space = st->full_packet_size;
  744. st->out_len = skb->len - st->header_len;
  745. st->unmap_len = 0;
  746. st->unmap_single = false;
  747. }
  748. static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
  749. skb_frag_t *frag)
  750. {
  751. st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
  752. frag->page_offset, frag->size,
  753. PCI_DMA_TODEVICE);
  754. if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
  755. st->unmap_single = false;
  756. st->unmap_len = frag->size;
  757. st->in_len = frag->size;
  758. st->dma_addr = st->unmap_addr;
  759. return 0;
  760. }
  761. return -ENOMEM;
  762. }
  763. static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
  764. const struct sk_buff *skb)
  765. {
  766. int hl = st->header_len;
  767. int len = skb_headlen(skb) - hl;
  768. st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
  769. len, PCI_DMA_TODEVICE);
  770. if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
  771. st->unmap_single = true;
  772. st->unmap_len = len;
  773. st->in_len = len;
  774. st->dma_addr = st->unmap_addr;
  775. return 0;
  776. }
  777. return -ENOMEM;
  778. }
  779. /**
  780. * tso_fill_packet_with_fragment - form descriptors for the current fragment
  781. * @tx_queue: Efx TX queue
  782. * @skb: Socket buffer
  783. * @st: TSO state
  784. *
  785. * Form descriptors for the current fragment, until we reach the end
  786. * of fragment or end-of-packet. Return 0 on success, 1 if not enough
  787. * space in @tx_queue.
  788. */
  789. static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
  790. const struct sk_buff *skb,
  791. struct tso_state *st)
  792. {
  793. struct efx_tx_buffer *buffer;
  794. int n, end_of_packet, rc;
  795. if (st->in_len == 0)
  796. return 0;
  797. if (st->packet_space == 0)
  798. return 0;
  799. EFX_BUG_ON_PARANOID(st->in_len <= 0);
  800. EFX_BUG_ON_PARANOID(st->packet_space <= 0);
  801. n = min(st->in_len, st->packet_space);
  802. st->packet_space -= n;
  803. st->out_len -= n;
  804. st->in_len -= n;
  805. rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
  806. if (likely(rc == 0)) {
  807. if (st->out_len == 0)
  808. /* Transfer ownership of the skb */
  809. buffer->skb = skb;
  810. end_of_packet = st->out_len == 0 || st->packet_space == 0;
  811. buffer->continuation = !end_of_packet;
  812. if (st->in_len == 0) {
  813. /* Transfer ownership of the pci mapping */
  814. buffer->unmap_len = st->unmap_len;
  815. buffer->unmap_single = st->unmap_single;
  816. st->unmap_len = 0;
  817. }
  818. }
  819. st->dma_addr += n;
  820. return rc;
  821. }
  822. /**
  823. * tso_start_new_packet - generate a new header and prepare for the new packet
  824. * @tx_queue: Efx TX queue
  825. * @skb: Socket buffer
  826. * @st: TSO state
  827. *
  828. * Generate a new header and prepare for the new packet. Return 0 on
  829. * success, or -1 if failed to alloc header.
  830. */
  831. static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
  832. const struct sk_buff *skb,
  833. struct tso_state *st)
  834. {
  835. struct efx_tso_header *tsoh;
  836. struct tcphdr *tsoh_th;
  837. unsigned ip_length;
  838. u8 *header;
  839. /* Allocate a DMA-mapped header buffer. */
  840. if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
  841. if (tx_queue->tso_headers_free == NULL) {
  842. if (efx_tsoh_block_alloc(tx_queue))
  843. return -1;
  844. }
  845. EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
  846. tsoh = tx_queue->tso_headers_free;
  847. tx_queue->tso_headers_free = tsoh->next;
  848. tsoh->unmap_len = 0;
  849. } else {
  850. tx_queue->tso_long_headers++;
  851. tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
  852. if (unlikely(!tsoh))
  853. return -1;
  854. }
  855. header = TSOH_BUFFER(tsoh);
  856. tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
  857. /* Copy and update the headers. */
  858. memcpy(header, skb->data, st->header_len);
  859. tsoh_th->seq = htonl(st->seqnum);
  860. st->seqnum += skb_shinfo(skb)->gso_size;
  861. if (st->out_len > skb_shinfo(skb)->gso_size) {
  862. /* This packet will not finish the TSO burst. */
  863. ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
  864. tsoh_th->fin = 0;
  865. tsoh_th->psh = 0;
  866. } else {
  867. /* This packet will be the last in the TSO burst. */
  868. ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
  869. tsoh_th->fin = tcp_hdr(skb)->fin;
  870. tsoh_th->psh = tcp_hdr(skb)->psh;
  871. }
  872. if (st->protocol == htons(ETH_P_IP)) {
  873. struct iphdr *tsoh_iph =
  874. (struct iphdr *)(header + SKB_IPV4_OFF(skb));
  875. tsoh_iph->tot_len = htons(ip_length);
  876. /* Linux leaves suitable gaps in the IP ID space for us to fill. */
  877. tsoh_iph->id = htons(st->ipv4_id);
  878. st->ipv4_id++;
  879. } else {
  880. struct ipv6hdr *tsoh_iph =
  881. (struct ipv6hdr *)(header + SKB_IPV6_OFF(skb));
  882. tsoh_iph->payload_len = htons(ip_length - sizeof(*tsoh_iph));
  883. }
  884. st->packet_space = skb_shinfo(skb)->gso_size;
  885. ++tx_queue->tso_packets;
  886. /* Form a descriptor for this header. */
  887. efx_tso_put_header(tx_queue, tsoh, st->header_len);
  888. return 0;
  889. }
  890. /**
  891. * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
  892. * @tx_queue: Efx TX queue
  893. * @skb: Socket buffer
  894. *
  895. * Context: You must hold netif_tx_lock() to call this function.
  896. *
  897. * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
  898. * @skb was not enqueued. In all cases @skb is consumed. Return
  899. * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
  900. */
  901. static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
  902. struct sk_buff *skb)
  903. {
  904. struct efx_nic *efx = tx_queue->efx;
  905. int frag_i, rc, rc2 = NETDEV_TX_OK;
  906. struct tso_state state;
  907. /* Find the packet protocol and sanity-check it */
  908. state.protocol = efx_tso_check_protocol(skb);
  909. EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
  910. tso_start(&state, skb);
  911. /* Assume that skb header area contains exactly the headers, and
  912. * all payload is in the frag list.
  913. */
  914. if (skb_headlen(skb) == state.header_len) {
  915. /* Grab the first payload fragment. */
  916. EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
  917. frag_i = 0;
  918. rc = tso_get_fragment(&state, efx,
  919. skb_shinfo(skb)->frags + frag_i);
  920. if (rc)
  921. goto mem_err;
  922. } else {
  923. rc = tso_get_head_fragment(&state, efx, skb);
  924. if (rc)
  925. goto mem_err;
  926. frag_i = -1;
  927. }
  928. if (tso_start_new_packet(tx_queue, skb, &state) < 0)
  929. goto mem_err;
  930. while (1) {
  931. rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
  932. if (unlikely(rc))
  933. goto stop;
  934. /* Move onto the next fragment? */
  935. if (state.in_len == 0) {
  936. if (++frag_i >= skb_shinfo(skb)->nr_frags)
  937. /* End of payload reached. */
  938. break;
  939. rc = tso_get_fragment(&state, efx,
  940. skb_shinfo(skb)->frags + frag_i);
  941. if (rc)
  942. goto mem_err;
  943. }
  944. /* Start at new packet? */
  945. if (state.packet_space == 0 &&
  946. tso_start_new_packet(tx_queue, skb, &state) < 0)
  947. goto mem_err;
  948. }
  949. /* Pass off to hardware */
  950. efx_nic_push_buffers(tx_queue);
  951. tx_queue->tso_bursts++;
  952. return NETDEV_TX_OK;
  953. mem_err:
  954. EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n");
  955. dev_kfree_skb_any(skb);
  956. goto unwind;
  957. stop:
  958. rc2 = NETDEV_TX_BUSY;
  959. /* Stop the queue if it wasn't stopped before. */
  960. if (tx_queue->stopped == 1)
  961. efx_stop_queue(tx_queue->channel);
  962. unwind:
  963. /* Free the DMA mapping we were in the process of writing out */
  964. if (state.unmap_len) {
  965. if (state.unmap_single)
  966. pci_unmap_single(efx->pci_dev, state.unmap_addr,
  967. state.unmap_len, PCI_DMA_TODEVICE);
  968. else
  969. pci_unmap_page(efx->pci_dev, state.unmap_addr,
  970. state.unmap_len, PCI_DMA_TODEVICE);
  971. }
  972. efx_enqueue_unwind(tx_queue);
  973. return rc2;
  974. }
  975. /*
  976. * Free up all TSO datastructures associated with tx_queue. This
  977. * routine should be called only once the tx_queue is both empty and
  978. * will no longer be used.
  979. */
  980. static void efx_fini_tso(struct efx_tx_queue *tx_queue)
  981. {
  982. unsigned i;
  983. if (tx_queue->buffer) {
  984. for (i = 0; i <= EFX_TXQ_MASK; ++i)
  985. efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
  986. }
  987. while (tx_queue->tso_headers_free != NULL)
  988. efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
  989. tx_queue->efx->pci_dev);
  990. }