tx.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139
  1. /****************************************************************************
  2. * Driver for Solarflare Solarstorm network controllers and boards
  3. * Copyright 2005-2006 Fen Systems Ltd.
  4. * Copyright 2005-2008 Solarflare Communications Inc.
  5. *
  6. * This program is free software; you can redistribute it and/or modify it
  7. * under the terms of the GNU General Public License version 2 as published
  8. * by the Free Software Foundation, incorporated herein by reference.
  9. */
  10. #include <linux/pci.h>
  11. #include <linux/tcp.h>
  12. #include <linux/ip.h>
  13. #include <linux/in.h>
  14. #include <linux/if_ether.h>
  15. #include <linux/highmem.h>
  16. #include "net_driver.h"
  17. #include "tx.h"
  18. #include "efx.h"
  19. #include "falcon.h"
  20. #include "workarounds.h"
  21. /*
  22. * TX descriptor ring full threshold
  23. *
  24. * The tx_queue descriptor ring fill-level must fall below this value
  25. * before we restart the netif queue
  26. */
  27. #define EFX_TXQ_THRESHOLD (EFX_TXQ_MASK / 2u)
  28. /* We want to be able to nest calls to netif_stop_queue(), since each
  29. * channel can have an individual stop on the queue.
  30. */
  31. void efx_stop_queue(struct efx_nic *efx)
  32. {
  33. spin_lock_bh(&efx->netif_stop_lock);
  34. EFX_TRACE(efx, "stop TX queue\n");
  35. atomic_inc(&efx->netif_stop_count);
  36. netif_stop_queue(efx->net_dev);
  37. spin_unlock_bh(&efx->netif_stop_lock);
  38. }
  39. /* Wake netif's TX queue
  40. * We want to be able to nest calls to netif_stop_queue(), since each
  41. * channel can have an individual stop on the queue.
  42. */
  43. void efx_wake_queue(struct efx_nic *efx)
  44. {
  45. local_bh_disable();
  46. if (atomic_dec_and_lock(&efx->netif_stop_count,
  47. &efx->netif_stop_lock)) {
  48. EFX_TRACE(efx, "waking TX queue\n");
  49. netif_wake_queue(efx->net_dev);
  50. spin_unlock(&efx->netif_stop_lock);
  51. }
  52. local_bh_enable();
  53. }
  54. static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
  55. struct efx_tx_buffer *buffer)
  56. {
  57. if (buffer->unmap_len) {
  58. struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
  59. dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
  60. buffer->unmap_len);
  61. if (buffer->unmap_single)
  62. pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
  63. PCI_DMA_TODEVICE);
  64. else
  65. pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
  66. PCI_DMA_TODEVICE);
  67. buffer->unmap_len = 0;
  68. buffer->unmap_single = false;
  69. }
  70. if (buffer->skb) {
  71. dev_kfree_skb_any((struct sk_buff *) buffer->skb);
  72. buffer->skb = NULL;
  73. EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
  74. "complete\n", tx_queue->queue, read_ptr);
  75. }
  76. }
  77. /**
  78. * struct efx_tso_header - a DMA mapped buffer for packet headers
  79. * @next: Linked list of free ones.
  80. * The list is protected by the TX queue lock.
  81. * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
  82. * @dma_addr: The DMA address of the header below.
  83. *
  84. * This controls the memory used for a TSO header. Use TSOH_DATA()
  85. * to find the packet header data. Use TSOH_SIZE() to calculate the
  86. * total size required for a given packet header length. TSO headers
  87. * in the free list are exactly %TSOH_STD_SIZE bytes in size.
  88. */
  89. struct efx_tso_header {
  90. union {
  91. struct efx_tso_header *next;
  92. size_t unmap_len;
  93. };
  94. dma_addr_t dma_addr;
  95. };
  96. static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
  97. struct sk_buff *skb);
  98. static void efx_fini_tso(struct efx_tx_queue *tx_queue);
  99. static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
  100. struct efx_tso_header *tsoh);
  101. static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
  102. struct efx_tx_buffer *buffer)
  103. {
  104. if (buffer->tsoh) {
  105. if (likely(!buffer->tsoh->unmap_len)) {
  106. buffer->tsoh->next = tx_queue->tso_headers_free;
  107. tx_queue->tso_headers_free = buffer->tsoh;
  108. } else {
  109. efx_tsoh_heap_free(tx_queue, buffer->tsoh);
  110. }
  111. buffer->tsoh = NULL;
  112. }
  113. }
  114. /*
  115. * Add a socket buffer to a TX queue
  116. *
  117. * This maps all fragments of a socket buffer for DMA and adds them to
  118. * the TX queue. The queue's insert pointer will be incremented by
  119. * the number of fragments in the socket buffer.
  120. *
  121. * If any DMA mapping fails, any mapped fragments will be unmapped,
  122. * the queue's insert pointer will be restored to its original value.
  123. *
  124. * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
  125. * You must hold netif_tx_lock() to call this function.
  126. */
  127. static netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue,
  128. struct sk_buff *skb)
  129. {
  130. struct efx_nic *efx = tx_queue->efx;
  131. struct pci_dev *pci_dev = efx->pci_dev;
  132. struct efx_tx_buffer *buffer;
  133. skb_frag_t *fragment;
  134. struct page *page;
  135. int page_offset;
  136. unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
  137. dma_addr_t dma_addr, unmap_addr = 0;
  138. unsigned int dma_len;
  139. bool unmap_single;
  140. int q_space, i = 0;
  141. netdev_tx_t rc = NETDEV_TX_OK;
  142. EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
  143. if (skb_shinfo((struct sk_buff *)skb)->gso_size)
  144. return efx_enqueue_skb_tso(tx_queue, skb);
  145. /* Get size of the initial fragment */
  146. len = skb_headlen(skb);
  147. /* Pad if necessary */
  148. if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
  149. EFX_BUG_ON_PARANOID(skb->data_len);
  150. len = 32 + 1;
  151. if (skb_pad(skb, len - skb->len))
  152. return NETDEV_TX_OK;
  153. }
  154. fill_level = tx_queue->insert_count - tx_queue->old_read_count;
  155. q_space = EFX_TXQ_MASK - 1 - fill_level;
  156. /* Map for DMA. Use pci_map_single rather than pci_map_page
  157. * since this is more efficient on machines with sparse
  158. * memory.
  159. */
  160. unmap_single = true;
  161. dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
  162. /* Process all fragments */
  163. while (1) {
  164. if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
  165. goto pci_err;
  166. /* Store fields for marking in the per-fragment final
  167. * descriptor */
  168. unmap_len = len;
  169. unmap_addr = dma_addr;
  170. /* Add to TX queue, splitting across DMA boundaries */
  171. do {
  172. if (unlikely(q_space-- <= 0)) {
  173. /* It might be that completions have
  174. * happened since the xmit path last
  175. * checked. Update the xmit path's
  176. * copy of read_count.
  177. */
  178. ++tx_queue->stopped;
  179. /* This memory barrier protects the
  180. * change of stopped from the access
  181. * of read_count. */
  182. smp_mb();
  183. tx_queue->old_read_count =
  184. *(volatile unsigned *)
  185. &tx_queue->read_count;
  186. fill_level = (tx_queue->insert_count
  187. - tx_queue->old_read_count);
  188. q_space = EFX_TXQ_MASK - 1 - fill_level;
  189. if (unlikely(q_space-- <= 0))
  190. goto stop;
  191. smp_mb();
  192. --tx_queue->stopped;
  193. }
  194. insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
  195. buffer = &tx_queue->buffer[insert_ptr];
  196. efx_tsoh_free(tx_queue, buffer);
  197. EFX_BUG_ON_PARANOID(buffer->tsoh);
  198. EFX_BUG_ON_PARANOID(buffer->skb);
  199. EFX_BUG_ON_PARANOID(buffer->len);
  200. EFX_BUG_ON_PARANOID(!buffer->continuation);
  201. EFX_BUG_ON_PARANOID(buffer->unmap_len);
  202. dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
  203. if (likely(dma_len > len))
  204. dma_len = len;
  205. misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
  206. if (misalign && dma_len + misalign > 512)
  207. dma_len = 512 - misalign;
  208. /* Fill out per descriptor fields */
  209. buffer->len = dma_len;
  210. buffer->dma_addr = dma_addr;
  211. len -= dma_len;
  212. dma_addr += dma_len;
  213. ++tx_queue->insert_count;
  214. } while (len);
  215. /* Transfer ownership of the unmapping to the final buffer */
  216. buffer->unmap_single = unmap_single;
  217. buffer->unmap_len = unmap_len;
  218. unmap_len = 0;
  219. /* Get address and size of next fragment */
  220. if (i >= skb_shinfo(skb)->nr_frags)
  221. break;
  222. fragment = &skb_shinfo(skb)->frags[i];
  223. len = fragment->size;
  224. page = fragment->page;
  225. page_offset = fragment->page_offset;
  226. i++;
  227. /* Map for DMA */
  228. unmap_single = false;
  229. dma_addr = pci_map_page(pci_dev, page, page_offset, len,
  230. PCI_DMA_TODEVICE);
  231. }
  232. /* Transfer ownership of the skb to the final buffer */
  233. buffer->skb = skb;
  234. buffer->continuation = false;
  235. /* Pass off to hardware */
  236. falcon_push_buffers(tx_queue);
  237. return NETDEV_TX_OK;
  238. pci_err:
  239. EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
  240. "fragments for DMA\n", tx_queue->queue, skb->len,
  241. skb_shinfo(skb)->nr_frags + 1);
  242. /* Mark the packet as transmitted, and free the SKB ourselves */
  243. dev_kfree_skb_any((struct sk_buff *)skb);
  244. goto unwind;
  245. stop:
  246. rc = NETDEV_TX_BUSY;
  247. if (tx_queue->stopped == 1)
  248. efx_stop_queue(efx);
  249. unwind:
  250. /* Work backwards until we hit the original insert pointer value */
  251. while (tx_queue->insert_count != tx_queue->write_count) {
  252. --tx_queue->insert_count;
  253. insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
  254. buffer = &tx_queue->buffer[insert_ptr];
  255. efx_dequeue_buffer(tx_queue, buffer);
  256. buffer->len = 0;
  257. }
  258. /* Free the fragment we were mid-way through pushing */
  259. if (unmap_len) {
  260. if (unmap_single)
  261. pci_unmap_single(pci_dev, unmap_addr, unmap_len,
  262. PCI_DMA_TODEVICE);
  263. else
  264. pci_unmap_page(pci_dev, unmap_addr, unmap_len,
  265. PCI_DMA_TODEVICE);
  266. }
  267. return rc;
  268. }
  269. /* Remove packets from the TX queue
  270. *
  271. * This removes packets from the TX queue, up to and including the
  272. * specified index.
  273. */
  274. static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
  275. unsigned int index)
  276. {
  277. struct efx_nic *efx = tx_queue->efx;
  278. unsigned int stop_index, read_ptr;
  279. stop_index = (index + 1) & EFX_TXQ_MASK;
  280. read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
  281. while (read_ptr != stop_index) {
  282. struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
  283. if (unlikely(buffer->len == 0)) {
  284. EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
  285. "completion id %x\n", tx_queue->queue,
  286. read_ptr);
  287. efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
  288. return;
  289. }
  290. efx_dequeue_buffer(tx_queue, buffer);
  291. buffer->continuation = true;
  292. buffer->len = 0;
  293. ++tx_queue->read_count;
  294. read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
  295. }
  296. }
  297. /* Initiate a packet transmission on the specified TX queue.
  298. * Note that returning anything other than NETDEV_TX_OK will cause the
  299. * OS to free the skb.
  300. *
  301. * This function is split out from efx_hard_start_xmit to allow the
  302. * loopback test to direct packets via specific TX queues. It is
  303. * therefore a non-static inline, so as not to penalise performance
  304. * for non-loopback transmissions.
  305. *
  306. * Context: netif_tx_lock held
  307. */
  308. inline netdev_tx_t efx_xmit(struct efx_nic *efx,
  309. struct efx_tx_queue *tx_queue, struct sk_buff *skb)
  310. {
  311. /* Map fragments for DMA and add to TX queue */
  312. return efx_enqueue_skb(tx_queue, skb);
  313. }
  314. /* Initiate a packet transmission. We use one channel per CPU
  315. * (sharing when we have more CPUs than channels). On Falcon, the TX
  316. * completion events will be directed back to the CPU that transmitted
  317. * the packet, which should be cache-efficient.
  318. *
  319. * Context: non-blocking.
  320. * Note that returning anything other than NETDEV_TX_OK will cause the
  321. * OS to free the skb.
  322. */
  323. netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
  324. struct net_device *net_dev)
  325. {
  326. struct efx_nic *efx = netdev_priv(net_dev);
  327. struct efx_tx_queue *tx_queue;
  328. if (unlikely(efx->port_inhibited))
  329. return NETDEV_TX_BUSY;
  330. if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
  331. tx_queue = &efx->tx_queue[EFX_TX_QUEUE_OFFLOAD_CSUM];
  332. else
  333. tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM];
  334. return efx_xmit(efx, tx_queue, skb);
  335. }
  336. void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
  337. {
  338. unsigned fill_level;
  339. struct efx_nic *efx = tx_queue->efx;
  340. EFX_BUG_ON_PARANOID(index > EFX_TXQ_MASK);
  341. efx_dequeue_buffers(tx_queue, index);
  342. /* See if we need to restart the netif queue. This barrier
  343. * separates the update of read_count from the test of
  344. * stopped. */
  345. smp_mb();
  346. if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
  347. fill_level = tx_queue->insert_count - tx_queue->read_count;
  348. if (fill_level < EFX_TXQ_THRESHOLD) {
  349. EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
  350. /* Do this under netif_tx_lock(), to avoid racing
  351. * with efx_xmit(). */
  352. netif_tx_lock(efx->net_dev);
  353. if (tx_queue->stopped) {
  354. tx_queue->stopped = 0;
  355. efx_wake_queue(efx);
  356. }
  357. netif_tx_unlock(efx->net_dev);
  358. }
  359. }
  360. }
  361. int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
  362. {
  363. struct efx_nic *efx = tx_queue->efx;
  364. unsigned int txq_size;
  365. int i, rc;
  366. EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
  367. /* Allocate software ring */
  368. txq_size = EFX_TXQ_SIZE * sizeof(*tx_queue->buffer);
  369. tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
  370. if (!tx_queue->buffer)
  371. return -ENOMEM;
  372. for (i = 0; i <= EFX_TXQ_MASK; ++i)
  373. tx_queue->buffer[i].continuation = true;
  374. /* Allocate hardware ring */
  375. rc = falcon_probe_tx(tx_queue);
  376. if (rc)
  377. goto fail;
  378. return 0;
  379. fail:
  380. kfree(tx_queue->buffer);
  381. tx_queue->buffer = NULL;
  382. return rc;
  383. }
  384. void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
  385. {
  386. EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
  387. tx_queue->insert_count = 0;
  388. tx_queue->write_count = 0;
  389. tx_queue->read_count = 0;
  390. tx_queue->old_read_count = 0;
  391. BUG_ON(tx_queue->stopped);
  392. /* Set up TX descriptor ring */
  393. falcon_init_tx(tx_queue);
  394. }
  395. void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
  396. {
  397. struct efx_tx_buffer *buffer;
  398. if (!tx_queue->buffer)
  399. return;
  400. /* Free any buffers left in the ring */
  401. while (tx_queue->read_count != tx_queue->write_count) {
  402. buffer = &tx_queue->buffer[tx_queue->read_count & EFX_TXQ_MASK];
  403. efx_dequeue_buffer(tx_queue, buffer);
  404. buffer->continuation = true;
  405. buffer->len = 0;
  406. ++tx_queue->read_count;
  407. }
  408. }
  409. void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
  410. {
  411. EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
  412. /* Flush TX queue, remove descriptor ring */
  413. falcon_fini_tx(tx_queue);
  414. efx_release_tx_buffers(tx_queue);
  415. /* Free up TSO header cache */
  416. efx_fini_tso(tx_queue);
  417. /* Release queue's stop on port, if any */
  418. if (tx_queue->stopped) {
  419. tx_queue->stopped = 0;
  420. efx_wake_queue(tx_queue->efx);
  421. }
  422. }
  423. void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
  424. {
  425. EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
  426. falcon_remove_tx(tx_queue);
  427. kfree(tx_queue->buffer);
  428. tx_queue->buffer = NULL;
  429. }
  430. /* Efx TCP segmentation acceleration.
  431. *
  432. * Why? Because by doing it here in the driver we can go significantly
  433. * faster than the GSO.
  434. *
  435. * Requires TX checksum offload support.
  436. */
  437. /* Number of bytes inserted at the start of a TSO header buffer,
  438. * similar to NET_IP_ALIGN.
  439. */
  440. #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
  441. #define TSOH_OFFSET 0
  442. #else
  443. #define TSOH_OFFSET NET_IP_ALIGN
  444. #endif
  445. #define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
  446. /* Total size of struct efx_tso_header, buffer and padding */
  447. #define TSOH_SIZE(hdr_len) \
  448. (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
  449. /* Size of blocks on free list. Larger blocks must be allocated from
  450. * the heap.
  451. */
  452. #define TSOH_STD_SIZE 128
  453. #define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2))
  454. #define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
  455. #define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
  456. #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
  457. /**
  458. * struct tso_state - TSO state for an SKB
  459. * @out_len: Remaining length in current segment
  460. * @seqnum: Current sequence number
  461. * @ipv4_id: Current IPv4 ID, host endian
  462. * @packet_space: Remaining space in current packet
  463. * @dma_addr: DMA address of current position
  464. * @in_len: Remaining length in current SKB fragment
  465. * @unmap_len: Length of SKB fragment
  466. * @unmap_addr: DMA address of SKB fragment
  467. * @unmap_single: DMA single vs page mapping flag
  468. * @header_len: Number of bytes of header
  469. * @full_packet_size: Number of bytes to put in each outgoing segment
  470. *
  471. * The state used during segmentation. It is put into this data structure
  472. * just to make it easy to pass into inline functions.
  473. */
  474. struct tso_state {
  475. /* Output position */
  476. unsigned out_len;
  477. unsigned seqnum;
  478. unsigned ipv4_id;
  479. unsigned packet_space;
  480. /* Input position */
  481. dma_addr_t dma_addr;
  482. unsigned in_len;
  483. unsigned unmap_len;
  484. dma_addr_t unmap_addr;
  485. bool unmap_single;
  486. unsigned header_len;
  487. int full_packet_size;
  488. };
  489. /*
  490. * Verify that our various assumptions about sk_buffs and the conditions
  491. * under which TSO will be attempted hold true.
  492. */
  493. static void efx_tso_check_safe(struct sk_buff *skb)
  494. {
  495. __be16 protocol = skb->protocol;
  496. EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
  497. protocol);
  498. if (protocol == htons(ETH_P_8021Q)) {
  499. /* Find the encapsulated protocol; reset network header
  500. * and transport header based on that. */
  501. struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
  502. protocol = veh->h_vlan_encapsulated_proto;
  503. skb_set_network_header(skb, sizeof(*veh));
  504. if (protocol == htons(ETH_P_IP))
  505. skb_set_transport_header(skb, sizeof(*veh) +
  506. 4 * ip_hdr(skb)->ihl);
  507. }
  508. EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IP));
  509. EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
  510. EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
  511. + (tcp_hdr(skb)->doff << 2u)) >
  512. skb_headlen(skb));
  513. }
  514. /*
  515. * Allocate a page worth of efx_tso_header structures, and string them
  516. * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
  517. */
  518. static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
  519. {
  520. struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
  521. struct efx_tso_header *tsoh;
  522. dma_addr_t dma_addr;
  523. u8 *base_kva, *kva;
  524. base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
  525. if (base_kva == NULL) {
  526. EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
  527. " headers\n");
  528. return -ENOMEM;
  529. }
  530. /* pci_alloc_consistent() allocates pages. */
  531. EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
  532. for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
  533. tsoh = (struct efx_tso_header *)kva;
  534. tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
  535. tsoh->next = tx_queue->tso_headers_free;
  536. tx_queue->tso_headers_free = tsoh;
  537. }
  538. return 0;
  539. }
  540. /* Free up a TSO header, and all others in the same page. */
  541. static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
  542. struct efx_tso_header *tsoh,
  543. struct pci_dev *pci_dev)
  544. {
  545. struct efx_tso_header **p;
  546. unsigned long base_kva;
  547. dma_addr_t base_dma;
  548. base_kva = (unsigned long)tsoh & PAGE_MASK;
  549. base_dma = tsoh->dma_addr & PAGE_MASK;
  550. p = &tx_queue->tso_headers_free;
  551. while (*p != NULL) {
  552. if (((unsigned long)*p & PAGE_MASK) == base_kva)
  553. *p = (*p)->next;
  554. else
  555. p = &(*p)->next;
  556. }
  557. pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
  558. }
  559. static struct efx_tso_header *
  560. efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
  561. {
  562. struct efx_tso_header *tsoh;
  563. tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
  564. if (unlikely(!tsoh))
  565. return NULL;
  566. tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
  567. TSOH_BUFFER(tsoh), header_len,
  568. PCI_DMA_TODEVICE);
  569. if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
  570. tsoh->dma_addr))) {
  571. kfree(tsoh);
  572. return NULL;
  573. }
  574. tsoh->unmap_len = header_len;
  575. return tsoh;
  576. }
  577. static void
  578. efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
  579. {
  580. pci_unmap_single(tx_queue->efx->pci_dev,
  581. tsoh->dma_addr, tsoh->unmap_len,
  582. PCI_DMA_TODEVICE);
  583. kfree(tsoh);
  584. }
  585. /**
  586. * efx_tx_queue_insert - push descriptors onto the TX queue
  587. * @tx_queue: Efx TX queue
  588. * @dma_addr: DMA address of fragment
  589. * @len: Length of fragment
  590. * @final_buffer: The final buffer inserted into the queue
  591. *
  592. * Push descriptors onto the TX queue. Return 0 on success or 1 if
  593. * @tx_queue full.
  594. */
  595. static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
  596. dma_addr_t dma_addr, unsigned len,
  597. struct efx_tx_buffer **final_buffer)
  598. {
  599. struct efx_tx_buffer *buffer;
  600. struct efx_nic *efx = tx_queue->efx;
  601. unsigned dma_len, fill_level, insert_ptr, misalign;
  602. int q_space;
  603. EFX_BUG_ON_PARANOID(len <= 0);
  604. fill_level = tx_queue->insert_count - tx_queue->old_read_count;
  605. /* -1 as there is no way to represent all descriptors used */
  606. q_space = EFX_TXQ_MASK - 1 - fill_level;
  607. while (1) {
  608. if (unlikely(q_space-- <= 0)) {
  609. /* It might be that completions have happened
  610. * since the xmit path last checked. Update
  611. * the xmit path's copy of read_count.
  612. */
  613. ++tx_queue->stopped;
  614. /* This memory barrier protects the change of
  615. * stopped from the access of read_count. */
  616. smp_mb();
  617. tx_queue->old_read_count =
  618. *(volatile unsigned *)&tx_queue->read_count;
  619. fill_level = (tx_queue->insert_count
  620. - tx_queue->old_read_count);
  621. q_space = EFX_TXQ_MASK - 1 - fill_level;
  622. if (unlikely(q_space-- <= 0)) {
  623. *final_buffer = NULL;
  624. return 1;
  625. }
  626. smp_mb();
  627. --tx_queue->stopped;
  628. }
  629. insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
  630. buffer = &tx_queue->buffer[insert_ptr];
  631. ++tx_queue->insert_count;
  632. EFX_BUG_ON_PARANOID(tx_queue->insert_count -
  633. tx_queue->read_count >
  634. EFX_TXQ_MASK);
  635. efx_tsoh_free(tx_queue, buffer);
  636. EFX_BUG_ON_PARANOID(buffer->len);
  637. EFX_BUG_ON_PARANOID(buffer->unmap_len);
  638. EFX_BUG_ON_PARANOID(buffer->skb);
  639. EFX_BUG_ON_PARANOID(!buffer->continuation);
  640. EFX_BUG_ON_PARANOID(buffer->tsoh);
  641. buffer->dma_addr = dma_addr;
  642. /* Ensure we do not cross a boundary unsupported by H/W */
  643. dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1;
  644. misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
  645. if (misalign && dma_len + misalign > 512)
  646. dma_len = 512 - misalign;
  647. /* If there is enough space to send then do so */
  648. if (dma_len >= len)
  649. break;
  650. buffer->len = dma_len; /* Don't set the other members */
  651. dma_addr += dma_len;
  652. len -= dma_len;
  653. }
  654. EFX_BUG_ON_PARANOID(!len);
  655. buffer->len = len;
  656. *final_buffer = buffer;
  657. return 0;
  658. }
  659. /*
  660. * Put a TSO header into the TX queue.
  661. *
  662. * This is special-cased because we know that it is small enough to fit in
  663. * a single fragment, and we know it doesn't cross a page boundary. It
  664. * also allows us to not worry about end-of-packet etc.
  665. */
  666. static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
  667. struct efx_tso_header *tsoh, unsigned len)
  668. {
  669. struct efx_tx_buffer *buffer;
  670. buffer = &tx_queue->buffer[tx_queue->insert_count & EFX_TXQ_MASK];
  671. efx_tsoh_free(tx_queue, buffer);
  672. EFX_BUG_ON_PARANOID(buffer->len);
  673. EFX_BUG_ON_PARANOID(buffer->unmap_len);
  674. EFX_BUG_ON_PARANOID(buffer->skb);
  675. EFX_BUG_ON_PARANOID(!buffer->continuation);
  676. EFX_BUG_ON_PARANOID(buffer->tsoh);
  677. buffer->len = len;
  678. buffer->dma_addr = tsoh->dma_addr;
  679. buffer->tsoh = tsoh;
  680. ++tx_queue->insert_count;
  681. }
  682. /* Remove descriptors put into a tx_queue. */
  683. static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
  684. {
  685. struct efx_tx_buffer *buffer;
  686. dma_addr_t unmap_addr;
  687. /* Work backwards until we hit the original insert pointer value */
  688. while (tx_queue->insert_count != tx_queue->write_count) {
  689. --tx_queue->insert_count;
  690. buffer = &tx_queue->buffer[tx_queue->insert_count &
  691. EFX_TXQ_MASK];
  692. efx_tsoh_free(tx_queue, buffer);
  693. EFX_BUG_ON_PARANOID(buffer->skb);
  694. buffer->len = 0;
  695. buffer->continuation = true;
  696. if (buffer->unmap_len) {
  697. unmap_addr = (buffer->dma_addr + buffer->len -
  698. buffer->unmap_len);
  699. if (buffer->unmap_single)
  700. pci_unmap_single(tx_queue->efx->pci_dev,
  701. unmap_addr, buffer->unmap_len,
  702. PCI_DMA_TODEVICE);
  703. else
  704. pci_unmap_page(tx_queue->efx->pci_dev,
  705. unmap_addr, buffer->unmap_len,
  706. PCI_DMA_TODEVICE);
  707. buffer->unmap_len = 0;
  708. }
  709. }
  710. }
  711. /* Parse the SKB header and initialise state. */
  712. static void tso_start(struct tso_state *st, const struct sk_buff *skb)
  713. {
  714. /* All ethernet/IP/TCP headers combined size is TCP header size
  715. * plus offset of TCP header relative to start of packet.
  716. */
  717. st->header_len = ((tcp_hdr(skb)->doff << 2u)
  718. + PTR_DIFF(tcp_hdr(skb), skb->data));
  719. st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
  720. st->ipv4_id = ntohs(ip_hdr(skb)->id);
  721. st->seqnum = ntohl(tcp_hdr(skb)->seq);
  722. EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
  723. EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
  724. EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
  725. st->packet_space = st->full_packet_size;
  726. st->out_len = skb->len - st->header_len;
  727. st->unmap_len = 0;
  728. st->unmap_single = false;
  729. }
  730. static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
  731. skb_frag_t *frag)
  732. {
  733. st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
  734. frag->page_offset, frag->size,
  735. PCI_DMA_TODEVICE);
  736. if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
  737. st->unmap_single = false;
  738. st->unmap_len = frag->size;
  739. st->in_len = frag->size;
  740. st->dma_addr = st->unmap_addr;
  741. return 0;
  742. }
  743. return -ENOMEM;
  744. }
  745. static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
  746. const struct sk_buff *skb)
  747. {
  748. int hl = st->header_len;
  749. int len = skb_headlen(skb) - hl;
  750. st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
  751. len, PCI_DMA_TODEVICE);
  752. if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
  753. st->unmap_single = true;
  754. st->unmap_len = len;
  755. st->in_len = len;
  756. st->dma_addr = st->unmap_addr;
  757. return 0;
  758. }
  759. return -ENOMEM;
  760. }
  761. /**
  762. * tso_fill_packet_with_fragment - form descriptors for the current fragment
  763. * @tx_queue: Efx TX queue
  764. * @skb: Socket buffer
  765. * @st: TSO state
  766. *
  767. * Form descriptors for the current fragment, until we reach the end
  768. * of fragment or end-of-packet. Return 0 on success, 1 if not enough
  769. * space in @tx_queue.
  770. */
  771. static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
  772. const struct sk_buff *skb,
  773. struct tso_state *st)
  774. {
  775. struct efx_tx_buffer *buffer;
  776. int n, end_of_packet, rc;
  777. if (st->in_len == 0)
  778. return 0;
  779. if (st->packet_space == 0)
  780. return 0;
  781. EFX_BUG_ON_PARANOID(st->in_len <= 0);
  782. EFX_BUG_ON_PARANOID(st->packet_space <= 0);
  783. n = min(st->in_len, st->packet_space);
  784. st->packet_space -= n;
  785. st->out_len -= n;
  786. st->in_len -= n;
  787. rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
  788. if (likely(rc == 0)) {
  789. if (st->out_len == 0)
  790. /* Transfer ownership of the skb */
  791. buffer->skb = skb;
  792. end_of_packet = st->out_len == 0 || st->packet_space == 0;
  793. buffer->continuation = !end_of_packet;
  794. if (st->in_len == 0) {
  795. /* Transfer ownership of the pci mapping */
  796. buffer->unmap_len = st->unmap_len;
  797. buffer->unmap_single = st->unmap_single;
  798. st->unmap_len = 0;
  799. }
  800. }
  801. st->dma_addr += n;
  802. return rc;
  803. }
  804. /**
  805. * tso_start_new_packet - generate a new header and prepare for the new packet
  806. * @tx_queue: Efx TX queue
  807. * @skb: Socket buffer
  808. * @st: TSO state
  809. *
  810. * Generate a new header and prepare for the new packet. Return 0 on
  811. * success, or -1 if failed to alloc header.
  812. */
  813. static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
  814. const struct sk_buff *skb,
  815. struct tso_state *st)
  816. {
  817. struct efx_tso_header *tsoh;
  818. struct iphdr *tsoh_iph;
  819. struct tcphdr *tsoh_th;
  820. unsigned ip_length;
  821. u8 *header;
  822. /* Allocate a DMA-mapped header buffer. */
  823. if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
  824. if (tx_queue->tso_headers_free == NULL) {
  825. if (efx_tsoh_block_alloc(tx_queue))
  826. return -1;
  827. }
  828. EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
  829. tsoh = tx_queue->tso_headers_free;
  830. tx_queue->tso_headers_free = tsoh->next;
  831. tsoh->unmap_len = 0;
  832. } else {
  833. tx_queue->tso_long_headers++;
  834. tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
  835. if (unlikely(!tsoh))
  836. return -1;
  837. }
  838. header = TSOH_BUFFER(tsoh);
  839. tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
  840. tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
  841. /* Copy and update the headers. */
  842. memcpy(header, skb->data, st->header_len);
  843. tsoh_th->seq = htonl(st->seqnum);
  844. st->seqnum += skb_shinfo(skb)->gso_size;
  845. if (st->out_len > skb_shinfo(skb)->gso_size) {
  846. /* This packet will not finish the TSO burst. */
  847. ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
  848. tsoh_th->fin = 0;
  849. tsoh_th->psh = 0;
  850. } else {
  851. /* This packet will be the last in the TSO burst. */
  852. ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
  853. tsoh_th->fin = tcp_hdr(skb)->fin;
  854. tsoh_th->psh = tcp_hdr(skb)->psh;
  855. }
  856. tsoh_iph->tot_len = htons(ip_length);
  857. /* Linux leaves suitable gaps in the IP ID space for us to fill. */
  858. tsoh_iph->id = htons(st->ipv4_id);
  859. st->ipv4_id++;
  860. st->packet_space = skb_shinfo(skb)->gso_size;
  861. ++tx_queue->tso_packets;
  862. /* Form a descriptor for this header. */
  863. efx_tso_put_header(tx_queue, tsoh, st->header_len);
  864. return 0;
  865. }
  866. /**
  867. * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
  868. * @tx_queue: Efx TX queue
  869. * @skb: Socket buffer
  870. *
  871. * Context: You must hold netif_tx_lock() to call this function.
  872. *
  873. * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
  874. * @skb was not enqueued. In all cases @skb is consumed. Return
  875. * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
  876. */
  877. static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
  878. struct sk_buff *skb)
  879. {
  880. struct efx_nic *efx = tx_queue->efx;
  881. int frag_i, rc, rc2 = NETDEV_TX_OK;
  882. struct tso_state state;
  883. /* Verify TSO is safe - these checks should never fail. */
  884. efx_tso_check_safe(skb);
  885. EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
  886. tso_start(&state, skb);
  887. /* Assume that skb header area contains exactly the headers, and
  888. * all payload is in the frag list.
  889. */
  890. if (skb_headlen(skb) == state.header_len) {
  891. /* Grab the first payload fragment. */
  892. EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
  893. frag_i = 0;
  894. rc = tso_get_fragment(&state, efx,
  895. skb_shinfo(skb)->frags + frag_i);
  896. if (rc)
  897. goto mem_err;
  898. } else {
  899. rc = tso_get_head_fragment(&state, efx, skb);
  900. if (rc)
  901. goto mem_err;
  902. frag_i = -1;
  903. }
  904. if (tso_start_new_packet(tx_queue, skb, &state) < 0)
  905. goto mem_err;
  906. while (1) {
  907. rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
  908. if (unlikely(rc))
  909. goto stop;
  910. /* Move onto the next fragment? */
  911. if (state.in_len == 0) {
  912. if (++frag_i >= skb_shinfo(skb)->nr_frags)
  913. /* End of payload reached. */
  914. break;
  915. rc = tso_get_fragment(&state, efx,
  916. skb_shinfo(skb)->frags + frag_i);
  917. if (rc)
  918. goto mem_err;
  919. }
  920. /* Start at new packet? */
  921. if (state.packet_space == 0 &&
  922. tso_start_new_packet(tx_queue, skb, &state) < 0)
  923. goto mem_err;
  924. }
  925. /* Pass off to hardware */
  926. falcon_push_buffers(tx_queue);
  927. tx_queue->tso_bursts++;
  928. return NETDEV_TX_OK;
  929. mem_err:
  930. EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n");
  931. dev_kfree_skb_any((struct sk_buff *)skb);
  932. goto unwind;
  933. stop:
  934. rc2 = NETDEV_TX_BUSY;
  935. /* Stop the queue if it wasn't stopped before. */
  936. if (tx_queue->stopped == 1)
  937. efx_stop_queue(efx);
  938. unwind:
  939. /* Free the DMA mapping we were in the process of writing out */
  940. if (state.unmap_len) {
  941. if (state.unmap_single)
  942. pci_unmap_single(efx->pci_dev, state.unmap_addr,
  943. state.unmap_len, PCI_DMA_TODEVICE);
  944. else
  945. pci_unmap_page(efx->pci_dev, state.unmap_addr,
  946. state.unmap_len, PCI_DMA_TODEVICE);
  947. }
  948. efx_enqueue_unwind(tx_queue);
  949. return rc2;
  950. }
  951. /*
  952. * Free up all TSO datastructures associated with tx_queue. This
  953. * routine should be called only once the tx_queue is both empty and
  954. * will no longer be used.
  955. */
  956. static void efx_fini_tso(struct efx_tx_queue *tx_queue)
  957. {
  958. unsigned i;
  959. if (tx_queue->buffer) {
  960. for (i = 0; i <= EFX_TXQ_MASK; ++i)
  961. efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
  962. }
  963. while (tx_queue->tso_headers_free != NULL)
  964. efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
  965. tx_queue->efx->pci_dev);
  966. }