virtio_net.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. /* A simple network driver using virtio.
  2. *
  3. * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. */
  19. //#define DEBUG
  20. #include <linux/netdevice.h>
  21. #include <linux/etherdevice.h>
  22. #include <linux/module.h>
  23. #include <linux/virtio.h>
  24. #include <linux/virtio_net.h>
  25. #include <linux/scatterlist.h>
  26. static int napi_weight = 128;
  27. module_param(napi_weight, int, 0444);
  28. static int csum = 1, gso = 1;
  29. module_param(csum, bool, 0444);
  30. module_param(gso, bool, 0444);
  31. /* FIXME: MTU in config. */
  32. #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN)
  33. struct virtnet_info
  34. {
  35. struct virtio_device *vdev;
  36. struct virtqueue *rvq, *svq;
  37. struct net_device *dev;
  38. struct napi_struct napi;
  39. /* The skb we couldn't send because buffers were full. */
  40. struct sk_buff *last_xmit_skb;
  41. /* If we need to free in a timer, this is it. */
  42. struct timer_list xmit_free_timer;
  43. /* Number of input buffers, and max we've ever had. */
  44. unsigned int num, max;
  45. /* For cleaning up after transmission. */
  46. struct tasklet_struct tasklet;
  47. bool free_in_tasklet;
  48. /* Receive & send queues. */
  49. struct sk_buff_head recv;
  50. struct sk_buff_head send;
  51. };
  52. static inline struct virtio_net_hdr *skb_vnet_hdr(struct sk_buff *skb)
  53. {
  54. return (struct virtio_net_hdr *)skb->cb;
  55. }
  56. static inline void vnet_hdr_to_sg(struct scatterlist *sg, struct sk_buff *skb)
  57. {
  58. sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr));
  59. }
  60. static void skb_xmit_done(struct virtqueue *svq)
  61. {
  62. struct virtnet_info *vi = svq->vdev->priv;
  63. /* Suppress further interrupts. */
  64. svq->vq_ops->disable_cb(svq);
  65. /* We were probably waiting for more output buffers. */
  66. netif_wake_queue(vi->dev);
  67. /* Make sure we re-xmit last_xmit_skb: if there are no more packets
  68. * queued, start_xmit won't be called. */
  69. tasklet_schedule(&vi->tasklet);
  70. }
  71. static void receive_skb(struct net_device *dev, struct sk_buff *skb,
  72. unsigned len)
  73. {
  74. struct virtio_net_hdr *hdr = skb_vnet_hdr(skb);
  75. if (unlikely(len < sizeof(struct virtio_net_hdr) + ETH_HLEN)) {
  76. pr_debug("%s: short packet %i\n", dev->name, len);
  77. dev->stats.rx_length_errors++;
  78. goto drop;
  79. }
  80. len -= sizeof(struct virtio_net_hdr);
  81. BUG_ON(len > MAX_PACKET_LEN);
  82. skb_trim(skb, len);
  83. dev->stats.rx_bytes += skb->len;
  84. dev->stats.rx_packets++;
  85. if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
  86. pr_debug("Needs csum!\n");
  87. if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset))
  88. goto frame_err;
  89. }
  90. skb->protocol = eth_type_trans(skb, dev);
  91. pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
  92. ntohs(skb->protocol), skb->len, skb->pkt_type);
  93. if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
  94. pr_debug("GSO!\n");
  95. switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
  96. case VIRTIO_NET_HDR_GSO_TCPV4:
  97. skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
  98. break;
  99. case VIRTIO_NET_HDR_GSO_UDP:
  100. skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
  101. break;
  102. case VIRTIO_NET_HDR_GSO_TCPV6:
  103. skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
  104. break;
  105. default:
  106. if (net_ratelimit())
  107. printk(KERN_WARNING "%s: bad gso type %u.\n",
  108. dev->name, hdr->gso_type);
  109. goto frame_err;
  110. }
  111. if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN)
  112. skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
  113. skb_shinfo(skb)->gso_size = hdr->gso_size;
  114. if (skb_shinfo(skb)->gso_size == 0) {
  115. if (net_ratelimit())
  116. printk(KERN_WARNING "%s: zero gso size.\n",
  117. dev->name);
  118. goto frame_err;
  119. }
  120. /* Header must be checked, and gso_segs computed. */
  121. skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
  122. skb_shinfo(skb)->gso_segs = 0;
  123. }
  124. netif_receive_skb(skb);
  125. return;
  126. frame_err:
  127. dev->stats.rx_frame_errors++;
  128. drop:
  129. dev_kfree_skb(skb);
  130. }
  131. static void try_fill_recv(struct virtnet_info *vi)
  132. {
  133. struct sk_buff *skb;
  134. struct scatterlist sg[2+MAX_SKB_FRAGS];
  135. int num, err;
  136. sg_init_table(sg, 2+MAX_SKB_FRAGS);
  137. for (;;) {
  138. skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
  139. if (unlikely(!skb))
  140. break;
  141. skb_put(skb, MAX_PACKET_LEN);
  142. vnet_hdr_to_sg(sg, skb);
  143. num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
  144. skb_queue_head(&vi->recv, skb);
  145. err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
  146. if (err) {
  147. skb_unlink(skb, &vi->recv);
  148. kfree_skb(skb);
  149. break;
  150. }
  151. vi->num++;
  152. }
  153. if (unlikely(vi->num > vi->max))
  154. vi->max = vi->num;
  155. vi->rvq->vq_ops->kick(vi->rvq);
  156. }
  157. static void skb_recv_done(struct virtqueue *rvq)
  158. {
  159. struct virtnet_info *vi = rvq->vdev->priv;
  160. /* Schedule NAPI, Suppress further interrupts if successful. */
  161. if (netif_rx_schedule_prep(vi->dev, &vi->napi)) {
  162. rvq->vq_ops->disable_cb(rvq);
  163. __netif_rx_schedule(vi->dev, &vi->napi);
  164. }
  165. }
  166. static int virtnet_poll(struct napi_struct *napi, int budget)
  167. {
  168. struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
  169. struct sk_buff *skb = NULL;
  170. unsigned int len, received = 0;
  171. again:
  172. while (received < budget &&
  173. (skb = vi->rvq->vq_ops->get_buf(vi->rvq, &len)) != NULL) {
  174. __skb_unlink(skb, &vi->recv);
  175. receive_skb(vi->dev, skb, len);
  176. vi->num--;
  177. received++;
  178. }
  179. /* FIXME: If we oom and completely run out of inbufs, we need
  180. * to start a timer trying to fill more. */
  181. if (vi->num < vi->max / 2)
  182. try_fill_recv(vi);
  183. /* Out of packets? */
  184. if (received < budget) {
  185. netif_rx_complete(vi->dev, napi);
  186. if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq))
  187. && napi_schedule_prep(napi)) {
  188. vi->rvq->vq_ops->disable_cb(vi->rvq);
  189. __netif_rx_schedule(vi->dev, napi);
  190. goto again;
  191. }
  192. }
  193. return received;
  194. }
  195. static void free_old_xmit_skbs(struct virtnet_info *vi)
  196. {
  197. struct sk_buff *skb;
  198. unsigned int len;
  199. while ((skb = vi->svq->vq_ops->get_buf(vi->svq, &len)) != NULL) {
  200. pr_debug("Sent skb %p\n", skb);
  201. __skb_unlink(skb, &vi->send);
  202. vi->dev->stats.tx_bytes += skb->len;
  203. vi->dev->stats.tx_packets++;
  204. kfree_skb(skb);
  205. }
  206. }
  207. /* If the virtio transport doesn't always notify us when all in-flight packets
  208. * are consumed, we fall back to using this function on a timer to free them. */
  209. static void xmit_free(unsigned long data)
  210. {
  211. struct virtnet_info *vi = (void *)data;
  212. netif_tx_lock(vi->dev);
  213. free_old_xmit_skbs(vi);
  214. if (!skb_queue_empty(&vi->send))
  215. mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
  216. netif_tx_unlock(vi->dev);
  217. }
  218. static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
  219. {
  220. int num, err;
  221. struct scatterlist sg[2+MAX_SKB_FRAGS];
  222. struct virtio_net_hdr *hdr;
  223. const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
  224. sg_init_table(sg, 2+MAX_SKB_FRAGS);
  225. pr_debug("%s: xmit %p " MAC_FMT "\n", vi->dev->name, skb,
  226. dest[0], dest[1], dest[2],
  227. dest[3], dest[4], dest[5]);
  228. /* Encode metadata header at front. */
  229. hdr = skb_vnet_hdr(skb);
  230. if (skb->ip_summed == CHECKSUM_PARTIAL) {
  231. hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
  232. hdr->csum_start = skb->csum_start - skb_headroom(skb);
  233. hdr->csum_offset = skb->csum_offset;
  234. } else {
  235. hdr->flags = 0;
  236. hdr->csum_offset = hdr->csum_start = 0;
  237. }
  238. if (skb_is_gso(skb)) {
  239. hdr->hdr_len = skb_transport_header(skb) - skb->data;
  240. hdr->gso_size = skb_shinfo(skb)->gso_size;
  241. if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
  242. hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
  243. else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
  244. hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
  245. else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
  246. hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
  247. else
  248. BUG();
  249. if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN)
  250. hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
  251. } else {
  252. hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
  253. hdr->gso_size = hdr->hdr_len = 0;
  254. }
  255. vnet_hdr_to_sg(sg, skb);
  256. num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
  257. err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
  258. if (!err && !vi->free_in_tasklet)
  259. mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
  260. return err;
  261. }
  262. static void xmit_tasklet(unsigned long data)
  263. {
  264. struct virtnet_info *vi = (void *)data;
  265. netif_tx_lock_bh(vi->dev);
  266. if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
  267. vi->svq->vq_ops->kick(vi->svq);
  268. vi->last_xmit_skb = NULL;
  269. }
  270. if (vi->free_in_tasklet)
  271. free_old_xmit_skbs(vi);
  272. netif_tx_unlock_bh(vi->dev);
  273. }
  274. static int start_xmit(struct sk_buff *skb, struct net_device *dev)
  275. {
  276. struct virtnet_info *vi = netdev_priv(dev);
  277. again:
  278. /* Free up any pending old buffers before queueing new ones. */
  279. free_old_xmit_skbs(vi);
  280. /* If we has a buffer left over from last time, send it now. */
  281. if (unlikely(vi->last_xmit_skb)) {
  282. if (xmit_skb(vi, vi->last_xmit_skb) != 0) {
  283. /* Drop this skb: we only queue one. */
  284. vi->dev->stats.tx_dropped++;
  285. kfree_skb(skb);
  286. skb = NULL;
  287. goto stop_queue;
  288. }
  289. vi->last_xmit_skb = NULL;
  290. }
  291. /* Put new one in send queue and do transmit */
  292. if (likely(skb)) {
  293. __skb_queue_head(&vi->send, skb);
  294. if (xmit_skb(vi, skb) != 0) {
  295. vi->last_xmit_skb = skb;
  296. skb = NULL;
  297. goto stop_queue;
  298. }
  299. }
  300. done:
  301. vi->svq->vq_ops->kick(vi->svq);
  302. return NETDEV_TX_OK;
  303. stop_queue:
  304. pr_debug("%s: virtio not prepared to send\n", dev->name);
  305. netif_stop_queue(dev);
  306. /* Activate callback for using skbs: if this returns false it
  307. * means some were used in the meantime. */
  308. if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) {
  309. vi->svq->vq_ops->disable_cb(vi->svq);
  310. netif_start_queue(dev);
  311. goto again;
  312. }
  313. goto done;
  314. }
  315. #ifdef CONFIG_NET_POLL_CONTROLLER
  316. static void virtnet_netpoll(struct net_device *dev)
  317. {
  318. struct virtnet_info *vi = netdev_priv(dev);
  319. napi_schedule(&vi->napi);
  320. }
  321. #endif
  322. static int virtnet_open(struct net_device *dev)
  323. {
  324. struct virtnet_info *vi = netdev_priv(dev);
  325. napi_enable(&vi->napi);
  326. /* If all buffers were filled by other side before we napi_enabled, we
  327. * won't get another interrupt, so process any outstanding packets
  328. * now. virtnet_poll wants re-enable the queue, so we disable here.
  329. * We synchronize against interrupts via NAPI_STATE_SCHED */
  330. if (netif_rx_schedule_prep(dev, &vi->napi)) {
  331. vi->rvq->vq_ops->disable_cb(vi->rvq);
  332. __netif_rx_schedule(dev, &vi->napi);
  333. }
  334. return 0;
  335. }
  336. static int virtnet_close(struct net_device *dev)
  337. {
  338. struct virtnet_info *vi = netdev_priv(dev);
  339. napi_disable(&vi->napi);
  340. return 0;
  341. }
  342. static int virtnet_probe(struct virtio_device *vdev)
  343. {
  344. int err;
  345. struct net_device *dev;
  346. struct virtnet_info *vi;
  347. /* Allocate ourselves a network device with room for our info */
  348. dev = alloc_etherdev(sizeof(struct virtnet_info));
  349. if (!dev)
  350. return -ENOMEM;
  351. /* Set up network device as normal. */
  352. dev->open = virtnet_open;
  353. dev->stop = virtnet_close;
  354. dev->hard_start_xmit = start_xmit;
  355. dev->features = NETIF_F_HIGHDMA;
  356. #ifdef CONFIG_NET_POLL_CONTROLLER
  357. dev->poll_controller = virtnet_netpoll;
  358. #endif
  359. SET_NETDEV_DEV(dev, &vdev->dev);
  360. /* Do we support "hardware" checksums? */
  361. if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
  362. /* This opens up the world of extra features. */
  363. dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
  364. if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
  365. dev->features |= NETIF_F_TSO | NETIF_F_UFO
  366. | NETIF_F_TSO_ECN | NETIF_F_TSO6;
  367. }
  368. /* Individual feature bits: what can host handle? */
  369. if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
  370. dev->features |= NETIF_F_TSO;
  371. if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
  372. dev->features |= NETIF_F_TSO6;
  373. if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
  374. dev->features |= NETIF_F_TSO_ECN;
  375. if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO))
  376. dev->features |= NETIF_F_UFO;
  377. }
  378. /* Configuration may specify what MAC to use. Otherwise random. */
  379. if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
  380. vdev->config->get(vdev,
  381. offsetof(struct virtio_net_config, mac),
  382. dev->dev_addr, dev->addr_len);
  383. } else
  384. random_ether_addr(dev->dev_addr);
  385. /* Set up our device-specific information */
  386. vi = netdev_priv(dev);
  387. netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
  388. vi->dev = dev;
  389. vi->vdev = vdev;
  390. vdev->priv = vi;
  391. /* If they give us a callback when all buffers are done, we don't need
  392. * the timer. */
  393. vi->free_in_tasklet = virtio_has_feature(vdev,VIRTIO_F_NOTIFY_ON_EMPTY);
  394. /* We expect two virtqueues, receive then send. */
  395. vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done);
  396. if (IS_ERR(vi->rvq)) {
  397. err = PTR_ERR(vi->rvq);
  398. goto free;
  399. }
  400. vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done);
  401. if (IS_ERR(vi->svq)) {
  402. err = PTR_ERR(vi->svq);
  403. goto free_recv;
  404. }
  405. /* Initialize our empty receive and send queues. */
  406. skb_queue_head_init(&vi->recv);
  407. skb_queue_head_init(&vi->send);
  408. tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi);
  409. if (!vi->free_in_tasklet)
  410. setup_timer(&vi->xmit_free_timer, xmit_free, (unsigned long)vi);
  411. err = register_netdev(dev);
  412. if (err) {
  413. pr_debug("virtio_net: registering device failed\n");
  414. goto free_send;
  415. }
  416. /* Last of all, set up some receive buffers. */
  417. try_fill_recv(vi);
  418. /* If we didn't even get one input buffer, we're useless. */
  419. if (vi->num == 0) {
  420. err = -ENOMEM;
  421. goto unregister;
  422. }
  423. pr_debug("virtnet: registered device %s\n", dev->name);
  424. return 0;
  425. unregister:
  426. unregister_netdev(dev);
  427. free_send:
  428. vdev->config->del_vq(vi->svq);
  429. free_recv:
  430. vdev->config->del_vq(vi->rvq);
  431. free:
  432. free_netdev(dev);
  433. return err;
  434. }
  435. static void virtnet_remove(struct virtio_device *vdev)
  436. {
  437. struct virtnet_info *vi = vdev->priv;
  438. struct sk_buff *skb;
  439. /* Stop all the virtqueues. */
  440. vdev->config->reset(vdev);
  441. if (!vi->free_in_tasklet)
  442. del_timer_sync(&vi->xmit_free_timer);
  443. /* Free our skbs in send and recv queues, if any. */
  444. while ((skb = __skb_dequeue(&vi->recv)) != NULL) {
  445. kfree_skb(skb);
  446. vi->num--;
  447. }
  448. __skb_queue_purge(&vi->send);
  449. BUG_ON(vi->num != 0);
  450. vdev->config->del_vq(vi->svq);
  451. vdev->config->del_vq(vi->rvq);
  452. unregister_netdev(vi->dev);
  453. free_netdev(vi->dev);
  454. }
  455. static struct virtio_device_id id_table[] = {
  456. { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
  457. { 0 },
  458. };
  459. static unsigned int features[] = {
  460. VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC,
  461. VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6,
  462. VIRTIO_NET_F_HOST_ECN, VIRTIO_F_NOTIFY_ON_EMPTY,
  463. };
  464. static struct virtio_driver virtio_net = {
  465. .feature_table = features,
  466. .feature_table_size = ARRAY_SIZE(features),
  467. .driver.name = KBUILD_MODNAME,
  468. .driver.owner = THIS_MODULE,
  469. .id_table = id_table,
  470. .probe = virtnet_probe,
  471. .remove = __devexit_p(virtnet_remove),
  472. };
  473. static int __init init(void)
  474. {
  475. return register_virtio_driver(&virtio_net);
  476. }
  477. static void __exit fini(void)
  478. {
  479. unregister_virtio_driver(&virtio_net);
  480. }
  481. module_init(init);
  482. module_exit(fini);
  483. MODULE_DEVICE_TABLE(virtio, id_table);
  484. MODULE_DESCRIPTION("Virtio network driver");
  485. MODULE_LICENSE("GPL");