trans_virtio.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615
  1. /*
  2. * The Virtio 9p transport driver
  3. *
  4. * This is a block based transport driver based on the lguest block driver
  5. * code.
  6. *
  7. * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation
  8. *
  9. * Based on virtio console driver
  10. * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation
  11. *
  12. * This program is free software; you can redistribute it and/or modify
  13. * it under the terms of the GNU General Public License version 2
  14. * as published by the Free Software Foundation.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License
  22. * along with this program; if not, write to:
  23. * Free Software Foundation
  24. * 51 Franklin Street, Fifth Floor
  25. * Boston, MA 02111-1301 USA
  26. *
  27. */
  28. #include <linux/in.h>
  29. #include <linux/module.h>
  30. #include <linux/net.h>
  31. #include <linux/ipv6.h>
  32. #include <linux/errno.h>
  33. #include <linux/kernel.h>
  34. #include <linux/un.h>
  35. #include <linux/uaccess.h>
  36. #include <linux/inet.h>
  37. #include <linux/idr.h>
  38. #include <linux/file.h>
  39. #include <linux/slab.h>
  40. #include <net/9p/9p.h>
  41. #include <linux/parser.h>
  42. #include <net/9p/client.h>
  43. #include <net/9p/transport.h>
  44. #include <linux/scatterlist.h>
  45. #include <linux/swap.h>
  46. #include <linux/virtio.h>
  47. #include <linux/virtio_9p.h>
  48. #include "trans_common.h"
  49. #define VIRTQUEUE_NUM 128
  50. /* a single mutex to manage channel initialization and attachment */
  51. static DEFINE_MUTEX(virtio_9p_lock);
  52. static DECLARE_WAIT_QUEUE_HEAD(vp_wq);
  53. static atomic_t vp_pinned = ATOMIC_INIT(0);
  54. /**
  55. * struct virtio_chan - per-instance transport information
  56. * @initialized: whether the channel is initialized
  57. * @inuse: whether the channel is in use
  58. * @lock: protects multiple elements within this structure
  59. * @client: client instance
  60. * @vdev: virtio dev associated with this channel
  61. * @vq: virtio queue associated with this channel
  62. * @sg: scatter gather list which is used to pack a request (protected?)
  63. *
  64. * We keep all per-channel information in a structure.
  65. * This structure is allocated within the devices dev->mem space.
  66. * A pointer to the structure will get put in the transport private.
  67. *
  68. */
  69. struct virtio_chan {
  70. bool inuse;
  71. spinlock_t lock;
  72. struct p9_client *client;
  73. struct virtio_device *vdev;
  74. struct virtqueue *vq;
  75. int ring_bufs_avail;
  76. wait_queue_head_t *vc_wq;
  77. /* This is global limit. Since we don't have a global structure,
  78. * will be placing it in each channel.
  79. */
  80. int p9_max_pages;
  81. /* Scatterlist: can be too big for stack. */
  82. struct scatterlist sg[VIRTQUEUE_NUM];
  83. int tag_len;
  84. /*
  85. * tag name to identify a mount Non-null terminated
  86. */
  87. char *tag;
  88. struct list_head chan_list;
  89. };
  90. static struct list_head virtio_chan_list;
  91. /* How many bytes left in this page. */
  92. static unsigned int rest_of_page(void *data)
  93. {
  94. return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
  95. }
  96. /**
  97. * p9_virtio_close - reclaim resources of a channel
  98. * @client: client instance
  99. *
  100. * This reclaims a channel by freeing its resources and
  101. * reseting its inuse flag.
  102. *
  103. */
  104. static void p9_virtio_close(struct p9_client *client)
  105. {
  106. struct virtio_chan *chan = client->trans;
  107. mutex_lock(&virtio_9p_lock);
  108. if (chan)
  109. chan->inuse = false;
  110. mutex_unlock(&virtio_9p_lock);
  111. }
  112. /**
  113. * req_done - callback which signals activity from the server
  114. * @vq: virtio queue activity was received on
  115. *
  116. * This notifies us that the server has triggered some activity
  117. * on the virtio channel - most likely a response to request we
  118. * sent. Figure out which requests now have responses and wake up
  119. * those threads.
  120. *
  121. * Bugs: could do with some additional sanity checking, but appears to work.
  122. *
  123. */
  124. static void req_done(struct virtqueue *vq)
  125. {
  126. struct virtio_chan *chan = vq->vdev->priv;
  127. struct p9_fcall *rc;
  128. unsigned int len;
  129. struct p9_req_t *req;
  130. unsigned long flags;
  131. P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n");
  132. while (1) {
  133. spin_lock_irqsave(&chan->lock, flags);
  134. rc = virtqueue_get_buf(chan->vq, &len);
  135. if (rc == NULL) {
  136. spin_unlock_irqrestore(&chan->lock, flags);
  137. break;
  138. }
  139. chan->ring_bufs_avail = 1;
  140. spin_unlock_irqrestore(&chan->lock, flags);
  141. /* Wakeup if anyone waiting for VirtIO ring space. */
  142. wake_up(chan->vc_wq);
  143. P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc);
  144. P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
  145. req = p9_tag_lookup(chan->client, rc->tag);
  146. if (req->tc->private) {
  147. struct trans_rpage_info *rp = req->tc->private;
  148. int p = rp->rp_nr_pages;
  149. /*Release pages */
  150. p9_release_req_pages(rp);
  151. atomic_sub(p, &vp_pinned);
  152. wake_up(&vp_wq);
  153. if (rp->rp_alloc)
  154. kfree(rp);
  155. req->tc->private = NULL;
  156. }
  157. req->status = REQ_STATUS_RCVD;
  158. p9_client_cb(chan->client, req);
  159. }
  160. }
  161. /**
  162. * pack_sg_list - pack a scatter gather list from a linear buffer
  163. * @sg: scatter/gather list to pack into
  164. * @start: which segment of the sg_list to start at
  165. * @limit: maximum segment to pack data to
  166. * @data: data to pack into scatter/gather list
  167. * @count: amount of data to pack into the scatter/gather list
  168. *
  169. * sg_lists have multiple segments of various sizes. This will pack
  170. * arbitrary data into an existing scatter gather list, segmenting the
  171. * data as necessary within constraints.
  172. *
  173. */
  174. static int
  175. pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
  176. int count)
  177. {
  178. int s;
  179. int index = start;
  180. while (count) {
  181. s = rest_of_page(data);
  182. if (s > count)
  183. s = count;
  184. sg_set_buf(&sg[index++], data, s);
  185. count -= s;
  186. data += s;
  187. BUG_ON(index > limit);
  188. }
  189. return index-start;
  190. }
  191. /* We don't currently allow canceling of virtio requests */
  192. static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
  193. {
  194. return 1;
  195. }
  196. /**
  197. * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer,
  198. * this takes a list of pages.
  199. * @sg: scatter/gather list to pack into
  200. * @start: which segment of the sg_list to start at
  201. * @pdata_off: Offset into the first page
  202. * @**pdata: a list of pages to add into sg.
  203. * @count: amount of data to pack into the scatter/gather list
  204. */
  205. static int
  206. pack_sg_list_p(struct scatterlist *sg, int start, int limit, size_t pdata_off,
  207. struct page **pdata, int count)
  208. {
  209. int s;
  210. int i = 0;
  211. int index = start;
  212. if (pdata_off) {
  213. s = min((int)(PAGE_SIZE - pdata_off), count);
  214. sg_set_page(&sg[index++], pdata[i++], s, pdata_off);
  215. count -= s;
  216. }
  217. while (count) {
  218. BUG_ON(index > limit);
  219. s = min((int)PAGE_SIZE, count);
  220. sg_set_page(&sg[index++], pdata[i++], s, 0);
  221. count -= s;
  222. }
  223. return index-start;
  224. }
  225. /**
  226. * p9_virtio_request - issue a request
  227. * @client: client instance issuing the request
  228. * @req: request to be issued
  229. *
  230. */
  231. static int
  232. p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
  233. {
  234. int in, out, inp, outp;
  235. struct virtio_chan *chan = client->trans;
  236. char *rdata = (char *)req->rc+sizeof(struct p9_fcall);
  237. unsigned long flags;
  238. size_t pdata_off = 0;
  239. struct trans_rpage_info *rpinfo = NULL;
  240. int err, pdata_len = 0;
  241. P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n");
  242. req->status = REQ_STATUS_SENT;
  243. if (req->tc->pbuf_size && (req->tc->pubuf && P9_IS_USER_CONTEXT)) {
  244. int nr_pages = p9_nr_pages(req);
  245. int rpinfo_size = sizeof(struct trans_rpage_info) +
  246. sizeof(struct page *) * nr_pages;
  247. if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
  248. err = wait_event_interruptible(vp_wq,
  249. atomic_read(&vp_pinned) < chan->p9_max_pages);
  250. if (err == -ERESTARTSYS)
  251. return err;
  252. P9_DPRINTK(P9_DEBUG_TRANS, "9p: May gup pages now.\n");
  253. }
  254. if (rpinfo_size <= (req->tc->capacity - req->tc->size)) {
  255. /* We can use sdata */
  256. req->tc->private = req->tc->sdata + req->tc->size;
  257. rpinfo = (struct trans_rpage_info *)req->tc->private;
  258. rpinfo->rp_alloc = 0;
  259. } else {
  260. req->tc->private = kmalloc(rpinfo_size, GFP_NOFS);
  261. if (!req->tc->private) {
  262. P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: "
  263. "private kmalloc returned NULL");
  264. return -ENOMEM;
  265. }
  266. rpinfo = (struct trans_rpage_info *)req->tc->private;
  267. rpinfo->rp_alloc = 1;
  268. }
  269. err = p9_payload_gup(req, &pdata_off, &pdata_len, nr_pages,
  270. req->tc->id == P9_TREAD ? 1 : 0);
  271. if (err < 0) {
  272. if (rpinfo->rp_alloc)
  273. kfree(rpinfo);
  274. return err;
  275. } else {
  276. atomic_add(rpinfo->rp_nr_pages, &vp_pinned);
  277. }
  278. }
  279. req_retry_pinned:
  280. spin_lock_irqsave(&chan->lock, flags);
  281. /* Handle out VirtIO ring buffers */
  282. out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata,
  283. req->tc->size);
  284. if (req->tc->pbuf_size && (req->tc->id == P9_TWRITE)) {
  285. /* We have additional write payload buffer to take care */
  286. if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
  287. outp = pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
  288. pdata_off, rpinfo->rp_data, pdata_len);
  289. } else {
  290. char *pbuf = req->tc->pubuf ? req->tc->pubuf :
  291. req->tc->pkbuf;
  292. outp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, pbuf,
  293. req->tc->pbuf_size);
  294. }
  295. out += outp;
  296. }
  297. /* Handle in VirtIO ring buffers */
  298. if (req->tc->pbuf_size &&
  299. ((req->tc->id == P9_TREAD) || (req->tc->id == P9_TREADDIR))) {
  300. /*
  301. * Take care of additional Read payload.
  302. * 11 is the read/write header = PDU Header(7) + IO Size (4).
  303. * Arrange in such a way that server places header in the
  304. * alloced memory and payload onto the user buffer.
  305. */
  306. inp = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata, 11);
  307. /*
  308. * Running executables in the filesystem may result in
  309. * a read request with kernel buffer as opposed to user buffer.
  310. */
  311. if (req->tc->pubuf && P9_IS_USER_CONTEXT) {
  312. in = pack_sg_list_p(chan->sg, out+inp, VIRTQUEUE_NUM,
  313. pdata_off, rpinfo->rp_data, pdata_len);
  314. } else {
  315. char *pbuf = req->tc->pubuf ? req->tc->pubuf :
  316. req->tc->pkbuf;
  317. in = pack_sg_list(chan->sg, out+inp, VIRTQUEUE_NUM,
  318. pbuf, req->tc->pbuf_size);
  319. }
  320. in += inp;
  321. } else {
  322. in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, rdata,
  323. client->msize);
  324. }
  325. err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc);
  326. if (err < 0) {
  327. if (err == -ENOSPC) {
  328. chan->ring_bufs_avail = 0;
  329. spin_unlock_irqrestore(&chan->lock, flags);
  330. err = wait_event_interruptible(*chan->vc_wq,
  331. chan->ring_bufs_avail);
  332. if (err == -ERESTARTSYS)
  333. return err;
  334. P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n");
  335. goto req_retry_pinned;
  336. } else {
  337. spin_unlock_irqrestore(&chan->lock, flags);
  338. P9_DPRINTK(P9_DEBUG_TRANS,
  339. "9p debug: "
  340. "virtio rpc add_buf returned failure");
  341. if (rpinfo && rpinfo->rp_alloc)
  342. kfree(rpinfo);
  343. return -EIO;
  344. }
  345. }
  346. virtqueue_kick(chan->vq);
  347. spin_unlock_irqrestore(&chan->lock, flags);
  348. P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n");
  349. return 0;
  350. }
  351. static ssize_t p9_mount_tag_show(struct device *dev,
  352. struct device_attribute *attr, char *buf)
  353. {
  354. struct virtio_chan *chan;
  355. struct virtio_device *vdev;
  356. vdev = dev_to_virtio(dev);
  357. chan = vdev->priv;
  358. return snprintf(buf, chan->tag_len + 1, "%s", chan->tag);
  359. }
  360. static DEVICE_ATTR(mount_tag, 0444, p9_mount_tag_show, NULL);
  361. /**
  362. * p9_virtio_probe - probe for existence of 9P virtio channels
  363. * @vdev: virtio device to probe
  364. *
  365. * This probes for existing virtio channels.
  366. *
  367. */
  368. static int p9_virtio_probe(struct virtio_device *vdev)
  369. {
  370. __u16 tag_len;
  371. char *tag;
  372. int err;
  373. struct virtio_chan *chan;
  374. chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);
  375. if (!chan) {
  376. printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n");
  377. err = -ENOMEM;
  378. goto fail;
  379. }
  380. chan->vdev = vdev;
  381. /* We expect one virtqueue, for requests. */
  382. chan->vq = virtio_find_single_vq(vdev, req_done, "requests");
  383. if (IS_ERR(chan->vq)) {
  384. err = PTR_ERR(chan->vq);
  385. goto out_free_vq;
  386. }
  387. chan->vq->vdev->priv = chan;
  388. spin_lock_init(&chan->lock);
  389. sg_init_table(chan->sg, VIRTQUEUE_NUM);
  390. chan->inuse = false;
  391. if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
  392. vdev->config->get(vdev,
  393. offsetof(struct virtio_9p_config, tag_len),
  394. &tag_len, sizeof(tag_len));
  395. } else {
  396. err = -EINVAL;
  397. goto out_free_vq;
  398. }
  399. tag = kmalloc(tag_len, GFP_KERNEL);
  400. if (!tag) {
  401. err = -ENOMEM;
  402. goto out_free_vq;
  403. }
  404. vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
  405. tag, tag_len);
  406. chan->tag = tag;
  407. chan->tag_len = tag_len;
  408. err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
  409. if (err) {
  410. goto out_free_tag;
  411. }
  412. chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
  413. if (!chan->vc_wq) {
  414. err = -ENOMEM;
  415. goto out_free_tag;
  416. }
  417. init_waitqueue_head(chan->vc_wq);
  418. chan->ring_bufs_avail = 1;
  419. /* Ceiling limit to avoid denial of service attacks */
  420. chan->p9_max_pages = nr_free_buffer_pages()/4;
  421. mutex_lock(&virtio_9p_lock);
  422. list_add_tail(&chan->chan_list, &virtio_chan_list);
  423. mutex_unlock(&virtio_9p_lock);
  424. return 0;
  425. out_free_tag:
  426. kfree(tag);
  427. out_free_vq:
  428. vdev->config->del_vqs(vdev);
  429. kfree(chan);
  430. fail:
  431. return err;
  432. }
  433. /**
  434. * p9_virtio_create - allocate a new virtio channel
  435. * @client: client instance invoking this transport
  436. * @devname: string identifying the channel to connect to (unused)
  437. * @args: args passed from sys_mount() for per-transport options (unused)
  438. *
  439. * This sets up a transport channel for 9p communication. Right now
  440. * we only match the first available channel, but eventually we couldlook up
  441. * alternate channels by matching devname versus a virtio_config entry.
  442. * We use a simple reference count mechanism to ensure that only a single
  443. * mount has a channel open at a time.
  444. *
  445. */
  446. static int
  447. p9_virtio_create(struct p9_client *client, const char *devname, char *args)
  448. {
  449. struct virtio_chan *chan;
  450. int ret = -ENOENT;
  451. int found = 0;
  452. mutex_lock(&virtio_9p_lock);
  453. list_for_each_entry(chan, &virtio_chan_list, chan_list) {
  454. if (!strncmp(devname, chan->tag, chan->tag_len) &&
  455. strlen(devname) == chan->tag_len) {
  456. if (!chan->inuse) {
  457. chan->inuse = true;
  458. found = 1;
  459. break;
  460. }
  461. ret = -EBUSY;
  462. }
  463. }
  464. mutex_unlock(&virtio_9p_lock);
  465. if (!found) {
  466. printk(KERN_ERR "9p: no channels available\n");
  467. return ret;
  468. }
  469. client->trans = (void *)chan;
  470. client->status = Connected;
  471. chan->client = client;
  472. return 0;
  473. }
  474. /**
  475. * p9_virtio_remove - clean up resources associated with a virtio device
  476. * @vdev: virtio device to remove
  477. *
  478. */
  479. static void p9_virtio_remove(struct virtio_device *vdev)
  480. {
  481. struct virtio_chan *chan = vdev->priv;
  482. BUG_ON(chan->inuse);
  483. vdev->config->del_vqs(vdev);
  484. mutex_lock(&virtio_9p_lock);
  485. list_del(&chan->chan_list);
  486. mutex_unlock(&virtio_9p_lock);
  487. sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
  488. kfree(chan->tag);
  489. kfree(chan->vc_wq);
  490. kfree(chan);
  491. }
  492. static struct virtio_device_id id_table[] = {
  493. { VIRTIO_ID_9P, VIRTIO_DEV_ANY_ID },
  494. { 0 },
  495. };
  496. static unsigned int features[] = {
  497. VIRTIO_9P_MOUNT_TAG,
  498. };
  499. /* The standard "struct lguest_driver": */
  500. static struct virtio_driver p9_virtio_drv = {
  501. .feature_table = features,
  502. .feature_table_size = ARRAY_SIZE(features),
  503. .driver.name = KBUILD_MODNAME,
  504. .driver.owner = THIS_MODULE,
  505. .id_table = id_table,
  506. .probe = p9_virtio_probe,
  507. .remove = p9_virtio_remove,
  508. };
  509. static struct p9_trans_module p9_virtio_trans = {
  510. .name = "virtio",
  511. .create = p9_virtio_create,
  512. .close = p9_virtio_close,
  513. .request = p9_virtio_request,
  514. .cancel = p9_virtio_cancel,
  515. .maxsize = PAGE_SIZE*16,
  516. .pref = P9_TRANS_PREF_PAYLOAD_SEP,
  517. .def = 0,
  518. .owner = THIS_MODULE,
  519. };
  520. /* The standard init function */
  521. static int __init p9_virtio_init(void)
  522. {
  523. INIT_LIST_HEAD(&virtio_chan_list);
  524. v9fs_register_trans(&p9_virtio_trans);
  525. return register_virtio_driver(&p9_virtio_drv);
  526. }
  527. static void __exit p9_virtio_cleanup(void)
  528. {
  529. unregister_virtio_driver(&p9_virtio_drv);
  530. v9fs_unregister_trans(&p9_virtio_trans);
  531. }
  532. module_init(p9_virtio_init);
  533. module_exit(p9_virtio_cleanup);
  534. MODULE_DEVICE_TABLE(virtio, id_table);
  535. MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
  536. MODULE_DESCRIPTION("Virtio 9p Transport");
  537. MODULE_LICENSE("GPL");