xen-blkfront.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984
  1. /*
  2. * blkfront.c
  3. *
  4. * XenLinux virtual block device driver.
  5. *
  6. * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
  7. * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
  8. * Copyright (c) 2004, Christian Limpach
  9. * Copyright (c) 2004, Andrew Warfield
  10. * Copyright (c) 2005, Christopher Clark
  11. * Copyright (c) 2005, XenSource Ltd
  12. *
  13. * This program is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU General Public License version 2
  15. * as published by the Free Software Foundation; or, when distributed
  16. * separately from the Linux kernel or incorporated into other
  17. * software packages, subject to the following license:
  18. *
  19. * Permission is hereby granted, free of charge, to any person obtaining a copy
  20. * of this source file (the "Software"), to deal in the Software without
  21. * restriction, including without limitation the rights to use, copy, modify,
  22. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  23. * and to permit persons to whom the Software is furnished to do so, subject to
  24. * the following conditions:
  25. *
  26. * The above copyright notice and this permission notice shall be included in
  27. * all copies or substantial portions of the Software.
  28. *
  29. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  30. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  31. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  32. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  33. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  34. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  35. * IN THE SOFTWARE.
  36. */
  37. #include <linux/interrupt.h>
  38. #include <linux/blkdev.h>
  39. #include <linux/module.h>
  40. #include <xen/xenbus.h>
  41. #include <xen/grant_table.h>
  42. #include <xen/events.h>
  43. #include <xen/page.h>
  44. #include <xen/interface/grant_table.h>
  45. #include <xen/interface/io/blkif.h>
  46. #include <asm/xen/hypervisor.h>
  47. enum blkif_state {
  48. BLKIF_STATE_DISCONNECTED,
  49. BLKIF_STATE_CONNECTED,
  50. BLKIF_STATE_SUSPENDED,
  51. };
  52. struct blk_shadow {
  53. struct blkif_request req;
  54. unsigned long request;
  55. unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
  56. };
  57. static struct block_device_operations xlvbd_block_fops;
  58. #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
  59. /*
  60. * We have one of these per vbd, whether ide, scsi or 'other'. They
  61. * hang in private_data off the gendisk structure. We may end up
  62. * putting all kinds of interesting stuff here :-)
  63. */
  64. struct blkfront_info
  65. {
  66. struct xenbus_device *xbdev;
  67. dev_t dev;
  68. struct gendisk *gd;
  69. int vdevice;
  70. blkif_vdev_t handle;
  71. enum blkif_state connected;
  72. int ring_ref;
  73. struct blkif_front_ring ring;
  74. unsigned int evtchn, irq;
  75. struct request_queue *rq;
  76. struct work_struct work;
  77. struct gnttab_free_callback callback;
  78. struct blk_shadow shadow[BLK_RING_SIZE];
  79. unsigned long shadow_free;
  80. int feature_barrier;
  81. /**
  82. * The number of people holding this device open. We won't allow a
  83. * hot-unplug unless this is 0.
  84. */
  85. int users;
  86. };
  87. static DEFINE_SPINLOCK(blkif_io_lock);
  88. #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
  89. (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
  90. #define GRANT_INVALID_REF 0
  91. #define PARTS_PER_DISK 16
  92. #define BLKIF_MAJOR(dev) ((dev)>>8)
  93. #define BLKIF_MINOR(dev) ((dev) & 0xff)
  94. #define DEV_NAME "xvd" /* name in /dev */
  95. /* Information about our VBDs. */
  96. #define MAX_VBDS 64
  97. static LIST_HEAD(vbds_list);
  98. static int get_id_from_freelist(struct blkfront_info *info)
  99. {
  100. unsigned long free = info->shadow_free;
  101. BUG_ON(free > BLK_RING_SIZE);
  102. info->shadow_free = info->shadow[free].req.id;
  103. info->shadow[free].req.id = 0x0fffffee; /* debug */
  104. return free;
  105. }
  106. static void add_id_to_freelist(struct blkfront_info *info,
  107. unsigned long id)
  108. {
  109. info->shadow[id].req.id = info->shadow_free;
  110. info->shadow[id].request = 0;
  111. info->shadow_free = id;
  112. }
  113. static void blkif_restart_queue_callback(void *arg)
  114. {
  115. struct blkfront_info *info = (struct blkfront_info *)arg;
  116. schedule_work(&info->work);
  117. }
  118. /*
  119. * blkif_queue_request
  120. *
  121. * request block io
  122. *
  123. * id: for guest use only.
  124. * operation: BLKIF_OP_{READ,WRITE,PROBE}
  125. * buffer: buffer to read/write into. this should be a
  126. * virtual address in the guest os.
  127. */
  128. static int blkif_queue_request(struct request *req)
  129. {
  130. struct blkfront_info *info = req->rq_disk->private_data;
  131. unsigned long buffer_mfn;
  132. struct blkif_request *ring_req;
  133. struct req_iterator iter;
  134. struct bio_vec *bvec;
  135. unsigned long id;
  136. unsigned int fsect, lsect;
  137. int ref;
  138. grant_ref_t gref_head;
  139. if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
  140. return 1;
  141. if (gnttab_alloc_grant_references(
  142. BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
  143. gnttab_request_free_callback(
  144. &info->callback,
  145. blkif_restart_queue_callback,
  146. info,
  147. BLKIF_MAX_SEGMENTS_PER_REQUEST);
  148. return 1;
  149. }
  150. /* Fill out a communications ring structure. */
  151. ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
  152. id = get_id_from_freelist(info);
  153. info->shadow[id].request = (unsigned long)req;
  154. ring_req->id = id;
  155. ring_req->sector_number = (blkif_sector_t)req->sector;
  156. ring_req->handle = info->handle;
  157. ring_req->operation = rq_data_dir(req) ?
  158. BLKIF_OP_WRITE : BLKIF_OP_READ;
  159. if (blk_barrier_rq(req))
  160. ring_req->operation = BLKIF_OP_WRITE_BARRIER;
  161. ring_req->nr_segments = 0;
  162. rq_for_each_segment(bvec, req, iter) {
  163. BUG_ON(ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST);
  164. buffer_mfn = pfn_to_mfn(page_to_pfn(bvec->bv_page));
  165. fsect = bvec->bv_offset >> 9;
  166. lsect = fsect + (bvec->bv_len >> 9) - 1;
  167. /* install a grant reference. */
  168. ref = gnttab_claim_grant_reference(&gref_head);
  169. BUG_ON(ref == -ENOSPC);
  170. gnttab_grant_foreign_access_ref(
  171. ref,
  172. info->xbdev->otherend_id,
  173. buffer_mfn,
  174. rq_data_dir(req) );
  175. info->shadow[id].frame[ring_req->nr_segments] =
  176. mfn_to_pfn(buffer_mfn);
  177. ring_req->seg[ring_req->nr_segments] =
  178. (struct blkif_request_segment) {
  179. .gref = ref,
  180. .first_sect = fsect,
  181. .last_sect = lsect };
  182. ring_req->nr_segments++;
  183. }
  184. info->ring.req_prod_pvt++;
  185. /* Keep a private copy so we can reissue requests when recovering. */
  186. info->shadow[id].req = *ring_req;
  187. gnttab_free_grant_references(gref_head);
  188. return 0;
  189. }
  190. static inline void flush_requests(struct blkfront_info *info)
  191. {
  192. int notify;
  193. RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
  194. if (notify)
  195. notify_remote_via_irq(info->irq);
  196. }
  197. /*
  198. * do_blkif_request
  199. * read a block; request is in a request queue
  200. */
  201. static void do_blkif_request(struct request_queue *rq)
  202. {
  203. struct blkfront_info *info = NULL;
  204. struct request *req;
  205. int queued;
  206. pr_debug("Entered do_blkif_request\n");
  207. queued = 0;
  208. while ((req = elv_next_request(rq)) != NULL) {
  209. info = req->rq_disk->private_data;
  210. if (!blk_fs_request(req)) {
  211. end_request(req, 0);
  212. continue;
  213. }
  214. if (RING_FULL(&info->ring))
  215. goto wait;
  216. pr_debug("do_blk_req %p: cmd %p, sec %lx, "
  217. "(%u/%li) buffer:%p [%s]\n",
  218. req, req->cmd, (unsigned long)req->sector,
  219. req->current_nr_sectors,
  220. req->nr_sectors, req->buffer,
  221. rq_data_dir(req) ? "write" : "read");
  222. blkdev_dequeue_request(req);
  223. if (blkif_queue_request(req)) {
  224. blk_requeue_request(rq, req);
  225. wait:
  226. /* Avoid pointless unplugs. */
  227. blk_stop_queue(rq);
  228. break;
  229. }
  230. queued++;
  231. }
  232. if (queued != 0)
  233. flush_requests(info);
  234. }
  235. static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
  236. {
  237. struct request_queue *rq;
  238. rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
  239. if (rq == NULL)
  240. return -1;
  241. elevator_init(rq, "noop");
  242. /* Hard sector size and max sectors impersonate the equiv. hardware. */
  243. blk_queue_hardsect_size(rq, sector_size);
  244. blk_queue_max_sectors(rq, 512);
  245. /* Each segment in a request is up to an aligned page in size. */
  246. blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
  247. blk_queue_max_segment_size(rq, PAGE_SIZE);
  248. /* Ensure a merged request will fit in a single I/O ring slot. */
  249. blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
  250. blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
  251. /* Make sure buffer addresses are sector-aligned. */
  252. blk_queue_dma_alignment(rq, 511);
  253. gd->queue = rq;
  254. return 0;
  255. }
  256. static int xlvbd_barrier(struct blkfront_info *info)
  257. {
  258. int err;
  259. err = blk_queue_ordered(info->rq,
  260. info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
  261. NULL);
  262. if (err)
  263. return err;
  264. printk(KERN_INFO "blkfront: %s: barriers %s\n",
  265. info->gd->disk_name,
  266. info->feature_barrier ? "enabled" : "disabled");
  267. return 0;
  268. }
  269. static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity,
  270. int vdevice, u16 vdisk_info, u16 sector_size,
  271. struct blkfront_info *info)
  272. {
  273. struct gendisk *gd;
  274. int nr_minors = 1;
  275. int err = -ENODEV;
  276. BUG_ON(info->gd != NULL);
  277. BUG_ON(info->rq != NULL);
  278. if ((minor % PARTS_PER_DISK) == 0)
  279. nr_minors = PARTS_PER_DISK;
  280. gd = alloc_disk(nr_minors);
  281. if (gd == NULL)
  282. goto out;
  283. if (nr_minors > 1)
  284. sprintf(gd->disk_name, "%s%c", DEV_NAME,
  285. 'a' + minor / PARTS_PER_DISK);
  286. else
  287. sprintf(gd->disk_name, "%s%c%d", DEV_NAME,
  288. 'a' + minor / PARTS_PER_DISK,
  289. minor % PARTS_PER_DISK);
  290. gd->major = XENVBD_MAJOR;
  291. gd->first_minor = minor;
  292. gd->fops = &xlvbd_block_fops;
  293. gd->private_data = info;
  294. gd->driverfs_dev = &(info->xbdev->dev);
  295. set_capacity(gd, capacity);
  296. if (xlvbd_init_blk_queue(gd, sector_size)) {
  297. del_gendisk(gd);
  298. goto out;
  299. }
  300. info->rq = gd->queue;
  301. info->gd = gd;
  302. if (info->feature_barrier)
  303. xlvbd_barrier(info);
  304. if (vdisk_info & VDISK_READONLY)
  305. set_disk_ro(gd, 1);
  306. if (vdisk_info & VDISK_REMOVABLE)
  307. gd->flags |= GENHD_FL_REMOVABLE;
  308. if (vdisk_info & VDISK_CDROM)
  309. gd->flags |= GENHD_FL_CD;
  310. return 0;
  311. out:
  312. return err;
  313. }
  314. static void kick_pending_request_queues(struct blkfront_info *info)
  315. {
  316. if (!RING_FULL(&info->ring)) {
  317. /* Re-enable calldowns. */
  318. blk_start_queue(info->rq);
  319. /* Kick things off immediately. */
  320. do_blkif_request(info->rq);
  321. }
  322. }
  323. static void blkif_restart_queue(struct work_struct *work)
  324. {
  325. struct blkfront_info *info = container_of(work, struct blkfront_info, work);
  326. spin_lock_irq(&blkif_io_lock);
  327. if (info->connected == BLKIF_STATE_CONNECTED)
  328. kick_pending_request_queues(info);
  329. spin_unlock_irq(&blkif_io_lock);
  330. }
  331. static void blkif_free(struct blkfront_info *info, int suspend)
  332. {
  333. /* Prevent new requests being issued until we fix things up. */
  334. spin_lock_irq(&blkif_io_lock);
  335. info->connected = suspend ?
  336. BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
  337. /* No more blkif_request(). */
  338. if (info->rq)
  339. blk_stop_queue(info->rq);
  340. /* No more gnttab callback work. */
  341. gnttab_cancel_free_callback(&info->callback);
  342. spin_unlock_irq(&blkif_io_lock);
  343. /* Flush gnttab callback work. Must be done with no locks held. */
  344. flush_scheduled_work();
  345. /* Free resources associated with old device channel. */
  346. if (info->ring_ref != GRANT_INVALID_REF) {
  347. gnttab_end_foreign_access(info->ring_ref, 0,
  348. (unsigned long)info->ring.sring);
  349. info->ring_ref = GRANT_INVALID_REF;
  350. info->ring.sring = NULL;
  351. }
  352. if (info->irq)
  353. unbind_from_irqhandler(info->irq, info);
  354. info->evtchn = info->irq = 0;
  355. }
  356. static void blkif_completion(struct blk_shadow *s)
  357. {
  358. int i;
  359. for (i = 0; i < s->req.nr_segments; i++)
  360. gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
  361. }
  362. static irqreturn_t blkif_interrupt(int irq, void *dev_id)
  363. {
  364. struct request *req;
  365. struct blkif_response *bret;
  366. RING_IDX i, rp;
  367. unsigned long flags;
  368. struct blkfront_info *info = (struct blkfront_info *)dev_id;
  369. int uptodate;
  370. spin_lock_irqsave(&blkif_io_lock, flags);
  371. if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
  372. spin_unlock_irqrestore(&blkif_io_lock, flags);
  373. return IRQ_HANDLED;
  374. }
  375. again:
  376. rp = info->ring.sring->rsp_prod;
  377. rmb(); /* Ensure we see queued responses up to 'rp'. */
  378. for (i = info->ring.rsp_cons; i != rp; i++) {
  379. unsigned long id;
  380. int ret;
  381. bret = RING_GET_RESPONSE(&info->ring, i);
  382. id = bret->id;
  383. req = (struct request *)info->shadow[id].request;
  384. blkif_completion(&info->shadow[id]);
  385. add_id_to_freelist(info, id);
  386. uptodate = (bret->status == BLKIF_RSP_OKAY);
  387. switch (bret->operation) {
  388. case BLKIF_OP_WRITE_BARRIER:
  389. if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
  390. printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
  391. info->gd->disk_name);
  392. uptodate = -EOPNOTSUPP;
  393. info->feature_barrier = 0;
  394. xlvbd_barrier(info);
  395. }
  396. /* fall through */
  397. case BLKIF_OP_READ:
  398. case BLKIF_OP_WRITE:
  399. if (unlikely(bret->status != BLKIF_RSP_OKAY))
  400. dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
  401. "request: %x\n", bret->status);
  402. ret = end_that_request_first(req, uptodate,
  403. req->hard_nr_sectors);
  404. BUG_ON(ret);
  405. end_that_request_last(req, uptodate);
  406. break;
  407. default:
  408. BUG();
  409. }
  410. }
  411. info->ring.rsp_cons = i;
  412. if (i != info->ring.req_prod_pvt) {
  413. int more_to_do;
  414. RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
  415. if (more_to_do)
  416. goto again;
  417. } else
  418. info->ring.sring->rsp_event = i + 1;
  419. kick_pending_request_queues(info);
  420. spin_unlock_irqrestore(&blkif_io_lock, flags);
  421. return IRQ_HANDLED;
  422. }
  423. static int setup_blkring(struct xenbus_device *dev,
  424. struct blkfront_info *info)
  425. {
  426. struct blkif_sring *sring;
  427. int err;
  428. info->ring_ref = GRANT_INVALID_REF;
  429. sring = (struct blkif_sring *)__get_free_page(GFP_KERNEL);
  430. if (!sring) {
  431. xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
  432. return -ENOMEM;
  433. }
  434. SHARED_RING_INIT(sring);
  435. FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
  436. err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
  437. if (err < 0) {
  438. free_page((unsigned long)sring);
  439. info->ring.sring = NULL;
  440. goto fail;
  441. }
  442. info->ring_ref = err;
  443. err = xenbus_alloc_evtchn(dev, &info->evtchn);
  444. if (err)
  445. goto fail;
  446. err = bind_evtchn_to_irqhandler(info->evtchn,
  447. blkif_interrupt,
  448. IRQF_SAMPLE_RANDOM, "blkif", info);
  449. if (err <= 0) {
  450. xenbus_dev_fatal(dev, err,
  451. "bind_evtchn_to_irqhandler failed");
  452. goto fail;
  453. }
  454. info->irq = err;
  455. return 0;
  456. fail:
  457. blkif_free(info, 0);
  458. return err;
  459. }
  460. /* Common code used when first setting up, and when resuming. */
  461. static int talk_to_backend(struct xenbus_device *dev,
  462. struct blkfront_info *info)
  463. {
  464. const char *message = NULL;
  465. struct xenbus_transaction xbt;
  466. int err;
  467. /* Create shared ring, alloc event channel. */
  468. err = setup_blkring(dev, info);
  469. if (err)
  470. goto out;
  471. again:
  472. err = xenbus_transaction_start(&xbt);
  473. if (err) {
  474. xenbus_dev_fatal(dev, err, "starting transaction");
  475. goto destroy_blkring;
  476. }
  477. err = xenbus_printf(xbt, dev->nodename,
  478. "ring-ref", "%u", info->ring_ref);
  479. if (err) {
  480. message = "writing ring-ref";
  481. goto abort_transaction;
  482. }
  483. err = xenbus_printf(xbt, dev->nodename,
  484. "event-channel", "%u", info->evtchn);
  485. if (err) {
  486. message = "writing event-channel";
  487. goto abort_transaction;
  488. }
  489. err = xenbus_transaction_end(xbt, 0);
  490. if (err) {
  491. if (err == -EAGAIN)
  492. goto again;
  493. xenbus_dev_fatal(dev, err, "completing transaction");
  494. goto destroy_blkring;
  495. }
  496. xenbus_switch_state(dev, XenbusStateInitialised);
  497. return 0;
  498. abort_transaction:
  499. xenbus_transaction_end(xbt, 1);
  500. if (message)
  501. xenbus_dev_fatal(dev, err, "%s", message);
  502. destroy_blkring:
  503. blkif_free(info, 0);
  504. out:
  505. return err;
  506. }
  507. /**
  508. * Entry point to this code when a new device is created. Allocate the basic
  509. * structures and the ring buffer for communication with the backend, and
  510. * inform the backend of the appropriate details for those. Switch to
  511. * Initialised state.
  512. */
  513. static int blkfront_probe(struct xenbus_device *dev,
  514. const struct xenbus_device_id *id)
  515. {
  516. int err, vdevice, i;
  517. struct blkfront_info *info;
  518. /* FIXME: Use dynamic device id if this is not set. */
  519. err = xenbus_scanf(XBT_NIL, dev->nodename,
  520. "virtual-device", "%i", &vdevice);
  521. if (err != 1) {
  522. xenbus_dev_fatal(dev, err, "reading virtual-device");
  523. return err;
  524. }
  525. info = kzalloc(sizeof(*info), GFP_KERNEL);
  526. if (!info) {
  527. xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
  528. return -ENOMEM;
  529. }
  530. info->xbdev = dev;
  531. info->vdevice = vdevice;
  532. info->connected = BLKIF_STATE_DISCONNECTED;
  533. INIT_WORK(&info->work, blkif_restart_queue);
  534. for (i = 0; i < BLK_RING_SIZE; i++)
  535. info->shadow[i].req.id = i+1;
  536. info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
  537. /* Front end dir is a number, which is used as the id. */
  538. info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
  539. dev->dev.driver_data = info;
  540. err = talk_to_backend(dev, info);
  541. if (err) {
  542. kfree(info);
  543. dev->dev.driver_data = NULL;
  544. return err;
  545. }
  546. return 0;
  547. }
  548. static int blkif_recover(struct blkfront_info *info)
  549. {
  550. int i;
  551. struct blkif_request *req;
  552. struct blk_shadow *copy;
  553. int j;
  554. /* Stage 1: Make a safe copy of the shadow state. */
  555. copy = kmalloc(sizeof(info->shadow), GFP_KERNEL);
  556. if (!copy)
  557. return -ENOMEM;
  558. memcpy(copy, info->shadow, sizeof(info->shadow));
  559. /* Stage 2: Set up free list. */
  560. memset(&info->shadow, 0, sizeof(info->shadow));
  561. for (i = 0; i < BLK_RING_SIZE; i++)
  562. info->shadow[i].req.id = i+1;
  563. info->shadow_free = info->ring.req_prod_pvt;
  564. info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
  565. /* Stage 3: Find pending requests and requeue them. */
  566. for (i = 0; i < BLK_RING_SIZE; i++) {
  567. /* Not in use? */
  568. if (copy[i].request == 0)
  569. continue;
  570. /* Grab a request slot and copy shadow state into it. */
  571. req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
  572. *req = copy[i].req;
  573. /* We get a new request id, and must reset the shadow state. */
  574. req->id = get_id_from_freelist(info);
  575. memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
  576. /* Rewrite any grant references invalidated by susp/resume. */
  577. for (j = 0; j < req->nr_segments; j++)
  578. gnttab_grant_foreign_access_ref(
  579. req->seg[j].gref,
  580. info->xbdev->otherend_id,
  581. pfn_to_mfn(info->shadow[req->id].frame[j]),
  582. rq_data_dir(
  583. (struct request *)
  584. info->shadow[req->id].request));
  585. info->shadow[req->id].req = *req;
  586. info->ring.req_prod_pvt++;
  587. }
  588. kfree(copy);
  589. xenbus_switch_state(info->xbdev, XenbusStateConnected);
  590. spin_lock_irq(&blkif_io_lock);
  591. /* Now safe for us to use the shared ring */
  592. info->connected = BLKIF_STATE_CONNECTED;
  593. /* Send off requeued requests */
  594. flush_requests(info);
  595. /* Kick any other new requests queued since we resumed */
  596. kick_pending_request_queues(info);
  597. spin_unlock_irq(&blkif_io_lock);
  598. return 0;
  599. }
  600. /**
  601. * We are reconnecting to the backend, due to a suspend/resume, or a backend
  602. * driver restart. We tear down our blkif structure and recreate it, but
  603. * leave the device-layer structures intact so that this is transparent to the
  604. * rest of the kernel.
  605. */
  606. static int blkfront_resume(struct xenbus_device *dev)
  607. {
  608. struct blkfront_info *info = dev->dev.driver_data;
  609. int err;
  610. dev_dbg(&dev->dev, "blkfront_resume: %s\n", dev->nodename);
  611. blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
  612. err = talk_to_backend(dev, info);
  613. if (info->connected == BLKIF_STATE_SUSPENDED && !err)
  614. err = blkif_recover(info);
  615. return err;
  616. }
  617. /*
  618. * Invoked when the backend is finally 'ready' (and has told produced
  619. * the details about the physical device - #sectors, size, etc).
  620. */
  621. static void blkfront_connect(struct blkfront_info *info)
  622. {
  623. unsigned long long sectors;
  624. unsigned long sector_size;
  625. unsigned int binfo;
  626. int err;
  627. if ((info->connected == BLKIF_STATE_CONNECTED) ||
  628. (info->connected == BLKIF_STATE_SUSPENDED) )
  629. return;
  630. dev_dbg(&info->xbdev->dev, "%s:%s.\n",
  631. __func__, info->xbdev->otherend);
  632. err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
  633. "sectors", "%llu", &sectors,
  634. "info", "%u", &binfo,
  635. "sector-size", "%lu", &sector_size,
  636. NULL);
  637. if (err) {
  638. xenbus_dev_fatal(info->xbdev, err,
  639. "reading backend fields at %s",
  640. info->xbdev->otherend);
  641. return;
  642. }
  643. err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
  644. "feature-barrier", "%lu", &info->feature_barrier,
  645. NULL);
  646. if (err)
  647. info->feature_barrier = 0;
  648. err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice),
  649. sectors, info->vdevice,
  650. binfo, sector_size, info);
  651. if (err) {
  652. xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
  653. info->xbdev->otherend);
  654. return;
  655. }
  656. xenbus_switch_state(info->xbdev, XenbusStateConnected);
  657. /* Kick pending requests. */
  658. spin_lock_irq(&blkif_io_lock);
  659. info->connected = BLKIF_STATE_CONNECTED;
  660. kick_pending_request_queues(info);
  661. spin_unlock_irq(&blkif_io_lock);
  662. add_disk(info->gd);
  663. }
  664. /**
  665. * Handle the change of state of the backend to Closing. We must delete our
  666. * device-layer structures now, to ensure that writes are flushed through to
  667. * the backend. Once is this done, we can switch to Closed in
  668. * acknowledgement.
  669. */
  670. static void blkfront_closing(struct xenbus_device *dev)
  671. {
  672. struct blkfront_info *info = dev->dev.driver_data;
  673. unsigned long flags;
  674. dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
  675. if (info->rq == NULL)
  676. goto out;
  677. spin_lock_irqsave(&blkif_io_lock, flags);
  678. del_gendisk(info->gd);
  679. /* No more blkif_request(). */
  680. blk_stop_queue(info->rq);
  681. /* No more gnttab callback work. */
  682. gnttab_cancel_free_callback(&info->callback);
  683. spin_unlock_irqrestore(&blkif_io_lock, flags);
  684. /* Flush gnttab callback work. Must be done with no locks held. */
  685. flush_scheduled_work();
  686. blk_cleanup_queue(info->rq);
  687. info->rq = NULL;
  688. out:
  689. xenbus_frontend_closed(dev);
  690. }
  691. /**
  692. * Callback received when the backend's state changes.
  693. */
  694. static void backend_changed(struct xenbus_device *dev,
  695. enum xenbus_state backend_state)
  696. {
  697. struct blkfront_info *info = dev->dev.driver_data;
  698. struct block_device *bd;
  699. dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
  700. switch (backend_state) {
  701. case XenbusStateInitialising:
  702. case XenbusStateInitWait:
  703. case XenbusStateInitialised:
  704. case XenbusStateUnknown:
  705. case XenbusStateClosed:
  706. break;
  707. case XenbusStateConnected:
  708. blkfront_connect(info);
  709. break;
  710. case XenbusStateClosing:
  711. bd = bdget(info->dev);
  712. if (bd == NULL)
  713. xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
  714. mutex_lock(&bd->bd_mutex);
  715. if (info->users > 0)
  716. xenbus_dev_error(dev, -EBUSY,
  717. "Device in use; refusing to close");
  718. else
  719. blkfront_closing(dev);
  720. mutex_unlock(&bd->bd_mutex);
  721. bdput(bd);
  722. break;
  723. }
  724. }
  725. static int blkfront_remove(struct xenbus_device *dev)
  726. {
  727. struct blkfront_info *info = dev->dev.driver_data;
  728. dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
  729. blkif_free(info, 0);
  730. kfree(info);
  731. return 0;
  732. }
  733. static int blkif_open(struct inode *inode, struct file *filep)
  734. {
  735. struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
  736. info->users++;
  737. return 0;
  738. }
  739. static int blkif_release(struct inode *inode, struct file *filep)
  740. {
  741. struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
  742. info->users--;
  743. if (info->users == 0) {
  744. /* Check whether we have been instructed to close. We will
  745. have ignored this request initially, as the device was
  746. still mounted. */
  747. struct xenbus_device *dev = info->xbdev;
  748. enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
  749. if (state == XenbusStateClosing)
  750. blkfront_closing(dev);
  751. }
  752. return 0;
  753. }
  754. static struct block_device_operations xlvbd_block_fops =
  755. {
  756. .owner = THIS_MODULE,
  757. .open = blkif_open,
  758. .release = blkif_release,
  759. };
  760. static struct xenbus_device_id blkfront_ids[] = {
  761. { "vbd" },
  762. { "" }
  763. };
  764. static struct xenbus_driver blkfront = {
  765. .name = "vbd",
  766. .owner = THIS_MODULE,
  767. .ids = blkfront_ids,
  768. .probe = blkfront_probe,
  769. .remove = blkfront_remove,
  770. .resume = blkfront_resume,
  771. .otherend_changed = backend_changed,
  772. };
  773. static int __init xlblk_init(void)
  774. {
  775. if (!is_running_on_xen())
  776. return -ENODEV;
  777. if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {
  778. printk(KERN_WARNING "xen_blk: can't get major %d with name %s\n",
  779. XENVBD_MAJOR, DEV_NAME);
  780. return -ENODEV;
  781. }
  782. return xenbus_register_frontend(&blkfront);
  783. }
  784. module_init(xlblk_init);
  785. static void xlblk_exit(void)
  786. {
  787. return xenbus_unregister_driver(&blkfront);
  788. }
  789. module_exit(xlblk_exit);
  790. MODULE_DESCRIPTION("Xen virtual block device frontend");
  791. MODULE_LICENSE("GPL");
  792. MODULE_ALIAS_BLOCKDEV_MAJOR(XENVBD_MAJOR);