virtio_balloon.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568
  1. /*
  2. * Virtio balloon implementation, inspired by Dor Laor and Marcelo
  3. * Tosatti's implementations.
  4. *
  5. * Copyright 2008 Rusty Russell IBM Corporation
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include <linux/virtio.h>
  22. #include <linux/virtio_balloon.h>
  23. #include <linux/swap.h>
  24. #include <linux/kthread.h>
  25. #include <linux/freezer.h>
  26. #include <linux/delay.h>
  27. #include <linux/slab.h>
  28. #include <linux/module.h>
  29. #include <linux/balloon_compaction.h>
  30. /*
  31. * Balloon device works in 4K page units. So each page is pointed to by
  32. * multiple balloon pages. All memory counters in this driver are in balloon
  33. * page units.
  34. */
  35. #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
  36. #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
  37. struct virtio_balloon
  38. {
  39. struct virtio_device *vdev;
  40. struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
  41. /* Where the ballooning thread waits for config to change. */
  42. wait_queue_head_t config_change;
  43. /* The thread servicing the balloon. */
  44. struct task_struct *thread;
  45. /* Waiting for host to ack the pages we released. */
  46. wait_queue_head_t acked;
  47. /* Number of balloon pages we've told the Host we're not using. */
  48. unsigned int num_pages;
  49. /*
  50. * The pages we've told the Host we're not using are enqueued
  51. * at vb_dev_info->pages list.
  52. * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
  53. * to num_pages above.
  54. */
  55. struct balloon_dev_info *vb_dev_info;
  56. /* Synchronize access/update to this struct virtio_balloon elements */
  57. struct mutex balloon_lock;
  58. /* The array of pfns we tell the Host about. */
  59. unsigned int num_pfns;
  60. u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
  61. /* Memory statistics */
  62. int need_stats_update;
  63. struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
  64. };
  65. static struct virtio_device_id id_table[] = {
  66. { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID },
  67. { 0 },
  68. };
  69. static u32 page_to_balloon_pfn(struct page *page)
  70. {
  71. unsigned long pfn = page_to_pfn(page);
  72. BUILD_BUG_ON(PAGE_SHIFT < VIRTIO_BALLOON_PFN_SHIFT);
  73. /* Convert pfn from Linux page size to balloon page size. */
  74. return pfn * VIRTIO_BALLOON_PAGES_PER_PAGE;
  75. }
  76. static struct page *balloon_pfn_to_page(u32 pfn)
  77. {
  78. BUG_ON(pfn % VIRTIO_BALLOON_PAGES_PER_PAGE);
  79. return pfn_to_page(pfn / VIRTIO_BALLOON_PAGES_PER_PAGE);
  80. }
  81. static void balloon_ack(struct virtqueue *vq)
  82. {
  83. struct virtio_balloon *vb = vq->vdev->priv;
  84. wake_up(&vb->acked);
  85. }
  86. static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
  87. {
  88. struct scatterlist sg;
  89. unsigned int len;
  90. sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
  91. /* We should always be able to add one buffer to an empty queue. */
  92. if (virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL) < 0)
  93. BUG();
  94. virtqueue_kick(vq);
  95. /* When host has read buffer, this completes via balloon_ack */
  96. wait_event(vb->acked, virtqueue_get_buf(vq, &len));
  97. }
  98. static void set_page_pfns(u32 pfns[], struct page *page)
  99. {
  100. unsigned int i;
  101. /* Set balloon pfns pointing at this page.
  102. * Note that the first pfn points at start of the page. */
  103. for (i = 0; i < VIRTIO_BALLOON_PAGES_PER_PAGE; i++)
  104. pfns[i] = page_to_balloon_pfn(page) + i;
  105. }
  106. static void fill_balloon(struct virtio_balloon *vb, size_t num)
  107. {
  108. struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
  109. /* We can only do one array worth at a time. */
  110. num = min(num, ARRAY_SIZE(vb->pfns));
  111. mutex_lock(&vb->balloon_lock);
  112. for (vb->num_pfns = 0; vb->num_pfns < num;
  113. vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
  114. struct page *page = balloon_page_enqueue(vb_dev_info);
  115. if (!page) {
  116. dev_info_ratelimited(&vb->vdev->dev,
  117. "Out of puff! Can't get %u pages\n",
  118. VIRTIO_BALLOON_PAGES_PER_PAGE);
  119. /* Sleep for at least 1/5 of a second before retry. */
  120. msleep(200);
  121. break;
  122. }
  123. set_page_pfns(vb->pfns + vb->num_pfns, page);
  124. vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
  125. adjust_managed_page_count(page, -1);
  126. }
  127. /* Did we get any? */
  128. if (vb->num_pfns != 0)
  129. tell_host(vb, vb->inflate_vq);
  130. mutex_unlock(&vb->balloon_lock);
  131. }
  132. static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
  133. {
  134. unsigned int i;
  135. /* Find pfns pointing at start of each page, get pages and free them. */
  136. for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
  137. struct page *page = balloon_pfn_to_page(pfns[i]);
  138. balloon_page_free(page);
  139. adjust_managed_page_count(page, 1);
  140. }
  141. }
  142. static void leak_balloon(struct virtio_balloon *vb, size_t num)
  143. {
  144. struct page *page;
  145. struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
  146. /* We can only do one array worth at a time. */
  147. num = min(num, ARRAY_SIZE(vb->pfns));
  148. mutex_lock(&vb->balloon_lock);
  149. for (vb->num_pfns = 0; vb->num_pfns < num;
  150. vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
  151. page = balloon_page_dequeue(vb_dev_info);
  152. if (!page)
  153. break;
  154. set_page_pfns(vb->pfns + vb->num_pfns, page);
  155. vb->num_pages -= VIRTIO_BALLOON_PAGES_PER_PAGE;
  156. }
  157. /*
  158. * Note that if
  159. * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
  160. * is true, we *have* to do it in this order
  161. */
  162. if (vb->num_pfns != 0)
  163. tell_host(vb, vb->deflate_vq);
  164. mutex_unlock(&vb->balloon_lock);
  165. release_pages_by_pfn(vb->pfns, vb->num_pfns);
  166. }
  167. static inline void update_stat(struct virtio_balloon *vb, int idx,
  168. u16 tag, u64 val)
  169. {
  170. BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
  171. vb->stats[idx].tag = tag;
  172. vb->stats[idx].val = val;
  173. }
  174. #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
  175. static void update_balloon_stats(struct virtio_balloon *vb)
  176. {
  177. unsigned long events[NR_VM_EVENT_ITEMS];
  178. struct sysinfo i;
  179. int idx = 0;
  180. all_vm_events(events);
  181. si_meminfo(&i);
  182. update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
  183. pages_to_bytes(events[PSWPIN]));
  184. update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
  185. pages_to_bytes(events[PSWPOUT]));
  186. update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
  187. update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
  188. update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
  189. pages_to_bytes(i.freeram));
  190. update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
  191. pages_to_bytes(i.totalram));
  192. }
  193. /*
  194. * While most virtqueues communicate guest-initiated requests to the hypervisor,
  195. * the stats queue operates in reverse. The driver initializes the virtqueue
  196. * with a single buffer. From that point forward, all conversations consist of
  197. * a hypervisor request (a call to this function) which directs us to refill
  198. * the virtqueue with a fresh stats buffer. Since stats collection can sleep,
  199. * we notify our kthread which does the actual work via stats_handle_request().
  200. */
  201. static void stats_request(struct virtqueue *vq)
  202. {
  203. struct virtio_balloon *vb = vq->vdev->priv;
  204. vb->need_stats_update = 1;
  205. wake_up(&vb->config_change);
  206. }
  207. static void stats_handle_request(struct virtio_balloon *vb)
  208. {
  209. struct virtqueue *vq;
  210. struct scatterlist sg;
  211. unsigned int len;
  212. vb->need_stats_update = 0;
  213. update_balloon_stats(vb);
  214. vq = vb->stats_vq;
  215. if (!virtqueue_get_buf(vq, &len))
  216. return;
  217. sg_init_one(&sg, vb->stats, sizeof(vb->stats));
  218. if (virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL) < 0)
  219. BUG();
  220. virtqueue_kick(vq);
  221. }
  222. static void virtballoon_changed(struct virtio_device *vdev)
  223. {
  224. struct virtio_balloon *vb = vdev->priv;
  225. wake_up(&vb->config_change);
  226. }
  227. static inline s64 towards_target(struct virtio_balloon *vb)
  228. {
  229. __le32 v;
  230. s64 target;
  231. vb->vdev->config->get(vb->vdev,
  232. offsetof(struct virtio_balloon_config, num_pages),
  233. &v, sizeof(v));
  234. target = le32_to_cpu(v);
  235. return target - vb->num_pages;
  236. }
  237. static void update_balloon_size(struct virtio_balloon *vb)
  238. {
  239. __le32 actual = cpu_to_le32(vb->num_pages);
  240. vb->vdev->config->set(vb->vdev,
  241. offsetof(struct virtio_balloon_config, actual),
  242. &actual, sizeof(actual));
  243. }
  244. static int balloon(void *_vballoon)
  245. {
  246. struct virtio_balloon *vb = _vballoon;
  247. set_freezable();
  248. while (!kthread_should_stop()) {
  249. s64 diff;
  250. try_to_freeze();
  251. wait_event_interruptible(vb->config_change,
  252. (diff = towards_target(vb)) != 0
  253. || vb->need_stats_update
  254. || kthread_should_stop()
  255. || freezing(current));
  256. if (vb->need_stats_update)
  257. stats_handle_request(vb);
  258. if (diff > 0)
  259. fill_balloon(vb, diff);
  260. else if (diff < 0)
  261. leak_balloon(vb, -diff);
  262. update_balloon_size(vb);
  263. }
  264. return 0;
  265. }
  266. static int init_vqs(struct virtio_balloon *vb)
  267. {
  268. struct virtqueue *vqs[3];
  269. vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_request };
  270. const char *names[] = { "inflate", "deflate", "stats" };
  271. int err, nvqs;
  272. /*
  273. * We expect two virtqueues: inflate and deflate, and
  274. * optionally stat.
  275. */
  276. nvqs = virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ) ? 3 : 2;
  277. err = vb->vdev->config->find_vqs(vb->vdev, nvqs, vqs, callbacks, names);
  278. if (err)
  279. return err;
  280. vb->inflate_vq = vqs[0];
  281. vb->deflate_vq = vqs[1];
  282. if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_STATS_VQ)) {
  283. struct scatterlist sg;
  284. vb->stats_vq = vqs[2];
  285. /*
  286. * Prime this virtqueue with one buffer so the hypervisor can
  287. * use it to signal us later.
  288. */
  289. sg_init_one(&sg, vb->stats, sizeof vb->stats);
  290. if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
  291. < 0)
  292. BUG();
  293. virtqueue_kick(vb->stats_vq);
  294. }
  295. return 0;
  296. }
  297. static const struct address_space_operations virtio_balloon_aops;
  298. #ifdef CONFIG_BALLOON_COMPACTION
  299. /*
  300. * virtballoon_migratepage - perform the balloon page migration on behalf of
  301. * a compation thread. (called under page lock)
  302. * @mapping: the page->mapping which will be assigned to the new migrated page.
  303. * @newpage: page that will replace the isolated page after migration finishes.
  304. * @page : the isolated (old) page that is about to be migrated to newpage.
  305. * @mode : compaction mode -- not used for balloon page migration.
  306. *
  307. * After a ballooned page gets isolated by compaction procedures, this is the
  308. * function that performs the page migration on behalf of a compaction thread
  309. * The page migration for virtio balloon is done in a simple swap fashion which
  310. * follows these two macro steps:
  311. * 1) insert newpage into vb->pages list and update the host about it;
  312. * 2) update the host about the old page removed from vb->pages list;
  313. *
  314. * This function preforms the balloon page migration task.
  315. * Called through balloon_mapping->a_ops->migratepage
  316. */
  317. int virtballoon_migratepage(struct address_space *mapping,
  318. struct page *newpage, struct page *page, enum migrate_mode mode)
  319. {
  320. struct balloon_dev_info *vb_dev_info = balloon_page_device(page);
  321. struct virtio_balloon *vb;
  322. unsigned long flags;
  323. BUG_ON(!vb_dev_info);
  324. vb = vb_dev_info->balloon_device;
  325. /*
  326. * In order to avoid lock contention while migrating pages concurrently
  327. * to leak_balloon() or fill_balloon() we just give up the balloon_lock
  328. * this turn, as it is easier to retry the page migration later.
  329. * This also prevents fill_balloon() getting stuck into a mutex
  330. * recursion in the case it ends up triggering memory compaction
  331. * while it is attempting to inflate the ballon.
  332. */
  333. if (!mutex_trylock(&vb->balloon_lock))
  334. return -EAGAIN;
  335. /* balloon's page migration 1st step -- inflate "newpage" */
  336. spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
  337. balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
  338. vb_dev_info->isolated_pages--;
  339. spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
  340. vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
  341. set_page_pfns(vb->pfns, newpage);
  342. tell_host(vb, vb->inflate_vq);
  343. /*
  344. * balloon's page migration 2nd step -- deflate "page"
  345. *
  346. * It's safe to delete page->lru here because this page is at
  347. * an isolated migration list, and this step is expected to happen here
  348. */
  349. balloon_page_delete(page);
  350. vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
  351. set_page_pfns(vb->pfns, page);
  352. tell_host(vb, vb->deflate_vq);
  353. mutex_unlock(&vb->balloon_lock);
  354. return MIGRATEPAGE_BALLOON_SUCCESS;
  355. }
  356. /* define the balloon_mapping->a_ops callback to allow balloon page migration */
  357. static const struct address_space_operations virtio_balloon_aops = {
  358. .migratepage = virtballoon_migratepage,
  359. };
  360. #endif /* CONFIG_BALLOON_COMPACTION */
  361. static int virtballoon_probe(struct virtio_device *vdev)
  362. {
  363. struct virtio_balloon *vb;
  364. struct address_space *vb_mapping;
  365. struct balloon_dev_info *vb_devinfo;
  366. int err;
  367. vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
  368. if (!vb) {
  369. err = -ENOMEM;
  370. goto out;
  371. }
  372. vb->num_pages = 0;
  373. mutex_init(&vb->balloon_lock);
  374. init_waitqueue_head(&vb->config_change);
  375. init_waitqueue_head(&vb->acked);
  376. vb->vdev = vdev;
  377. vb->need_stats_update = 0;
  378. vb_devinfo = balloon_devinfo_alloc(vb);
  379. if (IS_ERR(vb_devinfo)) {
  380. err = PTR_ERR(vb_devinfo);
  381. goto out_free_vb;
  382. }
  383. vb_mapping = balloon_mapping_alloc(vb_devinfo,
  384. (balloon_compaction_check()) ?
  385. &virtio_balloon_aops : NULL);
  386. if (IS_ERR(vb_mapping)) {
  387. /*
  388. * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
  389. * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
  390. */
  391. err = PTR_ERR(vb_mapping);
  392. if (err != -EOPNOTSUPP)
  393. goto out_free_vb_devinfo;
  394. }
  395. vb->vb_dev_info = vb_devinfo;
  396. err = init_vqs(vb);
  397. if (err)
  398. goto out_free_vb_mapping;
  399. vb->thread = kthread_run(balloon, vb, "vballoon");
  400. if (IS_ERR(vb->thread)) {
  401. err = PTR_ERR(vb->thread);
  402. goto out_del_vqs;
  403. }
  404. return 0;
  405. out_del_vqs:
  406. vdev->config->del_vqs(vdev);
  407. out_free_vb_mapping:
  408. balloon_mapping_free(vb_mapping);
  409. out_free_vb_devinfo:
  410. balloon_devinfo_free(vb_devinfo);
  411. out_free_vb:
  412. kfree(vb);
  413. out:
  414. return err;
  415. }
  416. static void remove_common(struct virtio_balloon *vb)
  417. {
  418. /* There might be pages left in the balloon: free them. */
  419. while (vb->num_pages)
  420. leak_balloon(vb, vb->num_pages);
  421. update_balloon_size(vb);
  422. /* Now we reset the device so we can clean up the queues. */
  423. vb->vdev->config->reset(vb->vdev);
  424. vb->vdev->config->del_vqs(vb->vdev);
  425. }
  426. static void virtballoon_remove(struct virtio_device *vdev)
  427. {
  428. struct virtio_balloon *vb = vdev->priv;
  429. kthread_stop(vb->thread);
  430. remove_common(vb);
  431. balloon_mapping_free(vb->vb_dev_info->mapping);
  432. balloon_devinfo_free(vb->vb_dev_info);
  433. kfree(vb);
  434. }
  435. #ifdef CONFIG_PM
  436. static int virtballoon_freeze(struct virtio_device *vdev)
  437. {
  438. struct virtio_balloon *vb = vdev->priv;
  439. /*
  440. * The kthread is already frozen by the PM core before this
  441. * function is called.
  442. */
  443. remove_common(vb);
  444. return 0;
  445. }
  446. static int virtballoon_restore(struct virtio_device *vdev)
  447. {
  448. struct virtio_balloon *vb = vdev->priv;
  449. int ret;
  450. ret = init_vqs(vdev->priv);
  451. if (ret)
  452. return ret;
  453. fill_balloon(vb, towards_target(vb));
  454. update_balloon_size(vb);
  455. return 0;
  456. }
  457. #endif
  458. static unsigned int features[] = {
  459. VIRTIO_BALLOON_F_MUST_TELL_HOST,
  460. VIRTIO_BALLOON_F_STATS_VQ,
  461. };
  462. static struct virtio_driver virtio_balloon_driver = {
  463. .feature_table = features,
  464. .feature_table_size = ARRAY_SIZE(features),
  465. .driver.name = KBUILD_MODNAME,
  466. .driver.owner = THIS_MODULE,
  467. .id_table = id_table,
  468. .probe = virtballoon_probe,
  469. .remove = virtballoon_remove,
  470. .config_changed = virtballoon_changed,
  471. #ifdef CONFIG_PM
  472. .freeze = virtballoon_freeze,
  473. .restore = virtballoon_restore,
  474. #endif
  475. };
  476. module_virtio_driver(virtio_balloon_driver);
  477. MODULE_DEVICE_TABLE(virtio, id_table);
  478. MODULE_DESCRIPTION("Virtio balloon driver");
  479. MODULE_LICENSE("GPL");