swsusp.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. /*
  2. * linux/kernel/power/swsusp.c
  3. *
  4. * This file provides code to write suspend image to swap and read it back.
  5. *
  6. * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
  7. * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
  8. *
  9. * This file is released under the GPLv2.
  10. *
  11. * I'd like to thank the following people for their work:
  12. *
  13. * Pavel Machek <pavel@ucw.cz>:
  14. * Modifications, defectiveness pointing, being with me at the very beginning,
  15. * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
  16. *
  17. * Steve Doddi <dirk@loth.demon.co.uk>:
  18. * Support the possibility of hardware state restoring.
  19. *
  20. * Raph <grey.havens@earthling.net>:
  21. * Support for preserving states of network devices and virtual console
  22. * (including X and svgatextmode)
  23. *
  24. * Kurt Garloff <garloff@suse.de>:
  25. * Straightened the critical function in order to prevent compilers from
  26. * playing tricks with local variables.
  27. *
  28. * Andreas Mohr <a.mohr@mailto.de>
  29. *
  30. * Alex Badea <vampire@go.ro>:
  31. * Fixed runaway init
  32. *
  33. * Rafael J. Wysocki <rjw@sisk.pl>
  34. * Reworked the freeing of memory and the handling of swap
  35. *
  36. * More state savers are welcome. Especially for the scsi layer...
  37. *
  38. * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
  39. */
  40. #include <linux/mm.h>
  41. #include <linux/suspend.h>
  42. #include <linux/spinlock.h>
  43. #include <linux/kernel.h>
  44. #include <linux/major.h>
  45. #include <linux/swap.h>
  46. #include <linux/pm.h>
  47. #include <linux/swapops.h>
  48. #include <linux/bootmem.h>
  49. #include <linux/syscalls.h>
  50. #include <linux/highmem.h>
  51. #include <linux/time.h>
  52. #include <linux/rbtree.h>
  53. #include "power.h"
  54. /*
  55. * Preferred image size in bytes (tunable via /sys/power/image_size).
  56. * When it is set to N, swsusp will do its best to ensure the image
  57. * size will not exceed N bytes, but if that is impossible, it will
  58. * try to create the smallest image possible.
  59. */
  60. unsigned long image_size = 500 * 1024 * 1024;
  61. int in_suspend __nosavedata = 0;
  62. #ifdef CONFIG_HIGHMEM
  63. unsigned int count_highmem_pages(void);
  64. int restore_highmem(void);
  65. #else
  66. static inline int restore_highmem(void) { return 0; }
  67. static inline unsigned int count_highmem_pages(void) { return 0; }
  68. #endif
  69. /**
  70. * The following functions are used for tracing the allocated
  71. * swap pages, so that they can be freed in case of an error.
  72. */
  73. struct swsusp_extent {
  74. struct rb_node node;
  75. unsigned long start;
  76. unsigned long end;
  77. };
  78. static struct rb_root swsusp_extents = RB_ROOT;
  79. static int swsusp_extents_insert(unsigned long swap_offset)
  80. {
  81. struct rb_node **new = &(swsusp_extents.rb_node);
  82. struct rb_node *parent = NULL;
  83. struct swsusp_extent *ext;
  84. /* Figure out where to put the new node */
  85. while (*new) {
  86. ext = container_of(*new, struct swsusp_extent, node);
  87. parent = *new;
  88. if (swap_offset < ext->start) {
  89. /* Try to merge */
  90. if (swap_offset == ext->start - 1) {
  91. ext->start--;
  92. return 0;
  93. }
  94. new = &((*new)->rb_left);
  95. } else if (swap_offset > ext->end) {
  96. /* Try to merge */
  97. if (swap_offset == ext->end + 1) {
  98. ext->end++;
  99. return 0;
  100. }
  101. new = &((*new)->rb_right);
  102. } else {
  103. /* It already is in the tree */
  104. return -EINVAL;
  105. }
  106. }
  107. /* Add the new node and rebalance the tree. */
  108. ext = kzalloc(sizeof(struct swsusp_extent), GFP_KERNEL);
  109. if (!ext)
  110. return -ENOMEM;
  111. ext->start = swap_offset;
  112. ext->end = swap_offset;
  113. rb_link_node(&ext->node, parent, new);
  114. rb_insert_color(&ext->node, &swsusp_extents);
  115. return 0;
  116. }
  117. /**
  118. * alloc_swapdev_block - allocate a swap page and register that it has
  119. * been allocated, so that it can be freed in case of an error.
  120. */
  121. sector_t alloc_swapdev_block(int swap)
  122. {
  123. unsigned long offset;
  124. offset = swp_offset(get_swap_page_of_type(swap));
  125. if (offset) {
  126. if (swsusp_extents_insert(offset))
  127. swap_free(swp_entry(swap, offset));
  128. else
  129. return swapdev_block(swap, offset);
  130. }
  131. return 0;
  132. }
  133. /**
  134. * free_all_swap_pages - free swap pages allocated for saving image data.
  135. * It also frees the extents used to register which swap entres had been
  136. * allocated.
  137. */
  138. void free_all_swap_pages(int swap)
  139. {
  140. struct rb_node *node;
  141. while ((node = swsusp_extents.rb_node)) {
  142. struct swsusp_extent *ext;
  143. unsigned long offset;
  144. ext = container_of(node, struct swsusp_extent, node);
  145. rb_erase(node, &swsusp_extents);
  146. for (offset = ext->start; offset <= ext->end; offset++)
  147. swap_free(swp_entry(swap, offset));
  148. kfree(ext);
  149. }
  150. }
  151. int swsusp_swap_in_use(void)
  152. {
  153. return (swsusp_extents.rb_node != NULL);
  154. }
  155. /**
  156. * swsusp_show_speed - print the time elapsed between two events represented by
  157. * @start and @stop
  158. *
  159. * @nr_pages - number of pages processed between @start and @stop
  160. * @msg - introductory message to print
  161. */
  162. void swsusp_show_speed(struct timeval *start, struct timeval *stop,
  163. unsigned nr_pages, char *msg)
  164. {
  165. s64 elapsed_centisecs64;
  166. int centisecs;
  167. int k;
  168. int kps;
  169. elapsed_centisecs64 = timeval_to_ns(stop) - timeval_to_ns(start);
  170. do_div(elapsed_centisecs64, NSEC_PER_SEC / 100);
  171. centisecs = elapsed_centisecs64;
  172. if (centisecs == 0)
  173. centisecs = 1; /* avoid div-by-zero */
  174. k = nr_pages * (PAGE_SIZE / 1024);
  175. kps = (k * 100) / centisecs;
  176. printk("%s %d kbytes in %d.%02d seconds (%d.%02d MB/s)\n", msg, k,
  177. centisecs / 100, centisecs % 100,
  178. kps / 1000, (kps % 1000) / 10);
  179. }
  180. /**
  181. * swsusp_shrink_memory - Try to free as much memory as needed
  182. *
  183. * ... but do not OOM-kill anyone
  184. *
  185. * Notice: all userland should be stopped before it is called, or
  186. * livelock is possible.
  187. */
  188. #define SHRINK_BITE 10000
  189. static inline unsigned long __shrink_memory(long tmp)
  190. {
  191. if (tmp > SHRINK_BITE)
  192. tmp = SHRINK_BITE;
  193. return shrink_all_memory(tmp);
  194. }
  195. int swsusp_shrink_memory(void)
  196. {
  197. long tmp;
  198. struct zone *zone;
  199. unsigned long pages = 0;
  200. unsigned int i = 0;
  201. char *p = "-\\|/";
  202. struct timeval start, stop;
  203. printk("Shrinking memory... ");
  204. do_gettimeofday(&start);
  205. do {
  206. long size, highmem_size;
  207. highmem_size = count_highmem_pages();
  208. size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES;
  209. tmp = size;
  210. size += highmem_size;
  211. for_each_zone (zone)
  212. if (populated_zone(zone)) {
  213. tmp += snapshot_additional_pages(zone);
  214. if (is_highmem(zone)) {
  215. highmem_size -=
  216. zone_page_state(zone, NR_FREE_PAGES);
  217. } else {
  218. tmp -= zone_page_state(zone, NR_FREE_PAGES);
  219. tmp += zone->lowmem_reserve[ZONE_NORMAL];
  220. }
  221. }
  222. if (highmem_size < 0)
  223. highmem_size = 0;
  224. tmp += highmem_size;
  225. if (tmp > 0) {
  226. tmp = __shrink_memory(tmp);
  227. if (!tmp)
  228. return -ENOMEM;
  229. pages += tmp;
  230. } else if (size > image_size / PAGE_SIZE) {
  231. tmp = __shrink_memory(size - (image_size / PAGE_SIZE));
  232. pages += tmp;
  233. }
  234. printk("\b%c", p[i++%4]);
  235. } while (tmp > 0);
  236. do_gettimeofday(&stop);
  237. printk("\bdone (%lu pages freed)\n", pages);
  238. swsusp_show_speed(&start, &stop, pages, "Freed");
  239. return 0;
  240. }
  241. int swsusp_suspend(void)
  242. {
  243. int error;
  244. if ((error = arch_prepare_suspend()))
  245. return error;
  246. local_irq_disable();
  247. /* At this point, device_suspend() has been called, but *not*
  248. * device_power_down(). We *must* device_power_down() now.
  249. * Otherwise, drivers for some devices (e.g. interrupt controllers)
  250. * become desynchronized with the actual state of the hardware
  251. * at resume time, and evil weirdness ensues.
  252. */
  253. if ((error = device_power_down(PMSG_FREEZE))) {
  254. printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
  255. goto Enable_irqs;
  256. }
  257. save_processor_state();
  258. if ((error = swsusp_arch_suspend()))
  259. printk(KERN_ERR "Error %d suspending\n", error);
  260. /* Restore control flow magically appears here */
  261. restore_processor_state();
  262. /* NOTE: device_power_up() is just a resume() for devices
  263. * that suspended with irqs off ... no overall powerup.
  264. */
  265. device_power_up();
  266. Enable_irqs:
  267. local_irq_enable();
  268. return error;
  269. }
  270. int swsusp_resume(void)
  271. {
  272. int error;
  273. local_irq_disable();
  274. /* NOTE: device_power_down() is just a suspend() with irqs off;
  275. * it has no special "power things down" semantics
  276. */
  277. if (device_power_down(PMSG_PRETHAW))
  278. printk(KERN_ERR "Some devices failed to power down, very bad\n");
  279. /* We'll ignore saved state, but this gets preempt count (etc) right */
  280. save_processor_state();
  281. error = restore_highmem();
  282. if (!error) {
  283. error = swsusp_arch_resume();
  284. /* The code below is only ever reached in case of a failure.
  285. * Otherwise execution continues at place where
  286. * swsusp_arch_suspend() was called
  287. */
  288. BUG_ON(!error);
  289. /* This call to restore_highmem() undos the previous one */
  290. restore_highmem();
  291. }
  292. /* The only reason why swsusp_arch_resume() can fail is memory being
  293. * very tight, so we have to free it as soon as we can to avoid
  294. * subsequent failures
  295. */
  296. swsusp_free();
  297. restore_processor_state();
  298. touch_softlockup_watchdog();
  299. device_power_up();
  300. local_irq_enable();
  301. return error;
  302. }