swsusp.c 26 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. * linux/kernel/power/swsusp.c
  3. *
  4. * This file provides code to write suspend image to swap and read it back.
  5. *
  6. * Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
  7. * Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
  8. *
  9. * This file is released under the GPLv2.
  10. *
  11. * I'd like to thank the following people for their work:
  12. *
  13. * Pavel Machek <pavel@ucw.cz>:
  14. * Modifications, defectiveness pointing, being with me at the very beginning,
  15. * suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
  16. *
  17. * Steve Doddi <dirk@loth.demon.co.uk>:
  18. * Support the possibility of hardware state restoring.
  19. *
  20. * Raph <grey.havens@earthling.net>:
  21. * Support for preserving states of network devices and virtual console
  22. * (including X and svgatextmode)
  23. *
  24. * Kurt Garloff <garloff@suse.de>:
  25. * Straightened the critical function in order to prevent compilers from
  26. * playing tricks with local variables.
  27. *
  28. * Andreas Mohr <a.mohr@mailto.de>
  29. *
  30. * Alex Badea <vampire@go.ro>:
  31. * Fixed runaway init
  32. *
  33. * Rafael J. Wysocki <rjw@sisk.pl>
  34. * Added the swap map data structure and reworked the handling of swap
  35. *
  36. * More state savers are welcome. Especially for the scsi layer...
  37. *
  38. * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
  39. */
  40. #include <linux/module.h>
  41. #include <linux/mm.h>
  42. #include <linux/suspend.h>
  43. #include <linux/smp_lock.h>
  44. #include <linux/file.h>
  45. #include <linux/utsname.h>
  46. #include <linux/version.h>
  47. #include <linux/delay.h>
  48. #include <linux/bitops.h>
  49. #include <linux/spinlock.h>
  50. #include <linux/genhd.h>
  51. #include <linux/kernel.h>
  52. #include <linux/major.h>
  53. #include <linux/swap.h>
  54. #include <linux/pm.h>
  55. #include <linux/device.h>
  56. #include <linux/buffer_head.h>
  57. #include <linux/swapops.h>
  58. #include <linux/bootmem.h>
  59. #include <linux/syscalls.h>
  60. #include <linux/highmem.h>
  61. #include <linux/bio.h>
  62. #include <asm/uaccess.h>
  63. #include <asm/mmu_context.h>
  64. #include <asm/pgtable.h>
  65. #include <asm/tlbflush.h>
  66. #include <asm/io.h>
  67. #include "power.h"
  68. /*
  69. * Preferred image size in bytes (tunable via /sys/power/image_size).
  70. * When it is set to N, swsusp will do its best to ensure the image
  71. * size will not exceed N bytes, but if that is impossible, it will
  72. * try to create the smallest image possible.
  73. */
  74. unsigned long image_size = 500 * 1024 * 1024;
  75. #ifdef CONFIG_HIGHMEM
  76. unsigned int count_highmem_pages(void);
  77. int save_highmem(void);
  78. int restore_highmem(void);
  79. #else
  80. static int save_highmem(void) { return 0; }
  81. static int restore_highmem(void) { return 0; }
  82. static unsigned int count_highmem_pages(void) { return 0; }
  83. #endif
  84. extern char resume_file[];
  85. #define SWSUSP_SIG "S1SUSPEND"
  86. static struct swsusp_header {
  87. char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
  88. swp_entry_t image;
  89. char orig_sig[10];
  90. char sig[10];
  91. } __attribute__((packed, aligned(PAGE_SIZE))) swsusp_header;
  92. static struct swsusp_info swsusp_info;
  93. /*
  94. * Saving part...
  95. */
  96. static unsigned short root_swap = 0xffff;
  97. static int mark_swapfiles(swp_entry_t start)
  98. {
  99. int error;
  100. rw_swap_page_sync(READ,
  101. swp_entry(root_swap, 0),
  102. virt_to_page((unsigned long)&swsusp_header));
  103. if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
  104. !memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
  105. memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
  106. memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
  107. swsusp_header.image = start;
  108. error = rw_swap_page_sync(WRITE,
  109. swp_entry(root_swap, 0),
  110. virt_to_page((unsigned long)
  111. &swsusp_header));
  112. } else {
  113. pr_debug("swsusp: Partition is not swap space.\n");
  114. error = -ENODEV;
  115. }
  116. return error;
  117. }
  118. /*
  119. * Check whether the swap device is the specified resume
  120. * device, irrespective of whether they are specified by
  121. * identical names.
  122. *
  123. * (Thus, device inode aliasing is allowed. You can say /dev/hda4
  124. * instead of /dev/ide/host0/bus0/target0/lun0/part4 [if using devfs]
  125. * and they'll be considered the same device. This is *necessary* for
  126. * devfs, since the resume code can only recognize the form /dev/hda4,
  127. * but the suspend code would see the long name.)
  128. */
  129. static inline int is_resume_device(const struct swap_info_struct *swap_info)
  130. {
  131. struct file *file = swap_info->swap_file;
  132. struct inode *inode = file->f_dentry->d_inode;
  133. return S_ISBLK(inode->i_mode) &&
  134. swsusp_resume_device == MKDEV(imajor(inode), iminor(inode));
  135. }
  136. static int swsusp_swap_check(void) /* This is called before saving image */
  137. {
  138. int i;
  139. if (!swsusp_resume_device)
  140. return -ENODEV;
  141. spin_lock(&swap_lock);
  142. for (i = 0; i < MAX_SWAPFILES; i++) {
  143. if (!(swap_info[i].flags & SWP_WRITEOK))
  144. continue;
  145. if (is_resume_device(swap_info + i)) {
  146. spin_unlock(&swap_lock);
  147. root_swap = i;
  148. return 0;
  149. }
  150. }
  151. spin_unlock(&swap_lock);
  152. return -ENODEV;
  153. }
  154. /**
  155. * write_page - Write one page to a fresh swap location.
  156. * @addr: Address we're writing.
  157. * @loc: Place to store the entry we used.
  158. *
  159. * Allocate a new swap entry and 'sync' it. Note we discard -EIO
  160. * errors. That is an artifact left over from swsusp. It did not
  161. * check the return of rw_swap_page_sync() at all, since most pages
  162. * written back to swap would return -EIO.
  163. * This is a partial improvement, since we will at least return other
  164. * errors, though we need to eventually fix the damn code.
  165. */
  166. static int write_page(unsigned long addr, swp_entry_t *loc)
  167. {
  168. swp_entry_t entry;
  169. int error = -ENOSPC;
  170. entry = get_swap_page_of_type(root_swap);
  171. if (swp_offset(entry)) {
  172. error = rw_swap_page_sync(WRITE, entry, virt_to_page(addr));
  173. if (!error || error == -EIO)
  174. *loc = entry;
  175. }
  176. return error;
  177. }
  178. /**
  179. * Swap map-handling functions
  180. *
  181. * The swap map is a data structure used for keeping track of each page
  182. * written to the swap. It consists of many swap_map_page structures
  183. * that contain each an array of MAP_PAGE_SIZE swap entries.
  184. * These structures are linked together with the help of either the
  185. * .next (in memory) or the .next_swap (in swap) member.
  186. *
  187. * The swap map is created during suspend. At that time we need to keep
  188. * it in memory, because we have to free all of the allocated swap
  189. * entries if an error occurs. The memory needed is preallocated
  190. * so that we know in advance if there's enough of it.
  191. *
  192. * The first swap_map_page structure is filled with the swap entries that
  193. * correspond to the first MAP_PAGE_SIZE data pages written to swap and
  194. * so on. After the all of the data pages have been written, the order
  195. * of the swap_map_page structures in the map is reversed so that they
  196. * can be read from swap in the original order. This causes the data
  197. * pages to be loaded in exactly the same order in which they have been
  198. * saved.
  199. *
  200. * During resume we only need to use one swap_map_page structure
  201. * at a time, which means that we only need to use two memory pages for
  202. * reading the image - one for reading the swap_map_page structures
  203. * and the second for reading the data pages from swap.
  204. */
  205. #define MAP_PAGE_SIZE ((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
  206. / sizeof(swp_entry_t))
  207. struct swap_map_page {
  208. swp_entry_t entries[MAP_PAGE_SIZE];
  209. swp_entry_t next_swap;
  210. struct swap_map_page *next;
  211. };
  212. static inline void free_swap_map(struct swap_map_page *swap_map)
  213. {
  214. struct swap_map_page *swp;
  215. while (swap_map) {
  216. swp = swap_map->next;
  217. free_page((unsigned long)swap_map);
  218. swap_map = swp;
  219. }
  220. }
  221. static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
  222. {
  223. struct swap_map_page *swap_map, *swp;
  224. unsigned n = 0;
  225. if (!nr_pages)
  226. return NULL;
  227. pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
  228. swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
  229. swp = swap_map;
  230. for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
  231. swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
  232. swp = swp->next;
  233. if (!swp) {
  234. free_swap_map(swap_map);
  235. return NULL;
  236. }
  237. }
  238. return swap_map;
  239. }
  240. /**
  241. * reverse_swap_map - reverse the order of pages in the swap map
  242. * @swap_map
  243. */
  244. static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
  245. {
  246. struct swap_map_page *prev, *next;
  247. prev = NULL;
  248. while (swap_map) {
  249. next = swap_map->next;
  250. swap_map->next = prev;
  251. prev = swap_map;
  252. swap_map = next;
  253. }
  254. return prev;
  255. }
  256. /**
  257. * free_swap_map_entries - free the swap entries allocated to store
  258. * the swap map @swap_map (this is only called in case of an error)
  259. */
  260. static inline void free_swap_map_entries(struct swap_map_page *swap_map)
  261. {
  262. while (swap_map) {
  263. if (swap_map->next_swap.val)
  264. swap_free(swap_map->next_swap);
  265. swap_map = swap_map->next;
  266. }
  267. }
  268. /**
  269. * save_swap_map - save the swap map used for tracing the data pages
  270. * stored in the swap
  271. */
  272. static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
  273. {
  274. swp_entry_t entry = (swp_entry_t){0};
  275. int error;
  276. while (swap_map) {
  277. swap_map->next_swap = entry;
  278. if ((error = write_page((unsigned long)swap_map, &entry)))
  279. return error;
  280. swap_map = swap_map->next;
  281. }
  282. *start = entry;
  283. return 0;
  284. }
  285. /**
  286. * free_image_entries - free the swap entries allocated to store
  287. * the image data pages (this is only called in case of an error)
  288. */
  289. static inline void free_image_entries(struct swap_map_page *swp)
  290. {
  291. unsigned k;
  292. while (swp) {
  293. for (k = 0; k < MAP_PAGE_SIZE; k++)
  294. if (swp->entries[k].val)
  295. swap_free(swp->entries[k]);
  296. swp = swp->next;
  297. }
  298. }
  299. /**
  300. * The swap_map_handle structure is used for handling the swap map in
  301. * a file-alike way
  302. */
  303. struct swap_map_handle {
  304. struct swap_map_page *cur;
  305. unsigned int k;
  306. };
  307. static inline void init_swap_map_handle(struct swap_map_handle *handle,
  308. struct swap_map_page *map)
  309. {
  310. handle->cur = map;
  311. handle->k = 0;
  312. }
  313. static inline int swap_map_write_page(struct swap_map_handle *handle,
  314. unsigned long addr)
  315. {
  316. int error;
  317. error = write_page(addr, handle->cur->entries + handle->k);
  318. if (error)
  319. return error;
  320. if (++handle->k >= MAP_PAGE_SIZE) {
  321. handle->cur = handle->cur->next;
  322. handle->k = 0;
  323. }
  324. return 0;
  325. }
  326. /**
  327. * save_image_data - save the data pages pointed to by the PBEs
  328. * from the list @pblist using the swap map handle @handle
  329. * (assume there are @nr_pages data pages to save)
  330. */
  331. static int save_image_data(struct pbe *pblist,
  332. struct swap_map_handle *handle,
  333. unsigned int nr_pages)
  334. {
  335. unsigned int m;
  336. struct pbe *p;
  337. int error = 0;
  338. printk("Saving image data pages (%u pages) ... ", nr_pages);
  339. m = nr_pages / 100;
  340. if (!m)
  341. m = 1;
  342. nr_pages = 0;
  343. for_each_pbe (p, pblist) {
  344. error = swap_map_write_page(handle, p->address);
  345. if (error)
  346. break;
  347. if (!(nr_pages % m))
  348. printk("\b\b\b\b%3d%%", nr_pages / m);
  349. nr_pages++;
  350. }
  351. if (!error)
  352. printk("\b\b\b\bdone\n");
  353. return error;
  354. }
  355. static void dump_info(void)
  356. {
  357. pr_debug(" swsusp: Version: %u\n",swsusp_info.version_code);
  358. pr_debug(" swsusp: Num Pages: %ld\n",swsusp_info.num_physpages);
  359. pr_debug(" swsusp: UTS Sys: %s\n",swsusp_info.uts.sysname);
  360. pr_debug(" swsusp: UTS Node: %s\n",swsusp_info.uts.nodename);
  361. pr_debug(" swsusp: UTS Release: %s\n",swsusp_info.uts.release);
  362. pr_debug(" swsusp: UTS Version: %s\n",swsusp_info.uts.version);
  363. pr_debug(" swsusp: UTS Machine: %s\n",swsusp_info.uts.machine);
  364. pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
  365. pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
  366. pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
  367. pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
  368. }
  369. static void init_header(unsigned int nr_pages)
  370. {
  371. memset(&swsusp_info, 0, sizeof(swsusp_info));
  372. swsusp_info.version_code = LINUX_VERSION_CODE;
  373. swsusp_info.num_physpages = num_physpages;
  374. memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
  375. swsusp_info.cpus = num_online_cpus();
  376. swsusp_info.image_pages = nr_pages;
  377. swsusp_info.pages = nr_pages +
  378. ((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
  379. }
  380. /**
  381. * pack_orig_addresses - the .orig_address fields of the PBEs from the
  382. * list starting at @pbe are stored in the array @buf[] (1 page)
  383. */
  384. static inline struct pbe *pack_orig_addresses(unsigned long *buf,
  385. struct pbe *pbe)
  386. {
  387. int j;
  388. for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
  389. buf[j] = pbe->orig_address;
  390. pbe = pbe->next;
  391. }
  392. if (!pbe)
  393. for (; j < PAGE_SIZE / sizeof(long); j++)
  394. buf[j] = 0;
  395. return pbe;
  396. }
  397. /**
  398. * save_image_metadata - save the .orig_address fields of the PBEs
  399. * from the list @pblist using the swap map handle @handle
  400. */
  401. static int save_image_metadata(struct pbe *pblist,
  402. struct swap_map_handle *handle)
  403. {
  404. unsigned long *buf;
  405. unsigned int n = 0;
  406. struct pbe *p;
  407. int error = 0;
  408. printk("Saving image metadata ... ");
  409. buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
  410. if (!buf)
  411. return -ENOMEM;
  412. p = pblist;
  413. while (p) {
  414. p = pack_orig_addresses(buf, p);
  415. error = swap_map_write_page(handle, (unsigned long)buf);
  416. if (error)
  417. break;
  418. n++;
  419. }
  420. free_page((unsigned long)buf);
  421. if (!error)
  422. printk("done (%u pages saved)\n", n);
  423. return error;
  424. }
  425. /**
  426. * enough_swap - Make sure we have enough swap to save the image.
  427. *
  428. * Returns TRUE or FALSE after checking the total amount of swap
  429. * space avaiable from the resume partition.
  430. */
  431. static int enough_swap(unsigned int nr_pages)
  432. {
  433. unsigned int free_swap = swap_info[root_swap].pages -
  434. swap_info[root_swap].inuse_pages;
  435. pr_debug("swsusp: free swap pages: %u\n", free_swap);
  436. return free_swap > (nr_pages + PAGES_FOR_IO +
  437. (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
  438. }
  439. /**
  440. * swsusp_write - Write entire image and metadata.
  441. *
  442. * It is important _NOT_ to umount filesystems at this point. We want
  443. * them synced (in case something goes wrong) but we DO not want to mark
  444. * filesystem clean: it is not. (And it does not matter, if we resume
  445. * correctly, we'll mark system clean, anyway.)
  446. */
  447. int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
  448. {
  449. struct swap_map_page *swap_map;
  450. struct swap_map_handle handle;
  451. swp_entry_t start;
  452. int error;
  453. if ((error = swsusp_swap_check())) {
  454. printk(KERN_ERR "swsusp: Cannot find swap device, try swapon -a.\n");
  455. return error;
  456. }
  457. if (!enough_swap(nr_pages)) {
  458. printk(KERN_ERR "swsusp: Not enough free swap\n");
  459. return -ENOSPC;
  460. }
  461. init_header(nr_pages);
  462. swap_map = alloc_swap_map(swsusp_info.pages);
  463. if (!swap_map)
  464. return -ENOMEM;
  465. init_swap_map_handle(&handle, swap_map);
  466. error = swap_map_write_page(&handle, (unsigned long)&swsusp_info);
  467. if (!error)
  468. error = save_image_metadata(pblist, &handle);
  469. if (!error)
  470. error = save_image_data(pblist, &handle, nr_pages);
  471. if (error)
  472. goto Free_image_entries;
  473. swap_map = reverse_swap_map(swap_map);
  474. error = save_swap_map(swap_map, &start);
  475. if (error)
  476. goto Free_map_entries;
  477. dump_info();
  478. printk( "S" );
  479. error = mark_swapfiles(start);
  480. printk( "|\n" );
  481. if (error)
  482. goto Free_map_entries;
  483. Free_swap_map:
  484. free_swap_map(swap_map);
  485. return error;
  486. Free_map_entries:
  487. free_swap_map_entries(swap_map);
  488. Free_image_entries:
  489. free_image_entries(swap_map);
  490. goto Free_swap_map;
  491. }
  492. /**
  493. * swsusp_shrink_memory - Try to free as much memory as needed
  494. *
  495. * ... but do not OOM-kill anyone
  496. *
  497. * Notice: all userland should be stopped before it is called, or
  498. * livelock is possible.
  499. */
  500. #define SHRINK_BITE 10000
  501. int swsusp_shrink_memory(void)
  502. {
  503. long size, tmp;
  504. struct zone *zone;
  505. unsigned long pages = 0;
  506. unsigned int i = 0;
  507. char *p = "-\\|/";
  508. printk("Shrinking memory... ");
  509. do {
  510. size = 2 * count_highmem_pages();
  511. size += size / 50 + count_data_pages();
  512. size += (size + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
  513. PAGES_FOR_IO;
  514. tmp = size;
  515. for_each_zone (zone)
  516. if (!is_highmem(zone))
  517. tmp -= zone->free_pages;
  518. if (tmp > 0) {
  519. tmp = shrink_all_memory(SHRINK_BITE);
  520. if (!tmp)
  521. return -ENOMEM;
  522. pages += tmp;
  523. } else if (size > image_size / PAGE_SIZE) {
  524. tmp = shrink_all_memory(SHRINK_BITE);
  525. pages += tmp;
  526. }
  527. printk("\b%c", p[i++%4]);
  528. } while (tmp > 0);
  529. printk("\bdone (%lu pages freed)\n", pages);
  530. return 0;
  531. }
  532. int swsusp_suspend(void)
  533. {
  534. int error;
  535. if ((error = arch_prepare_suspend()))
  536. return error;
  537. local_irq_disable();
  538. /* At this point, device_suspend() has been called, but *not*
  539. * device_power_down(). We *must* device_power_down() now.
  540. * Otherwise, drivers for some devices (e.g. interrupt controllers)
  541. * become desynchronized with the actual state of the hardware
  542. * at resume time, and evil weirdness ensues.
  543. */
  544. if ((error = device_power_down(PMSG_FREEZE))) {
  545. printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
  546. goto Enable_irqs;
  547. }
  548. if ((error = save_highmem())) {
  549. printk(KERN_ERR "swsusp: Not enough free pages for highmem\n");
  550. goto Restore_highmem;
  551. }
  552. save_processor_state();
  553. if ((error = swsusp_arch_suspend()))
  554. printk(KERN_ERR "Error %d suspending\n", error);
  555. /* Restore control flow magically appears here */
  556. restore_processor_state();
  557. Restore_highmem:
  558. restore_highmem();
  559. device_power_up();
  560. Enable_irqs:
  561. local_irq_enable();
  562. return error;
  563. }
  564. int swsusp_resume(void)
  565. {
  566. int error;
  567. local_irq_disable();
  568. if (device_power_down(PMSG_FREEZE))
  569. printk(KERN_ERR "Some devices failed to power down, very bad\n");
  570. /* We'll ignore saved state, but this gets preempt count (etc) right */
  571. save_processor_state();
  572. error = swsusp_arch_resume();
  573. /* Code below is only ever reached in case of failure. Otherwise
  574. * execution continues at place where swsusp_arch_suspend was called
  575. */
  576. BUG_ON(!error);
  577. /* The only reason why swsusp_arch_resume() can fail is memory being
  578. * very tight, so we have to free it as soon as we can to avoid
  579. * subsequent failures
  580. */
  581. swsusp_free();
  582. restore_processor_state();
  583. restore_highmem();
  584. touch_softlockup_watchdog();
  585. device_power_up();
  586. local_irq_enable();
  587. return error;
  588. }
  589. /**
  590. * mark_unsafe_pages - mark the pages that cannot be used for storing
  591. * the image during resume, because they conflict with the pages that
  592. * had been used before suspend
  593. */
  594. static void mark_unsafe_pages(struct pbe *pblist)
  595. {
  596. struct zone *zone;
  597. unsigned long zone_pfn;
  598. struct pbe *p;
  599. if (!pblist) /* a sanity check */
  600. return;
  601. /* Clear page flags */
  602. for_each_zone (zone) {
  603. for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
  604. if (pfn_valid(zone_pfn + zone->zone_start_pfn))
  605. ClearPageNosaveFree(pfn_to_page(zone_pfn +
  606. zone->zone_start_pfn));
  607. }
  608. /* Mark orig addresses */
  609. for_each_pbe (p, pblist)
  610. SetPageNosaveFree(virt_to_page(p->orig_address));
  611. }
  612. static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
  613. {
  614. /* We assume both lists contain the same number of elements */
  615. while (src) {
  616. dst->orig_address = src->orig_address;
  617. dst = dst->next;
  618. src = src->next;
  619. }
  620. }
  621. /*
  622. * Using bio to read from swap.
  623. * This code requires a bit more work than just using buffer heads
  624. * but, it is the recommended way for 2.5/2.6.
  625. * The following are to signal the beginning and end of I/O. Bios
  626. * finish asynchronously, while we want them to happen synchronously.
  627. * A simple atomic_t, and a wait loop take care of this problem.
  628. */
  629. static atomic_t io_done = ATOMIC_INIT(0);
  630. static int end_io(struct bio *bio, unsigned int num, int err)
  631. {
  632. if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
  633. panic("I/O error reading memory image");
  634. atomic_set(&io_done, 0);
  635. return 0;
  636. }
  637. static struct block_device *resume_bdev;
  638. /**
  639. * submit - submit BIO request.
  640. * @rw: READ or WRITE.
  641. * @off physical offset of page.
  642. * @page: page we're reading or writing.
  643. *
  644. * Straight from the textbook - allocate and initialize the bio.
  645. * If we're writing, make sure the page is marked as dirty.
  646. * Then submit it and wait.
  647. */
  648. static int submit(int rw, pgoff_t page_off, void *page)
  649. {
  650. int error = 0;
  651. struct bio *bio;
  652. bio = bio_alloc(GFP_ATOMIC, 1);
  653. if (!bio)
  654. return -ENOMEM;
  655. bio->bi_sector = page_off * (PAGE_SIZE >> 9);
  656. bio->bi_bdev = resume_bdev;
  657. bio->bi_end_io = end_io;
  658. if (bio_add_page(bio, virt_to_page(page), PAGE_SIZE, 0) < PAGE_SIZE) {
  659. printk("swsusp: ERROR: adding page to bio at %ld\n",page_off);
  660. error = -EFAULT;
  661. goto Done;
  662. }
  663. atomic_set(&io_done, 1);
  664. submit_bio(rw | (1 << BIO_RW_SYNC), bio);
  665. while (atomic_read(&io_done))
  666. yield();
  667. if (rw == READ)
  668. bio_set_pages_dirty(bio);
  669. Done:
  670. bio_put(bio);
  671. return error;
  672. }
  673. static int bio_read_page(pgoff_t page_off, void *page)
  674. {
  675. return submit(READ, page_off, page);
  676. }
  677. static int bio_write_page(pgoff_t page_off, void *page)
  678. {
  679. return submit(WRITE, page_off, page);
  680. }
  681. /**
  682. * The following functions allow us to read data using a swap map
  683. * in a file-alike way
  684. */
  685. static inline void release_swap_map_reader(struct swap_map_handle *handle)
  686. {
  687. if (handle->cur)
  688. free_page((unsigned long)handle->cur);
  689. handle->cur = NULL;
  690. }
  691. static inline int get_swap_map_reader(struct swap_map_handle *handle,
  692. swp_entry_t start)
  693. {
  694. int error;
  695. if (!swp_offset(start))
  696. return -EINVAL;
  697. handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
  698. if (!handle->cur)
  699. return -ENOMEM;
  700. error = bio_read_page(swp_offset(start), handle->cur);
  701. if (error) {
  702. release_swap_map_reader(handle);
  703. return error;
  704. }
  705. handle->k = 0;
  706. return 0;
  707. }
  708. static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
  709. {
  710. unsigned long offset;
  711. int error;
  712. if (!handle->cur)
  713. return -EINVAL;
  714. offset = swp_offset(handle->cur->entries[handle->k]);
  715. if (!offset)
  716. return -EINVAL;
  717. error = bio_read_page(offset, buf);
  718. if (error)
  719. return error;
  720. if (++handle->k >= MAP_PAGE_SIZE) {
  721. handle->k = 0;
  722. offset = swp_offset(handle->cur->next_swap);
  723. if (!offset)
  724. release_swap_map_reader(handle);
  725. else
  726. error = bio_read_page(offset, handle->cur);
  727. }
  728. return error;
  729. }
  730. static int check_header(void)
  731. {
  732. char *reason = NULL;
  733. dump_info();
  734. if (swsusp_info.version_code != LINUX_VERSION_CODE)
  735. reason = "kernel version";
  736. if (swsusp_info.num_physpages != num_physpages)
  737. reason = "memory size";
  738. if (strcmp(swsusp_info.uts.sysname,system_utsname.sysname))
  739. reason = "system type";
  740. if (strcmp(swsusp_info.uts.release,system_utsname.release))
  741. reason = "kernel release";
  742. if (strcmp(swsusp_info.uts.version,system_utsname.version))
  743. reason = "version";
  744. if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
  745. reason = "machine";
  746. if (reason) {
  747. printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
  748. return -EPERM;
  749. }
  750. return 0;
  751. }
  752. /**
  753. * load_image_data - load the image data using the swap map handle
  754. * @handle and store them using the page backup list @pblist
  755. * (assume there are @nr_pages pages to load)
  756. */
  757. static int load_image_data(struct pbe *pblist,
  758. struct swap_map_handle *handle,
  759. unsigned int nr_pages)
  760. {
  761. int error;
  762. unsigned int m;
  763. struct pbe *p;
  764. if (!pblist)
  765. return -EINVAL;
  766. printk("Loading image data pages (%u pages) ... ", nr_pages);
  767. m = nr_pages / 100;
  768. if (!m)
  769. m = 1;
  770. nr_pages = 0;
  771. p = pblist;
  772. while (p) {
  773. error = swap_map_read_page(handle, (void *)p->address);
  774. if (error)
  775. break;
  776. p = p->next;
  777. if (!(nr_pages % m))
  778. printk("\b\b\b\b%3d%%", nr_pages / m);
  779. nr_pages++;
  780. }
  781. if (!error)
  782. printk("\b\b\b\bdone\n");
  783. return error;
  784. }
  785. /**
  786. * unpack_orig_addresses - copy the elements of @buf[] (1 page) to
  787. * the PBEs in the list starting at @pbe
  788. */
  789. static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
  790. struct pbe *pbe)
  791. {
  792. int j;
  793. for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
  794. pbe->orig_address = buf[j];
  795. pbe = pbe->next;
  796. }
  797. return pbe;
  798. }
  799. /**
  800. * load_image_metadata - load the image metadata using the swap map
  801. * handle @handle and put them into the PBEs in the list @pblist
  802. */
  803. static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
  804. {
  805. struct pbe *p;
  806. unsigned long *buf;
  807. unsigned int n = 0;
  808. int error = 0;
  809. printk("Loading image metadata ... ");
  810. buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
  811. if (!buf)
  812. return -ENOMEM;
  813. p = pblist;
  814. while (p) {
  815. error = swap_map_read_page(handle, buf);
  816. if (error)
  817. break;
  818. p = unpack_orig_addresses(buf, p);
  819. n++;
  820. }
  821. free_page((unsigned long)buf);
  822. if (!error)
  823. printk("done (%u pages loaded)\n", n);
  824. return error;
  825. }
  826. int swsusp_read(struct pbe **pblist_ptr)
  827. {
  828. int error;
  829. struct pbe *p, *pblist;
  830. struct swap_map_handle handle;
  831. unsigned int nr_pages;
  832. if (IS_ERR(resume_bdev)) {
  833. pr_debug("swsusp: block device not initialised\n");
  834. return PTR_ERR(resume_bdev);
  835. }
  836. error = get_swap_map_reader(&handle, swsusp_header.image);
  837. if (!error)
  838. error = swap_map_read_page(&handle, &swsusp_info);
  839. if (!error)
  840. error = check_header();
  841. if (error)
  842. return error;
  843. nr_pages = swsusp_info.image_pages;
  844. p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
  845. if (!p)
  846. return -ENOMEM;
  847. error = load_image_metadata(p, &handle);
  848. if (!error) {
  849. mark_unsafe_pages(p);
  850. pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
  851. if (pblist)
  852. copy_page_backup_list(pblist, p);
  853. free_pagedir(p);
  854. if (!pblist)
  855. error = -ENOMEM;
  856. /* Allocate memory for the image and read the data from swap */
  857. if (!error)
  858. error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
  859. if (!error) {
  860. release_eaten_pages();
  861. error = load_image_data(pblist, &handle, nr_pages);
  862. }
  863. if (!error)
  864. *pblist_ptr = pblist;
  865. }
  866. release_swap_map_reader(&handle);
  867. blkdev_put(resume_bdev);
  868. if (!error)
  869. pr_debug("swsusp: Reading resume file was successful\n");
  870. else
  871. pr_debug("swsusp: Error %d resuming\n", error);
  872. return error;
  873. }
  874. /**
  875. * swsusp_check - Check for swsusp signature in the resume device
  876. */
  877. int swsusp_check(void)
  878. {
  879. int error;
  880. resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_READ);
  881. if (!IS_ERR(resume_bdev)) {
  882. set_blocksize(resume_bdev, PAGE_SIZE);
  883. memset(&swsusp_header, 0, sizeof(swsusp_header));
  884. if ((error = bio_read_page(0, &swsusp_header)))
  885. return error;
  886. if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
  887. memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
  888. /* Reset swap signature now */
  889. error = bio_write_page(0, &swsusp_header);
  890. } else {
  891. return -EINVAL;
  892. }
  893. if (error)
  894. blkdev_put(resume_bdev);
  895. else
  896. pr_debug("swsusp: Signature found, resuming\n");
  897. } else {
  898. error = PTR_ERR(resume_bdev);
  899. }
  900. if (error)
  901. pr_debug("swsusp: Error %d check for resume file\n", error);
  902. return error;
  903. }
  904. /**
  905. * swsusp_close - close swap device.
  906. */
  907. void swsusp_close(void)
  908. {
  909. if (IS_ERR(resume_bdev)) {
  910. pr_debug("swsusp: block device not initialised\n");
  911. return;
  912. }
  913. blkdev_put(resume_bdev);
  914. }