snapshot.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. /*
  2. * linux/kernel/power/snapshot.c
  3. *
  4. * This file provide system snapshot/restore functionality.
  5. *
  6. * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
  7. *
  8. * This file is released under the GPLv2, and is based on swsusp.c.
  9. *
  10. */
  11. #include <linux/module.h>
  12. #include <linux/mm.h>
  13. #include <linux/suspend.h>
  14. #include <linux/smp_lock.h>
  15. #include <linux/delay.h>
  16. #include <linux/bitops.h>
  17. #include <linux/spinlock.h>
  18. #include <linux/kernel.h>
  19. #include <linux/pm.h>
  20. #include <linux/device.h>
  21. #include <linux/bootmem.h>
  22. #include <linux/syscalls.h>
  23. #include <linux/console.h>
  24. #include <linux/highmem.h>
  25. #include <asm/uaccess.h>
  26. #include <asm/mmu_context.h>
  27. #include <asm/pgtable.h>
  28. #include <asm/tlbflush.h>
  29. #include <asm/io.h>
  30. #include "power.h"
  31. struct pbe *pagedir_nosave;
  32. unsigned int nr_copy_pages;
  33. #ifdef CONFIG_HIGHMEM
  34. unsigned int count_highmem_pages(void)
  35. {
  36. struct zone *zone;
  37. unsigned long zone_pfn;
  38. unsigned int n = 0;
  39. for_each_zone (zone)
  40. if (is_highmem(zone)) {
  41. mark_free_pages(zone);
  42. for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
  43. struct page *page;
  44. unsigned long pfn = zone_pfn + zone->zone_start_pfn;
  45. if (!pfn_valid(pfn))
  46. continue;
  47. page = pfn_to_page(pfn);
  48. if (PageReserved(page))
  49. continue;
  50. if (PageNosaveFree(page))
  51. continue;
  52. n++;
  53. }
  54. }
  55. return n;
  56. }
  57. struct highmem_page {
  58. char *data;
  59. struct page *page;
  60. struct highmem_page *next;
  61. };
  62. static struct highmem_page *highmem_copy;
  63. static int save_highmem_zone(struct zone *zone)
  64. {
  65. unsigned long zone_pfn;
  66. mark_free_pages(zone);
  67. for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
  68. struct page *page;
  69. struct highmem_page *save;
  70. void *kaddr;
  71. unsigned long pfn = zone_pfn + zone->zone_start_pfn;
  72. if (!(pfn%1000))
  73. printk(".");
  74. if (!pfn_valid(pfn))
  75. continue;
  76. page = pfn_to_page(pfn);
  77. /*
  78. * This condition results from rvmalloc() sans vmalloc_32()
  79. * and architectural memory reservations. This should be
  80. * corrected eventually when the cases giving rise to this
  81. * are better understood.
  82. */
  83. if (PageReserved(page)) {
  84. printk("highmem reserved page?!\n");
  85. continue;
  86. }
  87. BUG_ON(PageNosave(page));
  88. if (PageNosaveFree(page))
  89. continue;
  90. save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
  91. if (!save)
  92. return -ENOMEM;
  93. save->next = highmem_copy;
  94. save->page = page;
  95. save->data = (void *) get_zeroed_page(GFP_ATOMIC);
  96. if (!save->data) {
  97. kfree(save);
  98. return -ENOMEM;
  99. }
  100. kaddr = kmap_atomic(page, KM_USER0);
  101. memcpy(save->data, kaddr, PAGE_SIZE);
  102. kunmap_atomic(kaddr, KM_USER0);
  103. highmem_copy = save;
  104. }
  105. return 0;
  106. }
  107. int save_highmem(void)
  108. {
  109. struct zone *zone;
  110. int res = 0;
  111. pr_debug("swsusp: Saving Highmem\n");
  112. for_each_zone (zone) {
  113. if (is_highmem(zone))
  114. res = save_highmem_zone(zone);
  115. if (res)
  116. return res;
  117. }
  118. return 0;
  119. }
  120. int restore_highmem(void)
  121. {
  122. printk("swsusp: Restoring Highmem\n");
  123. while (highmem_copy) {
  124. struct highmem_page *save = highmem_copy;
  125. void *kaddr;
  126. highmem_copy = save->next;
  127. kaddr = kmap_atomic(save->page, KM_USER0);
  128. memcpy(kaddr, save->data, PAGE_SIZE);
  129. kunmap_atomic(kaddr, KM_USER0);
  130. free_page((long) save->data);
  131. kfree(save);
  132. }
  133. return 0;
  134. }
  135. #endif
  136. static int pfn_is_nosave(unsigned long pfn)
  137. {
  138. unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
  139. unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
  140. return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
  141. }
  142. /**
  143. * saveable - Determine whether a page should be cloned or not.
  144. * @pfn: The page
  145. *
  146. * We save a page if it's Reserved, and not in the range of pages
  147. * statically defined as 'unsaveable', or if it isn't reserved, and
  148. * isn't part of a free chunk of pages.
  149. */
  150. static int saveable(struct zone *zone, unsigned long *zone_pfn)
  151. {
  152. unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
  153. struct page *page;
  154. if (!pfn_valid(pfn))
  155. return 0;
  156. page = pfn_to_page(pfn);
  157. BUG_ON(PageReserved(page) && PageNosave(page));
  158. if (PageNosave(page))
  159. return 0;
  160. if (PageReserved(page) && pfn_is_nosave(pfn))
  161. return 0;
  162. if (PageNosaveFree(page))
  163. return 0;
  164. return 1;
  165. }
  166. unsigned int count_data_pages(void)
  167. {
  168. struct zone *zone;
  169. unsigned long zone_pfn;
  170. unsigned int n = 0;
  171. for_each_zone (zone) {
  172. if (is_highmem(zone))
  173. continue;
  174. mark_free_pages(zone);
  175. for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
  176. n += saveable(zone, &zone_pfn);
  177. }
  178. return n;
  179. }
  180. static void copy_data_pages(struct pbe *pblist)
  181. {
  182. struct zone *zone;
  183. unsigned long zone_pfn;
  184. struct pbe *pbe, *p;
  185. pbe = pblist;
  186. for_each_zone (zone) {
  187. if (is_highmem(zone))
  188. continue;
  189. mark_free_pages(zone);
  190. /* This is necessary for swsusp_free() */
  191. for_each_pb_page (p, pblist)
  192. SetPageNosaveFree(virt_to_page(p));
  193. for_each_pbe (p, pblist)
  194. SetPageNosaveFree(virt_to_page(p->address));
  195. for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
  196. if (saveable(zone, &zone_pfn)) {
  197. struct page *page;
  198. page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
  199. BUG_ON(!pbe);
  200. pbe->orig_address = (unsigned long)page_address(page);
  201. /* copy_page is not usable for copying task structs. */
  202. memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
  203. pbe = pbe->next;
  204. }
  205. }
  206. }
  207. BUG_ON(pbe);
  208. }
  209. /**
  210. * free_pagedir - free pages allocated with alloc_pagedir()
  211. */
  212. void free_pagedir(struct pbe *pblist)
  213. {
  214. struct pbe *pbe;
  215. while (pblist) {
  216. pbe = (pblist + PB_PAGE_SKIP)->next;
  217. ClearPageNosave(virt_to_page(pblist));
  218. ClearPageNosaveFree(virt_to_page(pblist));
  219. free_page((unsigned long)pblist);
  220. pblist = pbe;
  221. }
  222. }
  223. /**
  224. * fill_pb_page - Create a list of PBEs on a given memory page
  225. */
  226. static inline void fill_pb_page(struct pbe *pbpage)
  227. {
  228. struct pbe *p;
  229. p = pbpage;
  230. pbpage += PB_PAGE_SKIP;
  231. do
  232. p->next = p + 1;
  233. while (++p < pbpage);
  234. }
  235. /**
  236. * create_pbe_list - Create a list of PBEs on top of a given chain
  237. * of memory pages allocated with alloc_pagedir()
  238. */
  239. static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
  240. {
  241. struct pbe *pbpage, *p;
  242. unsigned int num = PBES_PER_PAGE;
  243. for_each_pb_page (pbpage, pblist) {
  244. if (num >= nr_pages)
  245. break;
  246. fill_pb_page(pbpage);
  247. num += PBES_PER_PAGE;
  248. }
  249. if (pbpage) {
  250. for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
  251. p->next = p + 1;
  252. p->next = NULL;
  253. }
  254. }
  255. /**
  256. * On resume it is necessary to trace and eventually free the unsafe
  257. * pages that have been allocated, because they are needed for I/O
  258. * (on x86-64 we likely will "eat" these pages once again while
  259. * creating the temporary page translation tables)
  260. */
  261. struct eaten_page {
  262. struct eaten_page *next;
  263. char padding[PAGE_SIZE - sizeof(void *)];
  264. };
  265. static struct eaten_page *eaten_pages = NULL;
  266. void release_eaten_pages(void)
  267. {
  268. struct eaten_page *p, *q;
  269. p = eaten_pages;
  270. while (p) {
  271. q = p->next;
  272. /* We don't want swsusp_free() to free this page again */
  273. ClearPageNosave(virt_to_page(p));
  274. free_page((unsigned long)p);
  275. p = q;
  276. }
  277. eaten_pages = NULL;
  278. }
  279. /**
  280. * @safe_needed - on resume, for storing the PBE list and the image,
  281. * we can only use memory pages that do not conflict with the pages
  282. * which had been used before suspend.
  283. *
  284. * The unsafe pages are marked with the PG_nosave_free flag
  285. *
  286. * Allocated but unusable (ie eaten) memory pages should be marked
  287. * so that swsusp_free() can release them
  288. */
  289. static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
  290. {
  291. void *res;
  292. if (safe_needed)
  293. do {
  294. res = (void *)get_zeroed_page(gfp_mask);
  295. if (res && PageNosaveFree(virt_to_page(res))) {
  296. /* This is for swsusp_free() */
  297. SetPageNosave(virt_to_page(res));
  298. ((struct eaten_page *)res)->next = eaten_pages;
  299. eaten_pages = res;
  300. }
  301. } while (res && PageNosaveFree(virt_to_page(res)));
  302. else
  303. res = (void *)get_zeroed_page(gfp_mask);
  304. if (res) {
  305. SetPageNosave(virt_to_page(res));
  306. SetPageNosaveFree(virt_to_page(res));
  307. }
  308. return res;
  309. }
  310. unsigned long get_safe_page(gfp_t gfp_mask)
  311. {
  312. return (unsigned long)alloc_image_page(gfp_mask, 1);
  313. }
  314. /**
  315. * alloc_pagedir - Allocate the page directory.
  316. *
  317. * First, determine exactly how many pages we need and
  318. * allocate them.
  319. *
  320. * We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
  321. * struct pbe elements (pbes) and the last element in the page points
  322. * to the next page.
  323. *
  324. * On each page we set up a list of struct_pbe elements.
  325. */
  326. struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed)
  327. {
  328. unsigned int num;
  329. struct pbe *pblist, *pbe;
  330. if (!nr_pages)
  331. return NULL;
  332. pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
  333. pblist = alloc_image_page(gfp_mask, safe_needed);
  334. /* FIXME: rewrite this ugly loop */
  335. for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
  336. pbe = pbe->next, num += PBES_PER_PAGE) {
  337. pbe += PB_PAGE_SKIP;
  338. pbe->next = alloc_image_page(gfp_mask, safe_needed);
  339. }
  340. if (!pbe) { /* get_zeroed_page() failed */
  341. free_pagedir(pblist);
  342. pblist = NULL;
  343. } else
  344. create_pbe_list(pblist, nr_pages);
  345. return pblist;
  346. }
  347. /**
  348. * Free pages we allocated for suspend. Suspend pages are alocated
  349. * before atomic copy, so we need to free them after resume.
  350. */
  351. void swsusp_free(void)
  352. {
  353. struct zone *zone;
  354. unsigned long zone_pfn;
  355. for_each_zone(zone) {
  356. for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
  357. if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
  358. struct page *page;
  359. page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
  360. if (PageNosave(page) && PageNosaveFree(page)) {
  361. ClearPageNosave(page);
  362. ClearPageNosaveFree(page);
  363. free_page((long) page_address(page));
  364. }
  365. }
  366. }
  367. }
  368. /**
  369. * enough_free_mem - Make sure we enough free memory to snapshot.
  370. *
  371. * Returns TRUE or FALSE after checking the number of available
  372. * free pages.
  373. */
  374. static int enough_free_mem(unsigned int nr_pages)
  375. {
  376. struct zone *zone;
  377. unsigned int n = 0;
  378. for_each_zone (zone)
  379. if (!is_highmem(zone))
  380. n += zone->free_pages;
  381. pr_debug("swsusp: available memory: %u pages\n", n);
  382. return n > (nr_pages + PAGES_FOR_IO +
  383. (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
  384. }
  385. int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed)
  386. {
  387. struct pbe *p;
  388. for_each_pbe (p, pblist) {
  389. p->address = (unsigned long)alloc_image_page(gfp_mask, safe_needed);
  390. if (!p->address)
  391. return -ENOMEM;
  392. }
  393. return 0;
  394. }
  395. static struct pbe *swsusp_alloc(unsigned int nr_pages)
  396. {
  397. struct pbe *pblist;
  398. if (!(pblist = alloc_pagedir(nr_pages, GFP_ATOMIC | __GFP_COLD, 0))) {
  399. printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
  400. return NULL;
  401. }
  402. if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
  403. printk(KERN_ERR "suspend: Allocating image pages failed.\n");
  404. swsusp_free();
  405. return NULL;
  406. }
  407. return pblist;
  408. }
  409. asmlinkage int swsusp_save(void)
  410. {
  411. unsigned int nr_pages;
  412. pr_debug("swsusp: critical section: \n");
  413. drain_local_pages();
  414. nr_pages = count_data_pages();
  415. printk("swsusp: Need to copy %u pages\n", nr_pages);
  416. pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
  417. nr_pages,
  418. (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
  419. PAGES_FOR_IO, nr_free_pages());
  420. if (!enough_free_mem(nr_pages)) {
  421. printk(KERN_ERR "swsusp: Not enough free memory\n");
  422. return -ENOMEM;
  423. }
  424. pagedir_nosave = swsusp_alloc(nr_pages);
  425. if (!pagedir_nosave)
  426. return -ENOMEM;
  427. /* During allocating of suspend pagedir, new cold pages may appear.
  428. * Kill them.
  429. */
  430. drain_local_pages();
  431. copy_data_pages(pagedir_nosave);
  432. /*
  433. * End of critical section. From now on, we can write to memory,
  434. * but we should not touch disk. This specially means we must _not_
  435. * touch swap space! Except we must write out our image of course.
  436. */
  437. nr_copy_pages = nr_pages;
  438. printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
  439. return 0;
  440. }