phyp_dump.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /*
  2. * Hypervisor-assisted dump
  3. *
  4. * Linas Vepstas, Manish Ahuja 2008
  5. * Copyright 2008 IBM Corp.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. *
  12. */
  13. #include <linux/gfp.h>
  14. #include <linux/init.h>
  15. #include <linux/kobject.h>
  16. #include <linux/mm.h>
  17. #include <linux/of.h>
  18. #include <linux/pfn.h>
  19. #include <linux/swap.h>
  20. #include <linux/sysfs.h>
  21. #include <asm/page.h>
  22. #include <asm/phyp_dump.h>
  23. #include <asm/machdep.h>
  24. #include <asm/prom.h>
  25. #include <asm/rtas.h>
  26. /* Variables, used to communicate data between early boot and late boot */
  27. static struct phyp_dump phyp_dump_vars;
  28. struct phyp_dump *phyp_dump_info = &phyp_dump_vars;
  29. static int ibm_configure_kernel_dump;
  30. /* ------------------------------------------------- */
  31. /* RTAS interfaces to declare the dump regions */
  32. struct dump_section {
  33. u32 dump_flags;
  34. u16 source_type;
  35. u16 error_flags;
  36. u64 source_address;
  37. u64 source_length;
  38. u64 length_copied;
  39. u64 destination_address;
  40. };
  41. struct phyp_dump_header {
  42. u32 version;
  43. u16 num_of_sections;
  44. u16 status;
  45. u32 first_offset_section;
  46. u32 dump_disk_section;
  47. u64 block_num_dd;
  48. u64 num_of_blocks_dd;
  49. u32 offset_dd;
  50. u32 maxtime_to_auto;
  51. /* No dump disk path string used */
  52. struct dump_section cpu_data;
  53. struct dump_section hpte_data;
  54. struct dump_section kernel_data;
  55. };
  56. /* The dump header *must be* in low memory, so .bss it */
  57. static struct phyp_dump_header phdr;
  58. #define NUM_DUMP_SECTIONS 3
  59. #define DUMP_HEADER_VERSION 0x1
  60. #define DUMP_REQUEST_FLAG 0x1
  61. #define DUMP_SOURCE_CPU 0x0001
  62. #define DUMP_SOURCE_HPTE 0x0002
  63. #define DUMP_SOURCE_RMO 0x0011
  64. #define DUMP_ERROR_FLAG 0x2000
  65. #define DUMP_TRIGGERED 0x4000
  66. #define DUMP_PERFORMED 0x8000
  67. /**
  68. * init_dump_header() - initialize the header declaring a dump
  69. * Returns: length of dump save area.
  70. *
  71. * When the hypervisor saves crashed state, it needs to put
  72. * it somewhere. The dump header tells the hypervisor where
  73. * the data can be saved.
  74. */
  75. static unsigned long init_dump_header(struct phyp_dump_header *ph)
  76. {
  77. unsigned long addr_offset = 0;
  78. /* Set up the dump header */
  79. ph->version = DUMP_HEADER_VERSION;
  80. ph->num_of_sections = NUM_DUMP_SECTIONS;
  81. ph->status = 0;
  82. ph->first_offset_section =
  83. (u32)offsetof(struct phyp_dump_header, cpu_data);
  84. ph->dump_disk_section = 0;
  85. ph->block_num_dd = 0;
  86. ph->num_of_blocks_dd = 0;
  87. ph->offset_dd = 0;
  88. ph->maxtime_to_auto = 0; /* disabled */
  89. /* The first two sections are mandatory */
  90. ph->cpu_data.dump_flags = DUMP_REQUEST_FLAG;
  91. ph->cpu_data.source_type = DUMP_SOURCE_CPU;
  92. ph->cpu_data.source_address = 0;
  93. ph->cpu_data.source_length = phyp_dump_info->cpu_state_size;
  94. ph->cpu_data.destination_address = addr_offset;
  95. addr_offset += phyp_dump_info->cpu_state_size;
  96. ph->hpte_data.dump_flags = DUMP_REQUEST_FLAG;
  97. ph->hpte_data.source_type = DUMP_SOURCE_HPTE;
  98. ph->hpte_data.source_address = 0;
  99. ph->hpte_data.source_length = phyp_dump_info->hpte_region_size;
  100. ph->hpte_data.destination_address = addr_offset;
  101. addr_offset += phyp_dump_info->hpte_region_size;
  102. /* This section describes the low kernel region */
  103. ph->kernel_data.dump_flags = DUMP_REQUEST_FLAG;
  104. ph->kernel_data.source_type = DUMP_SOURCE_RMO;
  105. ph->kernel_data.source_address = PHYP_DUMP_RMR_START;
  106. ph->kernel_data.source_length = PHYP_DUMP_RMR_END;
  107. ph->kernel_data.destination_address = addr_offset;
  108. addr_offset += ph->kernel_data.source_length;
  109. return addr_offset;
  110. }
  111. static void print_dump_header(const struct phyp_dump_header *ph)
  112. {
  113. #ifdef DEBUG
  114. if (ph == NULL)
  115. return;
  116. printk(KERN_INFO "dump header:\n");
  117. /* setup some ph->sections required */
  118. printk(KERN_INFO "version = %d\n", ph->version);
  119. printk(KERN_INFO "Sections = %d\n", ph->num_of_sections);
  120. printk(KERN_INFO "Status = 0x%x\n", ph->status);
  121. /* No ph->disk, so all should be set to 0 */
  122. printk(KERN_INFO "Offset to first section 0x%x\n",
  123. ph->first_offset_section);
  124. printk(KERN_INFO "dump disk sections should be zero\n");
  125. printk(KERN_INFO "dump disk section = %d\n", ph->dump_disk_section);
  126. printk(KERN_INFO "block num = %lld\n", ph->block_num_dd);
  127. printk(KERN_INFO "number of blocks = %lld\n", ph->num_of_blocks_dd);
  128. printk(KERN_INFO "dump disk offset = %d\n", ph->offset_dd);
  129. printk(KERN_INFO "Max auto time= %d\n", ph->maxtime_to_auto);
  130. /*set cpu state and hpte states as well scratch pad area */
  131. printk(KERN_INFO " CPU AREA\n");
  132. printk(KERN_INFO "cpu dump_flags =%d\n", ph->cpu_data.dump_flags);
  133. printk(KERN_INFO "cpu source_type =%d\n", ph->cpu_data.source_type);
  134. printk(KERN_INFO "cpu error_flags =%d\n", ph->cpu_data.error_flags);
  135. printk(KERN_INFO "cpu source_address =%llx\n",
  136. ph->cpu_data.source_address);
  137. printk(KERN_INFO "cpu source_length =%llx\n",
  138. ph->cpu_data.source_length);
  139. printk(KERN_INFO "cpu length_copied =%llx\n",
  140. ph->cpu_data.length_copied);
  141. printk(KERN_INFO " HPTE AREA\n");
  142. printk(KERN_INFO "HPTE dump_flags =%d\n", ph->hpte_data.dump_flags);
  143. printk(KERN_INFO "HPTE source_type =%d\n", ph->hpte_data.source_type);
  144. printk(KERN_INFO "HPTE error_flags =%d\n", ph->hpte_data.error_flags);
  145. printk(KERN_INFO "HPTE source_address =%llx\n",
  146. ph->hpte_data.source_address);
  147. printk(KERN_INFO "HPTE source_length =%llx\n",
  148. ph->hpte_data.source_length);
  149. printk(KERN_INFO "HPTE length_copied =%llx\n",
  150. ph->hpte_data.length_copied);
  151. printk(KERN_INFO " SRSD AREA\n");
  152. printk(KERN_INFO "SRSD dump_flags =%d\n", ph->kernel_data.dump_flags);
  153. printk(KERN_INFO "SRSD source_type =%d\n", ph->kernel_data.source_type);
  154. printk(KERN_INFO "SRSD error_flags =%d\n", ph->kernel_data.error_flags);
  155. printk(KERN_INFO "SRSD source_address =%llx\n",
  156. ph->kernel_data.source_address);
  157. printk(KERN_INFO "SRSD source_length =%llx\n",
  158. ph->kernel_data.source_length);
  159. printk(KERN_INFO "SRSD length_copied =%llx\n",
  160. ph->kernel_data.length_copied);
  161. #endif
  162. }
  163. static ssize_t show_phyp_dump_active(struct kobject *kobj,
  164. struct kobj_attribute *attr, char *buf)
  165. {
  166. /* create filesystem entry so kdump is phyp-dump aware */
  167. return sprintf(buf, "%lx\n", phyp_dump_info->phyp_dump_at_boot);
  168. }
  169. static struct kobj_attribute pdl = __ATTR(phyp_dump_active, 0600,
  170. show_phyp_dump_active,
  171. NULL);
  172. static void register_dump_area(struct phyp_dump_header *ph, unsigned long addr)
  173. {
  174. int rc;
  175. /* Add addr value if not initialized before */
  176. if (ph->cpu_data.destination_address == 0) {
  177. ph->cpu_data.destination_address += addr;
  178. ph->hpte_data.destination_address += addr;
  179. ph->kernel_data.destination_address += addr;
  180. }
  181. /* ToDo Invalidate kdump and free memory range. */
  182. do {
  183. rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
  184. 1, ph, sizeof(struct phyp_dump_header));
  185. } while (rtas_busy_delay(rc));
  186. if (rc) {
  187. printk(KERN_ERR "phyp-dump: unexpected error (%d) on "
  188. "register\n", rc);
  189. print_dump_header(ph);
  190. return;
  191. }
  192. rc = sysfs_create_file(kernel_kobj, &pdl.attr);
  193. if (rc)
  194. printk(KERN_ERR "phyp-dump: unable to create sysfs"
  195. " file (%d)\n", rc);
  196. }
  197. static
  198. void invalidate_last_dump(struct phyp_dump_header *ph, unsigned long addr)
  199. {
  200. int rc;
  201. /* Add addr value if not initialized before */
  202. if (ph->cpu_data.destination_address == 0) {
  203. ph->cpu_data.destination_address += addr;
  204. ph->hpte_data.destination_address += addr;
  205. ph->kernel_data.destination_address += addr;
  206. }
  207. do {
  208. rc = rtas_call(ibm_configure_kernel_dump, 3, 1, NULL,
  209. 2, ph, sizeof(struct phyp_dump_header));
  210. } while (rtas_busy_delay(rc));
  211. if (rc) {
  212. printk(KERN_ERR "phyp-dump: unexpected error (%d) "
  213. "on invalidate\n", rc);
  214. print_dump_header(ph);
  215. }
  216. }
  217. /* ------------------------------------------------- */
  218. /**
  219. * release_memory_range -- release memory previously memblock_reserved
  220. * @start_pfn: starting physical frame number
  221. * @nr_pages: number of pages to free.
  222. *
  223. * This routine will release memory that had been previously
  224. * memblock_reserved in early boot. The released memory becomes
  225. * available for genreal use.
  226. */
  227. static void release_memory_range(unsigned long start_pfn,
  228. unsigned long nr_pages)
  229. {
  230. struct page *rpage;
  231. unsigned long end_pfn;
  232. long i;
  233. end_pfn = start_pfn + nr_pages;
  234. for (i = start_pfn; i <= end_pfn; i++) {
  235. rpage = pfn_to_page(i);
  236. if (PageReserved(rpage)) {
  237. ClearPageReserved(rpage);
  238. init_page_count(rpage);
  239. __free_page(rpage);
  240. totalram_pages++;
  241. }
  242. }
  243. }
  244. /**
  245. * track_freed_range -- Counts the range being freed.
  246. * Once the counter goes to zero, it re-registers dump for
  247. * future use.
  248. */
  249. static void
  250. track_freed_range(unsigned long addr, unsigned long length)
  251. {
  252. static unsigned long scratch_area_size, reserved_area_size;
  253. if (addr < phyp_dump_info->init_reserve_start)
  254. return;
  255. if ((addr >= phyp_dump_info->init_reserve_start) &&
  256. (addr <= phyp_dump_info->init_reserve_start +
  257. phyp_dump_info->init_reserve_size))
  258. reserved_area_size += length;
  259. if ((addr >= phyp_dump_info->reserved_scratch_addr) &&
  260. (addr <= phyp_dump_info->reserved_scratch_addr +
  261. phyp_dump_info->reserved_scratch_size))
  262. scratch_area_size += length;
  263. if ((reserved_area_size == phyp_dump_info->init_reserve_size) &&
  264. (scratch_area_size == phyp_dump_info->reserved_scratch_size)) {
  265. invalidate_last_dump(&phdr,
  266. phyp_dump_info->reserved_scratch_addr);
  267. register_dump_area(&phdr,
  268. phyp_dump_info->reserved_scratch_addr);
  269. }
  270. }
  271. /* ------------------------------------------------- */
  272. /**
  273. * sysfs_release_region -- sysfs interface to release memory range.
  274. *
  275. * Usage:
  276. * "echo <start addr> <length> > /sys/kernel/release_region"
  277. *
  278. * Example:
  279. * "echo 0x40000000 0x10000000 > /sys/kernel/release_region"
  280. *
  281. * will release 256MB starting at 1GB.
  282. */
  283. static ssize_t store_release_region(struct kobject *kobj,
  284. struct kobj_attribute *attr,
  285. const char *buf, size_t count)
  286. {
  287. unsigned long start_addr, length, end_addr;
  288. unsigned long start_pfn, nr_pages;
  289. ssize_t ret;
  290. ret = sscanf(buf, "%lx %lx", &start_addr, &length);
  291. if (ret != 2)
  292. return -EINVAL;
  293. track_freed_range(start_addr, length);
  294. /* Range-check - don't free any reserved memory that
  295. * wasn't reserved for phyp-dump */
  296. if (start_addr < phyp_dump_info->init_reserve_start)
  297. start_addr = phyp_dump_info->init_reserve_start;
  298. end_addr = phyp_dump_info->init_reserve_start +
  299. phyp_dump_info->init_reserve_size;
  300. if (start_addr+length > end_addr)
  301. length = end_addr - start_addr;
  302. /* Release the region of memory assed in by user */
  303. start_pfn = PFN_DOWN(start_addr);
  304. nr_pages = PFN_DOWN(length);
  305. release_memory_range(start_pfn, nr_pages);
  306. return count;
  307. }
  308. static ssize_t show_release_region(struct kobject *kobj,
  309. struct kobj_attribute *attr, char *buf)
  310. {
  311. u64 second_addr_range;
  312. /* total reserved size - start of scratch area */
  313. second_addr_range = phyp_dump_info->init_reserve_size -
  314. phyp_dump_info->reserved_scratch_size;
  315. return sprintf(buf, "CPU:0x%llx-0x%llx: HPTE:0x%llx-0x%llx:"
  316. " DUMP:0x%llx-0x%llx, 0x%lx-0x%llx:\n",
  317. phdr.cpu_data.destination_address,
  318. phdr.cpu_data.length_copied,
  319. phdr.hpte_data.destination_address,
  320. phdr.hpte_data.length_copied,
  321. phdr.kernel_data.destination_address,
  322. phdr.kernel_data.length_copied,
  323. phyp_dump_info->init_reserve_start,
  324. second_addr_range);
  325. }
  326. static struct kobj_attribute rr = __ATTR(release_region, 0600,
  327. show_release_region,
  328. store_release_region);
  329. static int __init phyp_dump_setup(void)
  330. {
  331. struct device_node *rtas;
  332. const struct phyp_dump_header *dump_header = NULL;
  333. unsigned long dump_area_start;
  334. unsigned long dump_area_length;
  335. int header_len = 0;
  336. int rc;
  337. /* If no memory was reserved in early boot, there is nothing to do */
  338. if (phyp_dump_info->init_reserve_size == 0)
  339. return 0;
  340. /* Return if phyp dump not supported */
  341. if (!phyp_dump_info->phyp_dump_configured)
  342. return -ENOSYS;
  343. /* Is there dump data waiting for us? If there isn't,
  344. * then register a new dump area, and release all of
  345. * the rest of the reserved ram.
  346. *
  347. * The /rtas/ibm,kernel-dump rtas node is present only
  348. * if there is dump data waiting for us.
  349. */
  350. rtas = of_find_node_by_path("/rtas");
  351. if (rtas) {
  352. dump_header = of_get_property(rtas, "ibm,kernel-dump",
  353. &header_len);
  354. of_node_put(rtas);
  355. }
  356. ibm_configure_kernel_dump = rtas_token("ibm,configure-kernel-dump");
  357. print_dump_header(dump_header);
  358. dump_area_length = init_dump_header(&phdr);
  359. /* align down */
  360. dump_area_start = phyp_dump_info->init_reserve_start & PAGE_MASK;
  361. if (dump_header == NULL) {
  362. register_dump_area(&phdr, dump_area_start);
  363. return 0;
  364. }
  365. /* re-register the dump area, if old dump was invalid */
  366. if ((dump_header) && (dump_header->status & DUMP_ERROR_FLAG)) {
  367. invalidate_last_dump(&phdr, dump_area_start);
  368. register_dump_area(&phdr, dump_area_start);
  369. return 0;
  370. }
  371. if (dump_header) {
  372. phyp_dump_info->reserved_scratch_addr =
  373. dump_header->cpu_data.destination_address;
  374. phyp_dump_info->reserved_scratch_size =
  375. dump_header->cpu_data.source_length +
  376. dump_header->hpte_data.source_length +
  377. dump_header->kernel_data.source_length;
  378. }
  379. /* Should we create a dump_subsys, analogous to s390/ipl.c ? */
  380. rc = sysfs_create_file(kernel_kobj, &rr.attr);
  381. if (rc)
  382. printk(KERN_ERR "phyp-dump: unable to create sysfs file (%d)\n",
  383. rc);
  384. /* ToDo: re-register the dump area, for next time. */
  385. return 0;
  386. }
  387. machine_subsys_initcall(pseries, phyp_dump_setup);
  388. int __init early_init_dt_scan_phyp_dump(unsigned long node,
  389. const char *uname, int depth, void *data)
  390. {
  391. const unsigned int *sizes;
  392. phyp_dump_info->phyp_dump_configured = 0;
  393. phyp_dump_info->phyp_dump_is_active = 0;
  394. if (depth != 1 || strcmp(uname, "rtas") != 0)
  395. return 0;
  396. if (of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL))
  397. phyp_dump_info->phyp_dump_configured++;
  398. if (of_get_flat_dt_prop(node, "ibm,dump-kernel", NULL))
  399. phyp_dump_info->phyp_dump_is_active++;
  400. sizes = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
  401. NULL);
  402. if (!sizes)
  403. return 0;
  404. if (sizes[0] == 1)
  405. phyp_dump_info->cpu_state_size = *((unsigned long *)&sizes[1]);
  406. if (sizes[3] == 2)
  407. phyp_dump_info->hpte_region_size =
  408. *((unsigned long *)&sizes[4]);
  409. return 1;
  410. }
  411. /* Look for phyp_dump= cmdline option */
  412. static int __init early_phyp_dump_enabled(char *p)
  413. {
  414. phyp_dump_info->phyp_dump_at_boot = 1;
  415. if (!p)
  416. return 0;
  417. if (strncmp(p, "1", 1) == 0)
  418. phyp_dump_info->phyp_dump_at_boot = 1;
  419. else if (strncmp(p, "0", 1) == 0)
  420. phyp_dump_info->phyp_dump_at_boot = 0;
  421. return 0;
  422. }
  423. early_param("phyp_dump", early_phyp_dump_enabled);
  424. /* Look for phyp_dump_reserve_size= cmdline option */
  425. static int __init early_phyp_dump_reserve_size(char *p)
  426. {
  427. if (p)
  428. phyp_dump_info->reserve_bootvar = memparse(p, &p);
  429. return 0;
  430. }
  431. early_param("phyp_dump_reserve_size", early_phyp_dump_reserve_size);