fadump.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165
  1. /*
  2. * Firmware Assisted dump: A robust mechanism to get reliable kernel crash
  3. * dump with assistance from firmware. This approach does not use kexec,
  4. * instead firmware assists in booting the kdump kernel while preserving
  5. * memory contents. The most of the code implementation has been adapted
  6. * from phyp assisted dump implementation written by Linas Vepstas and
  7. * Manish Ahuja
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU General Public License as published by
  11. * the Free Software Foundation; either version 2 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License
  20. * along with this program; if not, write to the Free Software
  21. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  22. *
  23. * Copyright 2011 IBM Corporation
  24. * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
  25. */
  26. #undef DEBUG
  27. #define pr_fmt(fmt) "fadump: " fmt
  28. #include <linux/string.h>
  29. #include <linux/memblock.h>
  30. #include <linux/delay.h>
  31. #include <linux/debugfs.h>
  32. #include <linux/seq_file.h>
  33. #include <linux/crash_dump.h>
  34. #include <asm/page.h>
  35. #include <asm/prom.h>
  36. #include <asm/rtas.h>
  37. #include <asm/fadump.h>
  38. static struct fw_dump fw_dump;
  39. static struct fadump_mem_struct fdm;
  40. static const struct fadump_mem_struct *fdm_active;
  41. static DEFINE_MUTEX(fadump_mutex);
  42. struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
  43. int crash_mem_ranges;
  44. /* Scan the Firmware Assisted dump configuration details. */
  45. int __init early_init_dt_scan_fw_dump(unsigned long node,
  46. const char *uname, int depth, void *data)
  47. {
  48. __be32 *sections;
  49. int i, num_sections;
  50. unsigned long size;
  51. const int *token;
  52. if (depth != 1 || strcmp(uname, "rtas") != 0)
  53. return 0;
  54. /*
  55. * Check if Firmware Assisted dump is supported. if yes, check
  56. * if dump has been initiated on last reboot.
  57. */
  58. token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
  59. if (!token)
  60. return 0;
  61. fw_dump.fadump_supported = 1;
  62. fw_dump.ibm_configure_kernel_dump = *token;
  63. /*
  64. * The 'ibm,kernel-dump' rtas node is present only if there is
  65. * dump data waiting for us.
  66. */
  67. fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
  68. if (fdm_active)
  69. fw_dump.dump_active = 1;
  70. /* Get the sizes required to store dump data for the firmware provided
  71. * dump sections.
  72. * For each dump section type supported, a 32bit cell which defines
  73. * the ID of a supported section followed by two 32 bit cells which
  74. * gives teh size of the section in bytes.
  75. */
  76. sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
  77. &size);
  78. if (!sections)
  79. return 0;
  80. num_sections = size / (3 * sizeof(u32));
  81. for (i = 0; i < num_sections; i++, sections += 3) {
  82. u32 type = (u32)of_read_number(sections, 1);
  83. switch (type) {
  84. case FADUMP_CPU_STATE_DATA:
  85. fw_dump.cpu_state_data_size =
  86. of_read_ulong(&sections[1], 2);
  87. break;
  88. case FADUMP_HPTE_REGION:
  89. fw_dump.hpte_region_size =
  90. of_read_ulong(&sections[1], 2);
  91. break;
  92. }
  93. }
  94. return 1;
  95. }
  96. int is_fadump_active(void)
  97. {
  98. return fw_dump.dump_active;
  99. }
  100. /* Print firmware assisted dump configurations for debugging purpose. */
  101. static void fadump_show_config(void)
  102. {
  103. pr_debug("Support for firmware-assisted dump (fadump): %s\n",
  104. (fw_dump.fadump_supported ? "present" : "no support"));
  105. if (!fw_dump.fadump_supported)
  106. return;
  107. pr_debug("Fadump enabled : %s\n",
  108. (fw_dump.fadump_enabled ? "yes" : "no"));
  109. pr_debug("Dump Active : %s\n",
  110. (fw_dump.dump_active ? "yes" : "no"));
  111. pr_debug("Dump section sizes:\n");
  112. pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
  113. pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
  114. pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
  115. }
  116. static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
  117. unsigned long addr)
  118. {
  119. if (!fdm)
  120. return 0;
  121. memset(fdm, 0, sizeof(struct fadump_mem_struct));
  122. addr = addr & PAGE_MASK;
  123. fdm->header.dump_format_version = 0x00000001;
  124. fdm->header.dump_num_sections = 3;
  125. fdm->header.dump_status_flag = 0;
  126. fdm->header.offset_first_dump_section =
  127. (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
  128. /*
  129. * Fields for disk dump option.
  130. * We are not using disk dump option, hence set these fields to 0.
  131. */
  132. fdm->header.dd_block_size = 0;
  133. fdm->header.dd_block_offset = 0;
  134. fdm->header.dd_num_blocks = 0;
  135. fdm->header.dd_offset_disk_path = 0;
  136. /* set 0 to disable an automatic dump-reboot. */
  137. fdm->header.max_time_auto = 0;
  138. /* Kernel dump sections */
  139. /* cpu state data section. */
  140. fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
  141. fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
  142. fdm->cpu_state_data.source_address = 0;
  143. fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
  144. fdm->cpu_state_data.destination_address = addr;
  145. addr += fw_dump.cpu_state_data_size;
  146. /* hpte region section */
  147. fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
  148. fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
  149. fdm->hpte_region.source_address = 0;
  150. fdm->hpte_region.source_len = fw_dump.hpte_region_size;
  151. fdm->hpte_region.destination_address = addr;
  152. addr += fw_dump.hpte_region_size;
  153. /* RMA region section */
  154. fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
  155. fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
  156. fdm->rmr_region.source_address = RMA_START;
  157. fdm->rmr_region.source_len = fw_dump.boot_memory_size;
  158. fdm->rmr_region.destination_address = addr;
  159. addr += fw_dump.boot_memory_size;
  160. return addr;
  161. }
  162. /**
  163. * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM
  164. *
  165. * Function to find the largest memory size we need to reserve during early
  166. * boot process. This will be the size of the memory that is required for a
  167. * kernel to boot successfully.
  168. *
  169. * This function has been taken from phyp-assisted dump feature implementation.
  170. *
  171. * returns larger of 256MB or 5% rounded down to multiples of 256MB.
  172. *
  173. * TODO: Come up with better approach to find out more accurate memory size
  174. * that is required for a kernel to boot successfully.
  175. *
  176. */
  177. static inline unsigned long fadump_calculate_reserve_size(void)
  178. {
  179. unsigned long size;
  180. /*
  181. * Check if the size is specified through fadump_reserve_mem= cmdline
  182. * option. If yes, then use that.
  183. */
  184. if (fw_dump.reserve_bootvar)
  185. return fw_dump.reserve_bootvar;
  186. /* divide by 20 to get 5% of value */
  187. size = memblock_end_of_DRAM() / 20;
  188. /* round it down in multiples of 256 */
  189. size = size & ~0x0FFFFFFFUL;
  190. /* Truncate to memory_limit. We don't want to over reserve the memory.*/
  191. if (memory_limit && size > memory_limit)
  192. size = memory_limit;
  193. return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
  194. }
  195. /*
  196. * Calculate the total memory size required to be reserved for
  197. * firmware-assisted dump registration.
  198. */
  199. static unsigned long get_fadump_area_size(void)
  200. {
  201. unsigned long size = 0;
  202. size += fw_dump.cpu_state_data_size;
  203. size += fw_dump.hpte_region_size;
  204. size += fw_dump.boot_memory_size;
  205. size += sizeof(struct fadump_crash_info_header);
  206. size += sizeof(struct elfhdr); /* ELF core header.*/
  207. size += sizeof(struct elf_phdr); /* place holder for cpu notes */
  208. /* Program headers for crash memory regions. */
  209. size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
  210. size = PAGE_ALIGN(size);
  211. return size;
  212. }
  213. int __init fadump_reserve_mem(void)
  214. {
  215. unsigned long base, size, memory_boundary;
  216. if (!fw_dump.fadump_enabled)
  217. return 0;
  218. if (!fw_dump.fadump_supported) {
  219. printk(KERN_INFO "Firmware-assisted dump is not supported on"
  220. " this hardware\n");
  221. fw_dump.fadump_enabled = 0;
  222. return 0;
  223. }
  224. /*
  225. * Initialize boot memory size
  226. * If dump is active then we have already calculated the size during
  227. * first kernel.
  228. */
  229. if (fdm_active)
  230. fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
  231. else
  232. fw_dump.boot_memory_size = fadump_calculate_reserve_size();
  233. /*
  234. * Calculate the memory boundary.
  235. * If memory_limit is less than actual memory boundary then reserve
  236. * the memory for fadump beyond the memory_limit and adjust the
  237. * memory_limit accordingly, so that the running kernel can run with
  238. * specified memory_limit.
  239. */
  240. if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
  241. size = get_fadump_area_size();
  242. if ((memory_limit + size) < memblock_end_of_DRAM())
  243. memory_limit += size;
  244. else
  245. memory_limit = memblock_end_of_DRAM();
  246. printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
  247. " dump, now %#016llx\n",
  248. (unsigned long long)memory_limit);
  249. }
  250. if (memory_limit)
  251. memory_boundary = memory_limit;
  252. else
  253. memory_boundary = memblock_end_of_DRAM();
  254. if (fw_dump.dump_active) {
  255. printk(KERN_INFO "Firmware-assisted dump is active.\n");
  256. /*
  257. * If last boot has crashed then reserve all the memory
  258. * above boot_memory_size so that we don't touch it until
  259. * dump is written to disk by userspace tool. This memory
  260. * will be released for general use once the dump is saved.
  261. */
  262. base = fw_dump.boot_memory_size;
  263. size = memory_boundary - base;
  264. memblock_reserve(base, size);
  265. printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
  266. "for saving crash dump\n",
  267. (unsigned long)(size >> 20),
  268. (unsigned long)(base >> 20));
  269. fw_dump.fadumphdr_addr =
  270. fdm_active->rmr_region.destination_address +
  271. fdm_active->rmr_region.source_len;
  272. pr_debug("fadumphdr_addr = %p\n",
  273. (void *) fw_dump.fadumphdr_addr);
  274. } else {
  275. /* Reserve the memory at the top of memory. */
  276. size = get_fadump_area_size();
  277. base = memory_boundary - size;
  278. memblock_reserve(base, size);
  279. printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
  280. "for firmware-assisted dump\n",
  281. (unsigned long)(size >> 20),
  282. (unsigned long)(base >> 20));
  283. }
  284. fw_dump.reserve_dump_area_start = base;
  285. fw_dump.reserve_dump_area_size = size;
  286. return 1;
  287. }
  288. /* Look for fadump= cmdline option. */
  289. static int __init early_fadump_param(char *p)
  290. {
  291. if (!p)
  292. return 1;
  293. if (strncmp(p, "on", 2) == 0)
  294. fw_dump.fadump_enabled = 1;
  295. else if (strncmp(p, "off", 3) == 0)
  296. fw_dump.fadump_enabled = 0;
  297. return 0;
  298. }
  299. early_param("fadump", early_fadump_param);
  300. /* Look for fadump_reserve_mem= cmdline option */
  301. static int __init early_fadump_reserve_mem(char *p)
  302. {
  303. if (p)
  304. fw_dump.reserve_bootvar = memparse(p, &p);
  305. return 0;
  306. }
  307. early_param("fadump_reserve_mem", early_fadump_reserve_mem);
  308. static void register_fw_dump(struct fadump_mem_struct *fdm)
  309. {
  310. int rc;
  311. unsigned int wait_time;
  312. pr_debug("Registering for firmware-assisted kernel dump...\n");
  313. /* TODO: Add upper time limit for the delay */
  314. do {
  315. rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
  316. FADUMP_REGISTER, fdm,
  317. sizeof(struct fadump_mem_struct));
  318. wait_time = rtas_busy_delay_time(rc);
  319. if (wait_time)
  320. mdelay(wait_time);
  321. } while (wait_time);
  322. switch (rc) {
  323. case -1:
  324. printk(KERN_ERR "Failed to register firmware-assisted kernel"
  325. " dump. Hardware Error(%d).\n", rc);
  326. break;
  327. case -3:
  328. printk(KERN_ERR "Failed to register firmware-assisted kernel"
  329. " dump. Parameter Error(%d).\n", rc);
  330. break;
  331. case -9:
  332. printk(KERN_ERR "firmware-assisted kernel dump is already "
  333. " registered.");
  334. fw_dump.dump_registered = 1;
  335. break;
  336. case 0:
  337. printk(KERN_INFO "firmware-assisted kernel dump registration"
  338. " is successful\n");
  339. fw_dump.dump_registered = 1;
  340. break;
  341. }
  342. }
  343. void crash_fadump(struct pt_regs *regs, const char *str)
  344. {
  345. struct fadump_crash_info_header *fdh = NULL;
  346. if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
  347. return;
  348. fdh = __va(fw_dump.fadumphdr_addr);
  349. crashing_cpu = smp_processor_id();
  350. fdh->crashing_cpu = crashing_cpu;
  351. crash_save_vmcoreinfo();
  352. if (regs)
  353. fdh->regs = *regs;
  354. else
  355. ppc_save_regs(&fdh->regs);
  356. fdh->cpu_online_mask = *cpu_online_mask;
  357. /* Call ibm,os-term rtas call to trigger firmware assisted dump */
  358. rtas_os_term((char *)str);
  359. }
  360. #define GPR_MASK 0xffffff0000000000
  361. static inline int fadump_gpr_index(u64 id)
  362. {
  363. int i = -1;
  364. char str[3];
  365. if ((id & GPR_MASK) == REG_ID("GPR")) {
  366. /* get the digits at the end */
  367. id &= ~GPR_MASK;
  368. id >>= 24;
  369. str[2] = '\0';
  370. str[1] = id & 0xff;
  371. str[0] = (id >> 8) & 0xff;
  372. sscanf(str, "%d", &i);
  373. if (i > 31)
  374. i = -1;
  375. }
  376. return i;
  377. }
  378. static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
  379. u64 reg_val)
  380. {
  381. int i;
  382. i = fadump_gpr_index(reg_id);
  383. if (i >= 0)
  384. regs->gpr[i] = (unsigned long)reg_val;
  385. else if (reg_id == REG_ID("NIA"))
  386. regs->nip = (unsigned long)reg_val;
  387. else if (reg_id == REG_ID("MSR"))
  388. regs->msr = (unsigned long)reg_val;
  389. else if (reg_id == REG_ID("CTR"))
  390. regs->ctr = (unsigned long)reg_val;
  391. else if (reg_id == REG_ID("LR"))
  392. regs->link = (unsigned long)reg_val;
  393. else if (reg_id == REG_ID("XER"))
  394. regs->xer = (unsigned long)reg_val;
  395. else if (reg_id == REG_ID("CR"))
  396. regs->ccr = (unsigned long)reg_val;
  397. else if (reg_id == REG_ID("DAR"))
  398. regs->dar = (unsigned long)reg_val;
  399. else if (reg_id == REG_ID("DSISR"))
  400. regs->dsisr = (unsigned long)reg_val;
  401. }
  402. static struct fadump_reg_entry*
  403. fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
  404. {
  405. memset(regs, 0, sizeof(struct pt_regs));
  406. while (reg_entry->reg_id != REG_ID("CPUEND")) {
  407. fadump_set_regval(regs, reg_entry->reg_id,
  408. reg_entry->reg_value);
  409. reg_entry++;
  410. }
  411. reg_entry++;
  412. return reg_entry;
  413. }
  414. static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
  415. void *data, size_t data_len)
  416. {
  417. struct elf_note note;
  418. note.n_namesz = strlen(name) + 1;
  419. note.n_descsz = data_len;
  420. note.n_type = type;
  421. memcpy(buf, &note, sizeof(note));
  422. buf += (sizeof(note) + 3)/4;
  423. memcpy(buf, name, note.n_namesz);
  424. buf += (note.n_namesz + 3)/4;
  425. memcpy(buf, data, note.n_descsz);
  426. buf += (note.n_descsz + 3)/4;
  427. return buf;
  428. }
  429. static void fadump_final_note(u32 *buf)
  430. {
  431. struct elf_note note;
  432. note.n_namesz = 0;
  433. note.n_descsz = 0;
  434. note.n_type = 0;
  435. memcpy(buf, &note, sizeof(note));
  436. }
  437. static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
  438. {
  439. struct elf_prstatus prstatus;
  440. memset(&prstatus, 0, sizeof(prstatus));
  441. /*
  442. * FIXME: How do i get PID? Do I really need it?
  443. * prstatus.pr_pid = ????
  444. */
  445. elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
  446. buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
  447. &prstatus, sizeof(prstatus));
  448. return buf;
  449. }
  450. static void fadump_update_elfcore_header(char *bufp)
  451. {
  452. struct elfhdr *elf;
  453. struct elf_phdr *phdr;
  454. elf = (struct elfhdr *)bufp;
  455. bufp += sizeof(struct elfhdr);
  456. /* First note is a place holder for cpu notes info. */
  457. phdr = (struct elf_phdr *)bufp;
  458. if (phdr->p_type == PT_NOTE) {
  459. phdr->p_paddr = fw_dump.cpu_notes_buf;
  460. phdr->p_offset = phdr->p_paddr;
  461. phdr->p_filesz = fw_dump.cpu_notes_buf_size;
  462. phdr->p_memsz = fw_dump.cpu_notes_buf_size;
  463. }
  464. return;
  465. }
  466. static void *fadump_cpu_notes_buf_alloc(unsigned long size)
  467. {
  468. void *vaddr;
  469. struct page *page;
  470. unsigned long order, count, i;
  471. order = get_order(size);
  472. vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
  473. if (!vaddr)
  474. return NULL;
  475. count = 1 << order;
  476. page = virt_to_page(vaddr);
  477. for (i = 0; i < count; i++)
  478. SetPageReserved(page + i);
  479. return vaddr;
  480. }
  481. static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
  482. {
  483. struct page *page;
  484. unsigned long order, count, i;
  485. order = get_order(size);
  486. count = 1 << order;
  487. page = virt_to_page(vaddr);
  488. for (i = 0; i < count; i++)
  489. ClearPageReserved(page + i);
  490. __free_pages(page, order);
  491. }
  492. /*
  493. * Read CPU state dump data and convert it into ELF notes.
  494. * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
  495. * used to access the data to allow for additional fields to be added without
  496. * affecting compatibility. Each list of registers for a CPU starts with
  497. * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
  498. * 8 Byte ASCII identifier and 8 Byte register value. The register entry
  499. * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
  500. * of register value. For more details refer to PAPR document.
  501. *
  502. * Only for the crashing cpu we ignore the CPU dump data and get exact
  503. * state from fadump crash info structure populated by first kernel at the
  504. * time of crash.
  505. */
  506. static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
  507. {
  508. struct fadump_reg_save_area_header *reg_header;
  509. struct fadump_reg_entry *reg_entry;
  510. struct fadump_crash_info_header *fdh = NULL;
  511. void *vaddr;
  512. unsigned long addr;
  513. u32 num_cpus, *note_buf;
  514. struct pt_regs regs;
  515. int i, rc = 0, cpu = 0;
  516. if (!fdm->cpu_state_data.bytes_dumped)
  517. return -EINVAL;
  518. addr = fdm->cpu_state_data.destination_address;
  519. vaddr = __va(addr);
  520. reg_header = vaddr;
  521. if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
  522. printk(KERN_ERR "Unable to read register save area.\n");
  523. return -ENOENT;
  524. }
  525. pr_debug("--------CPU State Data------------\n");
  526. pr_debug("Magic Number: %llx\n", reg_header->magic_number);
  527. pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
  528. vaddr += reg_header->num_cpu_offset;
  529. num_cpus = *((u32 *)(vaddr));
  530. pr_debug("NumCpus : %u\n", num_cpus);
  531. vaddr += sizeof(u32);
  532. reg_entry = (struct fadump_reg_entry *)vaddr;
  533. /* Allocate buffer to hold cpu crash notes. */
  534. fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
  535. fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
  536. note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
  537. if (!note_buf) {
  538. printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
  539. "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
  540. return -ENOMEM;
  541. }
  542. fw_dump.cpu_notes_buf = __pa(note_buf);
  543. pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
  544. (num_cpus * sizeof(note_buf_t)), note_buf);
  545. if (fw_dump.fadumphdr_addr)
  546. fdh = __va(fw_dump.fadumphdr_addr);
  547. for (i = 0; i < num_cpus; i++) {
  548. if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
  549. printk(KERN_ERR "Unable to read CPU state data\n");
  550. rc = -ENOENT;
  551. goto error_out;
  552. }
  553. /* Lower 4 bytes of reg_value contains logical cpu id */
  554. cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
  555. if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
  556. SKIP_TO_NEXT_CPU(reg_entry);
  557. continue;
  558. }
  559. pr_debug("Reading register data for cpu %d...\n", cpu);
  560. if (fdh && fdh->crashing_cpu == cpu) {
  561. regs = fdh->regs;
  562. note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
  563. SKIP_TO_NEXT_CPU(reg_entry);
  564. } else {
  565. reg_entry++;
  566. reg_entry = fadump_read_registers(reg_entry, &regs);
  567. note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
  568. }
  569. }
  570. fadump_final_note(note_buf);
  571. pr_debug("Updating elfcore header (%llx) with cpu notes\n",
  572. fdh->elfcorehdr_addr);
  573. fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
  574. return 0;
  575. error_out:
  576. fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
  577. fw_dump.cpu_notes_buf_size);
  578. fw_dump.cpu_notes_buf = 0;
  579. fw_dump.cpu_notes_buf_size = 0;
  580. return rc;
  581. }
  582. /*
  583. * Validate and process the dump data stored by firmware before exporting
  584. * it through '/proc/vmcore'.
  585. */
  586. static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
  587. {
  588. struct fadump_crash_info_header *fdh;
  589. int rc = 0;
  590. if (!fdm_active || !fw_dump.fadumphdr_addr)
  591. return -EINVAL;
  592. /* Check if the dump data is valid. */
  593. if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
  594. (fdm_active->cpu_state_data.error_flags != 0) ||
  595. (fdm_active->rmr_region.error_flags != 0)) {
  596. printk(KERN_ERR "Dump taken by platform is not valid\n");
  597. return -EINVAL;
  598. }
  599. if ((fdm_active->rmr_region.bytes_dumped !=
  600. fdm_active->rmr_region.source_len) ||
  601. !fdm_active->cpu_state_data.bytes_dumped) {
  602. printk(KERN_ERR "Dump taken by platform is incomplete\n");
  603. return -EINVAL;
  604. }
  605. /* Validate the fadump crash info header */
  606. fdh = __va(fw_dump.fadumphdr_addr);
  607. if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
  608. printk(KERN_ERR "Crash info header is not valid.\n");
  609. return -EINVAL;
  610. }
  611. rc = fadump_build_cpu_notes(fdm_active);
  612. if (rc)
  613. return rc;
  614. /*
  615. * We are done validating dump info and elfcore header is now ready
  616. * to be exported. set elfcorehdr_addr so that vmcore module will
  617. * export the elfcore header through '/proc/vmcore'.
  618. */
  619. elfcorehdr_addr = fdh->elfcorehdr_addr;
  620. return 0;
  621. }
  622. static inline void fadump_add_crash_memory(unsigned long long base,
  623. unsigned long long end)
  624. {
  625. if (base == end)
  626. return;
  627. pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
  628. crash_mem_ranges, base, end - 1, (end - base));
  629. crash_memory_ranges[crash_mem_ranges].base = base;
  630. crash_memory_ranges[crash_mem_ranges].size = end - base;
  631. crash_mem_ranges++;
  632. }
  633. static void fadump_exclude_reserved_area(unsigned long long start,
  634. unsigned long long end)
  635. {
  636. unsigned long long ra_start, ra_end;
  637. ra_start = fw_dump.reserve_dump_area_start;
  638. ra_end = ra_start + fw_dump.reserve_dump_area_size;
  639. if ((ra_start < end) && (ra_end > start)) {
  640. if ((start < ra_start) && (end > ra_end)) {
  641. fadump_add_crash_memory(start, ra_start);
  642. fadump_add_crash_memory(ra_end, end);
  643. } else if (start < ra_start) {
  644. fadump_add_crash_memory(start, ra_start);
  645. } else if (ra_end < end) {
  646. fadump_add_crash_memory(ra_end, end);
  647. }
  648. } else
  649. fadump_add_crash_memory(start, end);
  650. }
  651. static int fadump_init_elfcore_header(char *bufp)
  652. {
  653. struct elfhdr *elf;
  654. elf = (struct elfhdr *) bufp;
  655. bufp += sizeof(struct elfhdr);
  656. memcpy(elf->e_ident, ELFMAG, SELFMAG);
  657. elf->e_ident[EI_CLASS] = ELF_CLASS;
  658. elf->e_ident[EI_DATA] = ELF_DATA;
  659. elf->e_ident[EI_VERSION] = EV_CURRENT;
  660. elf->e_ident[EI_OSABI] = ELF_OSABI;
  661. memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
  662. elf->e_type = ET_CORE;
  663. elf->e_machine = ELF_ARCH;
  664. elf->e_version = EV_CURRENT;
  665. elf->e_entry = 0;
  666. elf->e_phoff = sizeof(struct elfhdr);
  667. elf->e_shoff = 0;
  668. elf->e_flags = ELF_CORE_EFLAGS;
  669. elf->e_ehsize = sizeof(struct elfhdr);
  670. elf->e_phentsize = sizeof(struct elf_phdr);
  671. elf->e_phnum = 0;
  672. elf->e_shentsize = 0;
  673. elf->e_shnum = 0;
  674. elf->e_shstrndx = 0;
  675. return 0;
  676. }
  677. /*
  678. * Traverse through memblock structure and setup crash memory ranges. These
  679. * ranges will be used create PT_LOAD program headers in elfcore header.
  680. */
  681. static void fadump_setup_crash_memory_ranges(void)
  682. {
  683. struct memblock_region *reg;
  684. unsigned long long start, end;
  685. pr_debug("Setup crash memory ranges.\n");
  686. crash_mem_ranges = 0;
  687. /*
  688. * add the first memory chunk (RMA_START through boot_memory_size) as
  689. * a separate memory chunk. The reason is, at the time crash firmware
  690. * will move the content of this memory chunk to different location
  691. * specified during fadump registration. We need to create a separate
  692. * program header for this chunk with the correct offset.
  693. */
  694. fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
  695. for_each_memblock(memory, reg) {
  696. start = (unsigned long long)reg->base;
  697. end = start + (unsigned long long)reg->size;
  698. if (start == RMA_START && end >= fw_dump.boot_memory_size)
  699. start = fw_dump.boot_memory_size;
  700. /* add this range excluding the reserved dump area. */
  701. fadump_exclude_reserved_area(start, end);
  702. }
  703. }
  704. /*
  705. * If the given physical address falls within the boot memory region then
  706. * return the relocated address that points to the dump region reserved
  707. * for saving initial boot memory contents.
  708. */
  709. static inline unsigned long fadump_relocate(unsigned long paddr)
  710. {
  711. if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
  712. return fdm.rmr_region.destination_address + paddr;
  713. else
  714. return paddr;
  715. }
  716. static int fadump_create_elfcore_headers(char *bufp)
  717. {
  718. struct elfhdr *elf;
  719. struct elf_phdr *phdr;
  720. int i;
  721. fadump_init_elfcore_header(bufp);
  722. elf = (struct elfhdr *)bufp;
  723. bufp += sizeof(struct elfhdr);
  724. /*
  725. * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
  726. * will be populated during second kernel boot after crash. Hence
  727. * this PT_NOTE will always be the first elf note.
  728. *
  729. * NOTE: Any new ELF note addition should be placed after this note.
  730. */
  731. phdr = (struct elf_phdr *)bufp;
  732. bufp += sizeof(struct elf_phdr);
  733. phdr->p_type = PT_NOTE;
  734. phdr->p_flags = 0;
  735. phdr->p_vaddr = 0;
  736. phdr->p_align = 0;
  737. phdr->p_offset = 0;
  738. phdr->p_paddr = 0;
  739. phdr->p_filesz = 0;
  740. phdr->p_memsz = 0;
  741. (elf->e_phnum)++;
  742. /* setup ELF PT_NOTE for vmcoreinfo */
  743. phdr = (struct elf_phdr *)bufp;
  744. bufp += sizeof(struct elf_phdr);
  745. phdr->p_type = PT_NOTE;
  746. phdr->p_flags = 0;
  747. phdr->p_vaddr = 0;
  748. phdr->p_align = 0;
  749. phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
  750. phdr->p_offset = phdr->p_paddr;
  751. phdr->p_memsz = vmcoreinfo_max_size;
  752. phdr->p_filesz = vmcoreinfo_max_size;
  753. /* Increment number of program headers. */
  754. (elf->e_phnum)++;
  755. /* setup PT_LOAD sections. */
  756. for (i = 0; i < crash_mem_ranges; i++) {
  757. unsigned long long mbase, msize;
  758. mbase = crash_memory_ranges[i].base;
  759. msize = crash_memory_ranges[i].size;
  760. if (!msize)
  761. continue;
  762. phdr = (struct elf_phdr *)bufp;
  763. bufp += sizeof(struct elf_phdr);
  764. phdr->p_type = PT_LOAD;
  765. phdr->p_flags = PF_R|PF_W|PF_X;
  766. phdr->p_offset = mbase;
  767. if (mbase == RMA_START) {
  768. /*
  769. * The entire RMA region will be moved by firmware
  770. * to the specified destination_address. Hence set
  771. * the correct offset.
  772. */
  773. phdr->p_offset = fdm.rmr_region.destination_address;
  774. }
  775. phdr->p_paddr = mbase;
  776. phdr->p_vaddr = (unsigned long)__va(mbase);
  777. phdr->p_filesz = msize;
  778. phdr->p_memsz = msize;
  779. phdr->p_align = 0;
  780. /* Increment number of program headers. */
  781. (elf->e_phnum)++;
  782. }
  783. return 0;
  784. }
  785. static unsigned long init_fadump_header(unsigned long addr)
  786. {
  787. struct fadump_crash_info_header *fdh;
  788. if (!addr)
  789. return 0;
  790. fw_dump.fadumphdr_addr = addr;
  791. fdh = __va(addr);
  792. addr += sizeof(struct fadump_crash_info_header);
  793. memset(fdh, 0, sizeof(struct fadump_crash_info_header));
  794. fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
  795. fdh->elfcorehdr_addr = addr;
  796. /* We will set the crashing cpu id in crash_fadump() during crash. */
  797. fdh->crashing_cpu = CPU_UNKNOWN;
  798. return addr;
  799. }
  800. static void register_fadump(void)
  801. {
  802. unsigned long addr;
  803. void *vaddr;
  804. /*
  805. * If no memory is reserved then we can not register for firmware-
  806. * assisted dump.
  807. */
  808. if (!fw_dump.reserve_dump_area_size)
  809. return;
  810. fadump_setup_crash_memory_ranges();
  811. addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
  812. /* Initialize fadump crash info header. */
  813. addr = init_fadump_header(addr);
  814. vaddr = __va(addr);
  815. pr_debug("Creating ELF core headers at %#016lx\n", addr);
  816. fadump_create_elfcore_headers(vaddr);
  817. /* register the future kernel dump with firmware. */
  818. register_fw_dump(&fdm);
  819. }
  820. static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
  821. {
  822. int rc = 0;
  823. unsigned int wait_time;
  824. pr_debug("Un-register firmware-assisted dump\n");
  825. /* TODO: Add upper time limit for the delay */
  826. do {
  827. rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
  828. FADUMP_UNREGISTER, fdm,
  829. sizeof(struct fadump_mem_struct));
  830. wait_time = rtas_busy_delay_time(rc);
  831. if (wait_time)
  832. mdelay(wait_time);
  833. } while (wait_time);
  834. if (rc) {
  835. printk(KERN_ERR "Failed to un-register firmware-assisted dump."
  836. " unexpected error(%d).\n", rc);
  837. return rc;
  838. }
  839. fw_dump.dump_registered = 0;
  840. return 0;
  841. }
  842. static ssize_t fadump_enabled_show(struct kobject *kobj,
  843. struct kobj_attribute *attr,
  844. char *buf)
  845. {
  846. return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
  847. }
  848. static ssize_t fadump_register_show(struct kobject *kobj,
  849. struct kobj_attribute *attr,
  850. char *buf)
  851. {
  852. return sprintf(buf, "%d\n", fw_dump.dump_registered);
  853. }
  854. static ssize_t fadump_register_store(struct kobject *kobj,
  855. struct kobj_attribute *attr,
  856. const char *buf, size_t count)
  857. {
  858. int ret = 0;
  859. if (!fw_dump.fadump_enabled || fdm_active)
  860. return -EPERM;
  861. mutex_lock(&fadump_mutex);
  862. switch (buf[0]) {
  863. case '0':
  864. if (fw_dump.dump_registered == 0) {
  865. ret = -EINVAL;
  866. goto unlock_out;
  867. }
  868. /* Un-register Firmware-assisted dump */
  869. fadump_unregister_dump(&fdm);
  870. break;
  871. case '1':
  872. if (fw_dump.dump_registered == 1) {
  873. ret = -EINVAL;
  874. goto unlock_out;
  875. }
  876. /* Register Firmware-assisted dump */
  877. register_fadump();
  878. break;
  879. default:
  880. ret = -EINVAL;
  881. break;
  882. }
  883. unlock_out:
  884. mutex_unlock(&fadump_mutex);
  885. return ret < 0 ? ret : count;
  886. }
  887. static int fadump_region_show(struct seq_file *m, void *private)
  888. {
  889. const struct fadump_mem_struct *fdm_ptr;
  890. if (!fw_dump.fadump_enabled)
  891. return 0;
  892. if (fdm_active)
  893. fdm_ptr = fdm_active;
  894. else
  895. fdm_ptr = &fdm;
  896. seq_printf(m,
  897. "CPU : [%#016llx-%#016llx] %#llx bytes, "
  898. "Dumped: %#llx\n",
  899. fdm_ptr->cpu_state_data.destination_address,
  900. fdm_ptr->cpu_state_data.destination_address +
  901. fdm_ptr->cpu_state_data.source_len - 1,
  902. fdm_ptr->cpu_state_data.source_len,
  903. fdm_ptr->cpu_state_data.bytes_dumped);
  904. seq_printf(m,
  905. "HPTE: [%#016llx-%#016llx] %#llx bytes, "
  906. "Dumped: %#llx\n",
  907. fdm_ptr->hpte_region.destination_address,
  908. fdm_ptr->hpte_region.destination_address +
  909. fdm_ptr->hpte_region.source_len - 1,
  910. fdm_ptr->hpte_region.source_len,
  911. fdm_ptr->hpte_region.bytes_dumped);
  912. seq_printf(m,
  913. "DUMP: [%#016llx-%#016llx] %#llx bytes, "
  914. "Dumped: %#llx\n",
  915. fdm_ptr->rmr_region.destination_address,
  916. fdm_ptr->rmr_region.destination_address +
  917. fdm_ptr->rmr_region.source_len - 1,
  918. fdm_ptr->rmr_region.source_len,
  919. fdm_ptr->rmr_region.bytes_dumped);
  920. if (!fdm_active ||
  921. (fw_dump.reserve_dump_area_start ==
  922. fdm_ptr->cpu_state_data.destination_address))
  923. return 0;
  924. /* Dump is active. Show reserved memory region. */
  925. seq_printf(m,
  926. " : [%#016llx-%#016llx] %#llx bytes, "
  927. "Dumped: %#llx\n",
  928. (unsigned long long)fw_dump.reserve_dump_area_start,
  929. fdm_ptr->cpu_state_data.destination_address - 1,
  930. fdm_ptr->cpu_state_data.destination_address -
  931. fw_dump.reserve_dump_area_start,
  932. fdm_ptr->cpu_state_data.destination_address -
  933. fw_dump.reserve_dump_area_start);
  934. return 0;
  935. }
  936. static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
  937. 0444, fadump_enabled_show,
  938. NULL);
  939. static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
  940. 0644, fadump_register_show,
  941. fadump_register_store);
  942. static int fadump_region_open(struct inode *inode, struct file *file)
  943. {
  944. return single_open(file, fadump_region_show, inode->i_private);
  945. }
  946. static const struct file_operations fadump_region_fops = {
  947. .open = fadump_region_open,
  948. .read = seq_read,
  949. .llseek = seq_lseek,
  950. .release = single_release,
  951. };
  952. static void fadump_init_files(void)
  953. {
  954. struct dentry *debugfs_file;
  955. int rc = 0;
  956. rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
  957. if (rc)
  958. printk(KERN_ERR "fadump: unable to create sysfs file"
  959. " fadump_enabled (%d)\n", rc);
  960. rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
  961. if (rc)
  962. printk(KERN_ERR "fadump: unable to create sysfs file"
  963. " fadump_registered (%d)\n", rc);
  964. debugfs_file = debugfs_create_file("fadump_region", 0444,
  965. powerpc_debugfs_root, NULL,
  966. &fadump_region_fops);
  967. if (!debugfs_file)
  968. printk(KERN_ERR "fadump: unable to create debugfs file"
  969. " fadump_region\n");
  970. return;
  971. }
  972. /*
  973. * Prepare for firmware-assisted dump.
  974. */
  975. int __init setup_fadump(void)
  976. {
  977. if (!fw_dump.fadump_enabled)
  978. return 0;
  979. if (!fw_dump.fadump_supported) {
  980. printk(KERN_ERR "Firmware-assisted dump is not supported on"
  981. " this hardware\n");
  982. return 0;
  983. }
  984. fadump_show_config();
  985. /*
  986. * If dump data is available then see if it is valid and prepare for
  987. * saving it to the disk.
  988. */
  989. if (fw_dump.dump_active)
  990. process_fadump(fdm_active);
  991. /* Initialize the kernel dump memory structure for FAD registration. */
  992. else if (fw_dump.reserve_dump_area_size)
  993. init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
  994. fadump_init_files();
  995. return 1;
  996. }
  997. subsys_initcall(setup_fadump);