nvram.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. /*
  2. * c 2001 PPC 64 Team, IBM Corp
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * /dev/nvram driver for PPC64
  10. *
  11. * This perhaps should live in drivers/char
  12. */
  13. #include <linux/types.h>
  14. #include <linux/errno.h>
  15. #include <linux/init.h>
  16. #include <linux/spinlock.h>
  17. #include <linux/slab.h>
  18. #include <linux/kmsg_dump.h>
  19. #include <linux/ctype.h>
  20. #include <linux/zlib.h>
  21. #include <asm/uaccess.h>
  22. #include <asm/nvram.h>
  23. #include <asm/rtas.h>
  24. #include <asm/prom.h>
  25. #include <asm/machdep.h>
  26. /* Max bytes to read/write in one go */
  27. #define NVRW_CNT 0x20
  28. /*
  29. * Set oops header version to distingush between old and new format header.
  30. * lnx,oops-log partition max size is 4000, header version > 4000 will
  31. * help in identifying new header.
  32. */
  33. #define OOPS_HDR_VERSION 5000
  34. static unsigned int nvram_size;
  35. static int nvram_fetch, nvram_store;
  36. static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
  37. static DEFINE_SPINLOCK(nvram_lock);
  38. struct err_log_info {
  39. int error_type;
  40. unsigned int seq_num;
  41. };
  42. struct nvram_os_partition {
  43. const char *name;
  44. int req_size; /* desired size, in bytes */
  45. int min_size; /* minimum acceptable size (0 means req_size) */
  46. long size; /* size of data portion (excluding err_log_info) */
  47. long index; /* offset of data portion of partition */
  48. };
  49. static struct nvram_os_partition rtas_log_partition = {
  50. .name = "ibm,rtas-log",
  51. .req_size = 2079,
  52. .min_size = 1055,
  53. .index = -1
  54. };
  55. static struct nvram_os_partition oops_log_partition = {
  56. .name = "lnx,oops-log",
  57. .req_size = 4000,
  58. .min_size = 2000,
  59. .index = -1
  60. };
  61. static const char *pseries_nvram_os_partitions[] = {
  62. "ibm,rtas-log",
  63. "lnx,oops-log",
  64. NULL
  65. };
  66. struct oops_log_info {
  67. u16 version;
  68. u16 report_length;
  69. u64 timestamp;
  70. } __attribute__((packed));
  71. static void oops_to_nvram(struct kmsg_dumper *dumper,
  72. enum kmsg_dump_reason reason);
  73. static struct kmsg_dumper nvram_kmsg_dumper = {
  74. .dump = oops_to_nvram
  75. };
  76. /* See clobbering_unread_rtas_event() */
  77. #define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
  78. static unsigned long last_unread_rtas_event; /* timestamp */
  79. /*
  80. * For capturing and compressing an oops or panic report...
  81. * big_oops_buf[] holds the uncompressed text we're capturing.
  82. *
  83. * oops_buf[] holds the compressed text, preceded by a oops header.
  84. * oops header has u16 holding the version of oops header (to differentiate
  85. * between old and new format header) followed by u16 holding the length of
  86. * the compressed* text (*Or uncompressed, if compression fails.) and u64
  87. * holding the timestamp. oops_buf[] gets written to NVRAM.
  88. *
  89. * oops_log_info points to the header. oops_data points to the compressed text.
  90. *
  91. * +- oops_buf
  92. * | +- oops_data
  93. * v v
  94. * +-----------+-----------+-----------+------------------------+
  95. * | version | length | timestamp | text |
  96. * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
  97. * +-----------+-----------+-----------+------------------------+
  98. * ^
  99. * +- oops_log_info
  100. *
  101. * We preallocate these buffers during init to avoid kmalloc during oops/panic.
  102. */
  103. static size_t big_oops_buf_sz;
  104. static char *big_oops_buf, *oops_buf;
  105. static char *oops_data;
  106. static size_t oops_data_sz;
  107. /* Compression parameters */
  108. #define COMPR_LEVEL 6
  109. #define WINDOW_BITS 12
  110. #define MEM_LEVEL 4
  111. static struct z_stream_s stream;
  112. static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
  113. {
  114. unsigned int i;
  115. unsigned long len;
  116. int done;
  117. unsigned long flags;
  118. char *p = buf;
  119. if (nvram_size == 0 || nvram_fetch == RTAS_UNKNOWN_SERVICE)
  120. return -ENODEV;
  121. if (*index >= nvram_size)
  122. return 0;
  123. i = *index;
  124. if (i + count > nvram_size)
  125. count = nvram_size - i;
  126. spin_lock_irqsave(&nvram_lock, flags);
  127. for (; count != 0; count -= len) {
  128. len = count;
  129. if (len > NVRW_CNT)
  130. len = NVRW_CNT;
  131. if ((rtas_call(nvram_fetch, 3, 2, &done, i, __pa(nvram_buf),
  132. len) != 0) || len != done) {
  133. spin_unlock_irqrestore(&nvram_lock, flags);
  134. return -EIO;
  135. }
  136. memcpy(p, nvram_buf, len);
  137. p += len;
  138. i += len;
  139. }
  140. spin_unlock_irqrestore(&nvram_lock, flags);
  141. *index = i;
  142. return p - buf;
  143. }
  144. static ssize_t pSeries_nvram_write(char *buf, size_t count, loff_t *index)
  145. {
  146. unsigned int i;
  147. unsigned long len;
  148. int done;
  149. unsigned long flags;
  150. const char *p = buf;
  151. if (nvram_size == 0 || nvram_store == RTAS_UNKNOWN_SERVICE)
  152. return -ENODEV;
  153. if (*index >= nvram_size)
  154. return 0;
  155. i = *index;
  156. if (i + count > nvram_size)
  157. count = nvram_size - i;
  158. spin_lock_irqsave(&nvram_lock, flags);
  159. for (; count != 0; count -= len) {
  160. len = count;
  161. if (len > NVRW_CNT)
  162. len = NVRW_CNT;
  163. memcpy(nvram_buf, p, len);
  164. if ((rtas_call(nvram_store, 3, 2, &done, i, __pa(nvram_buf),
  165. len) != 0) || len != done) {
  166. spin_unlock_irqrestore(&nvram_lock, flags);
  167. return -EIO;
  168. }
  169. p += len;
  170. i += len;
  171. }
  172. spin_unlock_irqrestore(&nvram_lock, flags);
  173. *index = i;
  174. return p - buf;
  175. }
  176. static ssize_t pSeries_nvram_get_size(void)
  177. {
  178. return nvram_size ? nvram_size : -ENODEV;
  179. }
  180. /* nvram_write_os_partition, nvram_write_error_log
  181. *
  182. * We need to buffer the error logs into nvram to ensure that we have
  183. * the failure information to decode. If we have a severe error there
  184. * is no way to guarantee that the OS or the machine is in a state to
  185. * get back to user land and write the error to disk. For example if
  186. * the SCSI device driver causes a Machine Check by writing to a bad
  187. * IO address, there is no way of guaranteeing that the device driver
  188. * is in any state that is would also be able to write the error data
  189. * captured to disk, thus we buffer it in NVRAM for analysis on the
  190. * next boot.
  191. *
  192. * In NVRAM the partition containing the error log buffer will looks like:
  193. * Header (in bytes):
  194. * +-----------+----------+--------+------------+------------------+
  195. * | signature | checksum | length | name | data |
  196. * |0 |1 |2 3|4 15|16 length-1|
  197. * +-----------+----------+--------+------------+------------------+
  198. *
  199. * The 'data' section would look like (in bytes):
  200. * +--------------+------------+-----------------------------------+
  201. * | event_logged | sequence # | error log |
  202. * |0 3|4 7|8 error_log_size-1|
  203. * +--------------+------------+-----------------------------------+
  204. *
  205. * event_logged: 0 if event has not been logged to syslog, 1 if it has
  206. * sequence #: The unique sequence # for each event. (until it wraps)
  207. * error log: The error log from event_scan
  208. */
  209. int nvram_write_os_partition(struct nvram_os_partition *part, char * buff,
  210. int length, unsigned int err_type, unsigned int error_log_cnt)
  211. {
  212. int rc;
  213. loff_t tmp_index;
  214. struct err_log_info info;
  215. if (part->index == -1) {
  216. return -ESPIPE;
  217. }
  218. if (length > part->size) {
  219. length = part->size;
  220. }
  221. info.error_type = err_type;
  222. info.seq_num = error_log_cnt;
  223. tmp_index = part->index;
  224. rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index);
  225. if (rc <= 0) {
  226. pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
  227. return rc;
  228. }
  229. rc = ppc_md.nvram_write(buff, length, &tmp_index);
  230. if (rc <= 0) {
  231. pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc);
  232. return rc;
  233. }
  234. return 0;
  235. }
  236. int nvram_write_error_log(char * buff, int length,
  237. unsigned int err_type, unsigned int error_log_cnt)
  238. {
  239. int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
  240. err_type, error_log_cnt);
  241. if (!rc)
  242. last_unread_rtas_event = get_seconds();
  243. return rc;
  244. }
  245. /* nvram_read_error_log
  246. *
  247. * Reads nvram for error log for at most 'length'
  248. */
  249. int nvram_read_error_log(char * buff, int length,
  250. unsigned int * err_type, unsigned int * error_log_cnt)
  251. {
  252. int rc;
  253. loff_t tmp_index;
  254. struct err_log_info info;
  255. if (rtas_log_partition.index == -1)
  256. return -1;
  257. if (length > rtas_log_partition.size)
  258. length = rtas_log_partition.size;
  259. tmp_index = rtas_log_partition.index;
  260. rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
  261. if (rc <= 0) {
  262. printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
  263. return rc;
  264. }
  265. rc = ppc_md.nvram_read(buff, length, &tmp_index);
  266. if (rc <= 0) {
  267. printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
  268. return rc;
  269. }
  270. *error_log_cnt = info.seq_num;
  271. *err_type = info.error_type;
  272. return 0;
  273. }
  274. /* This doesn't actually zero anything, but it sets the event_logged
  275. * word to tell that this event is safely in syslog.
  276. */
  277. int nvram_clear_error_log(void)
  278. {
  279. loff_t tmp_index;
  280. int clear_word = ERR_FLAG_ALREADY_LOGGED;
  281. int rc;
  282. if (rtas_log_partition.index == -1)
  283. return -1;
  284. tmp_index = rtas_log_partition.index;
  285. rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index);
  286. if (rc <= 0) {
  287. printk(KERN_ERR "nvram_clear_error_log: Failed nvram_write (%d)\n", rc);
  288. return rc;
  289. }
  290. last_unread_rtas_event = 0;
  291. return 0;
  292. }
  293. /* pseries_nvram_init_os_partition
  294. *
  295. * This sets up a partition with an "OS" signature.
  296. *
  297. * The general strategy is the following:
  298. * 1.) If a partition with the indicated name already exists...
  299. * - If it's large enough, use it.
  300. * - Otherwise, recycle it and keep going.
  301. * 2.) Search for a free partition that is large enough.
  302. * 3.) If there's not a free partition large enough, recycle any obsolete
  303. * OS partitions and try again.
  304. * 4.) Will first try getting a chunk that will satisfy the requested size.
  305. * 5.) If a chunk of the requested size cannot be allocated, then try finding
  306. * a chunk that will satisfy the minum needed.
  307. *
  308. * Returns 0 on success, else -1.
  309. */
  310. static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
  311. *part)
  312. {
  313. loff_t p;
  314. int size;
  315. /* Scan nvram for partitions */
  316. nvram_scan_partitions();
  317. /* Look for ours */
  318. p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
  319. /* Found one but too small, remove it */
  320. if (p && size < part->min_size) {
  321. pr_info("nvram: Found too small %s partition,"
  322. " removing it...\n", part->name);
  323. nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
  324. p = 0;
  325. }
  326. /* Create one if we didn't find */
  327. if (!p) {
  328. p = nvram_create_partition(part->name, NVRAM_SIG_OS,
  329. part->req_size, part->min_size);
  330. if (p == -ENOSPC) {
  331. pr_info("nvram: No room to create %s partition, "
  332. "deleting any obsolete OS partitions...\n",
  333. part->name);
  334. nvram_remove_partition(NULL, NVRAM_SIG_OS,
  335. pseries_nvram_os_partitions);
  336. p = nvram_create_partition(part->name, NVRAM_SIG_OS,
  337. part->req_size, part->min_size);
  338. }
  339. }
  340. if (p <= 0) {
  341. pr_err("nvram: Failed to find or create %s"
  342. " partition, err %d\n", part->name, (int)p);
  343. return -1;
  344. }
  345. part->index = p;
  346. part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
  347. return 0;
  348. }
  349. static void __init nvram_init_oops_partition(int rtas_partition_exists)
  350. {
  351. int rc;
  352. rc = pseries_nvram_init_os_partition(&oops_log_partition);
  353. if (rc != 0) {
  354. if (!rtas_partition_exists)
  355. return;
  356. pr_notice("nvram: Using %s partition to log both"
  357. " RTAS errors and oops/panic reports\n",
  358. rtas_log_partition.name);
  359. memcpy(&oops_log_partition, &rtas_log_partition,
  360. sizeof(rtas_log_partition));
  361. }
  362. oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
  363. if (!oops_buf) {
  364. pr_err("nvram: No memory for %s partition\n",
  365. oops_log_partition.name);
  366. return;
  367. }
  368. oops_data = oops_buf + sizeof(struct oops_log_info);
  369. oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
  370. /*
  371. * Figure compression (preceded by elimination of each line's <n>
  372. * severity prefix) will reduce the oops/panic report to at most
  373. * 45% of its original size.
  374. */
  375. big_oops_buf_sz = (oops_data_sz * 100) / 45;
  376. big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
  377. if (big_oops_buf) {
  378. stream.workspace = kmalloc(zlib_deflate_workspacesize(
  379. WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
  380. if (!stream.workspace) {
  381. pr_err("nvram: No memory for compression workspace; "
  382. "skipping compression of %s partition data\n",
  383. oops_log_partition.name);
  384. kfree(big_oops_buf);
  385. big_oops_buf = NULL;
  386. }
  387. } else {
  388. pr_err("No memory for uncompressed %s data; "
  389. "skipping compression\n", oops_log_partition.name);
  390. stream.workspace = NULL;
  391. }
  392. rc = kmsg_dump_register(&nvram_kmsg_dumper);
  393. if (rc != 0) {
  394. pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
  395. kfree(oops_buf);
  396. kfree(big_oops_buf);
  397. kfree(stream.workspace);
  398. }
  399. }
  400. static int __init pseries_nvram_init_log_partitions(void)
  401. {
  402. int rc;
  403. rc = pseries_nvram_init_os_partition(&rtas_log_partition);
  404. nvram_init_oops_partition(rc == 0);
  405. return 0;
  406. }
  407. machine_arch_initcall(pseries, pseries_nvram_init_log_partitions);
  408. int __init pSeries_nvram_init(void)
  409. {
  410. struct device_node *nvram;
  411. const unsigned int *nbytes_p;
  412. unsigned int proplen;
  413. nvram = of_find_node_by_type(NULL, "nvram");
  414. if (nvram == NULL)
  415. return -ENODEV;
  416. nbytes_p = of_get_property(nvram, "#bytes", &proplen);
  417. if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
  418. of_node_put(nvram);
  419. return -EIO;
  420. }
  421. nvram_size = *nbytes_p;
  422. nvram_fetch = rtas_token("nvram-fetch");
  423. nvram_store = rtas_token("nvram-store");
  424. printk(KERN_INFO "PPC64 nvram contains %d bytes\n", nvram_size);
  425. of_node_put(nvram);
  426. ppc_md.nvram_read = pSeries_nvram_read;
  427. ppc_md.nvram_write = pSeries_nvram_write;
  428. ppc_md.nvram_size = pSeries_nvram_get_size;
  429. return 0;
  430. }
  431. /*
  432. * Are we using the ibm,rtas-log for oops/panic reports? And if so,
  433. * would logging this oops/panic overwrite an RTAS event that rtas_errd
  434. * hasn't had a chance to read and process? Return 1 if so, else 0.
  435. *
  436. * We assume that if rtas_errd hasn't read the RTAS event in
  437. * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
  438. */
  439. static int clobbering_unread_rtas_event(void)
  440. {
  441. return (oops_log_partition.index == rtas_log_partition.index
  442. && last_unread_rtas_event
  443. && get_seconds() - last_unread_rtas_event <=
  444. NVRAM_RTAS_READ_TIMEOUT);
  445. }
  446. /* Derived from logfs_compress() */
  447. static int nvram_compress(const void *in, void *out, size_t inlen,
  448. size_t outlen)
  449. {
  450. int err, ret;
  451. ret = -EIO;
  452. err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
  453. MEM_LEVEL, Z_DEFAULT_STRATEGY);
  454. if (err != Z_OK)
  455. goto error;
  456. stream.next_in = in;
  457. stream.avail_in = inlen;
  458. stream.total_in = 0;
  459. stream.next_out = out;
  460. stream.avail_out = outlen;
  461. stream.total_out = 0;
  462. err = zlib_deflate(&stream, Z_FINISH);
  463. if (err != Z_STREAM_END)
  464. goto error;
  465. err = zlib_deflateEnd(&stream);
  466. if (err != Z_OK)
  467. goto error;
  468. if (stream.total_out >= stream.total_in)
  469. goto error;
  470. ret = stream.total_out;
  471. error:
  472. return ret;
  473. }
  474. /* Compress the text from big_oops_buf into oops_buf. */
  475. static int zip_oops(size_t text_len)
  476. {
  477. struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
  478. int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
  479. oops_data_sz);
  480. if (zipped_len < 0) {
  481. pr_err("nvram: compression failed; returned %d\n", zipped_len);
  482. pr_err("nvram: logging uncompressed oops/panic report\n");
  483. return -1;
  484. }
  485. oops_hdr->version = OOPS_HDR_VERSION;
  486. oops_hdr->report_length = (u16) zipped_len;
  487. oops_hdr->timestamp = get_seconds();
  488. return 0;
  489. }
  490. /*
  491. * This is our kmsg_dump callback, called after an oops or panic report
  492. * has been written to the printk buffer. We want to capture as much
  493. * of the printk buffer as possible. First, capture as much as we can
  494. * that we think will compress sufficiently to fit in the lnx,oops-log
  495. * partition. If that's too much, go back and capture uncompressed text.
  496. */
  497. static void oops_to_nvram(struct kmsg_dumper *dumper,
  498. enum kmsg_dump_reason reason)
  499. {
  500. struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
  501. static unsigned int oops_count = 0;
  502. static bool panicking = false;
  503. static DEFINE_SPINLOCK(lock);
  504. unsigned long flags;
  505. size_t text_len;
  506. unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
  507. int rc = -1;
  508. switch (reason) {
  509. case KMSG_DUMP_RESTART:
  510. case KMSG_DUMP_HALT:
  511. case KMSG_DUMP_POWEROFF:
  512. /* These are almost always orderly shutdowns. */
  513. return;
  514. case KMSG_DUMP_OOPS:
  515. break;
  516. case KMSG_DUMP_PANIC:
  517. panicking = true;
  518. break;
  519. case KMSG_DUMP_EMERG:
  520. if (panicking)
  521. /* Panic report already captured. */
  522. return;
  523. break;
  524. default:
  525. pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
  526. __FUNCTION__, (int) reason);
  527. return;
  528. }
  529. if (clobbering_unread_rtas_event())
  530. return;
  531. if (!spin_trylock_irqsave(&lock, flags))
  532. return;
  533. if (big_oops_buf) {
  534. kmsg_dump_get_buffer(dumper, false,
  535. big_oops_buf, big_oops_buf_sz, &text_len);
  536. rc = zip_oops(text_len);
  537. }
  538. if (rc != 0) {
  539. kmsg_dump_rewind(dumper);
  540. kmsg_dump_get_buffer(dumper, false,
  541. oops_data, oops_data_sz, &text_len);
  542. err_type = ERR_TYPE_KERNEL_PANIC;
  543. oops_hdr->version = OOPS_HDR_VERSION;
  544. oops_hdr->report_length = (u16) text_len;
  545. oops_hdr->timestamp = get_seconds();
  546. }
  547. (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
  548. (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
  549. ++oops_count);
  550. spin_unlock_irqrestore(&lock, flags);
  551. }