e820.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127
  1. /*
  2. * Handle the memory map.
  3. * The functions here do the job until bootmem takes over.
  4. *
  5. * Getting sanitize_e820_map() in sync with i386 version by applying change:
  6. * - Provisions for empty E820 memory regions (reported by certain BIOSes).
  7. * Alex Achenbach <xela@slit.de>, December 2002.
  8. * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
  9. *
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/types.h>
  13. #include <linux/init.h>
  14. #include <linux/crash_dump.h>
  15. #include <linux/export.h>
  16. #include <linux/bootmem.h>
  17. #include <linux/pfn.h>
  18. #include <linux/suspend.h>
  19. #include <linux/acpi.h>
  20. #include <linux/firmware-map.h>
  21. #include <linux/memblock.h>
  22. #include <asm/e820.h>
  23. #include <asm/proto.h>
  24. #include <asm/setup.h>
  25. /*
  26. * The e820 map is the map that gets modified e.g. with command line parameters
  27. * and that is also registered with modifications in the kernel resource tree
  28. * with the iomem_resource as parent.
  29. *
  30. * The e820_saved is directly saved after the BIOS-provided memory map is
  31. * copied. It doesn't get modified afterwards. It's registered for the
  32. * /sys/firmware/memmap interface.
  33. *
  34. * That memory map is not modified and is used as base for kexec. The kexec'd
  35. * kernel should get the same memory map as the firmware provides. Then the
  36. * user can e.g. boot the original kernel with mem=1G while still booting the
  37. * next kernel with full memory.
  38. */
  39. struct e820map e820;
  40. struct e820map e820_saved;
  41. /* For PCI or other memory-mapped resources */
  42. unsigned long pci_mem_start = 0xaeedbabe;
  43. #ifdef CONFIG_PCI
  44. EXPORT_SYMBOL(pci_mem_start);
  45. #endif
  46. /*
  47. * This function checks if any part of the range <start,end> is mapped
  48. * with type.
  49. */
  50. int
  51. e820_any_mapped(u64 start, u64 end, unsigned type)
  52. {
  53. int i;
  54. for (i = 0; i < e820.nr_map; i++) {
  55. struct e820entry *ei = &e820.map[i];
  56. if (type && ei->type != type)
  57. continue;
  58. if (ei->addr >= end || ei->addr + ei->size <= start)
  59. continue;
  60. return 1;
  61. }
  62. return 0;
  63. }
  64. EXPORT_SYMBOL_GPL(e820_any_mapped);
  65. /*
  66. * This function checks if the entire range <start,end> is mapped with type.
  67. *
  68. * Note: this function only works correct if the e820 table is sorted and
  69. * not-overlapping, which is the case
  70. */
  71. int __init e820_all_mapped(u64 start, u64 end, unsigned type)
  72. {
  73. int i;
  74. for (i = 0; i < e820.nr_map; i++) {
  75. struct e820entry *ei = &e820.map[i];
  76. if (type && ei->type != type)
  77. continue;
  78. /* is the region (part) in overlap with the current region ?*/
  79. if (ei->addr >= end || ei->addr + ei->size <= start)
  80. continue;
  81. /* if the region is at the beginning of <start,end> we move
  82. * start to the end of the region since it's ok until there
  83. */
  84. if (ei->addr <= start)
  85. start = ei->addr + ei->size;
  86. /*
  87. * if start is now at or beyond end, we're done, full
  88. * coverage
  89. */
  90. if (start >= end)
  91. return 1;
  92. }
  93. return 0;
  94. }
  95. /*
  96. * Add a memory region to the kernel e820 map.
  97. */
  98. static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
  99. int type)
  100. {
  101. int x = e820x->nr_map;
  102. if (x >= ARRAY_SIZE(e820x->map)) {
  103. printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
  104. return;
  105. }
  106. e820x->map[x].addr = start;
  107. e820x->map[x].size = size;
  108. e820x->map[x].type = type;
  109. e820x->nr_map++;
  110. }
  111. void __init e820_add_region(u64 start, u64 size, int type)
  112. {
  113. __e820_add_region(&e820, start, size, type);
  114. }
  115. static void __init e820_print_type(u32 type)
  116. {
  117. switch (type) {
  118. case E820_RAM:
  119. case E820_RESERVED_KERN:
  120. printk(KERN_CONT "(usable)");
  121. break;
  122. case E820_RESERVED:
  123. case E820_RESERVED_EFI:
  124. printk(KERN_CONT "(reserved)");
  125. break;
  126. case E820_ACPI:
  127. printk(KERN_CONT "(ACPI data)");
  128. break;
  129. case E820_NVS:
  130. printk(KERN_CONT "(ACPI NVS)");
  131. break;
  132. case E820_UNUSABLE:
  133. printk(KERN_CONT "(unusable)");
  134. break;
  135. default:
  136. printk(KERN_CONT "type %u", type);
  137. break;
  138. }
  139. }
  140. void __init e820_print_map(char *who)
  141. {
  142. int i;
  143. for (i = 0; i < e820.nr_map; i++) {
  144. printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
  145. (unsigned long long) e820.map[i].addr,
  146. (unsigned long long)
  147. (e820.map[i].addr + e820.map[i].size));
  148. e820_print_type(e820.map[i].type);
  149. printk(KERN_CONT "\n");
  150. }
  151. }
  152. /*
  153. * Sanitize the BIOS e820 map.
  154. *
  155. * Some e820 responses include overlapping entries. The following
  156. * replaces the original e820 map with a new one, removing overlaps,
  157. * and resolving conflicting memory types in favor of highest
  158. * numbered type.
  159. *
  160. * The input parameter biosmap points to an array of 'struct
  161. * e820entry' which on entry has elements in the range [0, *pnr_map)
  162. * valid, and which has space for up to max_nr_map entries.
  163. * On return, the resulting sanitized e820 map entries will be in
  164. * overwritten in the same location, starting at biosmap.
  165. *
  166. * The integer pointed to by pnr_map must be valid on entry (the
  167. * current number of valid entries located at biosmap) and will
  168. * be updated on return, with the new number of valid entries
  169. * (something no more than max_nr_map.)
  170. *
  171. * The return value from sanitize_e820_map() is zero if it
  172. * successfully 'sanitized' the map entries passed in, and is -1
  173. * if it did nothing, which can happen if either of (1) it was
  174. * only passed one map entry, or (2) any of the input map entries
  175. * were invalid (start + size < start, meaning that the size was
  176. * so big the described memory range wrapped around through zero.)
  177. *
  178. * Visually we're performing the following
  179. * (1,2,3,4 = memory types)...
  180. *
  181. * Sample memory map (w/overlaps):
  182. * ____22__________________
  183. * ______________________4_
  184. * ____1111________________
  185. * _44_____________________
  186. * 11111111________________
  187. * ____________________33__
  188. * ___________44___________
  189. * __________33333_________
  190. * ______________22________
  191. * ___________________2222_
  192. * _________111111111______
  193. * _____________________11_
  194. * _________________4______
  195. *
  196. * Sanitized equivalent (no overlap):
  197. * 1_______________________
  198. * _44_____________________
  199. * ___1____________________
  200. * ____22__________________
  201. * ______11________________
  202. * _________1______________
  203. * __________3_____________
  204. * ___________44___________
  205. * _____________33_________
  206. * _______________2________
  207. * ________________1_______
  208. * _________________4______
  209. * ___________________2____
  210. * ____________________33__
  211. * ______________________4_
  212. */
  213. int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
  214. u32 *pnr_map)
  215. {
  216. struct change_member {
  217. struct e820entry *pbios; /* pointer to original bios entry */
  218. unsigned long long addr; /* address for this change point */
  219. };
  220. static struct change_member change_point_list[2*E820_X_MAX] __initdata;
  221. static struct change_member *change_point[2*E820_X_MAX] __initdata;
  222. static struct e820entry *overlap_list[E820_X_MAX] __initdata;
  223. static struct e820entry new_bios[E820_X_MAX] __initdata;
  224. struct change_member *change_tmp;
  225. unsigned long current_type, last_type;
  226. unsigned long long last_addr;
  227. int chgidx, still_changing;
  228. int overlap_entries;
  229. int new_bios_entry;
  230. int old_nr, new_nr, chg_nr;
  231. int i;
  232. /* if there's only one memory region, don't bother */
  233. if (*pnr_map < 2)
  234. return -1;
  235. old_nr = *pnr_map;
  236. BUG_ON(old_nr > max_nr_map);
  237. /* bail out if we find any unreasonable addresses in bios map */
  238. for (i = 0; i < old_nr; i++)
  239. if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
  240. return -1;
  241. /* create pointers for initial change-point information (for sorting) */
  242. for (i = 0; i < 2 * old_nr; i++)
  243. change_point[i] = &change_point_list[i];
  244. /* record all known change-points (starting and ending addresses),
  245. omitting those that are for empty memory regions */
  246. chgidx = 0;
  247. for (i = 0; i < old_nr; i++) {
  248. if (biosmap[i].size != 0) {
  249. change_point[chgidx]->addr = biosmap[i].addr;
  250. change_point[chgidx++]->pbios = &biosmap[i];
  251. change_point[chgidx]->addr = biosmap[i].addr +
  252. biosmap[i].size;
  253. change_point[chgidx++]->pbios = &biosmap[i];
  254. }
  255. }
  256. chg_nr = chgidx;
  257. /* sort change-point list by memory addresses (low -> high) */
  258. still_changing = 1;
  259. while (still_changing) {
  260. still_changing = 0;
  261. for (i = 1; i < chg_nr; i++) {
  262. unsigned long long curaddr, lastaddr;
  263. unsigned long long curpbaddr, lastpbaddr;
  264. curaddr = change_point[i]->addr;
  265. lastaddr = change_point[i - 1]->addr;
  266. curpbaddr = change_point[i]->pbios->addr;
  267. lastpbaddr = change_point[i - 1]->pbios->addr;
  268. /*
  269. * swap entries, when:
  270. *
  271. * curaddr > lastaddr or
  272. * curaddr == lastaddr and curaddr == curpbaddr and
  273. * lastaddr != lastpbaddr
  274. */
  275. if (curaddr < lastaddr ||
  276. (curaddr == lastaddr && curaddr == curpbaddr &&
  277. lastaddr != lastpbaddr)) {
  278. change_tmp = change_point[i];
  279. change_point[i] = change_point[i-1];
  280. change_point[i-1] = change_tmp;
  281. still_changing = 1;
  282. }
  283. }
  284. }
  285. /* create a new bios memory map, removing overlaps */
  286. overlap_entries = 0; /* number of entries in the overlap table */
  287. new_bios_entry = 0; /* index for creating new bios map entries */
  288. last_type = 0; /* start with undefined memory type */
  289. last_addr = 0; /* start with 0 as last starting address */
  290. /* loop through change-points, determining affect on the new bios map */
  291. for (chgidx = 0; chgidx < chg_nr; chgidx++) {
  292. /* keep track of all overlapping bios entries */
  293. if (change_point[chgidx]->addr ==
  294. change_point[chgidx]->pbios->addr) {
  295. /*
  296. * add map entry to overlap list (> 1 entry
  297. * implies an overlap)
  298. */
  299. overlap_list[overlap_entries++] =
  300. change_point[chgidx]->pbios;
  301. } else {
  302. /*
  303. * remove entry from list (order independent,
  304. * so swap with last)
  305. */
  306. for (i = 0; i < overlap_entries; i++) {
  307. if (overlap_list[i] ==
  308. change_point[chgidx]->pbios)
  309. overlap_list[i] =
  310. overlap_list[overlap_entries-1];
  311. }
  312. overlap_entries--;
  313. }
  314. /*
  315. * if there are overlapping entries, decide which
  316. * "type" to use (larger value takes precedence --
  317. * 1=usable, 2,3,4,4+=unusable)
  318. */
  319. current_type = 0;
  320. for (i = 0; i < overlap_entries; i++)
  321. if (overlap_list[i]->type > current_type)
  322. current_type = overlap_list[i]->type;
  323. /*
  324. * continue building up new bios map based on this
  325. * information
  326. */
  327. if (current_type != last_type) {
  328. if (last_type != 0) {
  329. new_bios[new_bios_entry].size =
  330. change_point[chgidx]->addr - last_addr;
  331. /*
  332. * move forward only if the new size
  333. * was non-zero
  334. */
  335. if (new_bios[new_bios_entry].size != 0)
  336. /*
  337. * no more space left for new
  338. * bios entries ?
  339. */
  340. if (++new_bios_entry >= max_nr_map)
  341. break;
  342. }
  343. if (current_type != 0) {
  344. new_bios[new_bios_entry].addr =
  345. change_point[chgidx]->addr;
  346. new_bios[new_bios_entry].type = current_type;
  347. last_addr = change_point[chgidx]->addr;
  348. }
  349. last_type = current_type;
  350. }
  351. }
  352. /* retain count for new bios entries */
  353. new_nr = new_bios_entry;
  354. /* copy new bios mapping into original location */
  355. memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
  356. *pnr_map = new_nr;
  357. return 0;
  358. }
  359. static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
  360. {
  361. while (nr_map) {
  362. u64 start = biosmap->addr;
  363. u64 size = biosmap->size;
  364. u64 end = start + size;
  365. u32 type = biosmap->type;
  366. /* Overflow in 64 bits? Ignore the memory map. */
  367. if (start > end)
  368. return -1;
  369. e820_add_region(start, size, type);
  370. biosmap++;
  371. nr_map--;
  372. }
  373. return 0;
  374. }
  375. /*
  376. * Copy the BIOS e820 map into a safe place.
  377. *
  378. * Sanity-check it while we're at it..
  379. *
  380. * If we're lucky and live on a modern system, the setup code
  381. * will have given us a memory map that we can use to properly
  382. * set up memory. If we aren't, we'll fake a memory map.
  383. */
  384. static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
  385. {
  386. /* Only one memory region (or negative)? Ignore it */
  387. if (nr_map < 2)
  388. return -1;
  389. return __append_e820_map(biosmap, nr_map);
  390. }
  391. static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
  392. u64 size, unsigned old_type,
  393. unsigned new_type)
  394. {
  395. u64 end;
  396. unsigned int i;
  397. u64 real_updated_size = 0;
  398. BUG_ON(old_type == new_type);
  399. if (size > (ULLONG_MAX - start))
  400. size = ULLONG_MAX - start;
  401. end = start + size;
  402. printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
  403. (unsigned long long) start,
  404. (unsigned long long) end);
  405. e820_print_type(old_type);
  406. printk(KERN_CONT " ==> ");
  407. e820_print_type(new_type);
  408. printk(KERN_CONT "\n");
  409. for (i = 0; i < e820x->nr_map; i++) {
  410. struct e820entry *ei = &e820x->map[i];
  411. u64 final_start, final_end;
  412. u64 ei_end;
  413. if (ei->type != old_type)
  414. continue;
  415. ei_end = ei->addr + ei->size;
  416. /* totally covered by new range? */
  417. if (ei->addr >= start && ei_end <= end) {
  418. ei->type = new_type;
  419. real_updated_size += ei->size;
  420. continue;
  421. }
  422. /* new range is totally covered? */
  423. if (ei->addr < start && ei_end > end) {
  424. __e820_add_region(e820x, start, size, new_type);
  425. __e820_add_region(e820x, end, ei_end - end, ei->type);
  426. ei->size = start - ei->addr;
  427. real_updated_size += size;
  428. continue;
  429. }
  430. /* partially covered */
  431. final_start = max(start, ei->addr);
  432. final_end = min(end, ei_end);
  433. if (final_start >= final_end)
  434. continue;
  435. __e820_add_region(e820x, final_start, final_end - final_start,
  436. new_type);
  437. real_updated_size += final_end - final_start;
  438. /*
  439. * left range could be head or tail, so need to update
  440. * size at first.
  441. */
  442. ei->size -= final_end - final_start;
  443. if (ei->addr < final_start)
  444. continue;
  445. ei->addr = final_end;
  446. }
  447. return real_updated_size;
  448. }
  449. u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
  450. unsigned new_type)
  451. {
  452. return __e820_update_range(&e820, start, size, old_type, new_type);
  453. }
  454. static u64 __init e820_update_range_saved(u64 start, u64 size,
  455. unsigned old_type, unsigned new_type)
  456. {
  457. return __e820_update_range(&e820_saved, start, size, old_type,
  458. new_type);
  459. }
  460. /* make e820 not cover the range */
  461. u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
  462. int checktype)
  463. {
  464. int i;
  465. u64 end;
  466. u64 real_removed_size = 0;
  467. if (size > (ULLONG_MAX - start))
  468. size = ULLONG_MAX - start;
  469. end = start + size;
  470. printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
  471. (unsigned long long) start,
  472. (unsigned long long) end);
  473. if (checktype)
  474. e820_print_type(old_type);
  475. printk(KERN_CONT "\n");
  476. for (i = 0; i < e820.nr_map; i++) {
  477. struct e820entry *ei = &e820.map[i];
  478. u64 final_start, final_end;
  479. u64 ei_end;
  480. if (checktype && ei->type != old_type)
  481. continue;
  482. ei_end = ei->addr + ei->size;
  483. /* totally covered? */
  484. if (ei->addr >= start && ei_end <= end) {
  485. real_removed_size += ei->size;
  486. memset(ei, 0, sizeof(struct e820entry));
  487. continue;
  488. }
  489. /* new range is totally covered? */
  490. if (ei->addr < start && ei_end > end) {
  491. e820_add_region(end, ei_end - end, ei->type);
  492. ei->size = start - ei->addr;
  493. real_removed_size += size;
  494. continue;
  495. }
  496. /* partially covered */
  497. final_start = max(start, ei->addr);
  498. final_end = min(end, ei_end);
  499. if (final_start >= final_end)
  500. continue;
  501. real_removed_size += final_end - final_start;
  502. /*
  503. * left range could be head or tail, so need to update
  504. * size at first.
  505. */
  506. ei->size -= final_end - final_start;
  507. if (ei->addr < final_start)
  508. continue;
  509. ei->addr = final_end;
  510. }
  511. return real_removed_size;
  512. }
  513. void __init update_e820(void)
  514. {
  515. u32 nr_map;
  516. nr_map = e820.nr_map;
  517. if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
  518. return;
  519. e820.nr_map = nr_map;
  520. printk(KERN_INFO "modified physical RAM map:\n");
  521. e820_print_map("modified");
  522. }
  523. static void __init update_e820_saved(void)
  524. {
  525. u32 nr_map;
  526. nr_map = e820_saved.nr_map;
  527. if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
  528. return;
  529. e820_saved.nr_map = nr_map;
  530. }
  531. #define MAX_GAP_END 0x100000000ull
  532. /*
  533. * Search for a gap in the e820 memory space from start_addr to end_addr.
  534. */
  535. __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
  536. unsigned long start_addr, unsigned long long end_addr)
  537. {
  538. unsigned long long last;
  539. int i = e820.nr_map;
  540. int found = 0;
  541. last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
  542. while (--i >= 0) {
  543. unsigned long long start = e820.map[i].addr;
  544. unsigned long long end = start + e820.map[i].size;
  545. if (end < start_addr)
  546. continue;
  547. /*
  548. * Since "last" is at most 4GB, we know we'll
  549. * fit in 32 bits if this condition is true
  550. */
  551. if (last > end) {
  552. unsigned long gap = last - end;
  553. if (gap >= *gapsize) {
  554. *gapsize = gap;
  555. *gapstart = end;
  556. found = 1;
  557. }
  558. }
  559. if (start < last)
  560. last = start;
  561. }
  562. return found;
  563. }
  564. /*
  565. * Search for the biggest gap in the low 32 bits of the e820
  566. * memory space. We pass this space to PCI to assign MMIO resources
  567. * for hotplug or unconfigured devices in.
  568. * Hopefully the BIOS let enough space left.
  569. */
  570. __init void e820_setup_gap(void)
  571. {
  572. unsigned long gapstart, gapsize;
  573. int found;
  574. gapstart = 0x10000000;
  575. gapsize = 0x400000;
  576. found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
  577. #ifdef CONFIG_X86_64
  578. if (!found) {
  579. gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024;
  580. printk(KERN_ERR
  581. "PCI: Warning: Cannot find a gap in the 32bit address range\n"
  582. "PCI: Unassigned devices with 32bit resource registers may break!\n");
  583. }
  584. #endif
  585. /*
  586. * e820_reserve_resources_late protect stolen RAM already
  587. */
  588. pci_mem_start = gapstart;
  589. printk(KERN_INFO
  590. "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
  591. pci_mem_start, gapstart, gapsize);
  592. }
  593. /**
  594. * Because of the size limitation of struct boot_params, only first
  595. * 128 E820 memory entries are passed to kernel via
  596. * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
  597. * linked list of struct setup_data, which is parsed here.
  598. */
  599. void __init parse_e820_ext(struct setup_data *sdata)
  600. {
  601. int entries;
  602. struct e820entry *extmap;
  603. entries = sdata->len / sizeof(struct e820entry);
  604. extmap = (struct e820entry *)(sdata->data);
  605. __append_e820_map(extmap, entries);
  606. sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
  607. printk(KERN_INFO "extended physical RAM map:\n");
  608. e820_print_map("extended");
  609. }
  610. #if defined(CONFIG_X86_64) || \
  611. (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
  612. /**
  613. * Find the ranges of physical addresses that do not correspond to
  614. * e820 RAM areas and mark the corresponding pages as nosave for
  615. * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
  616. *
  617. * This function requires the e820 map to be sorted and without any
  618. * overlapping entries and assumes the first e820 area to be RAM.
  619. */
  620. void __init e820_mark_nosave_regions(unsigned long limit_pfn)
  621. {
  622. int i;
  623. unsigned long pfn;
  624. pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
  625. for (i = 1; i < e820.nr_map; i++) {
  626. struct e820entry *ei = &e820.map[i];
  627. if (pfn < PFN_UP(ei->addr))
  628. register_nosave_region(pfn, PFN_UP(ei->addr));
  629. pfn = PFN_DOWN(ei->addr + ei->size);
  630. if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
  631. register_nosave_region(PFN_UP(ei->addr), pfn);
  632. if (pfn >= limit_pfn)
  633. break;
  634. }
  635. }
  636. #endif
  637. #ifdef CONFIG_HIBERNATION
  638. /**
  639. * Mark ACPI NVS memory region, so that we can save/restore it during
  640. * hibernation and the subsequent resume.
  641. */
  642. static int __init e820_mark_nvs_memory(void)
  643. {
  644. int i;
  645. for (i = 0; i < e820.nr_map; i++) {
  646. struct e820entry *ei = &e820.map[i];
  647. if (ei->type == E820_NVS)
  648. suspend_nvs_register(ei->addr, ei->size);
  649. }
  650. return 0;
  651. }
  652. core_initcall(e820_mark_nvs_memory);
  653. #endif
  654. /*
  655. * pre allocated 4k and reserved it in memblock and e820_saved
  656. */
  657. u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
  658. {
  659. u64 size = 0;
  660. u64 addr;
  661. u64 start;
  662. for (start = startt; ; start += size) {
  663. start = memblock_x86_find_in_range_size(start, &size, align);
  664. if (start == MEMBLOCK_ERROR)
  665. return 0;
  666. if (size >= sizet)
  667. break;
  668. }
  669. #ifdef CONFIG_X86_32
  670. if (start >= MAXMEM)
  671. return 0;
  672. if (start + size > MAXMEM)
  673. size = MAXMEM - start;
  674. #endif
  675. addr = round_down(start + size - sizet, align);
  676. if (addr < start)
  677. return 0;
  678. memblock_x86_reserve_range(addr, addr + sizet, "new next");
  679. e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
  680. printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
  681. update_e820_saved();
  682. return addr;
  683. }
  684. #ifdef CONFIG_X86_32
  685. # ifdef CONFIG_X86_PAE
  686. # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT))
  687. # else
  688. # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
  689. # endif
  690. #else /* CONFIG_X86_32 */
  691. # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
  692. #endif
  693. /*
  694. * Find the highest page frame number we have available
  695. */
  696. unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
  697. {
  698. int i;
  699. unsigned long last_pfn = 0;
  700. unsigned long max_arch_pfn = MAX_ARCH_PFN;
  701. for (i = 0; i < e820.nr_map; i++) {
  702. struct e820entry *ei = &e820.map[i];
  703. unsigned long start_pfn;
  704. unsigned long end_pfn;
  705. if (ei->type != type)
  706. continue;
  707. start_pfn = ei->addr >> PAGE_SHIFT;
  708. end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
  709. if (start_pfn >= limit_pfn)
  710. continue;
  711. if (end_pfn > limit_pfn) {
  712. last_pfn = limit_pfn;
  713. break;
  714. }
  715. if (end_pfn > last_pfn)
  716. last_pfn = end_pfn;
  717. }
  718. if (last_pfn > max_arch_pfn)
  719. last_pfn = max_arch_pfn;
  720. printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
  721. last_pfn, max_arch_pfn);
  722. return last_pfn;
  723. }
  724. unsigned long __init e820_end_of_ram_pfn(void)
  725. {
  726. return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
  727. }
  728. unsigned long __init e820_end_of_low_ram_pfn(void)
  729. {
  730. return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
  731. }
  732. static void early_panic(char *msg)
  733. {
  734. early_printk(msg);
  735. panic(msg);
  736. }
  737. static int userdef __initdata;
  738. /* "mem=nopentium" disables the 4MB page tables. */
  739. static int __init parse_memopt(char *p)
  740. {
  741. u64 mem_size;
  742. if (!p)
  743. return -EINVAL;
  744. if (!strcmp(p, "nopentium")) {
  745. #ifdef CONFIG_X86_32
  746. setup_clear_cpu_cap(X86_FEATURE_PSE);
  747. return 0;
  748. #else
  749. printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n");
  750. return -EINVAL;
  751. #endif
  752. }
  753. userdef = 1;
  754. mem_size = memparse(p, &p);
  755. /* don't remove all of memory when handling "mem={invalid}" param */
  756. if (mem_size == 0)
  757. return -EINVAL;
  758. e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
  759. return 0;
  760. }
  761. early_param("mem", parse_memopt);
  762. static int __init parse_memmap_opt(char *p)
  763. {
  764. char *oldp;
  765. u64 start_at, mem_size;
  766. if (!p)
  767. return -EINVAL;
  768. if (!strncmp(p, "exactmap", 8)) {
  769. #ifdef CONFIG_CRASH_DUMP
  770. /*
  771. * If we are doing a crash dump, we still need to know
  772. * the real mem size before original memory map is
  773. * reset.
  774. */
  775. saved_max_pfn = e820_end_of_ram_pfn();
  776. #endif
  777. e820.nr_map = 0;
  778. userdef = 1;
  779. return 0;
  780. }
  781. oldp = p;
  782. mem_size = memparse(p, &p);
  783. if (p == oldp)
  784. return -EINVAL;
  785. userdef = 1;
  786. if (*p == '@') {
  787. start_at = memparse(p+1, &p);
  788. e820_add_region(start_at, mem_size, E820_RAM);
  789. } else if (*p == '#') {
  790. start_at = memparse(p+1, &p);
  791. e820_add_region(start_at, mem_size, E820_ACPI);
  792. } else if (*p == '$') {
  793. start_at = memparse(p+1, &p);
  794. e820_add_region(start_at, mem_size, E820_RESERVED);
  795. } else
  796. e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
  797. return *p == '\0' ? 0 : -EINVAL;
  798. }
  799. early_param("memmap", parse_memmap_opt);
  800. void __init finish_e820_parsing(void)
  801. {
  802. if (userdef) {
  803. u32 nr = e820.nr_map;
  804. if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
  805. early_panic("Invalid user supplied memory map");
  806. e820.nr_map = nr;
  807. printk(KERN_INFO "user-defined physical RAM map:\n");
  808. e820_print_map("user");
  809. }
  810. }
  811. static inline const char *e820_type_to_string(int e820_type)
  812. {
  813. switch (e820_type) {
  814. case E820_RESERVED_KERN:
  815. case E820_RAM: return "System RAM";
  816. case E820_ACPI: return "ACPI Tables";
  817. case E820_NVS: return "ACPI Non-volatile Storage";
  818. case E820_UNUSABLE: return "Unusable memory";
  819. default: return "reserved";
  820. }
  821. }
  822. /*
  823. * Mark e820 reserved areas as busy for the resource manager.
  824. */
  825. static struct resource __initdata *e820_res;
  826. void __init e820_reserve_resources(void)
  827. {
  828. int i;
  829. struct resource *res;
  830. u64 end;
  831. res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
  832. e820_res = res;
  833. for (i = 0; i < e820.nr_map; i++) {
  834. end = e820.map[i].addr + e820.map[i].size - 1;
  835. if (end != (resource_size_t)end) {
  836. res++;
  837. continue;
  838. }
  839. res->name = e820_type_to_string(e820.map[i].type);
  840. res->start = e820.map[i].addr;
  841. res->end = end;
  842. res->flags = IORESOURCE_MEM;
  843. /*
  844. * don't register the region that could be conflicted with
  845. * pci device BAR resource and insert them later in
  846. * pcibios_resource_survey()
  847. */
  848. if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
  849. res->flags |= IORESOURCE_BUSY;
  850. insert_resource(&iomem_resource, res);
  851. }
  852. res++;
  853. }
  854. for (i = 0; i < e820_saved.nr_map; i++) {
  855. struct e820entry *entry = &e820_saved.map[i];
  856. firmware_map_add_early(entry->addr,
  857. entry->addr + entry->size - 1,
  858. e820_type_to_string(entry->type));
  859. }
  860. }
  861. /* How much should we pad RAM ending depending on where it is? */
  862. static unsigned long ram_alignment(resource_size_t pos)
  863. {
  864. unsigned long mb = pos >> 20;
  865. /* To 64kB in the first megabyte */
  866. if (!mb)
  867. return 64*1024;
  868. /* To 1MB in the first 16MB */
  869. if (mb < 16)
  870. return 1024*1024;
  871. /* To 64MB for anything above that */
  872. return 64*1024*1024;
  873. }
  874. #define MAX_RESOURCE_SIZE ((resource_size_t)-1)
  875. void __init e820_reserve_resources_late(void)
  876. {
  877. int i;
  878. struct resource *res;
  879. res = e820_res;
  880. for (i = 0; i < e820.nr_map; i++) {
  881. if (!res->parent && res->end)
  882. insert_resource_expand_to_fit(&iomem_resource, res);
  883. res++;
  884. }
  885. /*
  886. * Try to bump up RAM regions to reasonable boundaries to
  887. * avoid stolen RAM:
  888. */
  889. for (i = 0; i < e820.nr_map; i++) {
  890. struct e820entry *entry = &e820.map[i];
  891. u64 start, end;
  892. if (entry->type != E820_RAM)
  893. continue;
  894. start = entry->addr + entry->size;
  895. end = round_up(start, ram_alignment(start)) - 1;
  896. if (end > MAX_RESOURCE_SIZE)
  897. end = MAX_RESOURCE_SIZE;
  898. if (start >= end)
  899. continue;
  900. printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
  901. start, end);
  902. reserve_region_with_split(&iomem_resource, start, end,
  903. "RAM buffer");
  904. }
  905. }
  906. char *__init default_machine_specific_memory_setup(void)
  907. {
  908. char *who = "BIOS-e820";
  909. u32 new_nr;
  910. /*
  911. * Try to copy the BIOS-supplied E820-map.
  912. *
  913. * Otherwise fake a memory map; one section from 0k->640k,
  914. * the next section from 1mb->appropriate_mem_k
  915. */
  916. new_nr = boot_params.e820_entries;
  917. sanitize_e820_map(boot_params.e820_map,
  918. ARRAY_SIZE(boot_params.e820_map),
  919. &new_nr);
  920. boot_params.e820_entries = new_nr;
  921. if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
  922. < 0) {
  923. u64 mem_size;
  924. /* compare results from other methods and take the greater */
  925. if (boot_params.alt_mem_k
  926. < boot_params.screen_info.ext_mem_k) {
  927. mem_size = boot_params.screen_info.ext_mem_k;
  928. who = "BIOS-88";
  929. } else {
  930. mem_size = boot_params.alt_mem_k;
  931. who = "BIOS-e801";
  932. }
  933. e820.nr_map = 0;
  934. e820_add_region(0, LOWMEMSIZE(), E820_RAM);
  935. e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
  936. }
  937. /* In case someone cares... */
  938. return who;
  939. }
  940. void __init setup_memory_map(void)
  941. {
  942. char *who;
  943. who = x86_init.resources.memory_setup();
  944. memcpy(&e820_saved, &e820, sizeof(struct e820map));
  945. printk(KERN_INFO "BIOS-provided physical RAM map:\n");
  946. e820_print_map(who);
  947. }
  948. void __init memblock_x86_fill(void)
  949. {
  950. int i;
  951. u64 end;
  952. /*
  953. * EFI may have more than 128 entries
  954. * We are safe to enable resizing, beause memblock_x86_fill()
  955. * is rather later for x86
  956. */
  957. memblock_can_resize = 1;
  958. for (i = 0; i < e820.nr_map; i++) {
  959. struct e820entry *ei = &e820.map[i];
  960. end = ei->addr + ei->size;
  961. if (end != (resource_size_t)end)
  962. continue;
  963. if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
  964. continue;
  965. memblock_add(ei->addr, ei->size);
  966. }
  967. memblock_analyze();
  968. memblock_dump_all();
  969. }
  970. void __init memblock_find_dma_reserve(void)
  971. {
  972. #ifdef CONFIG_X86_64
  973. u64 free_size_pfn;
  974. u64 mem_size_pfn;
  975. /*
  976. * need to find out used area below MAX_DMA_PFN
  977. * need to use memblock to get free size in [0, MAX_DMA_PFN]
  978. * at first, and assume boot_mem will not take below MAX_DMA_PFN
  979. */
  980. mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
  981. free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
  982. set_dma_reserve(mem_size_pfn - free_size_pfn);
  983. #endif
  984. }