memblock.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680
  1. /*
  2. * Procedures for maintaining information about logical memory blocks.
  3. *
  4. * Peter Bergner, IBM Corp. June 2001.
  5. * Copyright (C) 2001 Peter Bergner.
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version
  10. * 2 of the License, or (at your option) any later version.
  11. */
  12. #include <linux/kernel.h>
  13. #include <linux/slab.h>
  14. #include <linux/init.h>
  15. #include <linux/bitops.h>
  16. #include <linux/poison.h>
  17. #include <linux/memblock.h>
  18. struct memblock memblock;
  19. static int memblock_debug, memblock_can_resize;
  20. static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS + 1];
  21. static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS + 1];
  22. #define MEMBLOCK_ERROR (~(phys_addr_t)0)
  23. /* inline so we don't get a warning when pr_debug is compiled out */
  24. static inline const char *memblock_type_name(struct memblock_type *type)
  25. {
  26. if (type == &memblock.memory)
  27. return "memory";
  28. else if (type == &memblock.reserved)
  29. return "reserved";
  30. else
  31. return "unknown";
  32. }
  33. /*
  34. * Address comparison utilities
  35. */
  36. static phys_addr_t memblock_align_down(phys_addr_t addr, phys_addr_t size)
  37. {
  38. return addr & ~(size - 1);
  39. }
  40. static phys_addr_t memblock_align_up(phys_addr_t addr, phys_addr_t size)
  41. {
  42. return (addr + (size - 1)) & ~(size - 1);
  43. }
  44. static unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
  45. phys_addr_t base2, phys_addr_t size2)
  46. {
  47. return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
  48. }
  49. static long memblock_addrs_adjacent(phys_addr_t base1, phys_addr_t size1,
  50. phys_addr_t base2, phys_addr_t size2)
  51. {
  52. if (base2 == base1 + size1)
  53. return 1;
  54. else if (base1 == base2 + size2)
  55. return -1;
  56. return 0;
  57. }
  58. static long memblock_regions_adjacent(struct memblock_type *type,
  59. unsigned long r1, unsigned long r2)
  60. {
  61. phys_addr_t base1 = type->regions[r1].base;
  62. phys_addr_t size1 = type->regions[r1].size;
  63. phys_addr_t base2 = type->regions[r2].base;
  64. phys_addr_t size2 = type->regions[r2].size;
  65. return memblock_addrs_adjacent(base1, size1, base2, size2);
  66. }
  67. long memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
  68. {
  69. unsigned long i;
  70. for (i = 0; i < type->cnt; i++) {
  71. phys_addr_t rgnbase = type->regions[i].base;
  72. phys_addr_t rgnsize = type->regions[i].size;
  73. if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
  74. break;
  75. }
  76. return (i < type->cnt) ? i : -1;
  77. }
  78. /*
  79. * Find, allocate, deallocate or reserve unreserved regions. All allocations
  80. * are top-down.
  81. */
  82. static phys_addr_t __init memblock_find_region(phys_addr_t start, phys_addr_t end,
  83. phys_addr_t size, phys_addr_t align)
  84. {
  85. phys_addr_t base, res_base;
  86. long j;
  87. base = memblock_align_down((end - size), align);
  88. while (start <= base) {
  89. j = memblock_overlaps_region(&memblock.reserved, base, size);
  90. if (j < 0)
  91. return base;
  92. res_base = memblock.reserved.regions[j].base;
  93. if (res_base < size)
  94. break;
  95. base = memblock_align_down(res_base - size, align);
  96. }
  97. return MEMBLOCK_ERROR;
  98. }
  99. static phys_addr_t __init memblock_find_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
  100. {
  101. long i;
  102. phys_addr_t base = 0;
  103. phys_addr_t res_base;
  104. BUG_ON(0 == size);
  105. size = memblock_align_up(size, align);
  106. /* Pump up max_addr */
  107. if (max_addr == MEMBLOCK_ALLOC_ACCESSIBLE)
  108. max_addr = memblock.current_limit;
  109. /* We do a top-down search, this tends to limit memory
  110. * fragmentation by keeping early boot allocs near the
  111. * top of memory
  112. */
  113. for (i = memblock.memory.cnt - 1; i >= 0; i--) {
  114. phys_addr_t memblockbase = memblock.memory.regions[i].base;
  115. phys_addr_t memblocksize = memblock.memory.regions[i].size;
  116. if (memblocksize < size)
  117. continue;
  118. base = min(memblockbase + memblocksize, max_addr);
  119. res_base = memblock_find_region(memblockbase, base, size, align);
  120. if (res_base != MEMBLOCK_ERROR)
  121. return res_base;
  122. }
  123. return MEMBLOCK_ERROR;
  124. }
  125. static void memblock_remove_region(struct memblock_type *type, unsigned long r)
  126. {
  127. unsigned long i;
  128. for (i = r; i < type->cnt - 1; i++) {
  129. type->regions[i].base = type->regions[i + 1].base;
  130. type->regions[i].size = type->regions[i + 1].size;
  131. }
  132. type->cnt--;
  133. }
  134. /* Assumption: base addr of region 1 < base addr of region 2 */
  135. static void memblock_coalesce_regions(struct memblock_type *type,
  136. unsigned long r1, unsigned long r2)
  137. {
  138. type->regions[r1].size += type->regions[r2].size;
  139. memblock_remove_region(type, r2);
  140. }
  141. /* Defined below but needed now */
  142. static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size);
  143. static int memblock_double_array(struct memblock_type *type)
  144. {
  145. struct memblock_region *new_array, *old_array;
  146. phys_addr_t old_size, new_size, addr;
  147. int use_slab = slab_is_available();
  148. /* We don't allow resizing until we know about the reserved regions
  149. * of memory that aren't suitable for allocation
  150. */
  151. if (!memblock_can_resize)
  152. return -1;
  153. pr_debug("memblock: %s array full, doubling...", memblock_type_name(type));
  154. /* Calculate new doubled size */
  155. old_size = type->max * sizeof(struct memblock_region);
  156. new_size = old_size << 1;
  157. /* Try to find some space for it.
  158. *
  159. * WARNING: We assume that either slab_is_available() and we use it or
  160. * we use MEMBLOCK for allocations. That means that this is unsafe to use
  161. * when bootmem is currently active (unless bootmem itself is implemented
  162. * on top of MEMBLOCK which isn't the case yet)
  163. *
  164. * This should however not be an issue for now, as we currently only
  165. * call into MEMBLOCK while it's still active, or much later when slab is
  166. * active for memory hotplug operations
  167. */
  168. if (use_slab) {
  169. new_array = kmalloc(new_size, GFP_KERNEL);
  170. addr = new_array == NULL ? MEMBLOCK_ERROR : __pa(new_array);
  171. } else
  172. addr = memblock_find_base(new_size, sizeof(phys_addr_t), MEMBLOCK_ALLOC_ACCESSIBLE);
  173. if (addr == MEMBLOCK_ERROR) {
  174. pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
  175. memblock_type_name(type), type->max, type->max * 2);
  176. return -1;
  177. }
  178. new_array = __va(addr);
  179. /* Found space, we now need to move the array over before
  180. * we add the reserved region since it may be our reserved
  181. * array itself that is full.
  182. */
  183. memcpy(new_array, type->regions, old_size);
  184. memset(new_array + type->max, 0, old_size);
  185. old_array = type->regions;
  186. type->regions = new_array;
  187. type->max <<= 1;
  188. /* If we use SLAB that's it, we are done */
  189. if (use_slab)
  190. return 0;
  191. /* Add the new reserved region now. Should not fail ! */
  192. BUG_ON(memblock_add_region(&memblock.reserved, addr, new_size) < 0);
  193. /* If the array wasn't our static init one, then free it. We only do
  194. * that before SLAB is available as later on, we don't know whether
  195. * to use kfree or free_bootmem_pages(). Shouldn't be a big deal
  196. * anyways
  197. */
  198. if (old_array != memblock_memory_init_regions &&
  199. old_array != memblock_reserved_init_regions)
  200. memblock_free(__pa(old_array), old_size);
  201. return 0;
  202. }
  203. static long memblock_add_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
  204. {
  205. unsigned long coalesced = 0;
  206. long adjacent, i;
  207. if ((type->cnt == 1) && (type->regions[0].size == 0)) {
  208. type->regions[0].base = base;
  209. type->regions[0].size = size;
  210. return 0;
  211. }
  212. /* First try and coalesce this MEMBLOCK with another. */
  213. for (i = 0; i < type->cnt; i++) {
  214. phys_addr_t rgnbase = type->regions[i].base;
  215. phys_addr_t rgnsize = type->regions[i].size;
  216. if ((rgnbase == base) && (rgnsize == size))
  217. /* Already have this region, so we're done */
  218. return 0;
  219. adjacent = memblock_addrs_adjacent(base, size, rgnbase, rgnsize);
  220. if (adjacent > 0) {
  221. type->regions[i].base -= size;
  222. type->regions[i].size += size;
  223. coalesced++;
  224. break;
  225. } else if (adjacent < 0) {
  226. type->regions[i].size += size;
  227. coalesced++;
  228. break;
  229. }
  230. }
  231. if ((i < type->cnt - 1) && memblock_regions_adjacent(type, i, i+1)) {
  232. memblock_coalesce_regions(type, i, i+1);
  233. coalesced++;
  234. }
  235. if (coalesced)
  236. return coalesced;
  237. /* If we are out of space, we fail. It's too late to resize the array
  238. * but then this shouldn't have happened in the first place.
  239. */
  240. if (WARN_ON(type->cnt >= type->max))
  241. return -1;
  242. /* Couldn't coalesce the MEMBLOCK, so add it to the sorted table. */
  243. for (i = type->cnt - 1; i >= 0; i--) {
  244. if (base < type->regions[i].base) {
  245. type->regions[i+1].base = type->regions[i].base;
  246. type->regions[i+1].size = type->regions[i].size;
  247. } else {
  248. type->regions[i+1].base = base;
  249. type->regions[i+1].size = size;
  250. break;
  251. }
  252. }
  253. if (base < type->regions[0].base) {
  254. type->regions[0].base = base;
  255. type->regions[0].size = size;
  256. }
  257. type->cnt++;
  258. /* The array is full ? Try to resize it. If that fails, we undo
  259. * our allocation and return an error
  260. */
  261. if (type->cnt == type->max && memblock_double_array(type)) {
  262. type->cnt--;
  263. return -1;
  264. }
  265. return 0;
  266. }
  267. long memblock_add(phys_addr_t base, phys_addr_t size)
  268. {
  269. return memblock_add_region(&memblock.memory, base, size);
  270. }
  271. static long __memblock_remove(struct memblock_type *type, phys_addr_t base, phys_addr_t size)
  272. {
  273. phys_addr_t rgnbegin, rgnend;
  274. phys_addr_t end = base + size;
  275. int i;
  276. rgnbegin = rgnend = 0; /* supress gcc warnings */
  277. /* Find the region where (base, size) belongs to */
  278. for (i=0; i < type->cnt; i++) {
  279. rgnbegin = type->regions[i].base;
  280. rgnend = rgnbegin + type->regions[i].size;
  281. if ((rgnbegin <= base) && (end <= rgnend))
  282. break;
  283. }
  284. /* Didn't find the region */
  285. if (i == type->cnt)
  286. return -1;
  287. /* Check to see if we are removing entire region */
  288. if ((rgnbegin == base) && (rgnend == end)) {
  289. memblock_remove_region(type, i);
  290. return 0;
  291. }
  292. /* Check to see if region is matching at the front */
  293. if (rgnbegin == base) {
  294. type->regions[i].base = end;
  295. type->regions[i].size -= size;
  296. return 0;
  297. }
  298. /* Check to see if the region is matching at the end */
  299. if (rgnend == end) {
  300. type->regions[i].size -= size;
  301. return 0;
  302. }
  303. /*
  304. * We need to split the entry - adjust the current one to the
  305. * beginging of the hole and add the region after hole.
  306. */
  307. type->regions[i].size = base - type->regions[i].base;
  308. return memblock_add_region(type, end, rgnend - end);
  309. }
  310. long memblock_remove(phys_addr_t base, phys_addr_t size)
  311. {
  312. return __memblock_remove(&memblock.memory, base, size);
  313. }
  314. long __init memblock_free(phys_addr_t base, phys_addr_t size)
  315. {
  316. return __memblock_remove(&memblock.reserved, base, size);
  317. }
  318. long __init memblock_reserve(phys_addr_t base, phys_addr_t size)
  319. {
  320. struct memblock_type *_rgn = &memblock.reserved;
  321. BUG_ON(0 == size);
  322. return memblock_add_region(_rgn, base, size);
  323. }
  324. phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
  325. {
  326. phys_addr_t found;
  327. /* We align the size to limit fragmentation. Without this, a lot of
  328. * small allocs quickly eat up the whole reserve array on sparc
  329. */
  330. size = memblock_align_up(size, align);
  331. found = memblock_find_base(size, align, max_addr);
  332. if (found != MEMBLOCK_ERROR &&
  333. memblock_add_region(&memblock.reserved, found, size) >= 0)
  334. return found;
  335. return 0;
  336. }
  337. phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
  338. {
  339. phys_addr_t alloc;
  340. alloc = __memblock_alloc_base(size, align, max_addr);
  341. if (alloc == 0)
  342. panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
  343. (unsigned long long) size, (unsigned long long) max_addr);
  344. return alloc;
  345. }
  346. phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
  347. {
  348. return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
  349. }
  350. /*
  351. * Additional node-local allocators. Search for node memory is bottom up
  352. * and walks memblock regions within that node bottom-up as well, but allocation
  353. * within an memblock region is top-down.
  354. */
  355. phys_addr_t __weak __init memblock_nid_range(phys_addr_t start, phys_addr_t end, int *nid)
  356. {
  357. *nid = 0;
  358. return end;
  359. }
  360. static phys_addr_t __init memblock_alloc_nid_region(struct memblock_region *mp,
  361. phys_addr_t size,
  362. phys_addr_t align, int nid)
  363. {
  364. phys_addr_t start, end;
  365. start = mp->base;
  366. end = start + mp->size;
  367. start = memblock_align_up(start, align);
  368. while (start < end) {
  369. phys_addr_t this_end;
  370. int this_nid;
  371. this_end = memblock_nid_range(start, end, &this_nid);
  372. if (this_nid == nid) {
  373. phys_addr_t ret = memblock_find_region(start, this_end, size, align);
  374. if (ret != MEMBLOCK_ERROR &&
  375. memblock_add_region(&memblock.reserved, ret, size) >= 0)
  376. return ret;
  377. }
  378. start = this_end;
  379. }
  380. return MEMBLOCK_ERROR;
  381. }
  382. phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
  383. {
  384. struct memblock_type *mem = &memblock.memory;
  385. int i;
  386. BUG_ON(0 == size);
  387. /* We align the size to limit fragmentation. Without this, a lot of
  388. * small allocs quickly eat up the whole reserve array on sparc
  389. */
  390. size = memblock_align_up(size, align);
  391. /* We do a bottom-up search for a region with the right
  392. * nid since that's easier considering how memblock_nid_range()
  393. * works
  394. */
  395. for (i = 0; i < mem->cnt; i++) {
  396. phys_addr_t ret = memblock_alloc_nid_region(&mem->regions[i],
  397. size, align, nid);
  398. if (ret != MEMBLOCK_ERROR)
  399. return ret;
  400. }
  401. return memblock_alloc(size, align);
  402. }
  403. /* You must call memblock_analyze() before this. */
  404. phys_addr_t __init memblock_phys_mem_size(void)
  405. {
  406. return memblock.memory_size;
  407. }
  408. phys_addr_t memblock_end_of_DRAM(void)
  409. {
  410. int idx = memblock.memory.cnt - 1;
  411. return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
  412. }
  413. /* You must call memblock_analyze() after this. */
  414. void __init memblock_enforce_memory_limit(phys_addr_t memory_limit)
  415. {
  416. unsigned long i;
  417. phys_addr_t limit;
  418. struct memblock_region *p;
  419. if (!memory_limit)
  420. return;
  421. /* Truncate the memblock regions to satisfy the memory limit. */
  422. limit = memory_limit;
  423. for (i = 0; i < memblock.memory.cnt; i++) {
  424. if (limit > memblock.memory.regions[i].size) {
  425. limit -= memblock.memory.regions[i].size;
  426. continue;
  427. }
  428. memblock.memory.regions[i].size = limit;
  429. memblock.memory.cnt = i + 1;
  430. break;
  431. }
  432. memory_limit = memblock_end_of_DRAM();
  433. /* And truncate any reserves above the limit also. */
  434. for (i = 0; i < memblock.reserved.cnt; i++) {
  435. p = &memblock.reserved.regions[i];
  436. if (p->base > memory_limit)
  437. p->size = 0;
  438. else if ((p->base + p->size) > memory_limit)
  439. p->size = memory_limit - p->base;
  440. if (p->size == 0) {
  441. memblock_remove_region(&memblock.reserved, i);
  442. i--;
  443. }
  444. }
  445. }
  446. static int memblock_search(struct memblock_type *type, phys_addr_t addr)
  447. {
  448. unsigned int left = 0, right = type->cnt;
  449. do {
  450. unsigned int mid = (right + left) / 2;
  451. if (addr < type->regions[mid].base)
  452. right = mid;
  453. else if (addr >= (type->regions[mid].base +
  454. type->regions[mid].size))
  455. left = mid + 1;
  456. else
  457. return mid;
  458. } while (left < right);
  459. return -1;
  460. }
  461. int __init memblock_is_reserved(phys_addr_t addr)
  462. {
  463. return memblock_search(&memblock.reserved, addr) != -1;
  464. }
  465. int memblock_is_memory(phys_addr_t addr)
  466. {
  467. return memblock_search(&memblock.memory, addr) != -1;
  468. }
  469. int memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
  470. {
  471. int idx = memblock_search(&memblock.reserved, base);
  472. if (idx == -1)
  473. return 0;
  474. return memblock.reserved.regions[idx].base <= base &&
  475. (memblock.reserved.regions[idx].base +
  476. memblock.reserved.regions[idx].size) >= (base + size);
  477. }
  478. int memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
  479. {
  480. return memblock_overlaps_region(&memblock.reserved, base, size) >= 0;
  481. }
  482. void __init memblock_set_current_limit(phys_addr_t limit)
  483. {
  484. memblock.current_limit = limit;
  485. }
  486. static void memblock_dump(struct memblock_type *region, char *name)
  487. {
  488. unsigned long long base, size;
  489. int i;
  490. pr_info(" %s.cnt = 0x%lx\n", name, region->cnt);
  491. for (i = 0; i < region->cnt; i++) {
  492. base = region->regions[i].base;
  493. size = region->regions[i].size;
  494. pr_info(" %s[0x%x]\t0x%016llx - 0x%016llx, 0x%llx bytes\n",
  495. name, i, base, base + size - 1, size);
  496. }
  497. }
  498. void memblock_dump_all(void)
  499. {
  500. if (!memblock_debug)
  501. return;
  502. pr_info("MEMBLOCK configuration:\n");
  503. pr_info(" memory size = 0x%llx\n", (unsigned long long)memblock.memory_size);
  504. memblock_dump(&memblock.memory, "memory");
  505. memblock_dump(&memblock.reserved, "reserved");
  506. }
  507. void __init memblock_analyze(void)
  508. {
  509. int i;
  510. /* Check marker in the unused last array entry */
  511. WARN_ON(memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS].base
  512. != (phys_addr_t)RED_INACTIVE);
  513. WARN_ON(memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS].base
  514. != (phys_addr_t)RED_INACTIVE);
  515. memblock.memory_size = 0;
  516. for (i = 0; i < memblock.memory.cnt; i++)
  517. memblock.memory_size += memblock.memory.regions[i].size;
  518. /* We allow resizing from there */
  519. memblock_can_resize = 1;
  520. }
  521. void __init memblock_init(void)
  522. {
  523. /* Hookup the initial arrays */
  524. memblock.memory.regions = memblock_memory_init_regions;
  525. memblock.memory.max = INIT_MEMBLOCK_REGIONS;
  526. memblock.reserved.regions = memblock_reserved_init_regions;
  527. memblock.reserved.max = INIT_MEMBLOCK_REGIONS;
  528. /* Write a marker in the unused last array entry */
  529. memblock.memory.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE;
  530. memblock.reserved.regions[INIT_MEMBLOCK_REGIONS].base = (phys_addr_t)RED_INACTIVE;
  531. /* Create a dummy zero size MEMBLOCK which will get coalesced away later.
  532. * This simplifies the memblock_add() code below...
  533. */
  534. memblock.memory.regions[0].base = 0;
  535. memblock.memory.regions[0].size = 0;
  536. memblock.memory.cnt = 1;
  537. /* Ditto. */
  538. memblock.reserved.regions[0].base = 0;
  539. memblock.reserved.regions[0].size = 0;
  540. memblock.reserved.cnt = 1;
  541. memblock.current_limit = MEMBLOCK_ALLOC_ANYWHERE;
  542. }
  543. static int __init early_memblock(char *p)
  544. {
  545. if (p && strstr(p, "debug"))
  546. memblock_debug = 1;
  547. return 0;
  548. }
  549. early_param("memblock", early_memblock);