init.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. /*
  2. * linux/arch/arm/mm/init.c
  3. *
  4. * Copyright (C) 1995-2002 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/config.h>
  11. #include <linux/kernel.h>
  12. #include <linux/errno.h>
  13. #include <linux/ptrace.h>
  14. #include <linux/swap.h>
  15. #include <linux/init.h>
  16. #include <linux/bootmem.h>
  17. #include <linux/mman.h>
  18. #include <linux/nodemask.h>
  19. #include <linux/initrd.h>
  20. #include <asm/mach-types.h>
  21. #include <asm/hardware.h>
  22. #include <asm/setup.h>
  23. #include <asm/tlb.h>
  24. #include <asm/mach/arch.h>
  25. #include <asm/mach/map.h>
  26. #define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
  27. DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
  28. extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  29. extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
  30. extern unsigned long phys_initrd_start;
  31. extern unsigned long phys_initrd_size;
  32. /*
  33. * The sole use of this is to pass memory configuration
  34. * data from paging_init to mem_init.
  35. */
  36. static struct meminfo meminfo __initdata = { 0, };
  37. /*
  38. * empty_zero_page is a special page that is used for
  39. * zero-initialized data and COW.
  40. */
  41. struct page *empty_zero_page;
  42. void show_mem(void)
  43. {
  44. int free = 0, total = 0, reserved = 0;
  45. int shared = 0, cached = 0, slab = 0, node;
  46. printk("Mem-info:\n");
  47. show_free_areas();
  48. printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
  49. for_each_online_node(node) {
  50. struct page *page, *end;
  51. page = NODE_MEM_MAP(node);
  52. end = page + NODE_DATA(node)->node_spanned_pages;
  53. do {
  54. total++;
  55. if (PageReserved(page))
  56. reserved++;
  57. else if (PageSwapCache(page))
  58. cached++;
  59. else if (PageSlab(page))
  60. slab++;
  61. else if (!page_count(page))
  62. free++;
  63. else
  64. shared += page_count(page) - 1;
  65. page++;
  66. } while (page < end);
  67. }
  68. printk("%d pages of RAM\n", total);
  69. printk("%d free pages\n", free);
  70. printk("%d reserved pages\n", reserved);
  71. printk("%d slab pages\n", slab);
  72. printk("%d pages shared\n", shared);
  73. printk("%d pages swap cached\n", cached);
  74. }
  75. struct node_info {
  76. unsigned int start;
  77. unsigned int end;
  78. int bootmap_pages;
  79. };
  80. #define O_PFN_DOWN(x) ((x) >> PAGE_SHIFT)
  81. #define O_PFN_UP(x) (PAGE_ALIGN(x) >> PAGE_SHIFT)
  82. /*
  83. * FIXME: We really want to avoid allocating the bootmap bitmap
  84. * over the top of the initrd. Hopefully, this is located towards
  85. * the start of a bank, so if we allocate the bootmap bitmap at
  86. * the end, we won't clash.
  87. */
  88. static unsigned int __init
  89. find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
  90. {
  91. unsigned int start_pfn, bank, bootmap_pfn;
  92. start_pfn = O_PFN_UP(__pa(&_end));
  93. bootmap_pfn = 0;
  94. for (bank = 0; bank < mi->nr_banks; bank ++) {
  95. unsigned int start, end;
  96. if (mi->bank[bank].node != node)
  97. continue;
  98. start = mi->bank[bank].start >> PAGE_SHIFT;
  99. end = (mi->bank[bank].size +
  100. mi->bank[bank].start) >> PAGE_SHIFT;
  101. if (end < start_pfn)
  102. continue;
  103. if (start < start_pfn)
  104. start = start_pfn;
  105. if (end <= start)
  106. continue;
  107. if (end - start >= bootmap_pages) {
  108. bootmap_pfn = start;
  109. break;
  110. }
  111. }
  112. if (bootmap_pfn == 0)
  113. BUG();
  114. return bootmap_pfn;
  115. }
  116. /*
  117. * Scan the memory info structure and pull out:
  118. * - the end of memory
  119. * - the number of nodes
  120. * - the pfn range of each node
  121. * - the number of bootmem bitmap pages
  122. */
  123. static unsigned int __init
  124. find_memend_and_nodes(struct meminfo *mi, struct node_info *np)
  125. {
  126. unsigned int i, bootmem_pages = 0, memend_pfn = 0;
  127. for (i = 0; i < MAX_NUMNODES; i++) {
  128. np[i].start = -1U;
  129. np[i].end = 0;
  130. np[i].bootmap_pages = 0;
  131. }
  132. for (i = 0; i < mi->nr_banks; i++) {
  133. unsigned long start, end;
  134. int node;
  135. if (mi->bank[i].size == 0) {
  136. /*
  137. * Mark this bank with an invalid node number
  138. */
  139. mi->bank[i].node = -1;
  140. continue;
  141. }
  142. node = mi->bank[i].node;
  143. /*
  144. * Make sure we haven't exceeded the maximum number of nodes
  145. * that we have in this configuration. If we have, we're in
  146. * trouble. (maybe we ought to limit, instead of bugging?)
  147. */
  148. if (node >= MAX_NUMNODES)
  149. BUG();
  150. node_set_online(node);
  151. /*
  152. * Get the start and end pfns for this bank
  153. */
  154. start = mi->bank[i].start >> PAGE_SHIFT;
  155. end = (mi->bank[i].start + mi->bank[i].size) >> PAGE_SHIFT;
  156. if (np[node].start > start)
  157. np[node].start = start;
  158. if (np[node].end < end)
  159. np[node].end = end;
  160. if (memend_pfn < end)
  161. memend_pfn = end;
  162. }
  163. /*
  164. * Calculate the number of pages we require to
  165. * store the bootmem bitmaps.
  166. */
  167. for_each_online_node(i) {
  168. if (np[i].end == 0)
  169. continue;
  170. np[i].bootmap_pages = bootmem_bootmap_pages(np[i].end -
  171. np[i].start);
  172. bootmem_pages += np[i].bootmap_pages;
  173. }
  174. high_memory = __va(memend_pfn << PAGE_SHIFT);
  175. /*
  176. * This doesn't seem to be used by the Linux memory
  177. * manager any more. If we can get rid of it, we
  178. * also get rid of some of the stuff above as well.
  179. *
  180. * Note: max_low_pfn and max_pfn reflect the number
  181. * of _pages_ in the system, not the maximum PFN.
  182. */
  183. max_low_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
  184. max_pfn = memend_pfn - O_PFN_DOWN(PHYS_OFFSET);
  185. return bootmem_pages;
  186. }
  187. static int __init check_initrd(struct meminfo *mi)
  188. {
  189. int initrd_node = -2;
  190. #ifdef CONFIG_BLK_DEV_INITRD
  191. unsigned long end = phys_initrd_start + phys_initrd_size;
  192. /*
  193. * Make sure that the initrd is within a valid area of
  194. * memory.
  195. */
  196. if (phys_initrd_size) {
  197. unsigned int i;
  198. initrd_node = -1;
  199. for (i = 0; i < mi->nr_banks; i++) {
  200. unsigned long bank_end;
  201. bank_end = mi->bank[i].start + mi->bank[i].size;
  202. if (mi->bank[i].start <= phys_initrd_start &&
  203. end <= bank_end)
  204. initrd_node = mi->bank[i].node;
  205. }
  206. }
  207. if (initrd_node == -1) {
  208. printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond "
  209. "physical memory - disabling initrd\n",
  210. phys_initrd_start, end);
  211. phys_initrd_start = phys_initrd_size = 0;
  212. }
  213. #endif
  214. return initrd_node;
  215. }
  216. /*
  217. * Reserve the various regions of node 0
  218. */
  219. static __init void reserve_node_zero(unsigned int bootmap_pfn, unsigned int bootmap_pages)
  220. {
  221. pg_data_t *pgdat = NODE_DATA(0);
  222. unsigned long res_size = 0;
  223. /*
  224. * Register the kernel text and data with bootmem.
  225. * Note that this can only be in node 0.
  226. */
  227. #ifdef CONFIG_XIP_KERNEL
  228. reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
  229. #else
  230. reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
  231. #endif
  232. /*
  233. * Reserve the page tables. These are already in use,
  234. * and can only be in node 0.
  235. */
  236. reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
  237. PTRS_PER_PGD * sizeof(pgd_t));
  238. /*
  239. * And don't forget to reserve the allocator bitmap,
  240. * which will be freed later.
  241. */
  242. reserve_bootmem_node(pgdat, bootmap_pfn << PAGE_SHIFT,
  243. bootmap_pages << PAGE_SHIFT);
  244. /*
  245. * Hmm... This should go elsewhere, but we really really need to
  246. * stop things allocating the low memory; ideally we need a better
  247. * implementation of GFP_DMA which does not assume that DMA-able
  248. * memory starts at zero.
  249. */
  250. if (machine_is_integrator() || machine_is_cintegrator())
  251. res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
  252. /*
  253. * These should likewise go elsewhere. They pre-reserve the
  254. * screen memory region at the start of main system memory.
  255. */
  256. if (machine_is_edb7211())
  257. res_size = 0x00020000;
  258. if (machine_is_p720t())
  259. res_size = 0x00014000;
  260. #ifdef CONFIG_SA1111
  261. /*
  262. * Because of the SA1111 DMA bug, we want to preserve our
  263. * precious DMA-able memory...
  264. */
  265. res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
  266. #endif
  267. if (res_size)
  268. reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
  269. }
  270. /*
  271. * Register all available RAM in this node with the bootmem allocator.
  272. */
  273. static inline void free_bootmem_node_bank(int node, struct meminfo *mi)
  274. {
  275. pg_data_t *pgdat = NODE_DATA(node);
  276. int bank;
  277. for (bank = 0; bank < mi->nr_banks; bank++)
  278. if (mi->bank[bank].node == node)
  279. free_bootmem_node(pgdat, mi->bank[bank].start,
  280. mi->bank[bank].size);
  281. }
  282. /*
  283. * Initialise the bootmem allocator for all nodes. This is called
  284. * early during the architecture specific initialisation.
  285. */
  286. static void __init bootmem_init(struct meminfo *mi)
  287. {
  288. struct node_info node_info[MAX_NUMNODES], *np = node_info;
  289. unsigned int bootmap_pages, bootmap_pfn, map_pg;
  290. int node, initrd_node;
  291. bootmap_pages = find_memend_and_nodes(mi, np);
  292. bootmap_pfn = find_bootmap_pfn(0, mi, bootmap_pages);
  293. initrd_node = check_initrd(mi);
  294. map_pg = bootmap_pfn;
  295. /*
  296. * Initialise the bootmem nodes.
  297. *
  298. * What we really want to do is:
  299. *
  300. * unmap_all_regions_except_kernel();
  301. * for_each_node_in_reverse_order(node) {
  302. * map_node(node);
  303. * allocate_bootmem_map(node);
  304. * init_bootmem_node(node);
  305. * free_bootmem_node(node);
  306. * }
  307. *
  308. * but this is a 2.5-type change. For now, we just set
  309. * the nodes up in reverse order.
  310. *
  311. * (we could also do with rolling bootmem_init and paging_init
  312. * into one generic "memory_init" type function).
  313. */
  314. np += num_online_nodes() - 1;
  315. for (node = num_online_nodes() - 1; node >= 0; node--, np--) {
  316. /*
  317. * If there are no pages in this node, ignore it.
  318. * Note that node 0 must always have some pages.
  319. */
  320. if (np->end == 0 || !node_online(node)) {
  321. if (node == 0)
  322. BUG();
  323. continue;
  324. }
  325. /*
  326. * Initialise the bootmem allocator.
  327. */
  328. init_bootmem_node(NODE_DATA(node), map_pg, np->start, np->end);
  329. free_bootmem_node_bank(node, mi);
  330. map_pg += np->bootmap_pages;
  331. /*
  332. * If this is node 0, we need to reserve some areas ASAP -
  333. * we may use bootmem on node 0 to setup the other nodes.
  334. */
  335. if (node == 0)
  336. reserve_node_zero(bootmap_pfn, bootmap_pages);
  337. }
  338. #ifdef CONFIG_BLK_DEV_INITRD
  339. if (phys_initrd_size && initrd_node >= 0) {
  340. reserve_bootmem_node(NODE_DATA(initrd_node), phys_initrd_start,
  341. phys_initrd_size);
  342. initrd_start = __phys_to_virt(phys_initrd_start);
  343. initrd_end = initrd_start + phys_initrd_size;
  344. }
  345. #endif
  346. BUG_ON(map_pg != bootmap_pfn + bootmap_pages);
  347. }
  348. /*
  349. * paging_init() sets up the page tables, initialises the zone memory
  350. * maps, and sets up the zero page, bad page and bad page tables.
  351. */
  352. void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
  353. {
  354. void *zero_page;
  355. int node;
  356. bootmem_init(mi);
  357. memcpy(&meminfo, mi, sizeof(meminfo));
  358. /*
  359. * allocate the zero page. Note that we count on this going ok.
  360. */
  361. zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
  362. /*
  363. * initialise the page tables.
  364. */
  365. memtable_init(mi);
  366. if (mdesc->map_io)
  367. mdesc->map_io();
  368. local_flush_tlb_all();
  369. /*
  370. * initialise the zones within each node
  371. */
  372. for_each_online_node(node) {
  373. unsigned long zone_size[MAX_NR_ZONES];
  374. unsigned long zhole_size[MAX_NR_ZONES];
  375. struct bootmem_data *bdata;
  376. pg_data_t *pgdat;
  377. int i;
  378. /*
  379. * Initialise the zone size information.
  380. */
  381. for (i = 0; i < MAX_NR_ZONES; i++) {
  382. zone_size[i] = 0;
  383. zhole_size[i] = 0;
  384. }
  385. pgdat = NODE_DATA(node);
  386. bdata = pgdat->bdata;
  387. /*
  388. * The size of this node has already been determined.
  389. * If we need to do anything fancy with the allocation
  390. * of this memory to the zones, now is the time to do
  391. * it.
  392. */
  393. zone_size[0] = bdata->node_low_pfn -
  394. (bdata->node_boot_start >> PAGE_SHIFT);
  395. /*
  396. * If this zone has zero size, skip it.
  397. */
  398. if (!zone_size[0])
  399. continue;
  400. /*
  401. * For each bank in this node, calculate the size of the
  402. * holes. holes = node_size - sum(bank_sizes_in_node)
  403. */
  404. zhole_size[0] = zone_size[0];
  405. for (i = 0; i < mi->nr_banks; i++) {
  406. if (mi->bank[i].node != node)
  407. continue;
  408. zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
  409. }
  410. /*
  411. * Adjust the sizes according to any special
  412. * requirements for this machine type.
  413. */
  414. arch_adjust_zones(node, zone_size, zhole_size);
  415. free_area_init_node(node, pgdat, zone_size,
  416. bdata->node_boot_start >> PAGE_SHIFT, zhole_size);
  417. }
  418. /*
  419. * finish off the bad pages once
  420. * the mem_map is initialised
  421. */
  422. memzero(zero_page, PAGE_SIZE);
  423. empty_zero_page = virt_to_page(zero_page);
  424. flush_dcache_page(empty_zero_page);
  425. }
  426. static inline void free_area(unsigned long addr, unsigned long end, char *s)
  427. {
  428. unsigned int size = (end - addr) >> 10;
  429. for (; addr < end; addr += PAGE_SIZE) {
  430. struct page *page = virt_to_page(addr);
  431. ClearPageReserved(page);
  432. set_page_count(page, 1);
  433. free_page(addr);
  434. totalram_pages++;
  435. }
  436. if (size && s)
  437. printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
  438. }
  439. static inline void
  440. free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
  441. {
  442. struct page *start_pg, *end_pg;
  443. unsigned long pg, pgend;
  444. /*
  445. * Convert start_pfn/end_pfn to a struct page pointer.
  446. */
  447. start_pg = pfn_to_page(start_pfn);
  448. end_pg = pfn_to_page(end_pfn);
  449. /*
  450. * Convert to physical addresses, and
  451. * round start upwards and end downwards.
  452. */
  453. pg = PAGE_ALIGN(__pa(start_pg));
  454. pgend = __pa(end_pg) & PAGE_MASK;
  455. /*
  456. * If there are free pages between these,
  457. * free the section of the memmap array.
  458. */
  459. if (pg < pgend)
  460. free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
  461. }
  462. /*
  463. * The mem_map array can get very big. Free the unused area of the memory map.
  464. */
  465. static void __init free_unused_memmap_node(int node, struct meminfo *mi)
  466. {
  467. unsigned long bank_start, prev_bank_end = 0;
  468. unsigned int i;
  469. /*
  470. * [FIXME] This relies on each bank being in address order. This
  471. * may not be the case, especially if the user has provided the
  472. * information on the command line.
  473. */
  474. for (i = 0; i < mi->nr_banks; i++) {
  475. if (mi->bank[i].size == 0 || mi->bank[i].node != node)
  476. continue;
  477. bank_start = mi->bank[i].start >> PAGE_SHIFT;
  478. if (bank_start < prev_bank_end) {
  479. printk(KERN_ERR "MEM: unordered memory banks. "
  480. "Not freeing memmap.\n");
  481. break;
  482. }
  483. /*
  484. * If we had a previous bank, and there is a space
  485. * between the current bank and the previous, free it.
  486. */
  487. if (prev_bank_end && prev_bank_end != bank_start)
  488. free_memmap(node, prev_bank_end, bank_start);
  489. prev_bank_end = (mi->bank[i].start +
  490. mi->bank[i].size) >> PAGE_SHIFT;
  491. }
  492. }
  493. /*
  494. * mem_init() marks the free areas in the mem_map and tells us how much
  495. * memory is free. This is done after various parts of the system have
  496. * claimed their memory after the kernel image.
  497. */
  498. void __init mem_init(void)
  499. {
  500. unsigned int codepages, datapages, initpages;
  501. int i, node;
  502. codepages = &_etext - &_text;
  503. datapages = &_end - &__data_start;
  504. initpages = &__init_end - &__init_begin;
  505. #ifndef CONFIG_DISCONTIGMEM
  506. max_mapnr = virt_to_page(high_memory) - mem_map;
  507. #endif
  508. /* this will put all unused low memory onto the freelists */
  509. for_each_online_node(node) {
  510. pg_data_t *pgdat = NODE_DATA(node);
  511. free_unused_memmap_node(node, &meminfo);
  512. if (pgdat->node_spanned_pages != 0)
  513. totalram_pages += free_all_bootmem_node(pgdat);
  514. }
  515. #ifdef CONFIG_SA1111
  516. /* now that our DMA memory is actually so designated, we can free it */
  517. free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL);
  518. #endif
  519. /*
  520. * Since our memory may not be contiguous, calculate the
  521. * real number of pages we have in this system
  522. */
  523. printk(KERN_INFO "Memory:");
  524. num_physpages = 0;
  525. for (i = 0; i < meminfo.nr_banks; i++) {
  526. num_physpages += meminfo.bank[i].size >> PAGE_SHIFT;
  527. printk(" %ldMB", meminfo.bank[i].size >> 20);
  528. }
  529. printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
  530. printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
  531. "%dK data, %dK init)\n",
  532. (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
  533. codepages >> 10, datapages >> 10, initpages >> 10);
  534. if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
  535. extern int sysctl_overcommit_memory;
  536. /*
  537. * On a machine this small we won't get
  538. * anywhere without overcommit, so turn
  539. * it on by default.
  540. */
  541. sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
  542. }
  543. }
  544. void free_initmem(void)
  545. {
  546. if (!machine_is_integrator() && !machine_is_cintegrator()) {
  547. free_area((unsigned long)(&__init_begin),
  548. (unsigned long)(&__init_end),
  549. "init");
  550. }
  551. }
  552. #ifdef CONFIG_BLK_DEV_INITRD
  553. static int keep_initrd;
  554. void free_initrd_mem(unsigned long start, unsigned long end)
  555. {
  556. if (!keep_initrd)
  557. free_area(start, end, "initrd");
  558. }
  559. static int __init keepinitrd_setup(char *__unused)
  560. {
  561. keep_initrd = 1;
  562. return 1;
  563. }
  564. __setup("keepinitrd", keepinitrd_setup);
  565. #endif