init.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674
  1. /*
  2. * linux/arch/arm/mm/init.c
  3. *
  4. * Copyright (C) 1995-2005 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/config.h>
  11. #include <linux/kernel.h>
  12. #include <linux/errno.h>
  13. #include <linux/ptrace.h>
  14. #include <linux/swap.h>
  15. #include <linux/init.h>
  16. #include <linux/bootmem.h>
  17. #include <linux/mman.h>
  18. #include <linux/nodemask.h>
  19. #include <linux/initrd.h>
  20. #include <asm/mach-types.h>
  21. #include <asm/hardware.h>
  22. #include <asm/setup.h>
  23. #include <asm/tlb.h>
  24. #include <asm/mach/arch.h>
  25. #include <asm/mach/map.h>
  26. #define TABLE_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
  27. DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
  28. extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
  29. extern void _stext, _text, _etext, __data_start, _end, __init_begin, __init_end;
  30. extern unsigned long phys_initrd_start;
  31. extern unsigned long phys_initrd_size;
  32. /*
  33. * The sole use of this is to pass memory configuration
  34. * data from paging_init to mem_init.
  35. */
  36. static struct meminfo meminfo __initdata = { 0, };
  37. /*
  38. * empty_zero_page is a special page that is used for
  39. * zero-initialized data and COW.
  40. */
  41. struct page *empty_zero_page;
  42. void show_mem(void)
  43. {
  44. int free = 0, total = 0, reserved = 0;
  45. int shared = 0, cached = 0, slab = 0, node;
  46. printk("Mem-info:\n");
  47. show_free_areas();
  48. printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
  49. for_each_online_node(node) {
  50. struct page *page, *end;
  51. page = NODE_MEM_MAP(node);
  52. end = page + NODE_DATA(node)->node_spanned_pages;
  53. do {
  54. total++;
  55. if (PageReserved(page))
  56. reserved++;
  57. else if (PageSwapCache(page))
  58. cached++;
  59. else if (PageSlab(page))
  60. slab++;
  61. else if (!page_count(page))
  62. free++;
  63. else
  64. shared += page_count(page) - 1;
  65. page++;
  66. } while (page < end);
  67. }
  68. printk("%d pages of RAM\n", total);
  69. printk("%d free pages\n", free);
  70. printk("%d reserved pages\n", reserved);
  71. printk("%d slab pages\n", slab);
  72. printk("%d pages shared\n", shared);
  73. printk("%d pages swap cached\n", cached);
  74. }
  75. static inline pmd_t *pmd_off(pgd_t *pgd, unsigned long virt)
  76. {
  77. return pmd_offset(pgd, virt);
  78. }
  79. static inline pmd_t *pmd_off_k(unsigned long virt)
  80. {
  81. return pmd_off(pgd_offset_k(virt), virt);
  82. }
  83. #define for_each_nodebank(iter,mi,no) \
  84. for (iter = 0; iter < mi->nr_banks; iter++) \
  85. if (mi->bank[iter].node == no)
  86. /*
  87. * FIXME: We really want to avoid allocating the bootmap bitmap
  88. * over the top of the initrd. Hopefully, this is located towards
  89. * the start of a bank, so if we allocate the bootmap bitmap at
  90. * the end, we won't clash.
  91. */
  92. static unsigned int __init
  93. find_bootmap_pfn(int node, struct meminfo *mi, unsigned int bootmap_pages)
  94. {
  95. unsigned int start_pfn, bank, bootmap_pfn;
  96. start_pfn = PAGE_ALIGN(__pa(&_end)) >> PAGE_SHIFT;
  97. bootmap_pfn = 0;
  98. for_each_nodebank(bank, mi, node) {
  99. unsigned int start, end;
  100. start = mi->bank[bank].start >> PAGE_SHIFT;
  101. end = (mi->bank[bank].size +
  102. mi->bank[bank].start) >> PAGE_SHIFT;
  103. if (end < start_pfn)
  104. continue;
  105. if (start < start_pfn)
  106. start = start_pfn;
  107. if (end <= start)
  108. continue;
  109. if (end - start >= bootmap_pages) {
  110. bootmap_pfn = start;
  111. break;
  112. }
  113. }
  114. if (bootmap_pfn == 0)
  115. BUG();
  116. return bootmap_pfn;
  117. }
  118. static int __init check_initrd(struct meminfo *mi)
  119. {
  120. int initrd_node = -2;
  121. #ifdef CONFIG_BLK_DEV_INITRD
  122. unsigned long end = phys_initrd_start + phys_initrd_size;
  123. /*
  124. * Make sure that the initrd is within a valid area of
  125. * memory.
  126. */
  127. if (phys_initrd_size) {
  128. unsigned int i;
  129. initrd_node = -1;
  130. for (i = 0; i < mi->nr_banks; i++) {
  131. unsigned long bank_end;
  132. bank_end = mi->bank[i].start + mi->bank[i].size;
  133. if (mi->bank[i].start <= phys_initrd_start &&
  134. end <= bank_end)
  135. initrd_node = mi->bank[i].node;
  136. }
  137. }
  138. if (initrd_node == -1) {
  139. printk(KERN_ERR "initrd (0x%08lx - 0x%08lx) extends beyond "
  140. "physical memory - disabling initrd\n",
  141. phys_initrd_start, end);
  142. phys_initrd_start = phys_initrd_size = 0;
  143. }
  144. #endif
  145. return initrd_node;
  146. }
  147. /*
  148. * Reserve the various regions of node 0
  149. */
  150. static __init void reserve_node_zero(pg_data_t *pgdat)
  151. {
  152. unsigned long res_size = 0;
  153. /*
  154. * Register the kernel text and data with bootmem.
  155. * Note that this can only be in node 0.
  156. */
  157. #ifdef CONFIG_XIP_KERNEL
  158. reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start);
  159. #else
  160. reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext);
  161. #endif
  162. /*
  163. * Reserve the page tables. These are already in use,
  164. * and can only be in node 0.
  165. */
  166. reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
  167. PTRS_PER_PGD * sizeof(pgd_t));
  168. /*
  169. * Hmm... This should go elsewhere, but we really really need to
  170. * stop things allocating the low memory; ideally we need a better
  171. * implementation of GFP_DMA which does not assume that DMA-able
  172. * memory starts at zero.
  173. */
  174. if (machine_is_integrator() || machine_is_cintegrator())
  175. res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
  176. /*
  177. * These should likewise go elsewhere. They pre-reserve the
  178. * screen memory region at the start of main system memory.
  179. */
  180. if (machine_is_edb7211())
  181. res_size = 0x00020000;
  182. if (machine_is_p720t())
  183. res_size = 0x00014000;
  184. #ifdef CONFIG_SA1111
  185. /*
  186. * Because of the SA1111 DMA bug, we want to preserve our
  187. * precious DMA-able memory...
  188. */
  189. res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
  190. #endif
  191. if (res_size)
  192. reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size);
  193. }
  194. void __init build_mem_type_table(void);
  195. void __init create_mapping(struct map_desc *md);
  196. static unsigned long __init
  197. bootmem_init_node(int node, int initrd_node, struct meminfo *mi)
  198. {
  199. unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
  200. unsigned long start_pfn, end_pfn, boot_pfn;
  201. unsigned int boot_pages;
  202. pg_data_t *pgdat;
  203. int i;
  204. start_pfn = -1UL;
  205. end_pfn = 0;
  206. /*
  207. * Calculate the pfn range, and map the memory banks for this node.
  208. */
  209. for_each_nodebank(i, mi, node) {
  210. unsigned long start, end;
  211. struct map_desc map;
  212. start = mi->bank[i].start >> PAGE_SHIFT;
  213. end = (mi->bank[i].start + mi->bank[i].size) >> PAGE_SHIFT;
  214. if (start_pfn > start)
  215. start_pfn = start;
  216. if (end_pfn < end)
  217. end_pfn = end;
  218. map.pfn = __phys_to_pfn(mi->bank[i].start);
  219. map.virtual = __phys_to_virt(mi->bank[i].start);
  220. map.length = mi->bank[i].size;
  221. map.type = MT_MEMORY;
  222. create_mapping(&map);
  223. }
  224. /*
  225. * If there is no memory in this node, ignore it.
  226. */
  227. if (end_pfn == 0)
  228. return end_pfn;
  229. /*
  230. * Allocate the bootmem bitmap page.
  231. */
  232. boot_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
  233. boot_pfn = find_bootmap_pfn(node, mi, boot_pages);
  234. /*
  235. * Initialise the bootmem allocator for this node, handing the
  236. * memory banks over to bootmem.
  237. */
  238. node_set_online(node);
  239. pgdat = NODE_DATA(node);
  240. init_bootmem_node(pgdat, boot_pfn, start_pfn, end_pfn);
  241. for_each_nodebank(i, mi, node)
  242. free_bootmem_node(pgdat, mi->bank[i].start, mi->bank[i].size);
  243. /*
  244. * Reserve the bootmem bitmap for this node.
  245. */
  246. reserve_bootmem_node(pgdat, boot_pfn << PAGE_SHIFT,
  247. boot_pages << PAGE_SHIFT);
  248. #ifdef CONFIG_BLK_DEV_INITRD
  249. /*
  250. * If the initrd is in this node, reserve its memory.
  251. */
  252. if (node == initrd_node) {
  253. reserve_bootmem_node(pgdat, phys_initrd_start,
  254. phys_initrd_size);
  255. initrd_start = __phys_to_virt(phys_initrd_start);
  256. initrd_end = initrd_start + phys_initrd_size;
  257. }
  258. #endif
  259. /*
  260. * Finally, reserve any node zero regions.
  261. */
  262. if (node == 0)
  263. reserve_node_zero(pgdat);
  264. /*
  265. * initialise the zones within this node.
  266. */
  267. memset(zone_size, 0, sizeof(zone_size));
  268. memset(zhole_size, 0, sizeof(zhole_size));
  269. /*
  270. * The size of this node has already been determined. If we need
  271. * to do anything fancy with the allocation of this memory to the
  272. * zones, now is the time to do it.
  273. */
  274. zone_size[0] = end_pfn - start_pfn;
  275. /*
  276. * For each bank in this node, calculate the size of the holes.
  277. * holes = node_size - sum(bank_sizes_in_node)
  278. */
  279. zhole_size[0] = zone_size[0];
  280. for_each_nodebank(i, mi, node)
  281. zhole_size[0] -= mi->bank[i].size >> PAGE_SHIFT;
  282. /*
  283. * Adjust the sizes according to any special requirements for
  284. * this machine type.
  285. */
  286. arch_adjust_zones(node, zone_size, zhole_size);
  287. free_area_init_node(node, pgdat, zone_size, start_pfn, zhole_size);
  288. return end_pfn;
  289. }
  290. static void __init bootmem_init(struct meminfo *mi)
  291. {
  292. unsigned long addr, memend_pfn = 0;
  293. int node, initrd_node, i;
  294. /*
  295. * Invalidate the node number for empty or invalid memory banks
  296. */
  297. for (i = 0; i < mi->nr_banks; i++)
  298. if (mi->bank[i].size == 0 || mi->bank[i].node >= MAX_NUMNODES)
  299. mi->bank[i].node = -1;
  300. memcpy(&meminfo, mi, sizeof(meminfo));
  301. #ifdef CONFIG_XIP_KERNEL
  302. #error needs fixing
  303. p->pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & PMD_MASK);
  304. p->virtual = (unsigned long)&_stext & PMD_MASK;
  305. p->length = ((unsigned long)&_etext - p->virtual + ~PMD_MASK) & PMD_MASK;
  306. p->type = MT_ROM;
  307. p ++;
  308. #endif
  309. /*
  310. * Clear out all the mappings below the kernel image.
  311. * FIXME: what about XIP?
  312. */
  313. for (addr = 0; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
  314. pmd_clear(pmd_off_k(addr));
  315. /*
  316. * Clear out all the kernel space mappings, except for the first
  317. * memory bank, up to the end of the vmalloc region.
  318. */
  319. for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
  320. addr < VMALLOC_END; addr += PGDIR_SIZE)
  321. pmd_clear(pmd_off_k(addr));
  322. /*
  323. * Locate which node contains the ramdisk image, if any.
  324. */
  325. initrd_node = check_initrd(mi);
  326. /*
  327. * Run through each node initialising the bootmem allocator.
  328. */
  329. for_each_node(node) {
  330. unsigned long end_pfn;
  331. end_pfn = bootmem_init_node(node, initrd_node, mi);
  332. /*
  333. * Remember the highest memory PFN.
  334. */
  335. if (end_pfn > memend_pfn)
  336. memend_pfn = end_pfn;
  337. }
  338. high_memory = __va(memend_pfn << PAGE_SHIFT);
  339. /*
  340. * This doesn't seem to be used by the Linux memory manager any
  341. * more, but is used by ll_rw_block. If we can get rid of it, we
  342. * also get rid of some of the stuff above as well.
  343. *
  344. * Note: max_low_pfn and max_pfn reflect the number of _pages_ in
  345. * the system, not the maximum PFN.
  346. */
  347. max_pfn = max_low_pfn = memend_pfn - PHYS_PFN_OFFSET;
  348. }
  349. /*
  350. * Set up device the mappings. Since we clear out the page tables for all
  351. * mappings above VMALLOC_END, we will remove any debug device mappings.
  352. * This means you have to be careful how you debug this function, or any
  353. * called function. (Do it by code inspection!)
  354. */
  355. static void __init devicemaps_init(struct machine_desc *mdesc)
  356. {
  357. struct map_desc map;
  358. unsigned long addr;
  359. void *vectors;
  360. for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
  361. pmd_clear(pmd_off_k(addr));
  362. /*
  363. * Map the cache flushing regions.
  364. */
  365. #ifdef FLUSH_BASE
  366. map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
  367. map.virtual = FLUSH_BASE;
  368. map.length = PGDIR_SIZE;
  369. map.type = MT_CACHECLEAN;
  370. create_mapping(&map);
  371. #endif
  372. #ifdef FLUSH_BASE_MINICACHE
  373. map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + PGDIR_SIZE);
  374. map.virtual = FLUSH_BASE_MINICACHE;
  375. map.length = PGDIR_SIZE;
  376. map.type = MT_MINICLEAN;
  377. create_mapping(&map);
  378. #endif
  379. flush_cache_all();
  380. local_flush_tlb_all();
  381. vectors = alloc_bootmem_low_pages(PAGE_SIZE);
  382. BUG_ON(!vectors);
  383. /*
  384. * Create a mapping for the machine vectors at the high-vectors
  385. * location (0xffff0000). If we aren't using high-vectors, also
  386. * create a mapping at the low-vectors virtual address.
  387. */
  388. map.pfn = __phys_to_pfn(virt_to_phys(vectors));
  389. map.virtual = 0xffff0000;
  390. map.length = PAGE_SIZE;
  391. map.type = MT_HIGH_VECTORS;
  392. create_mapping(&map);
  393. if (!vectors_high()) {
  394. map.virtual = 0;
  395. map.type = MT_LOW_VECTORS;
  396. create_mapping(&map);
  397. }
  398. /*
  399. * Ask the machine support to map in the statically mapped devices.
  400. * After this point, we can start to touch devices again.
  401. */
  402. if (mdesc->map_io)
  403. mdesc->map_io();
  404. }
  405. /*
  406. * paging_init() sets up the page tables, initialises the zone memory
  407. * maps, and sets up the zero page, bad page and bad page tables.
  408. */
  409. void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
  410. {
  411. void *zero_page;
  412. build_mem_type_table();
  413. bootmem_init(mi);
  414. devicemaps_init(mdesc);
  415. top_pmd = pmd_off_k(0xffff0000);
  416. /*
  417. * allocate the zero page. Note that we count on this going ok.
  418. */
  419. zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
  420. memzero(zero_page, PAGE_SIZE);
  421. empty_zero_page = virt_to_page(zero_page);
  422. flush_dcache_page(empty_zero_page);
  423. }
  424. static inline void free_area(unsigned long addr, unsigned long end, char *s)
  425. {
  426. unsigned int size = (end - addr) >> 10;
  427. for (; addr < end; addr += PAGE_SIZE) {
  428. struct page *page = virt_to_page(addr);
  429. ClearPageReserved(page);
  430. set_page_count(page, 1);
  431. free_page(addr);
  432. totalram_pages++;
  433. }
  434. if (size && s)
  435. printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
  436. }
  437. static inline void
  438. free_memmap(int node, unsigned long start_pfn, unsigned long end_pfn)
  439. {
  440. struct page *start_pg, *end_pg;
  441. unsigned long pg, pgend;
  442. /*
  443. * Convert start_pfn/end_pfn to a struct page pointer.
  444. */
  445. start_pg = pfn_to_page(start_pfn);
  446. end_pg = pfn_to_page(end_pfn);
  447. /*
  448. * Convert to physical addresses, and
  449. * round start upwards and end downwards.
  450. */
  451. pg = PAGE_ALIGN(__pa(start_pg));
  452. pgend = __pa(end_pg) & PAGE_MASK;
  453. /*
  454. * If there are free pages between these,
  455. * free the section of the memmap array.
  456. */
  457. if (pg < pgend)
  458. free_bootmem_node(NODE_DATA(node), pg, pgend - pg);
  459. }
  460. /*
  461. * The mem_map array can get very big. Free the unused area of the memory map.
  462. */
  463. static void __init free_unused_memmap_node(int node, struct meminfo *mi)
  464. {
  465. unsigned long bank_start, prev_bank_end = 0;
  466. unsigned int i;
  467. /*
  468. * [FIXME] This relies on each bank being in address order. This
  469. * may not be the case, especially if the user has provided the
  470. * information on the command line.
  471. */
  472. for_each_nodebank(i, mi, node) {
  473. bank_start = mi->bank[i].start >> PAGE_SHIFT;
  474. if (bank_start < prev_bank_end) {
  475. printk(KERN_ERR "MEM: unordered memory banks. "
  476. "Not freeing memmap.\n");
  477. break;
  478. }
  479. /*
  480. * If we had a previous bank, and there is a space
  481. * between the current bank and the previous, free it.
  482. */
  483. if (prev_bank_end && prev_bank_end != bank_start)
  484. free_memmap(node, prev_bank_end, bank_start);
  485. prev_bank_end = (mi->bank[i].start +
  486. mi->bank[i].size) >> PAGE_SHIFT;
  487. }
  488. }
  489. /*
  490. * mem_init() marks the free areas in the mem_map and tells us how much
  491. * memory is free. This is done after various parts of the system have
  492. * claimed their memory after the kernel image.
  493. */
  494. void __init mem_init(void)
  495. {
  496. unsigned int codepages, datapages, initpages;
  497. int i, node;
  498. codepages = &_etext - &_text;
  499. datapages = &_end - &__data_start;
  500. initpages = &__init_end - &__init_begin;
  501. #ifndef CONFIG_DISCONTIGMEM
  502. max_mapnr = virt_to_page(high_memory) - mem_map;
  503. #endif
  504. /* this will put all unused low memory onto the freelists */
  505. for_each_online_node(node) {
  506. pg_data_t *pgdat = NODE_DATA(node);
  507. free_unused_memmap_node(node, &meminfo);
  508. if (pgdat->node_spanned_pages != 0)
  509. totalram_pages += free_all_bootmem_node(pgdat);
  510. }
  511. #ifdef CONFIG_SA1111
  512. /* now that our DMA memory is actually so designated, we can free it */
  513. free_area(PAGE_OFFSET, (unsigned long)swapper_pg_dir, NULL);
  514. #endif
  515. /*
  516. * Since our memory may not be contiguous, calculate the
  517. * real number of pages we have in this system
  518. */
  519. printk(KERN_INFO "Memory:");
  520. num_physpages = 0;
  521. for (i = 0; i < meminfo.nr_banks; i++) {
  522. num_physpages += meminfo.bank[i].size >> PAGE_SHIFT;
  523. printk(" %ldMB", meminfo.bank[i].size >> 20);
  524. }
  525. printk(" = %luMB total\n", num_physpages >> (20 - PAGE_SHIFT));
  526. printk(KERN_NOTICE "Memory: %luKB available (%dK code, "
  527. "%dK data, %dK init)\n",
  528. (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
  529. codepages >> 10, datapages >> 10, initpages >> 10);
  530. if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
  531. extern int sysctl_overcommit_memory;
  532. /*
  533. * On a machine this small we won't get
  534. * anywhere without overcommit, so turn
  535. * it on by default.
  536. */
  537. sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
  538. }
  539. }
  540. void free_initmem(void)
  541. {
  542. if (!machine_is_integrator() && !machine_is_cintegrator()) {
  543. free_area((unsigned long)(&__init_begin),
  544. (unsigned long)(&__init_end),
  545. "init");
  546. }
  547. }
  548. #ifdef CONFIG_BLK_DEV_INITRD
  549. static int keep_initrd;
  550. void free_initrd_mem(unsigned long start, unsigned long end)
  551. {
  552. if (!keep_initrd)
  553. free_area(start, end, "initrd");
  554. }
  555. static int __init keepinitrd_setup(char *__unused)
  556. {
  557. keep_initrd = 1;
  558. return 1;
  559. }
  560. __setup("keepinitrd", keepinitrd_setup);
  561. #endif