head.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. /*
  2. * linux/arch/arm26/boot/compressed/head.S
  3. *
  4. * Copyright (C) 1996-2002 Russell King
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/config.h>
  11. #include <linux/linkage.h>
  12. /*
  13. * Debugging stuff
  14. *
  15. * Note that these macros must not contain any code which is not
  16. * 100% relocatable. Any attempt to do so will result in a crash.
  17. * Please select one of the following when turning on debugging.
  18. */
  19. .macro kputc,val
  20. mov r0, \val
  21. bl putc
  22. .endm
  23. .macro kphex,val,len
  24. mov r0, \val
  25. mov r1, #\len
  26. bl phex
  27. .endm
  28. .macro debug_reloc_start
  29. .endm
  30. .macro debug_reloc_end
  31. .endm
  32. .section ".start", #alloc, #execinstr
  33. /*
  34. * sort out different calling conventions
  35. */
  36. .align
  37. start:
  38. .type start,#function
  39. .rept 8
  40. mov r0, r0
  41. .endr
  42. b 1f
  43. .word 0x016f2818 @ Magic numbers to help the loader
  44. .word start @ absolute load/run zImage address
  45. .word _edata @ zImage end address
  46. 1: mov r7, r1 @ save architecture ID
  47. mov r8, #0 @ save r0
  48. teqp pc, #0x0c000003 @ turn off interrupts
  49. .text
  50. adr r0, LC0
  51. ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp}
  52. subs r0, r0, r1 @ calculate the delta offset
  53. teq r0, #0 @ if delta is zero, we're
  54. beq not_relocated @ running at the address we
  55. @ were linked at.
  56. add r2, r2, r0 @ different address, so we
  57. add r3, r3, r0 @ need to fix up various
  58. add r5, r5, r0 @ pointers.
  59. add r6, r6, r0
  60. add ip, ip, r0
  61. add sp, sp, r0
  62. 1: ldr r1, [r6, #0] @ relocate entries in the GOT
  63. add r1, r1, r0 @ table. This fixes up the
  64. str r1, [r6], #4 @ C references.
  65. cmp r6, ip
  66. blo 1b
  67. not_relocated: mov r0, #0
  68. 1: str r0, [r2], #4 @ clear bss
  69. str r0, [r2], #4
  70. str r0, [r2], #4
  71. str r0, [r2], #4
  72. cmp r2, r3
  73. blo 1b
  74. bl cache_on
  75. mov r1, sp @ malloc space above stack
  76. add r2, sp, #0x10000 @ 64k max
  77. /*
  78. * Check to see if we will overwrite ourselves.
  79. * r4 = final kernel address
  80. * r5 = start of this image
  81. * r2 = end of malloc space (and therefore this image)
  82. * We basically want:
  83. * r4 >= r2 -> OK
  84. * r4 + image length <= r5 -> OK
  85. */
  86. cmp r4, r2
  87. bhs wont_overwrite
  88. add r0, r4, #4096*1024 @ 4MB largest kernel size
  89. cmp r0, r5
  90. bls wont_overwrite
  91. mov r5, r2 @ decompress after malloc space
  92. mov r0, r5
  93. mov r3, r7
  94. bl decompress_kernel
  95. add r0, r0, #127
  96. bic r0, r0, #127 @ align the kernel length
  97. /*
  98. * r0 = decompressed kernel length
  99. * r1-r3 = unused
  100. * r4 = kernel execution address
  101. * r5 = decompressed kernel start
  102. * r6 = processor ID
  103. * r7 = architecture ID
  104. * r8-r14 = unused
  105. */
  106. add r1, r5, r0 @ end of decompressed kernel
  107. adr r2, reloc_start
  108. ldr r3, LC1
  109. add r3, r2, r3
  110. 1: ldmia r2!, {r8 - r13} @ copy relocation code
  111. stmia r1!, {r8 - r13}
  112. ldmia r2!, {r8 - r13}
  113. stmia r1!, {r8 - r13}
  114. cmp r2, r3
  115. blo 1b
  116. bl cache_clean_flush
  117. add pc, r5, r0 @ call relocation code
  118. /*
  119. * We're not in danger of overwriting ourselves. Do this the simple way.
  120. *
  121. * r4 = kernel execution address
  122. * r7 = architecture ID
  123. */
  124. wont_overwrite: mov r0, r4
  125. mov r3, r7
  126. bl decompress_kernel
  127. b call_kernel
  128. .type LC0, #object
  129. LC0: .word LC0 @ r1
  130. .word __bss_start @ r2
  131. .word _end @ r3
  132. .word _load_addr @ r4
  133. .word _start @ r5
  134. .word _got_start @ r6
  135. .word _got_end @ ip
  136. .word user_stack+4096 @ sp
  137. LC1: .word reloc_end - reloc_start
  138. .size LC0, . - LC0
  139. /*
  140. * Turn on the cache. We need to setup some page tables so that we
  141. * can have both the I and D caches on.
  142. *
  143. * We place the page tables 16k down from the kernel execution address,
  144. * and we hope that nothing else is using it. If we're using it, we
  145. * will go pop!
  146. *
  147. * On entry,
  148. * r4 = kernel execution address
  149. * r6 = processor ID
  150. * r7 = architecture number
  151. * r8 = run-time address of "start"
  152. * On exit,
  153. * r1, r2, r3, r8, r9, r12 corrupted
  154. * This routine must preserve:
  155. * r4, r5, r6, r7
  156. */
  157. .align 5
  158. cache_on: mov r3, #8 @ cache_on function
  159. b call_cache_fn
  160. __setup_mmu: sub r3, r4, #16384 @ Page directory size
  161. bic r3, r3, #0xff @ Align the pointer
  162. bic r3, r3, #0x3f00
  163. /*
  164. * Initialise the page tables, turning on the cacheable and bufferable
  165. * bits for the RAM area only.
  166. */
  167. mov r0, r3
  168. mov r8, r0, lsr #18
  169. mov r8, r8, lsl #18 @ start of RAM
  170. add r9, r8, #0x10000000 @ a reasonable RAM size
  171. mov r1, #0x12
  172. orr r1, r1, #3 << 10
  173. add r2, r3, #16384
  174. 1: cmp r1, r8 @ if virt > start of RAM
  175. orrhs r1, r1, #0x0c @ set cacheable, bufferable
  176. cmp r1, r9 @ if virt > end of RAM
  177. bichs r1, r1, #0x0c @ clear cacheable, bufferable
  178. str r1, [r0], #4 @ 1:1 mapping
  179. add r1, r1, #1048576
  180. teq r0, r2
  181. bne 1b
  182. /*
  183. * If ever we are running from Flash, then we surely want the cache
  184. * to be enabled also for our execution instance... We map 2MB of it
  185. * so there is no map overlap problem for up to 1 MB compressed kernel.
  186. * If the execution is in RAM then we would only be duplicating the above.
  187. */
  188. mov r1, #0x1e
  189. orr r1, r1, #3 << 10
  190. mov r2, pc, lsr #20
  191. orr r1, r1, r2, lsl #20
  192. add r0, r3, r2, lsl #2
  193. str r1, [r0], #4
  194. add r1, r1, #1048576
  195. str r1, [r0]
  196. mov pc, lr
  197. __armv4_cache_on:
  198. mov r12, lr
  199. bl __setup_mmu
  200. mov r0, #0
  201. mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
  202. mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
  203. mrc p15, 0, r0, c1, c0, 0 @ read control reg
  204. orr r0, r0, #0x1000 @ I-cache enable
  205. orr r0, r0, #0x0030
  206. b __common_cache_on
  207. __arm6_cache_on:
  208. mov r12, lr
  209. bl __setup_mmu
  210. mov r0, #0
  211. mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
  212. mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
  213. mov r0, #0x30
  214. __common_cache_on:
  215. #ifndef DEBUG
  216. orr r0, r0, #0x000d @ Write buffer, mmu
  217. #endif
  218. mov r1, #-1
  219. mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
  220. mcr p15, 0, r1, c3, c0, 0 @ load domain access control
  221. mcr p15, 0, r0, c1, c0, 0 @ load control register
  222. mov pc, r12
  223. /*
  224. * All code following this line is relocatable. It is relocated by
  225. * the above code to the end of the decompressed kernel image and
  226. * executed there. During this time, we have no stacks.
  227. *
  228. * r0 = decompressed kernel length
  229. * r1-r3 = unused
  230. * r4 = kernel execution address
  231. * r5 = decompressed kernel start
  232. * r6 = processor ID
  233. * r7 = architecture ID
  234. * r8-r14 = unused
  235. */
  236. .align 5
  237. reloc_start: add r8, r5, r0
  238. debug_reloc_start
  239. mov r1, r4
  240. 1:
  241. .rept 4
  242. ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel
  243. stmia r1!, {r0, r2, r3, r9 - r13}
  244. .endr
  245. cmp r5, r8
  246. blo 1b
  247. debug_reloc_end
  248. call_kernel: bl cache_clean_flush
  249. bl cache_off
  250. mov r0, #0
  251. mov r1, r7 @ restore architecture number
  252. mov pc, r4 @ call kernel
  253. /*
  254. * Here follow the relocatable cache support functions for the
  255. * various processors. This is a generic hook for locating an
  256. * entry and jumping to an instruction at the specified offset
  257. * from the start of the block. Please note this is all position
  258. * independent code.
  259. *
  260. * r1 = corrupted
  261. * r2 = corrupted
  262. * r3 = block offset
  263. * r6 = corrupted
  264. * r12 = corrupted
  265. */
  266. call_cache_fn: adr r12, proc_types
  267. mrc p15, 0, r6, c0, c0 @ get processor ID
  268. 1: ldr r1, [r12, #0] @ get value
  269. ldr r2, [r12, #4] @ get mask
  270. eor r1, r1, r6 @ (real ^ match)
  271. tst r1, r2 @ & mask
  272. addeq pc, r12, r3 @ call cache function
  273. add r12, r12, #4*5
  274. b 1b
  275. /*
  276. * Table for cache operations. This is basically:
  277. * - CPU ID match
  278. * - CPU ID mask
  279. * - 'cache on' method instruction
  280. * - 'cache off' method instruction
  281. * - 'cache flush' method instruction
  282. *
  283. * We match an entry using: ((real_id ^ match) & mask) == 0
  284. *
  285. * Writethrough caches generally only need 'on' and 'off'
  286. * methods. Writeback caches _must_ have the flush method
  287. * defined.
  288. */
  289. .type proc_types,#object
  290. proc_types:
  291. .word 0x41560600 @ ARM6/610
  292. .word 0xffffffe0
  293. b __arm6_cache_off @ works, but slow
  294. b __arm6_cache_off
  295. mov pc, lr
  296. @ b __arm6_cache_on @ untested
  297. @ b __arm6_cache_off
  298. @ b __armv3_cache_flush
  299. .word 0x41007000 @ ARM7/710
  300. .word 0xfff8fe00
  301. b __arm7_cache_off
  302. b __arm7_cache_off
  303. mov pc, lr
  304. .word 0x41807200 @ ARM720T (writethrough)
  305. .word 0xffffff00
  306. b __armv4_cache_on
  307. b __armv4_cache_off
  308. mov pc, lr
  309. .word 0x41129200 @ ARM920T
  310. .word 0xff00fff0
  311. b __armv4_cache_on
  312. b __armv4_cache_off
  313. b __armv4_cache_flush
  314. .word 0x4401a100 @ sa110 / sa1100
  315. .word 0xffffffe0
  316. b __armv4_cache_on
  317. b __armv4_cache_off
  318. b __armv4_cache_flush
  319. .word 0x6901b110 @ sa1110
  320. .word 0xfffffff0
  321. b __armv4_cache_on
  322. b __armv4_cache_off
  323. b __armv4_cache_flush
  324. .word 0x69050000 @ xscale
  325. .word 0xffff0000
  326. b __armv4_cache_on
  327. b __armv4_cache_off
  328. b __armv4_cache_flush
  329. .word 0 @ unrecognised type
  330. .word 0
  331. mov pc, lr
  332. mov pc, lr
  333. mov pc, lr
  334. .size proc_types, . - proc_types
  335. /*
  336. * Turn off the Cache and MMU. ARMv3 does not support
  337. * reading the control register, but ARMv4 does.
  338. *
  339. * On entry, r6 = processor ID
  340. * On exit, r0, r1, r2, r3, r12 corrupted
  341. * This routine must preserve: r4, r6, r7
  342. */
  343. .align 5
  344. cache_off: mov r3, #12 @ cache_off function
  345. b call_cache_fn
  346. __armv4_cache_off:
  347. mrc p15, 0, r0, c1, c0
  348. bic r0, r0, #0x000d
  349. mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
  350. mov r0, #0
  351. mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
  352. mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
  353. mov pc, lr
  354. __arm6_cache_off:
  355. mov r0, #0x00000030 @ ARM6 control reg.
  356. b __armv3_cache_off
  357. __arm7_cache_off:
  358. mov r0, #0x00000070 @ ARM7 control reg.
  359. b __armv3_cache_off
  360. __armv3_cache_off:
  361. mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off
  362. mov r0, #0
  363. mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
  364. mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
  365. mov pc, lr
  366. /*
  367. * Clean and flush the cache to maintain consistency.
  368. *
  369. * On entry,
  370. * r6 = processor ID
  371. * On exit,
  372. * r1, r2, r3, r12 corrupted
  373. * This routine must preserve:
  374. * r0, r4, r5, r6, r7
  375. */
  376. .align 5
  377. cache_clean_flush:
  378. mov r3, #16
  379. b call_cache_fn
  380. __armv4_cache_flush:
  381. bic r1, pc, #31
  382. add r2, r1, #65536 @ 2x the largest dcache size
  383. 1: ldr r12, [r1], #32 @ s/w flush D cache
  384. teq r1, r2
  385. bne 1b
  386. mcr p15, 0, r1, c7, c7, 0 @ flush I cache
  387. mcr p15, 0, r1, c7, c10, 4 @ drain WB
  388. mov pc, lr
  389. __armv3_cache_flush:
  390. mov r1, #0
  391. mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
  392. mov pc, lr
  393. /*
  394. * Various debugging routines for printing hex characters and
  395. * memory, which again must be relocatable.
  396. */
  397. #ifdef DEBUG
  398. .type phexbuf,#object
  399. phexbuf: .space 12
  400. .size phexbuf, . - phexbuf
  401. phex: adr r3, phexbuf
  402. mov r2, #0
  403. strb r2, [r3, r1]
  404. 1: subs r1, r1, #1
  405. movmi r0, r3
  406. bmi puts
  407. and r2, r0, #15
  408. mov r0, r0, lsr #4
  409. cmp r2, #10
  410. addge r2, r2, #7
  411. add r2, r2, #'0'
  412. strb r2, [r3, r1]
  413. b 1b
  414. puts: loadsp r3
  415. 1: ldrb r2, [r0], #1
  416. teq r2, #0
  417. moveq pc, lr
  418. 2: writeb r2
  419. mov r1, #0x00020000
  420. 3: subs r1, r1, #1
  421. bne 3b
  422. teq r2, #'\n'
  423. moveq r2, #'\r'
  424. beq 2b
  425. teq r0, #0
  426. bne 1b
  427. mov pc, lr
  428. putc:
  429. mov r2, r0
  430. mov r0, #0
  431. loadsp r3
  432. b 2b
  433. memdump: mov r12, r0
  434. mov r10, lr
  435. mov r11, #0
  436. 2: mov r0, r11, lsl #2
  437. add r0, r0, r12
  438. mov r1, #8
  439. bl phex
  440. mov r0, #':'
  441. bl putc
  442. 1: mov r0, #' '
  443. bl putc
  444. ldr r0, [r12, r11, lsl #2]
  445. mov r1, #8
  446. bl phex
  447. and r0, r11, #7
  448. teq r0, #3
  449. moveq r0, #' '
  450. bleq putc
  451. and r0, r11, #7
  452. add r11, r11, #1
  453. teq r0, #7
  454. bne 1b
  455. mov r0, #'\n'
  456. bl putc
  457. cmp r11, #64
  458. blt 2b
  459. mov pc, r10
  460. #endif
  461. reloc_end:
  462. .align
  463. .section ".stack", "aw"
  464. user_stack: .space 4096