vector.S 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. #include <asm/processor.h>
  2. #include <asm/ppc_asm.h>
  3. #include <asm/reg.h>
  4. #include <asm/asm-offsets.h>
  5. #include <asm/cputable.h>
  6. #include <asm/thread_info.h>
  7. #include <asm/page.h>
  8. /*
  9. * load_up_altivec(unused, unused, tsk)
  10. * Disable VMX for the task which had it previously,
  11. * and save its vector registers in its thread_struct.
  12. * Enables the VMX for use in the kernel on return.
  13. * On SMP we know the VMX is free, since we give it up every
  14. * switch (ie, no lazy save of the vector registers).
  15. */
  16. _GLOBAL(load_up_altivec)
  17. mfmsr r5 /* grab the current MSR */
  18. oris r5,r5,MSR_VEC@h
  19. MTMSRD(r5) /* enable use of AltiVec now */
  20. isync
  21. /*
  22. * For SMP, we don't do lazy VMX switching because it just gets too
  23. * horrendously complex, especially when a task switches from one CPU
  24. * to another. Instead we call giveup_altvec in switch_to.
  25. * VRSAVE isn't dealt with here, that is done in the normal context
  26. * switch code. Note that we could rely on vrsave value to eventually
  27. * avoid saving all of the VREGs here...
  28. */
  29. #ifndef CONFIG_SMP
  30. LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
  31. toreal(r3)
  32. PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
  33. PPC_LCMPI 0,r4,0
  34. beq 1f
  35. /* Save VMX state to last_task_used_altivec's THREAD struct */
  36. toreal(r4)
  37. addi r4,r4,THREAD
  38. SAVE_32VRS(0,r5,r4)
  39. mfvscr vr0
  40. li r10,THREAD_VSCR
  41. stvx vr0,r10,r4
  42. /* Disable VMX for last_task_used_altivec */
  43. PPC_LL r5,PT_REGS(r4)
  44. toreal(r5)
  45. PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  46. lis r10,MSR_VEC@h
  47. andc r4,r4,r10
  48. PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  49. 1:
  50. #endif /* CONFIG_SMP */
  51. /* Hack: if we get an altivec unavailable trap with VRSAVE
  52. * set to all zeros, we assume this is a broken application
  53. * that fails to set it properly, and thus we switch it to
  54. * all 1's
  55. */
  56. mfspr r4,SPRN_VRSAVE
  57. cmpdi 0,r4,0
  58. bne+ 1f
  59. li r4,-1
  60. mtspr SPRN_VRSAVE,r4
  61. 1:
  62. /* enable use of VMX after return */
  63. #ifdef CONFIG_PPC32
  64. mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */
  65. oris r9,r9,MSR_VEC@h
  66. #else
  67. ld r4,PACACURRENT(r13)
  68. addi r5,r4,THREAD /* Get THREAD */
  69. oris r12,r12,MSR_VEC@h
  70. std r12,_MSR(r1)
  71. #endif
  72. li r4,1
  73. li r10,THREAD_VSCR
  74. stw r4,THREAD_USED_VR(r5)
  75. lvx vr0,r10,r5
  76. mtvscr vr0
  77. REST_32VRS(0,r4,r5)
  78. #ifndef CONFIG_SMP
  79. /* Update last_task_used_altivec to 'current' */
  80. subi r4,r5,THREAD /* Back to 'current' */
  81. fromreal(r4)
  82. PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
  83. #endif /* CONFIG_SMP */
  84. /* restore registers and return */
  85. blr
  86. /*
  87. * giveup_altivec(tsk)
  88. * Disable VMX for the task given as the argument,
  89. * and save the vector registers in its thread_struct.
  90. * Enables the VMX for use in the kernel on return.
  91. */
  92. _GLOBAL(giveup_altivec)
  93. mfmsr r5
  94. oris r5,r5,MSR_VEC@h
  95. SYNC
  96. MTMSRD(r5) /* enable use of VMX now */
  97. isync
  98. PPC_LCMPI 0,r3,0
  99. beqlr- /* if no previous owner, done */
  100. addi r3,r3,THREAD /* want THREAD of task */
  101. PPC_LL r5,PT_REGS(r3)
  102. PPC_LCMPI 0,r5,0
  103. SAVE_32VRS(0,r4,r3)
  104. mfvscr vr0
  105. li r4,THREAD_VSCR
  106. stvx vr0,r4,r3
  107. beq 1f
  108. PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  109. #ifdef CONFIG_VSX
  110. BEGIN_FTR_SECTION
  111. lis r3,(MSR_VEC|MSR_VSX)@h
  112. FTR_SECTION_ELSE
  113. lis r3,MSR_VEC@h
  114. ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
  115. #else
  116. lis r3,MSR_VEC@h
  117. #endif
  118. andc r4,r4,r3 /* disable FP for previous task */
  119. PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  120. 1:
  121. #ifndef CONFIG_SMP
  122. li r5,0
  123. LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
  124. PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
  125. #endif /* CONFIG_SMP */
  126. blr
  127. #ifdef CONFIG_VSX
  128. #ifdef CONFIG_PPC32
  129. #error This asm code isn't ready for 32-bit kernels
  130. #endif
  131. /*
  132. * load_up_vsx(unused, unused, tsk)
  133. * Disable VSX for the task which had it previously,
  134. * and save its vector registers in its thread_struct.
  135. * Reuse the fp and vsx saves, but first check to see if they have
  136. * been saved already.
  137. */
  138. _GLOBAL(load_up_vsx)
  139. /* Load FP and VSX registers if they haven't been done yet */
  140. andi. r5,r12,MSR_FP
  141. beql+ load_up_fpu /* skip if already loaded */
  142. andis. r5,r12,MSR_VEC@h
  143. beql+ load_up_altivec /* skip if already loaded */
  144. #ifndef CONFIG_SMP
  145. ld r3,last_task_used_vsx@got(r2)
  146. ld r4,0(r3)
  147. cmpdi 0,r4,0
  148. beq 1f
  149. /* Disable VSX for last_task_used_vsx */
  150. addi r4,r4,THREAD
  151. ld r5,PT_REGS(r4)
  152. ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  153. lis r6,MSR_VSX@h
  154. andc r6,r4,r6
  155. std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
  156. 1:
  157. #endif /* CONFIG_SMP */
  158. ld r4,PACACURRENT(r13)
  159. addi r4,r4,THREAD /* Get THREAD */
  160. li r6,1
  161. stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
  162. /* enable use of VSX after return */
  163. oris r12,r12,MSR_VSX@h
  164. std r12,_MSR(r1)
  165. #ifndef CONFIG_SMP
  166. /* Update last_task_used_vsx to 'current' */
  167. ld r4,PACACURRENT(r13)
  168. std r4,0(r3)
  169. #endif /* CONFIG_SMP */
  170. b fast_exception_return
  171. /*
  172. * __giveup_vsx(tsk)
  173. * Disable VSX for the task given as the argument.
  174. * Does NOT save vsx registers.
  175. * Enables the VSX for use in the kernel on return.
  176. */
  177. _GLOBAL(__giveup_vsx)
  178. mfmsr r5
  179. oris r5,r5,MSR_VSX@h
  180. mtmsrd r5 /* enable use of VSX now */
  181. isync
  182. cmpdi 0,r3,0
  183. beqlr- /* if no previous owner, done */
  184. addi r3,r3,THREAD /* want THREAD of task */
  185. ld r5,PT_REGS(r3)
  186. cmpdi 0,r5,0
  187. beq 1f
  188. ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  189. lis r3,MSR_VSX@h
  190. andc r4,r4,r3 /* disable VSX for previous task */
  191. std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
  192. 1:
  193. #ifndef CONFIG_SMP
  194. li r5,0
  195. ld r4,last_task_used_vsx@got(r2)
  196. std r5,0(r4)
  197. #endif /* CONFIG_SMP */
  198. blr
  199. #endif /* CONFIG_VSX */
  200. /*
  201. * The routines below are in assembler so we can closely control the
  202. * usage of floating-point registers. These routines must be called
  203. * with preempt disabled.
  204. */
  205. #ifdef CONFIG_PPC32
  206. .data
  207. fpzero:
  208. .long 0
  209. fpone:
  210. .long 0x3f800000 /* 1.0 in single-precision FP */
  211. fphalf:
  212. .long 0x3f000000 /* 0.5 in single-precision FP */
  213. #define LDCONST(fr, name) \
  214. lis r11,name@ha; \
  215. lfs fr,name@l(r11)
  216. #else
  217. .section ".toc","aw"
  218. fpzero:
  219. .tc FD_0_0[TC],0
  220. fpone:
  221. .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
  222. fphalf:
  223. .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
  224. #define LDCONST(fr, name) \
  225. lfd fr,name@toc(r2)
  226. #endif
  227. .text
  228. /*
  229. * Internal routine to enable floating point and set FPSCR to 0.
  230. * Don't call it from C; it doesn't use the normal calling convention.
  231. */
  232. fpenable:
  233. #ifdef CONFIG_PPC32
  234. stwu r1,-64(r1)
  235. #else
  236. stdu r1,-64(r1)
  237. #endif
  238. mfmsr r10
  239. ori r11,r10,MSR_FP
  240. mtmsr r11
  241. isync
  242. stfd fr0,24(r1)
  243. stfd fr1,16(r1)
  244. stfd fr31,8(r1)
  245. LDCONST(fr1, fpzero)
  246. mffs fr31
  247. MTFSF_L(fr1)
  248. blr
  249. fpdisable:
  250. mtlr r12
  251. MTFSF_L(fr31)
  252. lfd fr31,8(r1)
  253. lfd fr1,16(r1)
  254. lfd fr0,24(r1)
  255. mtmsr r10
  256. isync
  257. addi r1,r1,64
  258. blr
  259. /*
  260. * Vector add, floating point.
  261. */
  262. _GLOBAL(vaddfp)
  263. mflr r12
  264. bl fpenable
  265. li r0,4
  266. mtctr r0
  267. li r6,0
  268. 1: lfsx fr0,r4,r6
  269. lfsx fr1,r5,r6
  270. fadds fr0,fr0,fr1
  271. stfsx fr0,r3,r6
  272. addi r6,r6,4
  273. bdnz 1b
  274. b fpdisable
  275. /*
  276. * Vector subtract, floating point.
  277. */
  278. _GLOBAL(vsubfp)
  279. mflr r12
  280. bl fpenable
  281. li r0,4
  282. mtctr r0
  283. li r6,0
  284. 1: lfsx fr0,r4,r6
  285. lfsx fr1,r5,r6
  286. fsubs fr0,fr0,fr1
  287. stfsx fr0,r3,r6
  288. addi r6,r6,4
  289. bdnz 1b
  290. b fpdisable
  291. /*
  292. * Vector multiply and add, floating point.
  293. */
  294. _GLOBAL(vmaddfp)
  295. mflr r12
  296. bl fpenable
  297. stfd fr2,32(r1)
  298. li r0,4
  299. mtctr r0
  300. li r7,0
  301. 1: lfsx fr0,r4,r7
  302. lfsx fr1,r5,r7
  303. lfsx fr2,r6,r7
  304. fmadds fr0,fr0,fr2,fr1
  305. stfsx fr0,r3,r7
  306. addi r7,r7,4
  307. bdnz 1b
  308. lfd fr2,32(r1)
  309. b fpdisable
  310. /*
  311. * Vector negative multiply and subtract, floating point.
  312. */
  313. _GLOBAL(vnmsubfp)
  314. mflr r12
  315. bl fpenable
  316. stfd fr2,32(r1)
  317. li r0,4
  318. mtctr r0
  319. li r7,0
  320. 1: lfsx fr0,r4,r7
  321. lfsx fr1,r5,r7
  322. lfsx fr2,r6,r7
  323. fnmsubs fr0,fr0,fr2,fr1
  324. stfsx fr0,r3,r7
  325. addi r7,r7,4
  326. bdnz 1b
  327. lfd fr2,32(r1)
  328. b fpdisable
  329. /*
  330. * Vector reciprocal estimate. We just compute 1.0/x.
  331. * r3 -> destination, r4 -> source.
  332. */
  333. _GLOBAL(vrefp)
  334. mflr r12
  335. bl fpenable
  336. li r0,4
  337. LDCONST(fr1, fpone)
  338. mtctr r0
  339. li r6,0
  340. 1: lfsx fr0,r4,r6
  341. fdivs fr0,fr1,fr0
  342. stfsx fr0,r3,r6
  343. addi r6,r6,4
  344. bdnz 1b
  345. b fpdisable
  346. /*
  347. * Vector reciprocal square-root estimate, floating point.
  348. * We use the frsqrte instruction for the initial estimate followed
  349. * by 2 iterations of Newton-Raphson to get sufficient accuracy.
  350. * r3 -> destination, r4 -> source.
  351. */
  352. _GLOBAL(vrsqrtefp)
  353. mflr r12
  354. bl fpenable
  355. stfd fr2,32(r1)
  356. stfd fr3,40(r1)
  357. stfd fr4,48(r1)
  358. stfd fr5,56(r1)
  359. li r0,4
  360. LDCONST(fr4, fpone)
  361. LDCONST(fr5, fphalf)
  362. mtctr r0
  363. li r6,0
  364. 1: lfsx fr0,r4,r6
  365. frsqrte fr1,fr0 /* r = frsqrte(s) */
  366. fmuls fr3,fr1,fr0 /* r * s */
  367. fmuls fr2,fr1,fr5 /* r * 0.5 */
  368. fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
  369. fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
  370. fmuls fr3,fr1,fr0 /* r * s */
  371. fmuls fr2,fr1,fr5 /* r * 0.5 */
  372. fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
  373. fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
  374. stfsx fr1,r3,r6
  375. addi r6,r6,4
  376. bdnz 1b
  377. lfd fr5,56(r1)
  378. lfd fr4,48(r1)
  379. lfd fr3,40(r1)
  380. lfd fr2,32(r1)
  381. b fpdisable