copy_user_64.S 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. /* Copyright 2002 Andi Kleen, SuSE Labs.
  2. * Subject to the GNU Public License v2.
  3. *
  4. * Functions to copy from and to user space.
  5. */
  6. #include <linux/linkage.h>
  7. #include <asm/dwarf2.h>
  8. #define FIX_ALIGNMENT 1
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeature.h>
  13. .macro ALTERNATIVE_JUMP feature,orig,alt
  14. 0:
  15. .byte 0xe9 /* 32bit jump */
  16. .long \orig-1f /* by default jump to orig */
  17. 1:
  18. .section .altinstr_replacement,"ax"
  19. 2: .byte 0xe9 /* near jump with 32bit immediate */
  20. .long \alt-1b /* offset */ /* or alternatively to alt */
  21. .previous
  22. .section .altinstructions,"a"
  23. .align 8
  24. .quad 0b
  25. .quad 2b
  26. .byte \feature /* when feature is set */
  27. .byte 5
  28. .byte 5
  29. .previous
  30. .endm
  31. /* Standard copy_to_user with segment limit checking */
  32. ENTRY(copy_to_user)
  33. CFI_STARTPROC
  34. GET_THREAD_INFO(%rax)
  35. movq %rdi,%rcx
  36. addq %rdx,%rcx
  37. jc bad_to_user
  38. cmpq threadinfo_addr_limit(%rax),%rcx
  39. jae bad_to_user
  40. xorl %eax,%eax /* clear zero flag */
  41. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  42. CFI_ENDPROC
  43. ENTRY(copy_user_generic)
  44. CFI_STARTPROC
  45. movl $1,%ecx /* set zero flag */
  46. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  47. CFI_ENDPROC
  48. ENTRY(__copy_from_user_inatomic)
  49. CFI_STARTPROC
  50. xorl %ecx,%ecx /* clear zero flag */
  51. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  52. CFI_ENDPROC
  53. /* Standard copy_from_user with segment limit checking */
  54. ENTRY(copy_from_user)
  55. CFI_STARTPROC
  56. GET_THREAD_INFO(%rax)
  57. movq %rsi,%rcx
  58. addq %rdx,%rcx
  59. jc bad_from_user
  60. cmpq threadinfo_addr_limit(%rax),%rcx
  61. jae bad_from_user
  62. movl $1,%ecx /* set zero flag */
  63. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  64. CFI_ENDPROC
  65. ENDPROC(copy_from_user)
  66. .section .fixup,"ax"
  67. /* must zero dest */
  68. bad_from_user:
  69. CFI_STARTPROC
  70. movl %edx,%ecx
  71. xorl %eax,%eax
  72. rep
  73. stosb
  74. bad_to_user:
  75. movl %edx,%eax
  76. ret
  77. CFI_ENDPROC
  78. END(bad_from_user)
  79. .previous
  80. /*
  81. * copy_user_generic_unrolled - memory copy with exception handling.
  82. * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
  83. *
  84. * Input:
  85. * rdi destination
  86. * rsi source
  87. * rdx count
  88. * ecx zero flag -- if true zero destination on error
  89. *
  90. * Output:
  91. * eax uncopied bytes or 0 if successful.
  92. */
  93. ENTRY(copy_user_generic_unrolled)
  94. CFI_STARTPROC
  95. pushq %rbx
  96. CFI_ADJUST_CFA_OFFSET 8
  97. CFI_REL_OFFSET rbx, 0
  98. pushq %rcx
  99. CFI_ADJUST_CFA_OFFSET 8
  100. CFI_REL_OFFSET rcx, 0
  101. xorl %eax,%eax /*zero for the exception handler */
  102. #ifdef FIX_ALIGNMENT
  103. /* check for bad alignment of destination */
  104. movl %edi,%ecx
  105. andl $7,%ecx
  106. jnz .Lbad_alignment
  107. .Lafter_bad_alignment:
  108. #endif
  109. movq %rdx,%rcx
  110. movl $64,%ebx
  111. shrq $6,%rdx
  112. decq %rdx
  113. js .Lhandle_tail
  114. .p2align 4
  115. .Lloop:
  116. .Ls1: movq (%rsi),%r11
  117. .Ls2: movq 1*8(%rsi),%r8
  118. .Ls3: movq 2*8(%rsi),%r9
  119. .Ls4: movq 3*8(%rsi),%r10
  120. .Ld1: movq %r11,(%rdi)
  121. .Ld2: movq %r8,1*8(%rdi)
  122. .Ld3: movq %r9,2*8(%rdi)
  123. .Ld4: movq %r10,3*8(%rdi)
  124. .Ls5: movq 4*8(%rsi),%r11
  125. .Ls6: movq 5*8(%rsi),%r8
  126. .Ls7: movq 6*8(%rsi),%r9
  127. .Ls8: movq 7*8(%rsi),%r10
  128. .Ld5: movq %r11,4*8(%rdi)
  129. .Ld6: movq %r8,5*8(%rdi)
  130. .Ld7: movq %r9,6*8(%rdi)
  131. .Ld8: movq %r10,7*8(%rdi)
  132. decq %rdx
  133. leaq 64(%rsi),%rsi
  134. leaq 64(%rdi),%rdi
  135. jns .Lloop
  136. .p2align 4
  137. .Lhandle_tail:
  138. movl %ecx,%edx
  139. andl $63,%ecx
  140. shrl $3,%ecx
  141. jz .Lhandle_7
  142. movl $8,%ebx
  143. .p2align 4
  144. .Lloop_8:
  145. .Ls9: movq (%rsi),%r8
  146. .Ld9: movq %r8,(%rdi)
  147. decl %ecx
  148. leaq 8(%rdi),%rdi
  149. leaq 8(%rsi),%rsi
  150. jnz .Lloop_8
  151. .Lhandle_7:
  152. movl %edx,%ecx
  153. andl $7,%ecx
  154. jz .Lende
  155. .p2align 4
  156. .Lloop_1:
  157. .Ls10: movb (%rsi),%bl
  158. .Ld10: movb %bl,(%rdi)
  159. incq %rdi
  160. incq %rsi
  161. decl %ecx
  162. jnz .Lloop_1
  163. CFI_REMEMBER_STATE
  164. .Lende:
  165. popq %rcx
  166. CFI_ADJUST_CFA_OFFSET -8
  167. CFI_RESTORE rcx
  168. popq %rbx
  169. CFI_ADJUST_CFA_OFFSET -8
  170. CFI_RESTORE rbx
  171. ret
  172. CFI_RESTORE_STATE
  173. #ifdef FIX_ALIGNMENT
  174. /* align destination */
  175. .p2align 4
  176. .Lbad_alignment:
  177. movl $8,%r9d
  178. subl %ecx,%r9d
  179. movl %r9d,%ecx
  180. cmpq %r9,%rdx
  181. jz .Lhandle_7
  182. js .Lhandle_7
  183. .Lalign_1:
  184. .Ls11: movb (%rsi),%bl
  185. .Ld11: movb %bl,(%rdi)
  186. incq %rsi
  187. incq %rdi
  188. decl %ecx
  189. jnz .Lalign_1
  190. subq %r9,%rdx
  191. jmp .Lafter_bad_alignment
  192. #endif
  193. /* table sorted by exception address */
  194. .section __ex_table,"a"
  195. .align 8
  196. .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */
  197. .quad .Ls2,.Ls1e
  198. .quad .Ls3,.Ls1e
  199. .quad .Ls4,.Ls1e
  200. .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */
  201. .quad .Ld2,.Ls2e
  202. .quad .Ld3,.Ls3e
  203. .quad .Ld4,.Ls4e
  204. .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */
  205. .quad .Ls6,.Ls5e
  206. .quad .Ls7,.Ls5e
  207. .quad .Ls8,.Ls5e
  208. .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */
  209. .quad .Ld6,.Ls6e
  210. .quad .Ld7,.Ls7e
  211. .quad .Ld8,.Ls8e
  212. .quad .Ls9,.Le_quad
  213. .quad .Ld9,.Le_quad
  214. .quad .Ls10,.Le_byte
  215. .quad .Ld10,.Le_byte
  216. #ifdef FIX_ALIGNMENT
  217. .quad .Ls11,.Lzero_rest
  218. .quad .Ld11,.Lzero_rest
  219. #endif
  220. .quad .Le5,.Le_zero
  221. .previous
  222. /* eax: zero, ebx: 64 */
  223. .Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
  224. .Ls2e: addl $8,%eax
  225. .Ls3e: addl $8,%eax
  226. .Ls4e: addl $8,%eax
  227. .Ls5e: addl $8,%eax
  228. .Ls6e: addl $8,%eax
  229. .Ls7e: addl $8,%eax
  230. .Ls8e: addl $8,%eax
  231. addq %rbx,%rdi /* +64 */
  232. subq %rax,%rdi /* correct destination with computed offset */
  233. shlq $6,%rdx /* loop counter * 64 (stride length) */
  234. addq %rax,%rdx /* add offset to loopcnt */
  235. andl $63,%ecx /* remaining bytes */
  236. addq %rcx,%rdx /* add them */
  237. jmp .Lzero_rest
  238. /* exception on quad word loop in tail handling */
  239. /* ecx: loopcnt/8, %edx: length, rdi: correct */
  240. .Le_quad:
  241. shll $3,%ecx
  242. andl $7,%edx
  243. addl %ecx,%edx
  244. /* edx: bytes to zero, rdi: dest, eax:zero */
  245. .Lzero_rest:
  246. cmpl $0,(%rsp)
  247. jz .Le_zero
  248. movq %rdx,%rcx
  249. .Le_byte:
  250. xorl %eax,%eax
  251. .Le5: rep
  252. stosb
  253. /* when there is another exception while zeroing the rest just return */
  254. .Le_zero:
  255. movq %rdx,%rax
  256. jmp .Lende
  257. CFI_ENDPROC
  258. ENDPROC(copy_user_generic)
  259. /* Some CPUs run faster using the string copy instructions.
  260. This is also a lot simpler. Use them when possible.
  261. Patch in jmps to this code instead of copying it fully
  262. to avoid unwanted aliasing in the exception tables. */
  263. /* rdi destination
  264. * rsi source
  265. * rdx count
  266. * ecx zero flag
  267. *
  268. * Output:
  269. * eax uncopied bytes or 0 if successfull.
  270. *
  271. * Only 4GB of copy is supported. This shouldn't be a problem
  272. * because the kernel normally only writes from/to page sized chunks
  273. * even if user space passed a longer buffer.
  274. * And more would be dangerous because both Intel and AMD have
  275. * errata with rep movsq > 4GB. If someone feels the need to fix
  276. * this please consider this.
  277. */
  278. ENTRY(copy_user_generic_string)
  279. CFI_STARTPROC
  280. movl %ecx,%r8d /* save zero flag */
  281. movl %edx,%ecx
  282. shrl $3,%ecx
  283. andl $7,%edx
  284. jz 10f
  285. 1: rep
  286. movsq
  287. movl %edx,%ecx
  288. 2: rep
  289. movsb
  290. 9: movl %ecx,%eax
  291. ret
  292. /* multiple of 8 byte */
  293. 10: rep
  294. movsq
  295. xor %eax,%eax
  296. ret
  297. /* exception handling */
  298. 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
  299. jmp 6f
  300. 5: movl %ecx,%eax /* exception on byte loop */
  301. /* eax: left over bytes */
  302. 6: testl %r8d,%r8d /* zero flag set? */
  303. jz 7f
  304. movl %eax,%ecx /* initialize x86 loop counter */
  305. push %rax
  306. xorl %eax,%eax
  307. 8: rep
  308. stosb /* zero the rest */
  309. 11: pop %rax
  310. 7: ret
  311. CFI_ENDPROC
  312. END(copy_user_generic_c)
  313. .section __ex_table,"a"
  314. .quad 1b,3b
  315. .quad 2b,5b
  316. .quad 8b,11b
  317. .quad 10b,3b
  318. .previous