copy_user_64.S 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. /* Copyright 2002 Andi Kleen, SuSE Labs.
  2. * Subject to the GNU Public License v2.
  3. *
  4. * Functions to copy from and to user space.
  5. */
  6. #include <linux/linkage.h>
  7. #include <asm/dwarf2.h>
  8. #define FIX_ALIGNMENT 1
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeature.h>
  13. .macro ALTERNATIVE_JUMP feature,orig,alt
  14. 0:
  15. .byte 0xe9 /* 32bit jump */
  16. .long \orig-1f /* by default jump to orig */
  17. 1:
  18. .section .altinstr_replacement,"ax"
  19. 2: .byte 0xe9 /* near jump with 32bit immediate */
  20. .long \alt-1b /* offset */ /* or alternatively to alt */
  21. .previous
  22. .section .altinstructions,"a"
  23. .align 8
  24. .quad 0b
  25. .quad 2b
  26. .byte \feature /* when feature is set */
  27. .byte 5
  28. .byte 5
  29. .previous
  30. .endm
  31. /* Standard copy_to_user with segment limit checking */
  32. ENTRY(copy_to_user)
  33. CFI_STARTPROC
  34. GET_THREAD_INFO(%rax)
  35. movq %rdi,%rcx
  36. addq %rdx,%rcx
  37. jc bad_to_user
  38. cmpq threadinfo_addr_limit(%rax),%rcx
  39. jae bad_to_user
  40. xorl %eax,%eax /* clear zero flag */
  41. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  42. CFI_ENDPROC
  43. ENTRY(copy_user_generic)
  44. CFI_STARTPROC
  45. movl $1,%ecx /* set zero flag */
  46. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  47. CFI_ENDPROC
  48. ENTRY(__copy_from_user_inatomic)
  49. CFI_STARTPROC
  50. xorl %ecx,%ecx /* clear zero flag */
  51. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  52. CFI_ENDPROC
  53. /* Standard copy_from_user with segment limit checking */
  54. ENTRY(copy_from_user)
  55. CFI_STARTPROC
  56. GET_THREAD_INFO(%rax)
  57. movq %rsi,%rcx
  58. addq %rdx,%rcx
  59. jc bad_from_user
  60. cmpq threadinfo_addr_limit(%rax),%rcx
  61. jae bad_from_user
  62. movl $1,%ecx /* set zero flag */
  63. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  64. CFI_ENDPROC
  65. ENDPROC(copy_from_user)
  66. .section .fixup,"ax"
  67. /* must zero dest */
  68. bad_from_user:
  69. CFI_STARTPROC
  70. movl %edx,%ecx
  71. xorl %eax,%eax
  72. rep
  73. stosb
  74. bad_to_user:
  75. movl %edx,%eax
  76. ret
  77. CFI_ENDPROC
  78. END(bad_from_user)
  79. .previous
  80. /*
  81. * copy_user_generic_unrolled - memory copy with exception handling.
  82. * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
  83. *
  84. * Input:
  85. * rdi destination
  86. * rsi source
  87. * rdx count
  88. * ecx zero flag -- if true zero destination on error
  89. *
  90. * Output:
  91. * eax uncopied bytes or 0 if successful.
  92. */
  93. ENTRY(copy_user_generic_unrolled)
  94. CFI_STARTPROC
  95. pushq %rbx
  96. CFI_ADJUST_CFA_OFFSET 8
  97. CFI_REL_OFFSET rbx, 0
  98. pushq %rcx
  99. CFI_ADJUST_CFA_OFFSET 8
  100. CFI_REL_OFFSET rcx, 0
  101. xorl %eax,%eax /*zero for the exception handler */
  102. #ifdef FIX_ALIGNMENT
  103. /* check for bad alignment of destination */
  104. movl %edi,%ecx
  105. andl $7,%ecx
  106. jnz .Lbad_alignment
  107. .Lafter_bad_alignment:
  108. #endif
  109. movq %rdx,%rcx
  110. movl $64,%ebx
  111. shrq $6,%rdx
  112. decq %rdx
  113. js .Lhandle_tail
  114. .p2align 4
  115. .Lloop:
  116. .Ls1: movq (%rsi),%r11
  117. .Ls2: movq 1*8(%rsi),%r8
  118. .Ls3: movq 2*8(%rsi),%r9
  119. .Ls4: movq 3*8(%rsi),%r10
  120. .Ld1: movq %r11,(%rdi)
  121. .Ld2: movq %r8,1*8(%rdi)
  122. .Ld3: movq %r9,2*8(%rdi)
  123. .Ld4: movq %r10,3*8(%rdi)
  124. .Ls5: movq 4*8(%rsi),%r11
  125. .Ls6: movq 5*8(%rsi),%r8
  126. .Ls7: movq 6*8(%rsi),%r9
  127. .Ls8: movq 7*8(%rsi),%r10
  128. .Ld5: movq %r11,4*8(%rdi)
  129. .Ld6: movq %r8,5*8(%rdi)
  130. .Ld7: movq %r9,6*8(%rdi)
  131. .Ld8: movq %r10,7*8(%rdi)
  132. decq %rdx
  133. leaq 64(%rsi),%rsi
  134. leaq 64(%rdi),%rdi
  135. jns .Lloop
  136. .p2align 4
  137. .Lhandle_tail:
  138. movl %ecx,%edx
  139. andl $63,%ecx
  140. shrl $3,%ecx
  141. jz .Lhandle_7
  142. movl $8,%ebx
  143. .p2align 4
  144. .Lloop_8:
  145. .Ls9: movq (%rsi),%r8
  146. .Ld9: movq %r8,(%rdi)
  147. decl %ecx
  148. leaq 8(%rdi),%rdi
  149. leaq 8(%rsi),%rsi
  150. jnz .Lloop_8
  151. .Lhandle_7:
  152. movl %edx,%ecx
  153. andl $7,%ecx
  154. jz .Lende
  155. .p2align 4
  156. .Lloop_1:
  157. .Ls10: movb (%rsi),%bl
  158. .Ld10: movb %bl,(%rdi)
  159. incq %rdi
  160. incq %rsi
  161. decl %ecx
  162. jnz .Lloop_1
  163. CFI_REMEMBER_STATE
  164. .Lende:
  165. popq %rcx
  166. CFI_ADJUST_CFA_OFFSET -8
  167. CFI_RESTORE rcx
  168. popq %rbx
  169. CFI_ADJUST_CFA_OFFSET -8
  170. CFI_RESTORE rbx
  171. ret
  172. CFI_RESTORE_STATE
  173. #ifdef FIX_ALIGNMENT
  174. /* align destination */
  175. .p2align 4
  176. .Lbad_alignment:
  177. movl $8,%r9d
  178. subl %ecx,%r9d
  179. movl %r9d,%ecx
  180. cmpq %r9,%rdx
  181. jz .Lhandle_7
  182. js .Lhandle_7
  183. .Lalign_1:
  184. .Ls11: movb (%rsi),%bl
  185. .Ld11: movb %bl,(%rdi)
  186. incq %rsi
  187. incq %rdi
  188. decl %ecx
  189. jnz .Lalign_1
  190. subq %r9,%rdx
  191. jmp .Lafter_bad_alignment
  192. #endif
  193. /* table sorted by exception address */
  194. .section __ex_table,"a"
  195. .align 8
  196. .quad .Ls1,.Ls1e
  197. .quad .Ls2,.Ls2e
  198. .quad .Ls3,.Ls3e
  199. .quad .Ls4,.Ls4e
  200. .quad .Ld1,.Ls1e
  201. .quad .Ld2,.Ls2e
  202. .quad .Ld3,.Ls3e
  203. .quad .Ld4,.Ls4e
  204. .quad .Ls5,.Ls5e
  205. .quad .Ls6,.Ls6e
  206. .quad .Ls7,.Ls7e
  207. .quad .Ls8,.Ls8e
  208. .quad .Ld5,.Ls5e
  209. .quad .Ld6,.Ls6e
  210. .quad .Ld7,.Ls7e
  211. .quad .Ld8,.Ls8e
  212. .quad .Ls9,.Le_quad
  213. .quad .Ld9,.Le_quad
  214. .quad .Ls10,.Le_byte
  215. .quad .Ld10,.Le_byte
  216. #ifdef FIX_ALIGNMENT
  217. .quad .Ls11,.Lzero_rest
  218. .quad .Ld11,.Lzero_rest
  219. #endif
  220. .quad .Le5,.Le_zero
  221. .previous
  222. /* compute 64-offset for main loop. 8 bytes accuracy with error on the
  223. pessimistic side. this is gross. it would be better to fix the
  224. interface. */
  225. /* eax: zero, ebx: 64 */
  226. .Ls1e: addl $8,%eax
  227. .Ls2e: addl $8,%eax
  228. .Ls3e: addl $8,%eax
  229. .Ls4e: addl $8,%eax
  230. .Ls5e: addl $8,%eax
  231. .Ls6e: addl $8,%eax
  232. .Ls7e: addl $8,%eax
  233. .Ls8e: addl $8,%eax
  234. addq %rbx,%rdi /* +64 */
  235. subq %rax,%rdi /* correct destination with computed offset */
  236. shlq $6,%rdx /* loop counter * 64 (stride length) */
  237. addq %rax,%rdx /* add offset to loopcnt */
  238. andl $63,%ecx /* remaining bytes */
  239. addq %rcx,%rdx /* add them */
  240. jmp .Lzero_rest
  241. /* exception on quad word loop in tail handling */
  242. /* ecx: loopcnt/8, %edx: length, rdi: correct */
  243. .Le_quad:
  244. shll $3,%ecx
  245. andl $7,%edx
  246. addl %ecx,%edx
  247. /* edx: bytes to zero, rdi: dest, eax:zero */
  248. .Lzero_rest:
  249. cmpl $0,(%rsp)
  250. jz .Le_zero
  251. movq %rdx,%rcx
  252. .Le_byte:
  253. xorl %eax,%eax
  254. .Le5: rep
  255. stosb
  256. /* when there is another exception while zeroing the rest just return */
  257. .Le_zero:
  258. movq %rdx,%rax
  259. jmp .Lende
  260. CFI_ENDPROC
  261. ENDPROC(copy_user_generic)
  262. /* Some CPUs run faster using the string copy instructions.
  263. This is also a lot simpler. Use them when possible.
  264. Patch in jmps to this code instead of copying it fully
  265. to avoid unwanted aliasing in the exception tables. */
  266. /* rdi destination
  267. * rsi source
  268. * rdx count
  269. * ecx zero flag
  270. *
  271. * Output:
  272. * eax uncopied bytes or 0 if successfull.
  273. *
  274. * Only 4GB of copy is supported. This shouldn't be a problem
  275. * because the kernel normally only writes from/to page sized chunks
  276. * even if user space passed a longer buffer.
  277. * And more would be dangerous because both Intel and AMD have
  278. * errata with rep movsq > 4GB. If someone feels the need to fix
  279. * this please consider this.
  280. */
  281. ENTRY(copy_user_generic_string)
  282. CFI_STARTPROC
  283. movl %ecx,%r8d /* save zero flag */
  284. movl %edx,%ecx
  285. shrl $3,%ecx
  286. andl $7,%edx
  287. jz 10f
  288. 1: rep
  289. movsq
  290. movl %edx,%ecx
  291. 2: rep
  292. movsb
  293. 9: movl %ecx,%eax
  294. ret
  295. /* multiple of 8 byte */
  296. 10: rep
  297. movsq
  298. xor %eax,%eax
  299. ret
  300. /* exception handling */
  301. 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
  302. jmp 6f
  303. 5: movl %ecx,%eax /* exception on byte loop */
  304. /* eax: left over bytes */
  305. 6: testl %r8d,%r8d /* zero flag set? */
  306. jz 7f
  307. movl %eax,%ecx /* initialize x86 loop counter */
  308. push %rax
  309. xorl %eax,%eax
  310. 8: rep
  311. stosb /* zero the rest */
  312. 11: pop %rax
  313. 7: ret
  314. CFI_ENDPROC
  315. END(copy_user_generic_c)
  316. .section __ex_table,"a"
  317. .quad 1b,3b
  318. .quad 2b,5b
  319. .quad 8b,11b
  320. .quad 10b,3b
  321. .previous