copy_user.S 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. /* Copyright 2002 Andi Kleen, SuSE Labs.
  2. * Subject to the GNU Public License v2.
  3. *
  4. * Functions to copy from and to user space.
  5. */
  6. #define FIX_ALIGNMENT 1
  7. #include <asm/current.h>
  8. #include <asm/asm-offsets.h>
  9. #include <asm/thread_info.h>
  10. #include <asm/cpufeature.h>
  11. /* Standard copy_to_user with segment limit checking */
  12. .globl copy_to_user
  13. .p2align 4
  14. copy_to_user:
  15. GET_THREAD_INFO(%rax)
  16. movq %rdi,%rcx
  17. addq %rdx,%rcx
  18. jc bad_to_user
  19. cmpq threadinfo_addr_limit(%rax),%rcx
  20. jae bad_to_user
  21. 2:
  22. .byte 0xe9 /* 32bit jump */
  23. .long .Lcug-1f
  24. 1:
  25. .section .altinstr_replacement,"ax"
  26. 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
  27. .long copy_user_generic_c-1b /* offset */
  28. .previous
  29. .section .altinstructions,"a"
  30. .align 8
  31. .quad 2b
  32. .quad 3b
  33. .byte X86_FEATURE_K8_C
  34. .byte 5
  35. .byte 5
  36. .previous
  37. /* Standard copy_from_user with segment limit checking */
  38. .globl copy_from_user
  39. .p2align 4
  40. copy_from_user:
  41. GET_THREAD_INFO(%rax)
  42. movq %rsi,%rcx
  43. addq %rdx,%rcx
  44. jc bad_from_user
  45. cmpq threadinfo_addr_limit(%rax),%rcx
  46. jae bad_from_user
  47. /* FALL THROUGH to copy_user_generic */
  48. .section .fixup,"ax"
  49. /* must zero dest */
  50. bad_from_user:
  51. movl %edx,%ecx
  52. xorl %eax,%eax
  53. rep
  54. stosb
  55. bad_to_user:
  56. movl %edx,%eax
  57. ret
  58. .previous
  59. /*
  60. * copy_user_generic - memory copy with exception handling.
  61. *
  62. * Input:
  63. * rdi destination
  64. * rsi source
  65. * rdx count
  66. *
  67. * Output:
  68. * eax uncopied bytes or 0 if successful.
  69. */
  70. .globl copy_user_generic
  71. .p2align 4
  72. copy_user_generic:
  73. .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
  74. .byte 0x66,0x90
  75. 1:
  76. .section .altinstr_replacement,"ax"
  77. 2: .byte 0xe9 /* near jump with 32bit immediate */
  78. .long copy_user_generic_c-1b /* offset */
  79. .previous
  80. .section .altinstructions,"a"
  81. .align 8
  82. .quad copy_user_generic
  83. .quad 2b
  84. .byte X86_FEATURE_K8_C
  85. .byte 5
  86. .byte 5
  87. .previous
  88. .Lcug:
  89. pushq %rbx
  90. xorl %eax,%eax /*zero for the exception handler */
  91. #ifdef FIX_ALIGNMENT
  92. /* check for bad alignment of destination */
  93. movl %edi,%ecx
  94. andl $7,%ecx
  95. jnz .Lbad_alignment
  96. .Lafter_bad_alignment:
  97. #endif
  98. movq %rdx,%rcx
  99. movl $64,%ebx
  100. shrq $6,%rdx
  101. decq %rdx
  102. js .Lhandle_tail
  103. .p2align 4
  104. .Lloop:
  105. .Ls1: movq (%rsi),%r11
  106. .Ls2: movq 1*8(%rsi),%r8
  107. .Ls3: movq 2*8(%rsi),%r9
  108. .Ls4: movq 3*8(%rsi),%r10
  109. .Ld1: movq %r11,(%rdi)
  110. .Ld2: movq %r8,1*8(%rdi)
  111. .Ld3: movq %r9,2*8(%rdi)
  112. .Ld4: movq %r10,3*8(%rdi)
  113. .Ls5: movq 4*8(%rsi),%r11
  114. .Ls6: movq 5*8(%rsi),%r8
  115. .Ls7: movq 6*8(%rsi),%r9
  116. .Ls8: movq 7*8(%rsi),%r10
  117. .Ld5: movq %r11,4*8(%rdi)
  118. .Ld6: movq %r8,5*8(%rdi)
  119. .Ld7: movq %r9,6*8(%rdi)
  120. .Ld8: movq %r10,7*8(%rdi)
  121. decq %rdx
  122. leaq 64(%rsi),%rsi
  123. leaq 64(%rdi),%rdi
  124. jns .Lloop
  125. .p2align 4
  126. .Lhandle_tail:
  127. movl %ecx,%edx
  128. andl $63,%ecx
  129. shrl $3,%ecx
  130. jz .Lhandle_7
  131. movl $8,%ebx
  132. .p2align 4
  133. .Lloop_8:
  134. .Ls9: movq (%rsi),%r8
  135. .Ld9: movq %r8,(%rdi)
  136. decl %ecx
  137. leaq 8(%rdi),%rdi
  138. leaq 8(%rsi),%rsi
  139. jnz .Lloop_8
  140. .Lhandle_7:
  141. movl %edx,%ecx
  142. andl $7,%ecx
  143. jz .Lende
  144. .p2align 4
  145. .Lloop_1:
  146. .Ls10: movb (%rsi),%bl
  147. .Ld10: movb %bl,(%rdi)
  148. incq %rdi
  149. incq %rsi
  150. decl %ecx
  151. jnz .Lloop_1
  152. .Lende:
  153. popq %rbx
  154. ret
  155. #ifdef FIX_ALIGNMENT
  156. /* align destination */
  157. .p2align 4
  158. .Lbad_alignment:
  159. movl $8,%r9d
  160. subl %ecx,%r9d
  161. movl %r9d,%ecx
  162. cmpq %r9,%rdx
  163. jz .Lhandle_7
  164. js .Lhandle_7
  165. .Lalign_1:
  166. .Ls11: movb (%rsi),%bl
  167. .Ld11: movb %bl,(%rdi)
  168. incq %rsi
  169. incq %rdi
  170. decl %ecx
  171. jnz .Lalign_1
  172. subq %r9,%rdx
  173. jmp .Lafter_bad_alignment
  174. #endif
  175. /* table sorted by exception address */
  176. .section __ex_table,"a"
  177. .align 8
  178. .quad .Ls1,.Ls1e
  179. .quad .Ls2,.Ls2e
  180. .quad .Ls3,.Ls3e
  181. .quad .Ls4,.Ls4e
  182. .quad .Ld1,.Ls1e
  183. .quad .Ld2,.Ls2e
  184. .quad .Ld3,.Ls3e
  185. .quad .Ld4,.Ls4e
  186. .quad .Ls5,.Ls5e
  187. .quad .Ls6,.Ls6e
  188. .quad .Ls7,.Ls7e
  189. .quad .Ls8,.Ls8e
  190. .quad .Ld5,.Ls5e
  191. .quad .Ld6,.Ls6e
  192. .quad .Ld7,.Ls7e
  193. .quad .Ld8,.Ls8e
  194. .quad .Ls9,.Le_quad
  195. .quad .Ld9,.Le_quad
  196. .quad .Ls10,.Le_byte
  197. .quad .Ld10,.Le_byte
  198. #ifdef FIX_ALIGNMENT
  199. .quad .Ls11,.Lzero_rest
  200. .quad .Ld11,.Lzero_rest
  201. #endif
  202. .quad .Le5,.Le_zero
  203. .previous
  204. /* compute 64-offset for main loop. 8 bytes accuracy with error on the
  205. pessimistic side. this is gross. it would be better to fix the
  206. interface. */
  207. /* eax: zero, ebx: 64 */
  208. .Ls1e: addl $8,%eax
  209. .Ls2e: addl $8,%eax
  210. .Ls3e: addl $8,%eax
  211. .Ls4e: addl $8,%eax
  212. .Ls5e: addl $8,%eax
  213. .Ls6e: addl $8,%eax
  214. .Ls7e: addl $8,%eax
  215. .Ls8e: addl $8,%eax
  216. addq %rbx,%rdi /* +64 */
  217. subq %rax,%rdi /* correct destination with computed offset */
  218. shlq $6,%rdx /* loop counter * 64 (stride length) */
  219. addq %rax,%rdx /* add offset to loopcnt */
  220. andl $63,%ecx /* remaining bytes */
  221. addq %rcx,%rdx /* add them */
  222. jmp .Lzero_rest
  223. /* exception on quad word loop in tail handling */
  224. /* ecx: loopcnt/8, %edx: length, rdi: correct */
  225. .Le_quad:
  226. shll $3,%ecx
  227. andl $7,%edx
  228. addl %ecx,%edx
  229. /* edx: bytes to zero, rdi: dest, eax:zero */
  230. .Lzero_rest:
  231. movq %rdx,%rcx
  232. .Le_byte:
  233. xorl %eax,%eax
  234. .Le5: rep
  235. stosb
  236. /* when there is another exception while zeroing the rest just return */
  237. .Le_zero:
  238. movq %rdx,%rax
  239. jmp .Lende
  240. /* C stepping K8 run faster using the string copy instructions.
  241. This is also a lot simpler. Use them when possible.
  242. Patch in jmps to this code instead of copying it fully
  243. to avoid unwanted aliasing in the exception tables. */
  244. /* rdi destination
  245. * rsi source
  246. * rdx count
  247. *
  248. * Output:
  249. * eax uncopied bytes or 0 if successfull.
  250. */
  251. copy_user_generic_c:
  252. movl %edx,%ecx
  253. shrl $3,%ecx
  254. andl $7,%edx
  255. 1: rep
  256. movsq
  257. movl %edx,%ecx
  258. 2: rep
  259. movsb
  260. 4: movl %ecx,%eax
  261. ret
  262. 3: lea (%rdx,%rcx,8),%rax
  263. ret
  264. .section __ex_table,"a"
  265. .quad 1b,3b
  266. .quad 2b,4b
  267. .previous