copy_user_64.S 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/dwarf2.h>
  10. #define FIX_ALIGNMENT 1
  11. #include <asm/current.h>
  12. #include <asm/asm-offsets.h>
  13. #include <asm/thread_info.h>
  14. #include <asm/cpufeature.h>
  15. .macro ALTERNATIVE_JUMP feature,orig,alt
  16. 0:
  17. .byte 0xe9 /* 32bit jump */
  18. .long \orig-1f /* by default jump to orig */
  19. 1:
  20. .section .altinstr_replacement,"ax"
  21. 2: .byte 0xe9 /* near jump with 32bit immediate */
  22. .long \alt-1b /* offset */ /* or alternatively to alt */
  23. .previous
  24. .section .altinstructions,"a"
  25. .align 8
  26. .quad 0b
  27. .quad 2b
  28. .byte \feature /* when feature is set */
  29. .byte 5
  30. .byte 5
  31. .previous
  32. .endm
  33. .macro ALIGN_DESTINATION
  34. #ifdef FIX_ALIGNMENT
  35. /* check for bad alignment of destination */
  36. movl %edi,%ecx
  37. andl $7,%ecx
  38. jz 102f /* already aligned */
  39. subl $8,%ecx
  40. negl %ecx
  41. subl %ecx,%edx
  42. 100: movb (%rsi),%al
  43. 101: movb %al,(%rdi)
  44. incq %rsi
  45. incq %rdi
  46. decl %ecx
  47. jnz 100b
  48. 102:
  49. .section .fixup,"ax"
  50. 103: addl %ecx,%edx /* ecx is zerorest also */
  51. jmp copy_user_handle_tail
  52. .previous
  53. .section __ex_table,"a"
  54. .align 8
  55. .quad 100b,103b
  56. .quad 101b,103b
  57. .previous
  58. #endif
  59. .endm
  60. /* Standard copy_to_user with segment limit checking */
  61. ENTRY(_copy_to_user)
  62. CFI_STARTPROC
  63. GET_THREAD_INFO(%rax)
  64. movq %rdi,%rcx
  65. addq %rdx,%rcx
  66. jc bad_to_user
  67. cmpq TI_addr_limit(%rax),%rcx
  68. jae bad_to_user
  69. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  70. CFI_ENDPROC
  71. ENDPROC(_copy_to_user)
  72. /* Standard copy_from_user with segment limit checking */
  73. ENTRY(_copy_from_user)
  74. CFI_STARTPROC
  75. GET_THREAD_INFO(%rax)
  76. movq %rsi,%rcx
  77. addq %rdx,%rcx
  78. jc bad_from_user
  79. cmpq TI_addr_limit(%rax),%rcx
  80. jae bad_from_user
  81. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  82. CFI_ENDPROC
  83. ENDPROC(_copy_from_user)
  84. .section .fixup,"ax"
  85. /* must zero dest */
  86. ENTRY(bad_from_user)
  87. bad_from_user:
  88. CFI_STARTPROC
  89. movl %edx,%ecx
  90. xorl %eax,%eax
  91. rep
  92. stosb
  93. bad_to_user:
  94. movl %edx,%eax
  95. ret
  96. CFI_ENDPROC
  97. ENDPROC(bad_from_user)
  98. .previous
  99. /*
  100. * copy_user_generic_unrolled - memory copy with exception handling.
  101. * This version is for CPUs like P4 that don't have efficient micro
  102. * code for rep movsq
  103. *
  104. * Input:
  105. * rdi destination
  106. * rsi source
  107. * rdx count
  108. *
  109. * Output:
  110. * eax uncopied bytes or 0 if successfull.
  111. */
  112. ENTRY(copy_user_generic_unrolled)
  113. CFI_STARTPROC
  114. cmpl $8,%edx
  115. jb 20f /* less then 8 bytes, go to byte copy loop */
  116. ALIGN_DESTINATION
  117. movl %edx,%ecx
  118. andl $63,%edx
  119. shrl $6,%ecx
  120. jz 17f
  121. 1: movq (%rsi),%r8
  122. 2: movq 1*8(%rsi),%r9
  123. 3: movq 2*8(%rsi),%r10
  124. 4: movq 3*8(%rsi),%r11
  125. 5: movq %r8,(%rdi)
  126. 6: movq %r9,1*8(%rdi)
  127. 7: movq %r10,2*8(%rdi)
  128. 8: movq %r11,3*8(%rdi)
  129. 9: movq 4*8(%rsi),%r8
  130. 10: movq 5*8(%rsi),%r9
  131. 11: movq 6*8(%rsi),%r10
  132. 12: movq 7*8(%rsi),%r11
  133. 13: movq %r8,4*8(%rdi)
  134. 14: movq %r9,5*8(%rdi)
  135. 15: movq %r10,6*8(%rdi)
  136. 16: movq %r11,7*8(%rdi)
  137. leaq 64(%rsi),%rsi
  138. leaq 64(%rdi),%rdi
  139. decl %ecx
  140. jnz 1b
  141. 17: movl %edx,%ecx
  142. andl $7,%edx
  143. shrl $3,%ecx
  144. jz 20f
  145. 18: movq (%rsi),%r8
  146. 19: movq %r8,(%rdi)
  147. leaq 8(%rsi),%rsi
  148. leaq 8(%rdi),%rdi
  149. decl %ecx
  150. jnz 18b
  151. 20: andl %edx,%edx
  152. jz 23f
  153. movl %edx,%ecx
  154. 21: movb (%rsi),%al
  155. 22: movb %al,(%rdi)
  156. incq %rsi
  157. incq %rdi
  158. decl %ecx
  159. jnz 21b
  160. 23: xor %eax,%eax
  161. ret
  162. .section .fixup,"ax"
  163. 30: shll $6,%ecx
  164. addl %ecx,%edx
  165. jmp 60f
  166. 40: lea (%rdx,%rcx,8),%rdx
  167. jmp 60f
  168. 50: movl %ecx,%edx
  169. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  170. .previous
  171. .section __ex_table,"a"
  172. .align 8
  173. .quad 1b,30b
  174. .quad 2b,30b
  175. .quad 3b,30b
  176. .quad 4b,30b
  177. .quad 5b,30b
  178. .quad 6b,30b
  179. .quad 7b,30b
  180. .quad 8b,30b
  181. .quad 9b,30b
  182. .quad 10b,30b
  183. .quad 11b,30b
  184. .quad 12b,30b
  185. .quad 13b,30b
  186. .quad 14b,30b
  187. .quad 15b,30b
  188. .quad 16b,30b
  189. .quad 18b,40b
  190. .quad 19b,40b
  191. .quad 21b,50b
  192. .quad 22b,50b
  193. .previous
  194. CFI_ENDPROC
  195. ENDPROC(copy_user_generic_unrolled)
  196. /* Some CPUs run faster using the string copy instructions.
  197. * This is also a lot simpler. Use them when possible.
  198. *
  199. * Only 4GB of copy is supported. This shouldn't be a problem
  200. * because the kernel normally only writes from/to page sized chunks
  201. * even if user space passed a longer buffer.
  202. * And more would be dangerous because both Intel and AMD have
  203. * errata with rep movsq > 4GB. If someone feels the need to fix
  204. * this please consider this.
  205. *
  206. * Input:
  207. * rdi destination
  208. * rsi source
  209. * rdx count
  210. *
  211. * Output:
  212. * eax uncopied bytes or 0 if successful.
  213. */
  214. ENTRY(copy_user_generic_string)
  215. CFI_STARTPROC
  216. andl %edx,%edx
  217. jz 4f
  218. cmpl $8,%edx
  219. jb 2f /* less than 8 bytes, go to byte copy loop */
  220. ALIGN_DESTINATION
  221. movl %edx,%ecx
  222. shrl $3,%ecx
  223. andl $7,%edx
  224. 1: rep
  225. movsq
  226. 2: movl %edx,%ecx
  227. 3: rep
  228. movsb
  229. 4: xorl %eax,%eax
  230. ret
  231. .section .fixup,"ax"
  232. 11: lea (%rdx,%rcx,8),%rcx
  233. 12: movl %ecx,%edx /* ecx is zerorest also */
  234. jmp copy_user_handle_tail
  235. .previous
  236. .section __ex_table,"a"
  237. .align 8
  238. .quad 1b,11b
  239. .quad 3b,12b
  240. .previous
  241. CFI_ENDPROC
  242. ENDPROC(copy_user_generic_string)