copy_user_64.S 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/dwarf2.h>
  10. #define FIX_ALIGNMENT 1
  11. #include <asm/current.h>
  12. #include <asm/asm-offsets.h>
  13. #include <asm/thread_info.h>
  14. #include <asm/cpufeature.h>
  15. .macro ALTERNATIVE_JUMP feature,orig,alt
  16. 0:
  17. .byte 0xe9 /* 32bit jump */
  18. .long \orig-1f /* by default jump to orig */
  19. 1:
  20. .section .altinstr_replacement,"ax"
  21. 2: .byte 0xe9 /* near jump with 32bit immediate */
  22. .long \alt-1b /* offset */ /* or alternatively to alt */
  23. .previous
  24. .section .altinstructions,"a"
  25. .align 8
  26. .quad 0b
  27. .quad 2b
  28. .byte \feature /* when feature is set */
  29. .byte 5
  30. .byte 5
  31. .previous
  32. .endm
  33. .macro ALIGN_DESTINATION
  34. #ifdef FIX_ALIGNMENT
  35. /* check for bad alignment of destination */
  36. movl %edi,%ecx
  37. andl $7,%ecx
  38. jz 102f /* already aligned */
  39. subl $8,%ecx
  40. negl %ecx
  41. subl %ecx,%edx
  42. 100: movb (%rsi),%al
  43. 101: movb %al,(%rdi)
  44. incq %rsi
  45. incq %rdi
  46. decl %ecx
  47. jnz 100b
  48. 102:
  49. .section .fixup,"ax"
  50. 103: addl %ecx,%edx /* ecx is zerorest also */
  51. jmp copy_user_handle_tail
  52. .previous
  53. .section __ex_table,"a"
  54. .align 8
  55. .quad 100b,103b
  56. .quad 101b,103b
  57. .previous
  58. #endif
  59. .endm
  60. /* Standard copy_to_user with segment limit checking */
  61. ENTRY(_copy_to_user)
  62. CFI_STARTPROC
  63. GET_THREAD_INFO(%rax)
  64. movq %rdi,%rcx
  65. addq %rdx,%rcx
  66. jc bad_to_user
  67. cmpq TI_addr_limit(%rax),%rcx
  68. jae bad_to_user
  69. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  70. CFI_ENDPROC
  71. ENDPROC(_copy_to_user)
  72. /* Standard copy_from_user with segment limit checking */
  73. ENTRY(_copy_from_user)
  74. CFI_STARTPROC
  75. GET_THREAD_INFO(%rax)
  76. movq %rsi,%rcx
  77. addq %rdx,%rcx
  78. jc bad_from_user
  79. cmpq TI_addr_limit(%rax),%rcx
  80. jae bad_from_user
  81. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  82. CFI_ENDPROC
  83. ENDPROC(_copy_from_user)
  84. ENTRY(copy_user_generic)
  85. CFI_STARTPROC
  86. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  87. CFI_ENDPROC
  88. ENDPROC(copy_user_generic)
  89. .section .fixup,"ax"
  90. /* must zero dest */
  91. ENTRY(bad_from_user)
  92. bad_from_user:
  93. CFI_STARTPROC
  94. movl %edx,%ecx
  95. xorl %eax,%eax
  96. rep
  97. stosb
  98. bad_to_user:
  99. movl %edx,%eax
  100. ret
  101. CFI_ENDPROC
  102. ENDPROC(bad_from_user)
  103. .previous
  104. /*
  105. * copy_user_generic_unrolled - memory copy with exception handling.
  106. * This version is for CPUs like P4 that don't have efficient micro
  107. * code for rep movsq
  108. *
  109. * Input:
  110. * rdi destination
  111. * rsi source
  112. * rdx count
  113. *
  114. * Output:
  115. * eax uncopied bytes or 0 if successfull.
  116. */
  117. ENTRY(copy_user_generic_unrolled)
  118. CFI_STARTPROC
  119. cmpl $8,%edx
  120. jb 20f /* less then 8 bytes, go to byte copy loop */
  121. ALIGN_DESTINATION
  122. movl %edx,%ecx
  123. andl $63,%edx
  124. shrl $6,%ecx
  125. jz 17f
  126. 1: movq (%rsi),%r8
  127. 2: movq 1*8(%rsi),%r9
  128. 3: movq 2*8(%rsi),%r10
  129. 4: movq 3*8(%rsi),%r11
  130. 5: movq %r8,(%rdi)
  131. 6: movq %r9,1*8(%rdi)
  132. 7: movq %r10,2*8(%rdi)
  133. 8: movq %r11,3*8(%rdi)
  134. 9: movq 4*8(%rsi),%r8
  135. 10: movq 5*8(%rsi),%r9
  136. 11: movq 6*8(%rsi),%r10
  137. 12: movq 7*8(%rsi),%r11
  138. 13: movq %r8,4*8(%rdi)
  139. 14: movq %r9,5*8(%rdi)
  140. 15: movq %r10,6*8(%rdi)
  141. 16: movq %r11,7*8(%rdi)
  142. leaq 64(%rsi),%rsi
  143. leaq 64(%rdi),%rdi
  144. decl %ecx
  145. jnz 1b
  146. 17: movl %edx,%ecx
  147. andl $7,%edx
  148. shrl $3,%ecx
  149. jz 20f
  150. 18: movq (%rsi),%r8
  151. 19: movq %r8,(%rdi)
  152. leaq 8(%rsi),%rsi
  153. leaq 8(%rdi),%rdi
  154. decl %ecx
  155. jnz 18b
  156. 20: andl %edx,%edx
  157. jz 23f
  158. movl %edx,%ecx
  159. 21: movb (%rsi),%al
  160. 22: movb %al,(%rdi)
  161. incq %rsi
  162. incq %rdi
  163. decl %ecx
  164. jnz 21b
  165. 23: xor %eax,%eax
  166. ret
  167. .section .fixup,"ax"
  168. 30: shll $6,%ecx
  169. addl %ecx,%edx
  170. jmp 60f
  171. 40: lea (%rdx,%rcx,8),%rdx
  172. jmp 60f
  173. 50: movl %ecx,%edx
  174. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  175. .previous
  176. .section __ex_table,"a"
  177. .align 8
  178. .quad 1b,30b
  179. .quad 2b,30b
  180. .quad 3b,30b
  181. .quad 4b,30b
  182. .quad 5b,30b
  183. .quad 6b,30b
  184. .quad 7b,30b
  185. .quad 8b,30b
  186. .quad 9b,30b
  187. .quad 10b,30b
  188. .quad 11b,30b
  189. .quad 12b,30b
  190. .quad 13b,30b
  191. .quad 14b,30b
  192. .quad 15b,30b
  193. .quad 16b,30b
  194. .quad 18b,40b
  195. .quad 19b,40b
  196. .quad 21b,50b
  197. .quad 22b,50b
  198. .previous
  199. CFI_ENDPROC
  200. ENDPROC(copy_user_generic_unrolled)
  201. /* Some CPUs run faster using the string copy instructions.
  202. * This is also a lot simpler. Use them when possible.
  203. *
  204. * Only 4GB of copy is supported. This shouldn't be a problem
  205. * because the kernel normally only writes from/to page sized chunks
  206. * even if user space passed a longer buffer.
  207. * And more would be dangerous because both Intel and AMD have
  208. * errata with rep movsq > 4GB. If someone feels the need to fix
  209. * this please consider this.
  210. *
  211. * Input:
  212. * rdi destination
  213. * rsi source
  214. * rdx count
  215. *
  216. * Output:
  217. * eax uncopied bytes or 0 if successful.
  218. */
  219. ENTRY(copy_user_generic_string)
  220. CFI_STARTPROC
  221. andl %edx,%edx
  222. jz 4f
  223. cmpl $8,%edx
  224. jb 2f /* less than 8 bytes, go to byte copy loop */
  225. ALIGN_DESTINATION
  226. movl %edx,%ecx
  227. shrl $3,%ecx
  228. andl $7,%edx
  229. 1: rep
  230. movsq
  231. 2: movl %edx,%ecx
  232. 3: rep
  233. movsb
  234. 4: xorl %eax,%eax
  235. ret
  236. .section .fixup,"ax"
  237. 11: lea (%rdx,%rcx,8),%rcx
  238. 12: movl %ecx,%edx /* ecx is zerorest also */
  239. jmp copy_user_handle_tail
  240. .previous
  241. .section __ex_table,"a"
  242. .align 8
  243. .quad 1b,11b
  244. .quad 3b,12b
  245. .previous
  246. CFI_ENDPROC
  247. ENDPROC(copy_user_generic_string)