copy_user_64.S 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/dwarf2.h>
  10. #define FIX_ALIGNMENT 1
  11. #include <asm/current.h>
  12. #include <asm/asm-offsets.h>
  13. #include <asm/thread_info.h>
  14. #include <asm/cpufeature.h>
  15. #include <asm/alternative-asm.h>
  16. #include <asm/asm.h>
  17. /*
  18. * By placing feature2 after feature1 in altinstructions section, we logically
  19. * implement:
  20. * If CPU has feature2, jmp to alt2 is used
  21. * else if CPU has feature1, jmp to alt1 is used
  22. * else jmp to orig is used.
  23. */
  24. .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
  25. 0:
  26. .byte 0xe9 /* 32bit jump */
  27. .long \orig-1f /* by default jump to orig */
  28. 1:
  29. .section .altinstr_replacement,"ax"
  30. 2: .byte 0xe9 /* near jump with 32bit immediate */
  31. .long \alt1-1b /* offset */ /* or alternatively to alt1 */
  32. 3: .byte 0xe9 /* near jump with 32bit immediate */
  33. .long \alt2-1b /* offset */ /* or alternatively to alt2 */
  34. .previous
  35. .section .altinstructions,"a"
  36. altinstruction_entry 0b,2b,\feature1,5,5
  37. altinstruction_entry 0b,3b,\feature2,5,5
  38. .previous
  39. .endm
  40. .macro ALIGN_DESTINATION
  41. #ifdef FIX_ALIGNMENT
  42. /* check for bad alignment of destination */
  43. movl %edi,%ecx
  44. andl $7,%ecx
  45. jz 102f /* already aligned */
  46. subl $8,%ecx
  47. negl %ecx
  48. subl %ecx,%edx
  49. 100: movb (%rsi),%al
  50. 101: movb %al,(%rdi)
  51. incq %rsi
  52. incq %rdi
  53. decl %ecx
  54. jnz 100b
  55. 102:
  56. .section .fixup,"ax"
  57. 103: addl %ecx,%edx /* ecx is zerorest also */
  58. jmp copy_user_handle_tail
  59. .previous
  60. _ASM_EXTABLE(100b,103b)
  61. _ASM_EXTABLE(101b,103b)
  62. #endif
  63. .endm
  64. /* Standard copy_to_user with segment limit checking */
  65. ENTRY(_copy_to_user)
  66. CFI_STARTPROC
  67. GET_THREAD_INFO(%rax)
  68. movq %rdi,%rcx
  69. addq %rdx,%rcx
  70. jc bad_to_user
  71. cmpq TI_addr_limit(%rax),%rcx
  72. ja bad_to_user
  73. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
  74. copy_user_generic_unrolled,copy_user_generic_string, \
  75. copy_user_enhanced_fast_string
  76. CFI_ENDPROC
  77. ENDPROC(_copy_to_user)
  78. /* Standard copy_from_user with segment limit checking */
  79. ENTRY(_copy_from_user)
  80. CFI_STARTPROC
  81. GET_THREAD_INFO(%rax)
  82. movq %rsi,%rcx
  83. addq %rdx,%rcx
  84. jc bad_from_user
  85. cmpq TI_addr_limit(%rax),%rcx
  86. ja bad_from_user
  87. ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
  88. copy_user_generic_unrolled,copy_user_generic_string, \
  89. copy_user_enhanced_fast_string
  90. CFI_ENDPROC
  91. ENDPROC(_copy_from_user)
  92. .section .fixup,"ax"
  93. /* must zero dest */
  94. ENTRY(bad_from_user)
  95. bad_from_user:
  96. CFI_STARTPROC
  97. movl %edx,%ecx
  98. xorl %eax,%eax
  99. rep
  100. stosb
  101. bad_to_user:
  102. movl %edx,%eax
  103. ret
  104. CFI_ENDPROC
  105. ENDPROC(bad_from_user)
  106. .previous
  107. /*
  108. * copy_user_generic_unrolled - memory copy with exception handling.
  109. * This version is for CPUs like P4 that don't have efficient micro
  110. * code for rep movsq
  111. *
  112. * Input:
  113. * rdi destination
  114. * rsi source
  115. * rdx count
  116. *
  117. * Output:
  118. * eax uncopied bytes or 0 if successful.
  119. */
  120. ENTRY(copy_user_generic_unrolled)
  121. CFI_STARTPROC
  122. cmpl $8,%edx
  123. jb 20f /* less then 8 bytes, go to byte copy loop */
  124. ALIGN_DESTINATION
  125. movl %edx,%ecx
  126. andl $63,%edx
  127. shrl $6,%ecx
  128. jz 17f
  129. 1: movq (%rsi),%r8
  130. 2: movq 1*8(%rsi),%r9
  131. 3: movq 2*8(%rsi),%r10
  132. 4: movq 3*8(%rsi),%r11
  133. 5: movq %r8,(%rdi)
  134. 6: movq %r9,1*8(%rdi)
  135. 7: movq %r10,2*8(%rdi)
  136. 8: movq %r11,3*8(%rdi)
  137. 9: movq 4*8(%rsi),%r8
  138. 10: movq 5*8(%rsi),%r9
  139. 11: movq 6*8(%rsi),%r10
  140. 12: movq 7*8(%rsi),%r11
  141. 13: movq %r8,4*8(%rdi)
  142. 14: movq %r9,5*8(%rdi)
  143. 15: movq %r10,6*8(%rdi)
  144. 16: movq %r11,7*8(%rdi)
  145. leaq 64(%rsi),%rsi
  146. leaq 64(%rdi),%rdi
  147. decl %ecx
  148. jnz 1b
  149. 17: movl %edx,%ecx
  150. andl $7,%edx
  151. shrl $3,%ecx
  152. jz 20f
  153. 18: movq (%rsi),%r8
  154. 19: movq %r8,(%rdi)
  155. leaq 8(%rsi),%rsi
  156. leaq 8(%rdi),%rdi
  157. decl %ecx
  158. jnz 18b
  159. 20: andl %edx,%edx
  160. jz 23f
  161. movl %edx,%ecx
  162. 21: movb (%rsi),%al
  163. 22: movb %al,(%rdi)
  164. incq %rsi
  165. incq %rdi
  166. decl %ecx
  167. jnz 21b
  168. 23: xor %eax,%eax
  169. ret
  170. .section .fixup,"ax"
  171. 30: shll $6,%ecx
  172. addl %ecx,%edx
  173. jmp 60f
  174. 40: lea (%rdx,%rcx,8),%rdx
  175. jmp 60f
  176. 50: movl %ecx,%edx
  177. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  178. .previous
  179. _ASM_EXTABLE(1b,30b)
  180. _ASM_EXTABLE(2b,30b)
  181. _ASM_EXTABLE(3b,30b)
  182. _ASM_EXTABLE(4b,30b)
  183. _ASM_EXTABLE(5b,30b)
  184. _ASM_EXTABLE(6b,30b)
  185. _ASM_EXTABLE(7b,30b)
  186. _ASM_EXTABLE(8b,30b)
  187. _ASM_EXTABLE(9b,30b)
  188. _ASM_EXTABLE(10b,30b)
  189. _ASM_EXTABLE(11b,30b)
  190. _ASM_EXTABLE(12b,30b)
  191. _ASM_EXTABLE(13b,30b)
  192. _ASM_EXTABLE(14b,30b)
  193. _ASM_EXTABLE(15b,30b)
  194. _ASM_EXTABLE(16b,30b)
  195. _ASM_EXTABLE(18b,40b)
  196. _ASM_EXTABLE(19b,40b)
  197. _ASM_EXTABLE(21b,50b)
  198. _ASM_EXTABLE(22b,50b)
  199. CFI_ENDPROC
  200. ENDPROC(copy_user_generic_unrolled)
  201. /* Some CPUs run faster using the string copy instructions.
  202. * This is also a lot simpler. Use them when possible.
  203. *
  204. * Only 4GB of copy is supported. This shouldn't be a problem
  205. * because the kernel normally only writes from/to page sized chunks
  206. * even if user space passed a longer buffer.
  207. * And more would be dangerous because both Intel and AMD have
  208. * errata with rep movsq > 4GB. If someone feels the need to fix
  209. * this please consider this.
  210. *
  211. * Input:
  212. * rdi destination
  213. * rsi source
  214. * rdx count
  215. *
  216. * Output:
  217. * eax uncopied bytes or 0 if successful.
  218. */
  219. ENTRY(copy_user_generic_string)
  220. CFI_STARTPROC
  221. andl %edx,%edx
  222. jz 4f
  223. cmpl $8,%edx
  224. jb 2f /* less than 8 bytes, go to byte copy loop */
  225. ALIGN_DESTINATION
  226. movl %edx,%ecx
  227. shrl $3,%ecx
  228. andl $7,%edx
  229. 1: rep
  230. movsq
  231. 2: movl %edx,%ecx
  232. 3: rep
  233. movsb
  234. 4: xorl %eax,%eax
  235. ret
  236. .section .fixup,"ax"
  237. 11: lea (%rdx,%rcx,8),%rcx
  238. 12: movl %ecx,%edx /* ecx is zerorest also */
  239. jmp copy_user_handle_tail
  240. .previous
  241. _ASM_EXTABLE(1b,11b)
  242. _ASM_EXTABLE(3b,12b)
  243. CFI_ENDPROC
  244. ENDPROC(copy_user_generic_string)
  245. /*
  246. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  247. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  248. *
  249. * Input:
  250. * rdi destination
  251. * rsi source
  252. * rdx count
  253. *
  254. * Output:
  255. * eax uncopied bytes or 0 if successful.
  256. */
  257. ENTRY(copy_user_enhanced_fast_string)
  258. CFI_STARTPROC
  259. andl %edx,%edx
  260. jz 2f
  261. movl %edx,%ecx
  262. 1: rep
  263. movsb
  264. 2: xorl %eax,%eax
  265. ret
  266. .section .fixup,"ax"
  267. 12: movl %ecx,%edx /* ecx is zerorest also */
  268. jmp copy_user_handle_tail
  269. .previous
  270. _ASM_EXTABLE(1b,12b)
  271. CFI_ENDPROC
  272. ENDPROC(copy_user_enhanced_fast_string)