copy_user_nocache.S 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /* Copyright 2002 Andi Kleen, SuSE Labs.
  2. * Subject to the GNU Public License v2.
  3. *
  4. * Functions to copy from and to user space.
  5. */
  6. #include <linux/linkage.h>
  7. #include <asm/dwarf2.h>
  8. #define FIX_ALIGNMENT 1
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeature.h>
  13. /*
  14. * copy_user_nocache - Uncached memory copy with exception handling
  15. * This will force destination/source out of cache for more performance.
  16. *
  17. * Input:
  18. * rdi destination
  19. * rsi source
  20. * rdx count
  21. * rcx zero flag when 1 zero on exception
  22. *
  23. * Output:
  24. * eax uncopied bytes or 0 if successful.
  25. */
  26. ENTRY(__copy_user_nocache)
  27. CFI_STARTPROC
  28. pushq %rbx
  29. CFI_ADJUST_CFA_OFFSET 8
  30. CFI_REL_OFFSET rbx, 0
  31. pushq %rcx /* save zero flag */
  32. CFI_ADJUST_CFA_OFFSET 8
  33. CFI_REL_OFFSET rcx, 0
  34. xorl %eax,%eax /* zero for the exception handler */
  35. #ifdef FIX_ALIGNMENT
  36. /* check for bad alignment of destination */
  37. movl %edi,%ecx
  38. andl $7,%ecx
  39. jnz .Lbad_alignment
  40. .Lafter_bad_alignment:
  41. #endif
  42. movq %rdx,%rcx
  43. movl $64,%ebx
  44. shrq $6,%rdx
  45. decq %rdx
  46. js .Lhandle_tail
  47. .p2align 4
  48. .Lloop:
  49. .Ls1: movq (%rsi),%r11
  50. .Ls2: movq 1*8(%rsi),%r8
  51. .Ls3: movq 2*8(%rsi),%r9
  52. .Ls4: movq 3*8(%rsi),%r10
  53. .Ld1: movnti %r11,(%rdi)
  54. .Ld2: movnti %r8,1*8(%rdi)
  55. .Ld3: movnti %r9,2*8(%rdi)
  56. .Ld4: movnti %r10,3*8(%rdi)
  57. .Ls5: movq 4*8(%rsi),%r11
  58. .Ls6: movq 5*8(%rsi),%r8
  59. .Ls7: movq 6*8(%rsi),%r9
  60. .Ls8: movq 7*8(%rsi),%r10
  61. .Ld5: movnti %r11,4*8(%rdi)
  62. .Ld6: movnti %r8,5*8(%rdi)
  63. .Ld7: movnti %r9,6*8(%rdi)
  64. .Ld8: movnti %r10,7*8(%rdi)
  65. dec %rdx
  66. leaq 64(%rsi),%rsi
  67. leaq 64(%rdi),%rdi
  68. jns .Lloop
  69. .p2align 4
  70. .Lhandle_tail:
  71. movl %ecx,%edx
  72. andl $63,%ecx
  73. shrl $3,%ecx
  74. jz .Lhandle_7
  75. movl $8,%ebx
  76. .p2align 4
  77. .Lloop_8:
  78. .Ls9: movq (%rsi),%r8
  79. .Ld9: movnti %r8,(%rdi)
  80. decl %ecx
  81. leaq 8(%rdi),%rdi
  82. leaq 8(%rsi),%rsi
  83. jnz .Lloop_8
  84. .Lhandle_7:
  85. movl %edx,%ecx
  86. andl $7,%ecx
  87. jz .Lende
  88. .p2align 4
  89. .Lloop_1:
  90. .Ls10: movb (%rsi),%bl
  91. .Ld10: movb %bl,(%rdi)
  92. incq %rdi
  93. incq %rsi
  94. decl %ecx
  95. jnz .Lloop_1
  96. CFI_REMEMBER_STATE
  97. .Lende:
  98. popq %rcx
  99. CFI_ADJUST_CFA_OFFSET -8
  100. CFI_RESTORE %rcx
  101. popq %rbx
  102. CFI_ADJUST_CFA_OFFSET -8
  103. CFI_RESTORE rbx
  104. ret
  105. CFI_RESTORE_STATE
  106. #ifdef FIX_ALIGNMENT
  107. /* align destination */
  108. .p2align 4
  109. .Lbad_alignment:
  110. movl $8,%r9d
  111. subl %ecx,%r9d
  112. movl %r9d,%ecx
  113. cmpq %r9,%rdx
  114. jz .Lhandle_7
  115. js .Lhandle_7
  116. .Lalign_1:
  117. .Ls11: movb (%rsi),%bl
  118. .Ld11: movb %bl,(%rdi)
  119. incq %rsi
  120. incq %rdi
  121. decl %ecx
  122. jnz .Lalign_1
  123. subq %r9,%rdx
  124. jmp .Lafter_bad_alignment
  125. #endif
  126. /* table sorted by exception address */
  127. .section __ex_table,"a"
  128. .align 8
  129. .quad .Ls1,.Ls1e
  130. .quad .Ls2,.Ls2e
  131. .quad .Ls3,.Ls3e
  132. .quad .Ls4,.Ls4e
  133. .quad .Ld1,.Ls1e
  134. .quad .Ld2,.Ls2e
  135. .quad .Ld3,.Ls3e
  136. .quad .Ld4,.Ls4e
  137. .quad .Ls5,.Ls5e
  138. .quad .Ls6,.Ls6e
  139. .quad .Ls7,.Ls7e
  140. .quad .Ls8,.Ls8e
  141. .quad .Ld5,.Ls5e
  142. .quad .Ld6,.Ls6e
  143. .quad .Ld7,.Ls7e
  144. .quad .Ld8,.Ls8e
  145. .quad .Ls9,.Le_quad
  146. .quad .Ld9,.Le_quad
  147. .quad .Ls10,.Le_byte
  148. .quad .Ld10,.Le_byte
  149. #ifdef FIX_ALIGNMENT
  150. .quad .Ls11,.Lzero_rest
  151. .quad .Ld11,.Lzero_rest
  152. #endif
  153. .quad .Le5,.Le_zero
  154. .previous
  155. /* compute 64-offset for main loop. 8 bytes accuracy with error on the
  156. pessimistic side. this is gross. it would be better to fix the
  157. interface. */
  158. /* eax: zero, ebx: 64 */
  159. .Ls1e: addl $8,%eax
  160. .Ls2e: addl $8,%eax
  161. .Ls3e: addl $8,%eax
  162. .Ls4e: addl $8,%eax
  163. .Ls5e: addl $8,%eax
  164. .Ls6e: addl $8,%eax
  165. .Ls7e: addl $8,%eax
  166. .Ls8e: addl $8,%eax
  167. addq %rbx,%rdi /* +64 */
  168. subq %rax,%rdi /* correct destination with computed offset */
  169. shlq $6,%rdx /* loop counter * 64 (stride length) */
  170. addq %rax,%rdx /* add offset to loopcnt */
  171. andl $63,%ecx /* remaining bytes */
  172. addq %rcx,%rdx /* add them */
  173. jmp .Lzero_rest
  174. /* exception on quad word loop in tail handling */
  175. /* ecx: loopcnt/8, %edx: length, rdi: correct */
  176. .Le_quad:
  177. shll $3,%ecx
  178. andl $7,%edx
  179. addl %ecx,%edx
  180. /* edx: bytes to zero, rdi: dest, eax:zero */
  181. .Lzero_rest:
  182. cmpl $0,(%rsp) /* zero flag set? */
  183. jz .Le_zero
  184. movq %rdx,%rcx
  185. .Le_byte:
  186. xorl %eax,%eax
  187. .Le5: rep
  188. stosb
  189. /* when there is another exception while zeroing the rest just return */
  190. .Le_zero:
  191. movq %rdx,%rax
  192. jmp .Lende
  193. CFI_ENDPROC
  194. ENDPROC(__copy_user_nocache)