memcpy.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /*
  2. * File: arch/blackfin/lib/memcpy.S
  3. * Based on:
  4. * Author:
  5. *
  6. * Created:
  7. * Description: internal version of memcpy(), issued by the compiler
  8. * to copy blocks of data around.
  9. * This is really memmove() - it has to be able to deal with
  10. * possible overlaps, because that ambiguity is when the compiler
  11. * gives up and calls a function. We have our own, internal version
  12. * so that we get something we trust, even if the user has redefined
  13. * the normal symbol.
  14. * Rev: $Id: memcpy.S 2775 2007-02-21 13:58:44Z hennerich $
  15. *
  16. * Modified:
  17. * Copyright 2004-2006 Analog Devices Inc.
  18. *
  19. * Bugs: Enter bugs at http://blackfin.uclinux.org/
  20. *
  21. * This program is free software; you can redistribute it and/or modify
  22. * it under the terms of the GNU General Public License as published by
  23. * the Free Software Foundation; either version 2 of the License, or
  24. * (at your option) any later version.
  25. *
  26. * This program is distributed in the hope that it will be useful,
  27. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. * GNU General Public License for more details.
  30. *
  31. * You should have received a copy of the GNU General Public License
  32. * along with this program; if not, see the file COPYING, or write
  33. * to the Free Software Foundation, Inc.,
  34. * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  35. */
  36. .align 2
  37. .globl _memcpy_ASM;
  38. _memcpy_ASM:
  39. CC = R2 <= 0; /* length not positive?*/
  40. IF CC JUMP .L_P1L2147483647; /* Nothing to do */
  41. P0 = R0 ; /* dst*/
  42. P1 = R1 ; /* src*/
  43. P2 = R2 ; /* length */
  44. /* check for overlapping data */
  45. CC = R1 < R0; /* src < dst */
  46. IF !CC JUMP .Lno_overlap;
  47. R3 = R1 + R2;
  48. CC = R0 < R3; /* and dst < src+len */
  49. IF CC JUMP .Lhas_overlap;
  50. .Lno_overlap:
  51. /* Check for aligned data.*/
  52. R3 = R1 | R0;
  53. R0 = 0x3;
  54. R3 = R3 & R0;
  55. CC = R3; /* low bits set on either address? */
  56. IF CC JUMP .Lnot_aligned;
  57. /* Both addresses are word-aligned, so we can copy
  58. at least part of the data using word copies.*/
  59. P2 = P2 >> 2;
  60. CC = P2 <= 2;
  61. IF !CC JUMP .Lmore_than_seven;
  62. /* less than eight bytes... */
  63. P2 = R2;
  64. LSETUP(.Lthree_start, .Lthree_end) LC0=P2;
  65. R0 = R1; /* setup src address for return */
  66. .Lthree_start:
  67. R3 = B[P1++] (X);
  68. .Lthree_end:
  69. B[P0++] = R3;
  70. RTS;
  71. .Lmore_than_seven:
  72. /* There's at least eight bytes to copy. */
  73. P2 += -1; /* because we unroll one iteration */
  74. LSETUP(.Lword_loop, .Lword_loop) LC0=P2;
  75. R0 = R1;
  76. I1 = P1;
  77. R3 = [I1++];
  78. .Lword_loop:
  79. MNOP || [P0++] = R3 || R3 = [I1++];
  80. [P0++] = R3;
  81. /* Any remaining bytes to copy? */
  82. R3 = 0x3;
  83. R3 = R2 & R3;
  84. CC = R3 == 0;
  85. P1 = I1; /* in case there's something left, */
  86. IF !CC JUMP .Lbytes_left;
  87. RTS;
  88. .Lbytes_left: P2 = R3;
  89. .Lnot_aligned:
  90. /* From here, we're copying byte-by-byte. */
  91. LSETUP (.Lbyte_start , .Lbyte_end) LC0=P2;
  92. R0 = R1; /* Save src address for return */
  93. .Lbyte_start:
  94. R1 = B[P1++] (X);
  95. .Lbyte_end:
  96. B[P0++] = R1;
  97. .L_P1L2147483647:
  98. RTS;
  99. .Lhas_overlap:
  100. /* Need to reverse the copying, because the
  101. * dst would clobber the src.
  102. * Don't bother to work out alignment for
  103. * the reverse case.
  104. */
  105. R0 = R1; /* save src for later. */
  106. P0 = P0 + P2;
  107. P0 += -1;
  108. P1 = P1 + P2;
  109. P1 += -1;
  110. LSETUP(.Lover_start, .Lover_end) LC0=P2;
  111. .Lover_start:
  112. R1 = B[P1--] (X);
  113. .Lover_end:
  114. B[P0--] = R1;
  115. RTS;