memcpy.S 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. ! Copyright (C) 2013 Imagination Technologies Ltd.
  2. ! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  3. .text
  4. .global _memcpy
  5. .type _memcpy,function
  6. ! D1Ar1 dst
  7. ! D0Ar2 src
  8. ! D1Ar3 cnt
  9. ! D0Re0 dst
  10. _memcpy:
  11. CMP D1Ar3, #16
  12. MOV A1.2, D0Ar2 ! source pointer
  13. MOV A0.2, D1Ar1 ! destination pointer
  14. MOV A0.3, D1Ar1 ! for return value
  15. ! If there are less than 16 bytes to copy use the byte copy loop
  16. BGE $Llong_copy
  17. $Lbyte_copy:
  18. ! Simply copy a byte at a time
  19. SUBS TXRPT, D1Ar3, #1
  20. BLT $Lend
  21. $Lloop_byte:
  22. GETB D1Re0, [A1.2++]
  23. SETB [A0.2++], D1Re0
  24. BR $Lloop_byte
  25. $Lend:
  26. ! Finally set return value and return
  27. MOV D0Re0, A0.3
  28. MOV PC, D1RtP
  29. $Llong_copy:
  30. ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
  31. BZ $Laligned_dst
  32. ! The destination address is not 8 byte aligned. We will copy bytes from
  33. ! the source to the destination until the remaining data has an 8 byte
  34. ! destination address alignment (i.e we should never copy more than 7
  35. ! bytes here).
  36. $Lalign_dst:
  37. GETB D0Re0, [A1.2++]
  38. ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
  39. SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
  40. SETB [A0.2++], D0Re0
  41. CMP D1Ar5, #8
  42. BNE $Lalign_dst
  43. ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
  44. ! blocks, then jump to the unaligned copy loop or fall through to the aligned
  45. ! copy loop as appropriate.
  46. $Laligned_dst:
  47. MOV D0Ar4, A1.2
  48. LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
  49. ANDS D0Ar4, D0Ar4, #7 ! test source alignment
  50. BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
  51. ! Both source and destination are 8 byte aligned - the easy case.
  52. $Laligned_copy:
  53. LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
  54. BZ $Lbyte_copy
  55. SUB TXRPT, D1Ar5, #1
  56. $Laligned_32:
  57. GETL D0Re0, D1Re0, [A1.2++]
  58. GETL D0Ar6, D1Ar5, [A1.2++]
  59. SETL [A0.2++], D0Re0, D1Re0
  60. SETL [A0.2++], D0Ar6, D1Ar5
  61. GETL D0Re0, D1Re0, [A1.2++]
  62. GETL D0Ar6, D1Ar5, [A1.2++]
  63. SETL [A0.2++], D0Re0, D1Re0
  64. SETL [A0.2++], D0Ar6, D1Ar5
  65. BR $Laligned_32
  66. ! If there are any remaining bytes use the byte copy loop, otherwise we are done
  67. ANDS D1Ar3, D1Ar3, #0x1f
  68. BNZ $Lbyte_copy
  69. B $Lend
  70. ! The destination is 8 byte aligned but the source is not, and there are 8
  71. ! or more bytes to be copied.
  72. $Lunaligned_copy:
  73. ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
  74. ! current value
  75. MOV D0Ar4, A1.2
  76. MOV D0Ar6, A1.2
  77. ANDMB D0Ar4, D0Ar4, #0xfff8
  78. MOV A1.2, D0Ar4
  79. ! Save the number of bytes of mis-alignment in D0Ar4 for use later
  80. SUBS D0Ar6, D0Ar6, D0Ar4
  81. MOV D0Ar4, D0Ar6
  82. ! if there is no mis-alignment after all, use the aligned copy loop
  83. BZ $Laligned_copy
  84. ! prefetch 8 bytes
  85. GETL D0Re0, D1Re0, [A1.2]
  86. SUB TXRPT, D1Ar5, #1
  87. ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
  88. ! 4 bytes, and more than 4 bytes.
  89. CMP D0Ar6, #4
  90. BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
  91. BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
  92. ! The mis-alignment is more than 4 bytes
  93. $Lunaligned_5_6_7:
  94. SUB D0Ar6, D0Ar6, #4
  95. ! Calculate the bit offsets required for the shift operations necesssary
  96. ! to align the data.
  97. ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
  98. MULW D0Ar6, D0Ar6, #8
  99. MOV D1Ar5, #32
  100. SUB D1Ar5, D1Ar5, D0Ar6
  101. ! Move data 4 bytes before we enter the main loop
  102. MOV D0Re0, D1Re0
  103. $Lloop_5_6_7:
  104. GETL D0Ar2, D1Ar1, [++A1.2]
  105. ! form 64-bit data in D0Re0, D1Re0
  106. LSR D0Re0, D0Re0, D0Ar6
  107. MOV D1Re0, D0Ar2
  108. LSL D1Re0, D1Re0, D1Ar5
  109. ADD D0Re0, D0Re0, D1Re0
  110. LSR D0Ar2, D0Ar2, D0Ar6
  111. LSL D1Re0, D1Ar1, D1Ar5
  112. ADD D1Re0, D1Re0, D0Ar2
  113. SETL [A0.2++], D0Re0, D1Re0
  114. MOV D0Re0, D1Ar1
  115. BR $Lloop_5_6_7
  116. B $Lunaligned_end
  117. $Lunaligned_1_2_3:
  118. ! Calculate the bit offsets required for the shift operations necesssary
  119. ! to align the data.
  120. ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
  121. MULW D0Ar6, D0Ar6, #8
  122. MOV D1Ar5, #32
  123. SUB D1Ar5, D1Ar5, D0Ar6
  124. $Lloop_1_2_3:
  125. ! form 64-bit data in D0Re0,D1Re0
  126. LSR D0Re0, D0Re0, D0Ar6
  127. LSL D1Ar1, D1Re0, D1Ar5
  128. ADD D0Re0, D0Re0, D1Ar1
  129. MOV D0Ar2, D1Re0
  130. LSR D0FrT, D0Ar2, D0Ar6
  131. GETL D0Ar2, D1Ar1, [++A1.2]
  132. MOV D1Re0, D0Ar2
  133. LSL D1Re0, D1Re0, D1Ar5
  134. ADD D1Re0, D1Re0, D0FrT
  135. SETL [A0.2++], D0Re0, D1Re0
  136. MOV D0Re0, D0Ar2
  137. MOV D1Re0, D1Ar1
  138. BR $Lloop_1_2_3
  139. B $Lunaligned_end
  140. ! The 4 byte mis-alignment case - this does not require any shifting, just a
  141. ! shuffling of registers.
  142. $Lunaligned_4:
  143. MOV D0Re0, D1Re0
  144. $Lloop_4:
  145. GETL D0Ar2, D1Ar1, [++A1.2]
  146. MOV D1Re0, D0Ar2
  147. SETL [A0.2++], D0Re0, D1Re0
  148. MOV D0Re0, D1Ar1
  149. BR $Lloop_4
  150. $Lunaligned_end:
  151. ! If there are no remaining bytes to copy, we are done.
  152. ANDS D1Ar3, D1Ar3, #7
  153. BZ $Lend
  154. ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
  155. ! address of the remaining bytes, and fall through to the byte copy loop.
  156. MOV D0Ar6, A1.2
  157. ADD D1Ar5, D0Ar4, D0Ar6
  158. MOV A1.2, D1Ar5
  159. B $Lbyte_copy
  160. .size _memcpy,.-_memcpy
  161. libc_hidden_def(memcpy)