memcpy.S 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. /*
  2. * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  5. */
  6. #include <features.h>
  7. #include <sysdep.h>
  8. #ifdef __LITTLE_ENDIAN__
  9. # define SHIFT_1(RX,RY,IMM) asl RX, RY, IMM ; <<
  10. # define SHIFT_2(RX,RY,IMM) lsr RX, RY, IMM ; >>
  11. # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM
  12. # define MERGE_2(RX,RY,IMM)
  13. # define EXTRACT_1(RX,RY,IMM) and RX, RY, 0xFFFF
  14. # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, IMM
  15. #else
  16. # define SHIFT_1(RX,RY,IMM) lsr RX, RY, IMM ; >>
  17. # define SHIFT_2(RX,RY,IMM) asl RX, RY, IMM ; <<
  18. # define MERGE_1(RX,RY,IMM) asl RX, RY, IMM ; <<
  19. # define MERGE_2(RX,RY,IMM) asl RX, RY, IMM ; <<
  20. # define EXTRACT_1(RX,RY,IMM) lsr RX, RY, IMM
  21. # define EXTRACT_2(RX,RY,IMM) lsr RX, RY, 0x08
  22. #endif
  23. #if defined(__LL64__) || defined(__ARC_LL64__)
  24. # define PREFETCH_READ(RX) prefetch [RX, 56]
  25. # define PREFETCH_WRITE(RX) prefetchw [RX, 64]
  26. # define LOADX(DST,RX) ldd.ab DST, [RX, 8]
  27. # define STOREX(SRC,RX) std.ab SRC, [RX, 8]
  28. # define ZOLSHFT 5
  29. # define ZOLAND 0x1F
  30. #else
  31. # define PREFETCH_READ(RX) prefetch [RX, 28]
  32. # define PREFETCH_WRITE(RX) prefetchw [RX, 32]
  33. # define LOADX(DST,RX) ld.ab DST, [RX, 4]
  34. # define STOREX(SRC,RX) st.ab SRC, [RX, 4]
  35. # define ZOLSHFT 4
  36. # define ZOLAND 0xF
  37. #endif
  38. ENTRY(memcpy)
  39. prefetch [r1] ; Prefetch the read location
  40. prefetchw [r0] ; Prefetch the write location
  41. mov.f 0, r2
  42. ;;; if size is zero
  43. jz.d [blink]
  44. mov r3, r0 ; don't clobber ret val
  45. ;;; if size <= 8
  46. cmp r2, 8
  47. bls.d @.Lsmallchunk
  48. mov.f lp_count, r2
  49. and.f r4, r0, 0x03
  50. rsub lp_count, r4, 4
  51. lpnz @.Laligndestination
  52. ;; LOOP BEGIN
  53. ldb.ab r5, [r1,1]
  54. sub r2, r2, 1
  55. stb.ab r5, [r3,1]
  56. .Laligndestination:
  57. ;;; Check the alignment of the source
  58. and.f r4, r1, 0x03
  59. bnz.d @.Lsourceunaligned
  60. ;;; CASE 0: Both source and destination are 32bit aligned
  61. ;;; Convert len to Dwords, unfold x4
  62. lsr.f lp_count, r2, ZOLSHFT
  63. lpnz @.Lcopy32_64bytes
  64. ;; LOOP START
  65. LOADX (r6, r1)
  66. PREFETCH_READ (r1)
  67. PREFETCH_WRITE (r3)
  68. LOADX (r8, r1)
  69. LOADX (r10, r1)
  70. LOADX (r4, r1)
  71. STOREX (r6, r3)
  72. STOREX (r8, r3)
  73. STOREX (r10, r3)
  74. STOREX (r4, r3)
  75. .Lcopy32_64bytes:
  76. and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes
  77. .Lsmallchunk:
  78. lpnz @.Lcopyremainingbytes
  79. ;; LOOP START
  80. ldb.ab r5, [r1,1]
  81. stb.ab r5, [r3,1]
  82. .Lcopyremainingbytes:
  83. j [blink]
  84. ;;; END CASE 0
  85. .Lsourceunaligned:
  86. cmp r4, 2
  87. beq.d @.LunalignedOffby2
  88. sub r2, r2, 1
  89. bhi.d @.LunalignedOffby3
  90. ldb.ab r5, [r1, 1]
  91. ;;; CASE 1: The source is unaligned, off by 1
  92. ;; Hence I need to read 1 byte for a 16bit alignment
  93. ;; and 2bytes to reach 32bit alignment
  94. ldh.ab r6, [r1, 2]
  95. sub r2, r2, 2
  96. ;; Convert to words, unfold x2
  97. lsr.f lp_count, r2, 3
  98. MERGE_1 (r6, r6, 8)
  99. MERGE_2 (r5, r5, 24)
  100. or r5, r5, r6
  101. ;; Both src and dst are aligned
  102. lpnz @.Lcopy8bytes_1
  103. ;; LOOP START
  104. ld.ab r6, [r1, 4]
  105. prefetch [r1, 28] ;Prefetch the next read location
  106. ld.ab r8, [r1,4]
  107. prefetchw [r3, 32] ;Prefetch the next write location
  108. SHIFT_1 (r7, r6, 24)
  109. or r7, r7, r5
  110. SHIFT_2 (r5, r6, 8)
  111. SHIFT_1 (r9, r8, 24)
  112. or r9, r9, r5
  113. SHIFT_2 (r5, r8, 8)
  114. st.ab r7, [r3, 4]
  115. st.ab r9, [r3, 4]
  116. .Lcopy8bytes_1:
  117. ;; Write back the remaining 16bits
  118. EXTRACT_1 (r6, r5, 16)
  119. sth.ab r6, [r3, 2]
  120. ;; Write back the remaining 8bits
  121. EXTRACT_2 (r5, r5, 16)
  122. stb.ab r5, [r3, 1]
  123. and.f lp_count, r2, 0x07 ;Last 8bytes
  124. lpnz @.Lcopybytewise_1
  125. ;; LOOP START
  126. ldb.ab r6, [r1,1]
  127. stb.ab r6, [r3,1]
  128. .Lcopybytewise_1:
  129. j [blink]
  130. .LunalignedOffby2:
  131. ;;; CASE 2: The source is unaligned, off by 2
  132. ldh.ab r5, [r1, 2]
  133. sub r2, r2, 1
  134. ;; Both src and dst are aligned
  135. ;; Convert to words, unfold x2
  136. lsr.f lp_count, r2, 3
  137. #ifdef __BIG_ENDIAN__
  138. asl.nz r5, r5, 16
  139. #endif
  140. lpnz @.Lcopy8bytes_2
  141. ;; LOOP START
  142. ld.ab r6, [r1, 4]
  143. prefetch [r1, 28] ;Prefetch the next read location
  144. ld.ab r8, [r1,4]
  145. prefetchw [r3, 32] ;Prefetch the next write location
  146. SHIFT_1 (r7, r6, 16)
  147. or r7, r7, r5
  148. SHIFT_2 (r5, r6, 16)
  149. SHIFT_1 (r9, r8, 16)
  150. or r9, r9, r5
  151. SHIFT_2 (r5, r8, 16)
  152. st.ab r7, [r3, 4]
  153. st.ab r9, [r3, 4]
  154. .Lcopy8bytes_2:
  155. #ifdef __BIG_ENDIAN__
  156. lsr.nz r5, r5, 16
  157. #endif
  158. sth.ab r5, [r3, 2]
  159. and.f lp_count, r2, 0x07 ;Last 8bytes
  160. lpnz @.Lcopybytewise_2
  161. ;; LOOP START
  162. ldb.ab r6, [r1,1]
  163. stb.ab r6, [r3,1]
  164. .Lcopybytewise_2:
  165. j [blink]
  166. .LunalignedOffby3:
  167. ;;; CASE 3: The source is unaligned, off by 3
  168. ;;; Hence, I need to read 1byte for achieve the 32bit alignment
  169. ;; Both src and dst are aligned
  170. ;; Convert to words, unfold x2
  171. lsr.f lp_count, r2, 3
  172. #ifdef __BIG_ENDIAN__
  173. asl.ne r5, r5, 24
  174. #endif
  175. lpnz @.Lcopy8bytes_3
  176. ;; LOOP START
  177. ld.ab r6, [r1, 4]
  178. prefetch [r1, 28] ;Prefetch the next read location
  179. ld.ab r8, [r1,4]
  180. prefetchw [r3, 32] ;Prefetch the next write location
  181. SHIFT_1 (r7, r6, 8)
  182. or r7, r7, r5
  183. SHIFT_2 (r5, r6, 24)
  184. SHIFT_1 (r9, r8, 8)
  185. or r9, r9, r5
  186. SHIFT_2 (r5, r8, 24)
  187. st.ab r7, [r3, 4]
  188. st.ab r9, [r3, 4]
  189. .Lcopy8bytes_3:
  190. #ifdef __BIG_ENDIAN__
  191. lsr.nz r5, r5, 24
  192. #endif
  193. stb.ab r5, [r3, 1]
  194. and.f lp_count, r2, 0x07 ;Last 8bytes
  195. lpnz @.Lcopybytewise_3
  196. ;; LOOP START
  197. ldb.ab r6, [r1,1]
  198. stb.ab r6, [r3,1]
  199. .Lcopybytewise_3:
  200. j [blink]
  201. END(memcpy)
  202. libc_hidden_def(memcpy)