memmove.S 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. ! Copyright (C) 2013 Imagination Technologies Ltd.
  2. ! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  3. .text
  4. .global _memmove
  5. .type _memmove,function
  6. ! D1Ar1 dst
  7. ! D0Ar2 src
  8. ! D1Ar3 cnt
  9. ! D0Re0 dst
  10. _memmove:
  11. CMP D1Ar3, #0
  12. MOV D0Re0, D1Ar1
  13. BZ $LEND2
  14. MSETL [A0StP], D0.5, D0.6, D0.7
  15. MOV D1Ar5, D0Ar2
  16. CMP D1Ar1, D1Ar5
  17. BLT $Lforwards_copy
  18. SUB D0Ar4, D1Ar1, D1Ar3
  19. ADD D0Ar4, D0Ar4, #1
  20. CMP D0Ar2, D0Ar4
  21. BLT $Lforwards_copy
  22. ! should copy backwards
  23. MOV D1Re0, D0Ar2
  24. ! adjust pointer to the end of mem
  25. ADD D0Ar2, D1Re0, D1Ar3
  26. ADD D1Ar1, D1Ar1, D1Ar3
  27. MOV A1.2, D0Ar2
  28. MOV A0.2, D1Ar1
  29. CMP D1Ar3, #8
  30. BLT $Lbbyte_loop
  31. MOV D0Ar4, D0Ar2
  32. MOV D1Ar5, D1Ar1
  33. ! test 8 byte alignment
  34. ANDS D1Ar5, D1Ar5, #7
  35. BNE $Lbdest_unaligned
  36. ANDS D0Ar4, D0Ar4, #7
  37. BNE $Lbsrc_unaligned
  38. LSR D1Ar5, D1Ar3, #3
  39. $Lbaligned_loop:
  40. GETL D0Re0, D1Re0, [--A1.2]
  41. SETL [--A0.2], D0Re0, D1Re0
  42. SUBS D1Ar5, D1Ar5, #1
  43. BNE $Lbaligned_loop
  44. ANDS D1Ar3, D1Ar3, #7
  45. BZ $Lbbyte_loop_exit
  46. $Lbbyte_loop:
  47. GETB D1Re0, [--A1.2]
  48. SETB [--A0.2], D1Re0
  49. SUBS D1Ar3, D1Ar3, #1
  50. BNE $Lbbyte_loop
  51. $Lbbyte_loop_exit:
  52. MOV D0Re0, A0.2
  53. $LEND:
  54. SUB A0.2, A0StP, #24
  55. MGETL D0.5, D0.6, D0.7, [A0.2]
  56. SUB A0StP, A0StP, #24
  57. $LEND2:
  58. MOV PC, D1RtP
  59. $Lbdest_unaligned:
  60. GETB D0Re0, [--A1.2]
  61. SETB [--A0.2], D0Re0
  62. SUBS D1Ar5, D1Ar5, #1
  63. SUB D1Ar3, D1Ar3, #1
  64. BNE $Lbdest_unaligned
  65. CMP D1Ar3, #8
  66. BLT $Lbbyte_loop
  67. $Lbsrc_unaligned:
  68. LSR D1Ar5, D1Ar3, #3
  69. ! adjust A1.2
  70. MOV D0Ar4, A1.2
  71. ! save original address
  72. MOV D0Ar6, A1.2
  73. ADD D0Ar4, D0Ar4, #7
  74. ANDMB D0Ar4, D0Ar4, #0xfff8
  75. ! new address is the 8-byte aligned one above the original
  76. MOV A1.2, D0Ar4
  77. ! A0.2 dst 64-bit is aligned
  78. ! measure the gap size
  79. SUB D0Ar6, D0Ar4, D0Ar6
  80. MOVS D0Ar4, D0Ar6
  81. ! keep this information for the later adjustment
  82. ! both aligned
  83. BZ $Lbaligned_loop
  84. ! prefetch
  85. GETL D0Re0, D1Re0, [--A1.2]
  86. CMP D0Ar6, #4
  87. BLT $Lbunaligned_1_2_3
  88. ! 32-bit aligned
  89. BZ $Lbaligned_4
  90. SUB D0Ar6, D0Ar6, #4
  91. ! D1.6 stores the gap size in bits
  92. MULW D1.6, D0Ar6, #8
  93. MOV D0.6, #32
  94. ! D0.6 stores the complement of the gap size
  95. SUB D0.6, D0.6, D1.6
  96. $Lbunaligned_5_6_7:
  97. GETL D0.7, D1.7, [--A1.2]
  98. ! form 64-bit data in D0Re0, D1Re0
  99. MOV D1Re0, D0Re0
  100. ! D1Re0 << gap-size
  101. LSL D1Re0, D1Re0, D1.6
  102. MOV D0Re0, D1.7
  103. ! D0Re0 >> complement
  104. LSR D0Re0, D0Re0, D0.6
  105. MOV D1.5, D0Re0
  106. ! combine the both
  107. ADD D1Re0, D1Re0, D1.5
  108. MOV D1.5, D1.7
  109. LSL D1.5, D1.5, D1.6
  110. MOV D0Re0, D0.7
  111. LSR D0Re0, D0Re0, D0.6
  112. MOV D0.5, D1.5
  113. ADD D0Re0, D0Re0, D0.5
  114. SETL [--A0.2], D0Re0, D1Re0
  115. MOV D0Re0, D0.7
  116. MOV D1Re0, D1.7
  117. SUBS D1Ar5, D1Ar5, #1
  118. BNE $Lbunaligned_5_6_7
  119. ANDS D1Ar3, D1Ar3, #7
  120. BZ $Lbbyte_loop_exit
  121. ! Adjust A1.2
  122. ! A1.2 <- A1.2 +8 - gapsize
  123. ADD A1.2, A1.2, #8
  124. SUB A1.2, A1.2, D0Ar4
  125. B $Lbbyte_loop
  126. $Lbunaligned_1_2_3:
  127. MULW D1.6, D0Ar6, #8
  128. MOV D0.6, #32
  129. SUB D0.6, D0.6, D1.6
  130. $Lbunaligned_1_2_3_loop:
  131. GETL D0.7, D1.7, [--A1.2]
  132. ! form 64-bit data in D0Re0, D1Re0
  133. LSL D1Re0, D1Re0, D1.6
  134. ! save D0Re0 for later use
  135. MOV D0.5, D0Re0
  136. LSR D0Re0, D0Re0, D0.6
  137. MOV D1.5, D0Re0
  138. ADD D1Re0, D1Re0, D1.5
  139. ! orignal data in D0Re0
  140. MOV D1.5, D0.5
  141. LSL D1.5, D1.5, D1.6
  142. MOV D0Re0, D1.7
  143. LSR D0Re0, D0Re0, D0.6
  144. MOV D0.5, D1.5
  145. ADD D0Re0, D0Re0, D0.5
  146. SETL [--A0.2], D0Re0, D1Re0
  147. MOV D0Re0, D0.7
  148. MOV D1Re0, D1.7
  149. SUBS D1Ar5, D1Ar5, #1
  150. BNE $Lbunaligned_1_2_3_loop
  151. ANDS D1Ar3, D1Ar3, #7
  152. BZ $Lbbyte_loop_exit
  153. ! Adjust A1.2
  154. ADD A1.2, A1.2, #8
  155. SUB A1.2, A1.2, D0Ar4
  156. B $Lbbyte_loop
  157. $Lbaligned_4:
  158. GETL D0.7, D1.7, [--A1.2]
  159. MOV D1Re0, D0Re0
  160. MOV D0Re0, D1.7
  161. SETL [--A0.2], D0Re0, D1Re0
  162. MOV D0Re0, D0.7
  163. MOV D1Re0, D1.7
  164. SUBS D1Ar5, D1Ar5, #1
  165. BNE $Lbaligned_4
  166. ANDS D1Ar3, D1Ar3, #7
  167. BZ $Lbbyte_loop_exit
  168. ! Adjust A1.2
  169. ADD A1.2, A1.2, #8
  170. SUB A1.2, A1.2, D0Ar4
  171. B $Lbbyte_loop
  172. $Lforwards_copy:
  173. MOV A1.2, D0Ar2
  174. MOV A0.2, D1Ar1
  175. CMP D1Ar3, #8
  176. BLT $Lfbyte_loop
  177. MOV D0Ar4, D0Ar2
  178. MOV D1Ar5, D1Ar1
  179. ANDS D1Ar5, D1Ar5, #7
  180. BNE $Lfdest_unaligned
  181. ANDS D0Ar4, D0Ar4, #7
  182. BNE $Lfsrc_unaligned
  183. LSR D1Ar5, D1Ar3, #3
  184. $Lfaligned_loop:
  185. GETL D0Re0, D1Re0, [A1.2++]
  186. SUBS D1Ar5, D1Ar5, #1
  187. SETL [A0.2++], D0Re0, D1Re0
  188. BNE $Lfaligned_loop
  189. ANDS D1Ar3, D1Ar3, #7
  190. BZ $Lfbyte_loop_exit
  191. $Lfbyte_loop:
  192. GETB D1Re0, [A1.2++]
  193. SETB [A0.2++], D1Re0
  194. SUBS D1Ar3, D1Ar3, #1
  195. BNE $Lfbyte_loop
  196. $Lfbyte_loop_exit:
  197. MOV D0Re0, D1Ar1
  198. B $LEND
  199. $Lfdest_unaligned:
  200. GETB D0Re0, [A1.2++]
  201. ADD D1Ar5, D1Ar5, #1
  202. SUB D1Ar3, D1Ar3, #1
  203. SETB [A0.2++], D0Re0
  204. CMP D1Ar5, #8
  205. BNE $Lfdest_unaligned
  206. CMP D1Ar3, #8
  207. BLT $Lfbyte_loop
  208. $Lfsrc_unaligned:
  209. ! adjust A1.2
  210. LSR D1Ar5, D1Ar3, #3
  211. MOV D0Ar4, A1.2
  212. MOV D0Ar6, A1.2
  213. ANDMB D0Ar4, D0Ar4, #0xfff8
  214. MOV A1.2, D0Ar4
  215. ! A0.2 dst 64-bit is aligned
  216. SUB D0Ar6, D0Ar6, D0Ar4
  217. ! keep the information for the later adjustment
  218. MOVS D0Ar4, D0Ar6
  219. ! both aligned
  220. BZ $Lfaligned_loop
  221. ! prefetch
  222. GETL D0Re0, D1Re0, [A1.2]
  223. CMP D0Ar6, #4
  224. BLT $Lfunaligned_1_2_3
  225. BZ $Lfaligned_4
  226. SUB D0Ar6, D0Ar6, #4
  227. MULW D0.6, D0Ar6, #8
  228. MOV D1.6, #32
  229. SUB D1.6, D1.6, D0.6
  230. $Lfunaligned_5_6_7:
  231. GETL D0.7, D1.7, [++A1.2]
  232. ! form 64-bit data in D0Re0, D1Re0
  233. MOV D0Re0, D1Re0
  234. LSR D0Re0, D0Re0, D0.6
  235. MOV D1Re0, D0.7
  236. LSL D1Re0, D1Re0, D1.6
  237. MOV D0.5, D1Re0
  238. ADD D0Re0, D0Re0, D0.5
  239. MOV D0.5, D0.7
  240. LSR D0.5, D0.5, D0.6
  241. MOV D1Re0, D1.7
  242. LSL D1Re0, D1Re0, D1.6
  243. MOV D1.5, D0.5
  244. ADD D1Re0, D1Re0, D1.5
  245. SETL [A0.2++], D0Re0, D1Re0
  246. MOV D0Re0, D0.7
  247. MOV D1Re0, D1.7
  248. SUBS D1Ar5, D1Ar5, #1
  249. BNE $Lfunaligned_5_6_7
  250. ANDS D1Ar3, D1Ar3, #7
  251. BZ $Lfbyte_loop_exit
  252. ! Adjust A1.2
  253. ADD A1.2, A1.2, D0Ar4
  254. B $Lfbyte_loop
  255. $Lfunaligned_1_2_3:
  256. MULW D0.6, D0Ar6, #8
  257. MOV D1.6, #32
  258. SUB D1.6, D1.6, D0.6
  259. $Lfunaligned_1_2_3_loop:
  260. GETL D0.7, D1.7, [++A1.2]
  261. ! form 64-bit data in D0Re0, D1Re0
  262. LSR D0Re0, D0Re0, D0.6
  263. MOV D1.5, D1Re0
  264. LSL D1Re0, D1Re0, D1.6
  265. MOV D0.5, D1Re0
  266. ADD D0Re0, D0Re0, D0.5
  267. MOV D0.5, D1.5
  268. LSR D0.5, D0.5, D0.6
  269. MOV D1Re0, D0.7
  270. LSL D1Re0, D1Re0, D1.6
  271. MOV D1.5, D0.5
  272. ADD D1Re0, D1Re0, D1.5
  273. SETL [A0.2++], D0Re0, D1Re0
  274. MOV D0Re0, D0.7
  275. MOV D1Re0, D1.7
  276. SUBS D1Ar5, D1Ar5, #1
  277. BNE $Lfunaligned_1_2_3_loop
  278. ANDS D1Ar3, D1Ar3, #7
  279. BZ $Lfbyte_loop_exit
  280. ! Adjust A1.2
  281. ADD A1.2, A1.2, D0Ar4
  282. B $Lfbyte_loop
  283. $Lfaligned_4:
  284. GETL D0.7, D1.7, [++A1.2]
  285. MOV D0Re0, D1Re0
  286. MOV D1Re0, D0.7
  287. SETL [A0.2++], D0Re0, D1Re0
  288. MOV D0Re0, D0.7
  289. MOV D1Re0, D1.7
  290. SUBS D1Ar5, D1Ar5, #1
  291. BNE $Lfaligned_4
  292. ANDS D1Ar3, D1Ar3, #7
  293. BZ $Lfbyte_loop_exit
  294. ! Adjust A1.2
  295. ADD A1.2, A1.2, D0Ar4
  296. B $Lfbyte_loop
  297. .size _memmove,.-_memmove
  298. libc_hidden_def(memmove)