memcpy.S 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973
  1. /* Copy SIZE bytes from SRC to DEST.
  2. For SPARC v7.
  3. Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller <davem@caip.rutgers.edu>,
  6. Eddie C. Dost <ecd@skynet.be> and
  7. Jakub Jelinek <jj@ultra.linux.cz>.
  8. The GNU C Library is free software; you can redistribute it and/or
  9. modify it under the terms of the GNU Lesser General Public
  10. License as published by the Free Software Foundation; either
  11. version 2.1 of the License, or (at your option) any later version.
  12. The GNU C Library is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. Lesser General Public License for more details.
  16. You should have received a copy of the GNU Lesser General Public
  17. License along with the GNU C Library; if not, write to the Free
  18. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19. 02111-1307 USA. */
  20. #include <features.h>
  21. /* Both these macros have to start with exactly the same insn */
  22. #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  23. ldd [%src + offset + 0x00], %t0; \
  24. ldd [%src + offset + 0x08], %t2; \
  25. ldd [%src + offset + 0x10], %t4; \
  26. ldd [%src + offset + 0x18], %t6; \
  27. st %t0, [%dst + offset + 0x00]; \
  28. st %t1, [%dst + offset + 0x04]; \
  29. st %t2, [%dst + offset + 0x08]; \
  30. st %t3, [%dst + offset + 0x0c]; \
  31. st %t4, [%dst + offset + 0x10]; \
  32. st %t5, [%dst + offset + 0x14]; \
  33. st %t6, [%dst + offset + 0x18]; \
  34. st %t7, [%dst + offset + 0x1c];
  35. #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  36. ldd [%src + offset + 0x00], %t0; \
  37. ldd [%src + offset + 0x08], %t2; \
  38. ldd [%src + offset + 0x10], %t4; \
  39. ldd [%src + offset + 0x18], %t6; \
  40. std %t0, [%dst + offset + 0x00]; \
  41. std %t2, [%dst + offset + 0x08]; \
  42. std %t4, [%dst + offset + 0x10]; \
  43. std %t6, [%dst + offset + 0x18];
  44. #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  45. ldd [%src - offset - 0x10], %t0; \
  46. ldd [%src - offset - 0x08], %t2; \
  47. st %t0, [%dst - offset - 0x10]; \
  48. st %t1, [%dst - offset - 0x0c]; \
  49. st %t2, [%dst - offset - 0x08]; \
  50. st %t3, [%dst - offset - 0x04];
  51. #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
  52. ldd [%src - offset - 0x10], %t0; \
  53. ldd [%src - offset - 0x08], %t2; \
  54. std %t0, [%dst - offset - 0x10]; \
  55. std %t2, [%dst - offset - 0x08];
  56. #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
  57. ldub [%src - offset - 0x02], %t0; \
  58. ldub [%src - offset - 0x01], %t1; \
  59. stb %t0, [%dst - offset - 0x02]; \
  60. stb %t1, [%dst - offset - 0x01];
  61. /* Both these macros have to start with exactly the same insn */
  62. #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  63. ldd [%src - offset - 0x20], %t0; \
  64. ldd [%src - offset - 0x18], %t2; \
  65. ldd [%src - offset - 0x10], %t4; \
  66. ldd [%src - offset - 0x08], %t6; \
  67. st %t0, [%dst - offset - 0x20]; \
  68. st %t1, [%dst - offset - 0x1c]; \
  69. st %t2, [%dst - offset - 0x18]; \
  70. st %t3, [%dst - offset - 0x14]; \
  71. st %t4, [%dst - offset - 0x10]; \
  72. st %t5, [%dst - offset - 0x0c]; \
  73. st %t6, [%dst - offset - 0x08]; \
  74. st %t7, [%dst - offset - 0x04];
  75. #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  76. ldd [%src - offset - 0x20], %t0; \
  77. ldd [%src - offset - 0x18], %t2; \
  78. ldd [%src - offset - 0x10], %t4; \
  79. ldd [%src - offset - 0x08], %t6; \
  80. std %t0, [%dst - offset - 0x20]; \
  81. std %t2, [%dst - offset - 0x18]; \
  82. std %t4, [%dst - offset - 0x10]; \
  83. std %t6, [%dst - offset - 0x08];
  84. #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  85. ldd [%src + offset + 0x00], %t0; \
  86. ldd [%src + offset + 0x08], %t2; \
  87. st %t0, [%dst + offset + 0x00]; \
  88. st %t1, [%dst + offset + 0x04]; \
  89. st %t2, [%dst + offset + 0x08]; \
  90. st %t3, [%dst + offset + 0x0c];
  91. #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
  92. ldub [%src + offset + 0x00], %t0; \
  93. ldub [%src + offset + 0x01], %t1; \
  94. stb %t0, [%dst + offset + 0x00]; \
  95. stb %t1, [%dst + offset + 0x01];
  96. #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
  97. ldd [%src + offset + 0x00], %t0; \
  98. ldd [%src + offset + 0x08], %t2; \
  99. srl %t0, shir, %t5; \
  100. srl %t1, shir, %t6; \
  101. sll %t0, shil, %t0; \
  102. or %t5, %prev, %t5; \
  103. sll %t1, shil, %prev; \
  104. or %t6, %t0, %t0; \
  105. srl %t2, shir, %t1; \
  106. srl %t3, shir, %t6; \
  107. sll %t2, shil, %t2; \
  108. or %t1, %prev, %t1; \
  109. std %t4, [%dst + offset + offset2 - 0x04]; \
  110. std %t0, [%dst + offset + offset2 + 0x04]; \
  111. sll %t3, shil, %prev; \
  112. or %t6, %t2, %t4;
  113. #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
  114. ldd [%src + offset + 0x00], %t0; \
  115. ldd [%src + offset + 0x08], %t2; \
  116. srl %t0, shir, %t4; \
  117. srl %t1, shir, %t5; \
  118. sll %t0, shil, %t6; \
  119. or %t4, %prev, %t0; \
  120. sll %t1, shil, %prev; \
  121. or %t5, %t6, %t1; \
  122. srl %t2, shir, %t4; \
  123. srl %t3, shir, %t5; \
  124. sll %t2, shil, %t6; \
  125. or %t4, %prev, %t2; \
  126. sll %t3, shil, %prev; \
  127. or %t5, %t6, %t3; \
  128. std %t0, [%dst + offset + offset2 + 0x00]; \
  129. std %t2, [%dst + offset + offset2 + 0x08];
  130. .text
  131. .align 4
  132. 70: andcc %o1, 1, %g0
  133. be 4f
  134. andcc %o1, 2, %g0
  135. ldub [%o1 - 1], %g2
  136. sub %o1, 1, %o1
  137. stb %g2, [%o0 - 1]
  138. sub %o2, 1, %o2
  139. be 3f
  140. sub %o0, 1, %o0
  141. 4: lduh [%o1 - 2], %g2
  142. sub %o1, 2, %o1
  143. sth %g2, [%o0 - 2]
  144. sub %o2, 2, %o2
  145. b 3f
  146. sub %o0, 2, %o0
  147. #ifdef __UCLIBC_SUSV3_LEGACY__
  148. ENTRY(bcopy)
  149. mov %o0, %o3
  150. mov %o1, %o0
  151. mov %o3, %o1
  152. END(bcopy)
  153. #endif
  154. ENTRY(memmove)
  155. cmp %o0, %o1
  156. st %o0, [%sp + 64]
  157. bleu 9f
  158. sub %o0, %o1, %o4
  159. add %o1, %o2, %o3
  160. cmp %o3, %o0
  161. bleu 0f
  162. andcc %o4, 3, %o5
  163. add %o1, %o2, %o1
  164. add %o0, %o2, %o0
  165. bne 77f
  166. cmp %o2, 15
  167. bleu 91f
  168. andcc %o1, 3, %g0
  169. bne 70b
  170. 3: andcc %o1, 4, %g0
  171. be 2f
  172. mov %o2, %g1
  173. ld [%o1 - 4], %o4
  174. sub %g1, 4, %g1
  175. st %o4, [%o0 - 4]
  176. sub %o1, 4, %o1
  177. sub %o0, 4, %o0
  178. 2: andcc %g1, 0xffffff80, %g6
  179. be 3f
  180. andcc %o0, 4, %g0
  181. be 74f + 4
  182. 5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  183. RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  184. RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  185. RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  186. subcc %g6, 128, %g6
  187. sub %o1, 128, %o1
  188. bne 5b
  189. sub %o0, 128, %o0
  190. 3: andcc %g1, 0x70, %g6
  191. be 72f
  192. andcc %g1, 8, %g0
  193. srl %g6, 1, %o4
  194. mov %o7, %g2
  195. add %g6, %o4, %o4
  196. 101: call 100f
  197. sub %o1, %g6, %o1
  198. mov %g2, %o7
  199. jmpl %o5 + (72f - 101b), %g0
  200. sub %o0, %g6, %o0
  201. 71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  202. RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  203. RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  204. RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  205. RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  206. RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  207. RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  208. 72: be 73f
  209. andcc %g1, 4, %g0
  210. ldd [%o1 - 0x08], %g2
  211. sub %o0, 8, %o0
  212. sub %o1, 8, %o1
  213. st %g2, [%o0]
  214. st %g3, [%o0 + 0x04]
  215. 73: be 1f
  216. andcc %g1, 2, %g0
  217. ld [%o1 - 4], %g2
  218. sub %o1, 4, %o1
  219. st %g2, [%o0 - 4]
  220. sub %o0, 4, %o0
  221. 1: be 1f
  222. andcc %g1, 1, %g0
  223. lduh [%o1 - 2], %g2
  224. sub %o1, 2, %o1
  225. sth %g2, [%o0 - 2]
  226. sub %o0, 2, %o0
  227. 1: be 1f
  228. nop
  229. ldub [%o1 - 1], %g2
  230. stb %g2, [%o0 - 1]
  231. 1: retl
  232. ld [%sp + 64], %o0
  233. 74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  234. RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  235. RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  236. RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  237. subcc %g6, 128, %g6
  238. sub %o1, 128, %o1
  239. bne 74b
  240. sub %o0, 128, %o0
  241. andcc %g1, 0x70, %g6
  242. be 72b
  243. andcc %g1, 8, %g0
  244. srl %g6, 1, %o4
  245. mov %o7, %g2
  246. add %g6, %o4, %o4
  247. 102: call 100f
  248. sub %o1, %g6, %o1
  249. mov %g2, %o7
  250. jmpl %o5 + (72b - 102b), %g0
  251. sub %o0, %g6, %o0
  252. 75: and %o2, 0xe, %o3
  253. mov %o7, %g2
  254. sll %o3, 3, %o4
  255. sub %o0, %o3, %o0
  256. 103: call 100f
  257. sub %o1, %o3, %o1
  258. mov %g2, %o7
  259. jmpl %o5 + (76f - 103b), %g0
  260. andcc %o2, 1, %g0
  261. RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
  262. RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
  263. RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
  264. RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
  265. RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
  266. RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
  267. RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
  268. 76: be 1f
  269. nop
  270. ldub [%o1 - 1], %g2
  271. stb %g2, [%o0 - 1]
  272. 1: retl
  273. ld [%sp + 64], %o0
  274. 91: bne 75b
  275. andcc %o2, 8, %g0
  276. be 1f
  277. andcc %o2, 4, %g0
  278. ld [%o1 - 0x08], %g2
  279. ld [%o1 - 0x04], %g3
  280. sub %o1, 8, %o1
  281. st %g2, [%o0 - 0x08]
  282. st %g3, [%o0 - 0x04]
  283. sub %o0, 8, %o0
  284. 1: b 73b
  285. mov %o2, %g1
  286. 77: cmp %o2, 15
  287. bleu 75b
  288. andcc %o0, 3, %g0
  289. be 64f
  290. andcc %o0, 1, %g0
  291. be 63f
  292. andcc %o0, 2, %g0
  293. ldub [%o1 - 1], %g5
  294. sub %o1, 1, %o1
  295. stb %g5, [%o0 - 1]
  296. sub %o0, 1, %o0
  297. be 64f
  298. sub %o2, 1, %o2
  299. 63: ldub [%o1 - 1], %g5
  300. sub %o1, 2, %o1
  301. stb %g5, [%o0 - 1]
  302. sub %o0, 2, %o0
  303. ldub [%o1], %g5
  304. sub %o2, 2, %o2
  305. stb %g5, [%o0]
  306. 64: and %o1, 3, %g2
  307. and %o1, -4, %o1
  308. and %o2, 0xc, %g3
  309. add %o1, 4, %o1
  310. cmp %g3, 4
  311. sll %g2, 3, %g4
  312. mov 32, %g2
  313. be 4f
  314. sub %g2, %g4, %g6
  315. blu 3f
  316. cmp %g3, 8
  317. be 2f
  318. srl %o2, 2, %g3
  319. ld [%o1 - 4], %o3
  320. add %o0, -8, %o0
  321. ld [%o1 - 8], %o4
  322. add %o1, -16, %o1
  323. b 7f
  324. add %g3, 1, %g3
  325. 2: ld [%o1 - 4], %o4
  326. add %o0, -4, %o0
  327. ld [%o1 - 8], %g1
  328. add %o1, -12, %o1
  329. b 8f
  330. add %g3, 2, %g3
  331. 3: ld [%o1 - 4], %o5
  332. add %o0, -12, %o0
  333. ld [%o1 - 8], %o3
  334. add %o1, -20, %o1
  335. b 6f
  336. srl %o2, 2, %g3
  337. 4: ld [%o1 - 4], %g1
  338. srl %o2, 2, %g3
  339. ld [%o1 - 8], %o5
  340. add %o1, -24, %o1
  341. add %o0, -16, %o0
  342. add %g3, -1, %g3
  343. ld [%o1 + 12], %o3
  344. 5: sll %o5, %g4, %g2
  345. srl %g1, %g6, %g5
  346. or %g2, %g5, %g2
  347. st %g2, [%o0 + 12]
  348. 6: ld [%o1 + 8], %o4
  349. sll %o3, %g4, %g2
  350. srl %o5, %g6, %g5
  351. or %g2, %g5, %g2
  352. st %g2, [%o0 + 8]
  353. 7: ld [%o1 + 4], %g1
  354. sll %o4, %g4, %g2
  355. srl %o3, %g6, %g5
  356. or %g2, %g5, %g2
  357. st %g2, [%o0 + 4]
  358. 8: ld [%o1], %o5
  359. sll %g1, %g4, %g2
  360. srl %o4, %g6, %g5
  361. addcc %g3, -4, %g3
  362. or %g2, %g5, %g2
  363. add %o1, -16, %o1
  364. st %g2, [%o0]
  365. add %o0, -16, %o0
  366. bne,a 5b
  367. ld [%o1 + 12], %o3
  368. sll %o5, %g4, %g2
  369. srl %g1, %g6, %g5
  370. srl %g4, 3, %g3
  371. or %g2, %g5, %g2
  372. add %o1, %g3, %o1
  373. andcc %o2, 2, %g0
  374. st %g2, [%o0 + 12]
  375. be 1f
  376. andcc %o2, 1, %g0
  377. ldub [%o1 + 15], %g5
  378. add %o1, -2, %o1
  379. stb %g5, [%o0 + 11]
  380. add %o0, -2, %o0
  381. ldub [%o1 + 16], %g5
  382. stb %g5, [%o0 + 12]
  383. 1: be 1f
  384. nop
  385. ldub [%o1 + 15], %g5
  386. stb %g5, [%o0 + 11]
  387. 1: retl
  388. ld [%sp + 64], %o0
  389. 78: andcc %o1, 1, %g0
  390. be 4f
  391. andcc %o1, 2, %g0
  392. ldub [%o1], %g2
  393. add %o1, 1, %o1
  394. stb %g2, [%o0]
  395. sub %o2, 1, %o2
  396. bne 3f
  397. add %o0, 1, %o0
  398. 4: lduh [%o1], %g2
  399. add %o1, 2, %o1
  400. sth %g2, [%o0]
  401. sub %o2, 2, %o2
  402. b 3f
  403. add %o0, 2, %o0
  404. END(memmove)
  405. libc_hidden_def(memmove)
  406. ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
  407. sub %o0, %o1, %o4
  408. st %o0, [%sp + 64]
  409. 9: andcc %o4, 3, %o5
  410. 0: bne 86f
  411. cmp %o2, 15
  412. bleu 90f
  413. andcc %o1, 3, %g0
  414. bne 78b
  415. 3: andcc %o1, 4, %g0
  416. be 2f
  417. mov %o2, %g1
  418. ld [%o1], %o4
  419. sub %g1, 4, %g1
  420. st %o4, [%o0]
  421. add %o1, 4, %o1
  422. add %o0, 4, %o0
  423. 2: andcc %g1, 0xffffff80, %g6
  424. be 3f
  425. andcc %o0, 4, %g0
  426. be 82f + 4
  427. 5: MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  428. MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  429. MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  430. MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  431. subcc %g6, 128, %g6
  432. add %o1, 128, %o1
  433. bne 5b
  434. add %o0, 128, %o0
  435. 3: andcc %g1, 0x70, %g6
  436. be 80f
  437. andcc %g1, 8, %g0
  438. srl %g6, 1, %o4
  439. mov %o7, %g2
  440. add %g6, %o4, %o4
  441. add %o1, %g6, %o1
  442. 104: call 100f
  443. add %o0, %g6, %o0
  444. jmpl %o5 + (80f - 104b), %g0
  445. mov %g2, %o7
  446. 79: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  447. MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  448. MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  449. MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  450. MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  451. MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  452. MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  453. 80: be 81f
  454. andcc %g1, 4, %g0
  455. ldd [%o1], %g2
  456. add %o0, 8, %o0
  457. st %g2, [%o0 - 0x08]
  458. add %o1, 8, %o1
  459. st %g3, [%o0 - 0x04]
  460. 81: be 1f
  461. andcc %g1, 2, %g0
  462. ld [%o1], %g2
  463. add %o1, 4, %o1
  464. st %g2, [%o0]
  465. add %o0, 4, %o0
  466. 1: be 1f
  467. andcc %g1, 1, %g0
  468. lduh [%o1], %g2
  469. add %o1, 2, %o1
  470. sth %g2, [%o0]
  471. add %o0, 2, %o0
  472. 1: be 1f
  473. nop
  474. ldub [%o1], %g2
  475. stb %g2, [%o0]
  476. 1: retl
  477. ld [%sp + 64], %o0
  478. 82: /* ldd_std */
  479. MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  480. MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  481. MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  482. MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  483. subcc %g6, 128, %g6
  484. add %o1, 128, %o1
  485. bne 82b
  486. add %o0, 128, %o0
  487. andcc %g1, 0x70, %g6
  488. be 84f
  489. andcc %g1, 8, %g0
  490. mov %o7, %g2
  491. 111: call 110f
  492. add %o1, %g6, %o1
  493. mov %g2, %o7
  494. jmpl %o5 + (84f - 111b), %g0
  495. add %o0, %g6, %o0
  496. 83: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  497. MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  498. MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  499. MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  500. MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  501. MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  502. MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  503. 84: be 85f
  504. andcc %g1, 4, %g0
  505. ldd [%o1], %g2
  506. add %o0, 8, %o0
  507. std %g2, [%o0 - 0x08]
  508. add %o1, 8, %o1
  509. 85: be 1f
  510. andcc %g1, 2, %g0
  511. ld [%o1], %g2
  512. add %o1, 4, %o1
  513. st %g2, [%o0]
  514. add %o0, 4, %o0
  515. 1: be 1f
  516. andcc %g1, 1, %g0
  517. lduh [%o1], %g2
  518. add %o1, 2, %o1
  519. sth %g2, [%o0]
  520. add %o0, 2, %o0
  521. 1: be 1f
  522. nop
  523. ldub [%o1], %g2
  524. stb %g2, [%o0]
  525. 1: retl
  526. ld [%sp + 64], %o0
  527. 86: cmp %o2, 6
  528. bleu 88f
  529. cmp %o2, 256
  530. bcc 87f
  531. andcc %o0, 3, %g0
  532. be 61f
  533. andcc %o0, 1, %g0
  534. be 60f
  535. andcc %o0, 2, %g0
  536. ldub [%o1], %g5
  537. add %o1, 1, %o1
  538. stb %g5, [%o0]
  539. sub %o2, 1, %o2
  540. bne 61f
  541. add %o0, 1, %o0
  542. 60: ldub [%o1], %g3
  543. add %o1, 2, %o1
  544. stb %g3, [%o0]
  545. sub %o2, 2, %o2
  546. ldub [%o1 - 1], %g3
  547. add %o0, 2, %o0
  548. stb %g3, [%o0 - 1]
  549. 61: and %o1, 3, %g2
  550. and %o2, 0xc, %g3
  551. and %o1, -4, %o1
  552. cmp %g3, 4
  553. sll %g2, 3, %g4
  554. mov 32, %g2
  555. be 4f
  556. sub %g2, %g4, %g6
  557. blu 3f
  558. cmp %g3, 0x8
  559. be 2f
  560. srl %o2, 2, %g3
  561. ld [%o1], %o3
  562. add %o0, -8, %o0
  563. ld [%o1 + 4], %o4
  564. b 8f
  565. add %g3, 1, %g3
  566. 2: ld [%o1], %o4
  567. add %o0, -12, %o0
  568. ld [%o1 + 4], %o5
  569. add %g3, 2, %g3
  570. b 9f
  571. add %o1, -4, %o1
  572. 3: ld [%o1], %g1
  573. add %o0, -4, %o0
  574. ld [%o1 + 4], %o3
  575. srl %o2, 2, %g3
  576. b 7f
  577. add %o1, 4, %o1
  578. 4: ld [%o1], %o5
  579. cmp %o2, 7
  580. ld [%o1 + 4], %g1
  581. srl %o2, 2, %g3
  582. bleu 10f
  583. add %o1, 8, %o1
  584. ld [%o1], %o3
  585. add %g3, -1, %g3
  586. 5: sll %o5, %g4, %g2
  587. srl %g1, %g6, %g5
  588. or %g2, %g5, %g2
  589. st %g2, [%o0]
  590. 7: ld [%o1 + 4], %o4
  591. sll %g1, %g4, %g2
  592. srl %o3, %g6, %g5
  593. or %g2, %g5, %g2
  594. st %g2, [%o0 + 4]
  595. 8: ld [%o1 + 8], %o5
  596. sll %o3, %g4, %g2
  597. srl %o4, %g6, %g5
  598. or %g2, %g5, %g2
  599. st %g2, [%o0 + 8]
  600. 9: ld [%o1 + 12], %g1
  601. sll %o4, %g4, %g2
  602. srl %o5, %g6, %g5
  603. addcc %g3, -4, %g3
  604. or %g2, %g5, %g2
  605. add %o1, 16, %o1
  606. st %g2, [%o0 + 12]
  607. add %o0, 16, %o0
  608. bne,a 5b
  609. ld [%o1], %o3
  610. 10: sll %o5, %g4, %g2
  611. srl %g1, %g6, %g5
  612. srl %g6, 3, %g3
  613. or %g2, %g5, %g2
  614. sub %o1, %g3, %o1
  615. andcc %o2, 2, %g0
  616. st %g2, [%o0]
  617. be 1f
  618. andcc %o2, 1, %g0
  619. ldub [%o1], %g2
  620. add %o1, 2, %o1
  621. stb %g2, [%o0 + 4]
  622. add %o0, 2, %o0
  623. ldub [%o1 - 1], %g2
  624. stb %g2, [%o0 + 3]
  625. 1: be 1f
  626. nop
  627. ldub [%o1], %g2
  628. stb %g2, [%o0 + 4]
  629. 1: retl
  630. ld [%sp + 64], %o0
  631. 87: andcc %o1, 3, %g0
  632. be 3f
  633. andcc %o1, 1, %g0
  634. be 4f
  635. andcc %o1, 2, %g0
  636. ldub [%o1], %g2
  637. add %o1, 1, %o1
  638. stb %g2, [%o0]
  639. sub %o2, 1, %o2
  640. bne 3f
  641. add %o0, 1, %o0
  642. 4: lduh [%o1], %g2
  643. add %o1, 2, %o1
  644. srl %g2, 8, %g3
  645. sub %o2, 2, %o2
  646. stb %g3, [%o0]
  647. add %o0, 2, %o0
  648. stb %g2, [%o0 - 1]
  649. 3: andcc %o1, 4, %g0
  650. bne 2f
  651. cmp %o5, 1
  652. ld [%o1], %o4
  653. srl %o4, 24, %g2
  654. stb %g2, [%o0]
  655. srl %o4, 16, %g3
  656. stb %g3, [%o0 + 1]
  657. srl %o4, 8, %g2
  658. stb %g2, [%o0 + 2]
  659. sub %o2, 4, %o2
  660. stb %o4, [%o0 + 3]
  661. add %o1, 4, %o1
  662. add %o0, 4, %o0
  663. 2: be 33f
  664. cmp %o5, 2
  665. be 32f
  666. sub %o2, 4, %o2
  667. 31: ld [%o1], %g2
  668. add %o1, 4, %o1
  669. srl %g2, 24, %g3
  670. and %o0, 7, %g5
  671. stb %g3, [%o0]
  672. cmp %g5, 7
  673. sll %g2, 8, %g1
  674. add %o0, 4, %o0
  675. be 41f
  676. and %o2, 0xffffffc0, %o3
  677. ld [%o0 - 7], %o4
  678. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  679. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  680. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  681. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  682. subcc %o3, 64, %o3
  683. add %o1, 64, %o1
  684. bne 4b
  685. add %o0, 64, %o0
  686. andcc %o2, 0x30, %o3
  687. be,a 1f
  688. srl %g1, 16, %g2
  689. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  690. subcc %o3, 16, %o3
  691. add %o1, 16, %o1
  692. bne 4b
  693. add %o0, 16, %o0
  694. srl %g1, 16, %g2
  695. 1: st %o4, [%o0 - 7]
  696. sth %g2, [%o0 - 3]
  697. srl %g1, 8, %g4
  698. b 88f
  699. stb %g4, [%o0 - 1]
  700. 32: ld [%o1], %g2
  701. add %o1, 4, %o1
  702. srl %g2, 16, %g3
  703. and %o0, 7, %g5
  704. sth %g3, [%o0]
  705. cmp %g5, 6
  706. sll %g2, 16, %g1
  707. add %o0, 4, %o0
  708. be 42f
  709. and %o2, 0xffffffc0, %o3
  710. ld [%o0 - 6], %o4
  711. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  712. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  713. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  714. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  715. subcc %o3, 64, %o3
  716. add %o1, 64, %o1
  717. bne 4b
  718. add %o0, 64, %o0
  719. andcc %o2, 0x30, %o3
  720. be,a 1f
  721. srl %g1, 16, %g2
  722. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  723. subcc %o3, 16, %o3
  724. add %o1, 16, %o1
  725. bne 4b
  726. add %o0, 16, %o0
  727. srl %g1, 16, %g2
  728. 1: st %o4, [%o0 - 6]
  729. b 88f
  730. sth %g2, [%o0 - 2]
  731. 33: ld [%o1], %g2
  732. sub %o2, 4, %o2
  733. srl %g2, 24, %g3
  734. and %o0, 7, %g5
  735. stb %g3, [%o0]
  736. cmp %g5, 5
  737. srl %g2, 8, %g4
  738. sll %g2, 24, %g1
  739. sth %g4, [%o0 + 1]
  740. add %o1, 4, %o1
  741. be 43f
  742. and %o2, 0xffffffc0, %o3
  743. ld [%o0 - 1], %o4
  744. add %o0, 4, %o0
  745. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  746. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  747. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  748. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  749. subcc %o3, 64, %o3
  750. add %o1, 64, %o1
  751. bne 4b
  752. add %o0, 64, %o0
  753. andcc %o2, 0x30, %o3
  754. be,a 1f
  755. srl %g1, 24, %g2
  756. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  757. subcc %o3, 16, %o3
  758. add %o1, 16, %o1
  759. bne 4b
  760. add %o0, 16, %o0
  761. srl %g1, 24, %g2
  762. 1: st %o4, [%o0 - 5]
  763. b 88f
  764. stb %g2, [%o0 - 1]
  765. 41: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  766. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  767. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  768. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  769. subcc %o3, 64, %o3
  770. add %o1, 64, %o1
  771. bne 41b
  772. add %o0, 64, %o0
  773. andcc %o2, 0x30, %o3
  774. be,a 1f
  775. srl %g1, 16, %g2
  776. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  777. subcc %o3, 16, %o3
  778. add %o1, 16, %o1
  779. bne 4b
  780. add %o0, 16, %o0
  781. srl %g1, 16, %g2
  782. 1: sth %g2, [%o0 - 3]
  783. srl %g1, 8, %g4
  784. b 88f
  785. stb %g4, [%o0 - 1]
  786. 43: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  787. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  788. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  789. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  790. subcc %o3, 64, %o3
  791. add %o1, 64, %o1
  792. bne 43b
  793. add %o0, 64, %o0
  794. andcc %o2, 0x30, %o3
  795. be,a 1f
  796. srl %g1, 24, %g2
  797. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  798. subcc %o3, 16, %o3
  799. add %o1, 16, %o1
  800. bne 4b
  801. add %o0, 16, %o0
  802. srl %g1, 24, %g2
  803. 1: stb %g2, [%o0 + 3]
  804. b 88f
  805. add %o0, 4, %o0
  806. 42: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  807. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  808. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  809. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  810. subcc %o3, 64, %o3
  811. add %o1, 64, %o1
  812. bne 42b
  813. add %o0, 64, %o0
  814. andcc %o2, 0x30, %o3
  815. be,a 1f
  816. srl %g1, 16, %g2
  817. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  818. subcc %o3, 16, %o3
  819. add %o1, 16, %o1
  820. bne 4b
  821. add %o0, 16, %o0
  822. srl %g1, 16, %g2
  823. 1: sth %g2, [%o0 - 2]
  824. /* Fall through */
  825. 88: and %o2, 0xe, %o3
  826. mov %o7, %g2
  827. sll %o3, 3, %o4
  828. add %o0, %o3, %o0
  829. 106: call 100f
  830. add %o1, %o3, %o1
  831. mov %g2, %o7
  832. jmpl %o5 + (89f - 106b), %g0
  833. andcc %o2, 1, %g0
  834. MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
  835. MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
  836. MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
  837. MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
  838. MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
  839. MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
  840. MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
  841. 89: be 1f
  842. nop
  843. ldub [%o1], %g2
  844. stb %g2, [%o0]
  845. 1: retl
  846. ld [%sp + 64], %o0
  847. 90: bne 88b
  848. andcc %o2, 8, %g0
  849. be 1f
  850. andcc %o2, 4, %g0
  851. ld [%o1 + 0x00], %g2
  852. ld [%o1 + 0x04], %g3
  853. add %o1, 8, %o1
  854. st %g2, [%o0 + 0x00]
  855. st %g3, [%o0 + 0x04]
  856. add %o0, 8, %o0
  857. 1: b 81b
  858. mov %o2, %g1
  859. 100: retl
  860. sub %o7, %o4, %o5
  861. 110: retl
  862. sub %o7, %g6, %o5
  863. END(memcpy)
  864. libc_hidden_def(memcpy)