memcpy.S 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. /* Copy SIZE bytes from SRC to DEST.
  2. For SPARC v7.
  3. Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller <davem@caip.rutgers.edu>,
  6. Eddie C. Dost <ecd@skynet.be> and
  7. Jakub Jelinek <jj@ultra.linux.cz>.
  8. The GNU C Library is free software; you can redistribute it and/or
  9. modify it under the terms of the GNU Lesser General Public
  10. License as published by the Free Software Foundation; either
  11. version 2.1 of the License, or (at your option) any later version.
  12. The GNU C Library is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. Lesser General Public License for more details.
  16. You should have received a copy of the GNU Lesser General Public
  17. License along with the GNU C Library; if not, write to the Free
  18. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19. 02111-1307 USA. */
  20. /* Both these macros have to start with exactly the same insn */
  21. #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  22. ldd [%src + offset + 0x00], %t0; \
  23. ldd [%src + offset + 0x08], %t2; \
  24. ldd [%src + offset + 0x10], %t4; \
  25. ldd [%src + offset + 0x18], %t6; \
  26. st %t0, [%dst + offset + 0x00]; \
  27. st %t1, [%dst + offset + 0x04]; \
  28. st %t2, [%dst + offset + 0x08]; \
  29. st %t3, [%dst + offset + 0x0c]; \
  30. st %t4, [%dst + offset + 0x10]; \
  31. st %t5, [%dst + offset + 0x14]; \
  32. st %t6, [%dst + offset + 0x18]; \
  33. st %t7, [%dst + offset + 0x1c];
  34. #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  35. ldd [%src + offset + 0x00], %t0; \
  36. ldd [%src + offset + 0x08], %t2; \
  37. ldd [%src + offset + 0x10], %t4; \
  38. ldd [%src + offset + 0x18], %t6; \
  39. std %t0, [%dst + offset + 0x00]; \
  40. std %t2, [%dst + offset + 0x08]; \
  41. std %t4, [%dst + offset + 0x10]; \
  42. std %t6, [%dst + offset + 0x18];
  43. #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  44. ldd [%src - offset - 0x10], %t0; \
  45. ldd [%src - offset - 0x08], %t2; \
  46. st %t0, [%dst - offset - 0x10]; \
  47. st %t1, [%dst - offset - 0x0c]; \
  48. st %t2, [%dst - offset - 0x08]; \
  49. st %t3, [%dst - offset - 0x04];
  50. #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
  51. ldd [%src - offset - 0x10], %t0; \
  52. ldd [%src - offset - 0x08], %t2; \
  53. std %t0, [%dst - offset - 0x10]; \
  54. std %t2, [%dst - offset - 0x08];
  55. #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
  56. ldub [%src - offset - 0x02], %t0; \
  57. ldub [%src - offset - 0x01], %t1; \
  58. stb %t0, [%dst - offset - 0x02]; \
  59. stb %t1, [%dst - offset - 0x01];
  60. /* Both these macros have to start with exactly the same insn */
  61. #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  62. ldd [%src - offset - 0x20], %t0; \
  63. ldd [%src - offset - 0x18], %t2; \
  64. ldd [%src - offset - 0x10], %t4; \
  65. ldd [%src - offset - 0x08], %t6; \
  66. st %t0, [%dst - offset - 0x20]; \
  67. st %t1, [%dst - offset - 0x1c]; \
  68. st %t2, [%dst - offset - 0x18]; \
  69. st %t3, [%dst - offset - 0x14]; \
  70. st %t4, [%dst - offset - 0x10]; \
  71. st %t5, [%dst - offset - 0x0c]; \
  72. st %t6, [%dst - offset - 0x08]; \
  73. st %t7, [%dst - offset - 0x04];
  74. #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  75. ldd [%src - offset - 0x20], %t0; \
  76. ldd [%src - offset - 0x18], %t2; \
  77. ldd [%src - offset - 0x10], %t4; \
  78. ldd [%src - offset - 0x08], %t6; \
  79. std %t0, [%dst - offset - 0x20]; \
  80. std %t2, [%dst - offset - 0x18]; \
  81. std %t4, [%dst - offset - 0x10]; \
  82. std %t6, [%dst - offset - 0x08];
  83. #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  84. ldd [%src + offset + 0x00], %t0; \
  85. ldd [%src + offset + 0x08], %t2; \
  86. st %t0, [%dst + offset + 0x00]; \
  87. st %t1, [%dst + offset + 0x04]; \
  88. st %t2, [%dst + offset + 0x08]; \
  89. st %t3, [%dst + offset + 0x0c];
  90. #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
  91. ldub [%src + offset + 0x00], %t0; \
  92. ldub [%src + offset + 0x01], %t1; \
  93. stb %t0, [%dst + offset + 0x00]; \
  94. stb %t1, [%dst + offset + 0x01];
  95. #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
  96. ldd [%src + offset + 0x00], %t0; \
  97. ldd [%src + offset + 0x08], %t2; \
  98. srl %t0, shir, %t5; \
  99. srl %t1, shir, %t6; \
  100. sll %t0, shil, %t0; \
  101. or %t5, %prev, %t5; \
  102. sll %t1, shil, %prev; \
  103. or %t6, %t0, %t0; \
  104. srl %t2, shir, %t1; \
  105. srl %t3, shir, %t6; \
  106. sll %t2, shil, %t2; \
  107. or %t1, %prev, %t1; \
  108. std %t4, [%dst + offset + offset2 - 0x04]; \
  109. std %t0, [%dst + offset + offset2 + 0x04]; \
  110. sll %t3, shil, %prev; \
  111. or %t6, %t2, %t4;
  112. #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
  113. ldd [%src + offset + 0x00], %t0; \
  114. ldd [%src + offset + 0x08], %t2; \
  115. srl %t0, shir, %t4; \
  116. srl %t1, shir, %t5; \
  117. sll %t0, shil, %t6; \
  118. or %t4, %prev, %t0; \
  119. sll %t1, shil, %prev; \
  120. or %t5, %t6, %t1; \
  121. srl %t2, shir, %t4; \
  122. srl %t3, shir, %t5; \
  123. sll %t2, shil, %t6; \
  124. or %t4, %prev, %t2; \
  125. sll %t3, shil, %prev; \
  126. or %t5, %t6, %t3; \
  127. std %t0, [%dst + offset + offset2 + 0x00]; \
  128. std %t2, [%dst + offset + offset2 + 0x08];
  129. .text
  130. .align 4
  131. 70: andcc %o1, 1, %g0
  132. be 4f
  133. andcc %o1, 2, %g0
  134. ldub [%o1 - 1], %g2
  135. sub %o1, 1, %o1
  136. stb %g2, [%o0 - 1]
  137. sub %o2, 1, %o2
  138. be 3f
  139. sub %o0, 1, %o0
  140. 4: lduh [%o1 - 2], %g2
  141. sub %o1, 2, %o1
  142. sth %g2, [%o0 - 2]
  143. sub %o2, 2, %o2
  144. b 3f
  145. sub %o0, 2, %o0
  146. .globl bcopy
  147. .set bcopy,__bcopy
  148. .hidden __bcopy
  149. ENTRY(__bcopy)
  150. mov %o0, %o3
  151. mov %o1, %o0
  152. mov %o3, %o1
  153. END(__bcopy)
  154. .globl memmove
  155. .set memmove,__memmove
  156. .hidden __memmove
  157. ENTRY(__memmove)
  158. cmp %o0, %o1
  159. st %o0, [%sp + 64]
  160. bleu 9f
  161. sub %o0, %o1, %o4
  162. add %o1, %o2, %o3
  163. cmp %o3, %o0
  164. bleu 0f
  165. andcc %o4, 3, %o5
  166. add %o1, %o2, %o1
  167. add %o0, %o2, %o0
  168. bne 77f
  169. cmp %o2, 15
  170. bleu 91f
  171. andcc %o1, 3, %g0
  172. bne 70b
  173. 3: andcc %o1, 4, %g0
  174. be 2f
  175. mov %o2, %g1
  176. ld [%o1 - 4], %o4
  177. sub %g1, 4, %g1
  178. st %o4, [%o0 - 4]
  179. sub %o1, 4, %o1
  180. sub %o0, 4, %o0
  181. 2: andcc %g1, 0xffffff80, %g6
  182. be 3f
  183. andcc %o0, 4, %g0
  184. be 74f + 4
  185. 5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  186. RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  187. RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  188. RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  189. subcc %g6, 128, %g6
  190. sub %o1, 128, %o1
  191. bne 5b
  192. sub %o0, 128, %o0
  193. 3: andcc %g1, 0x70, %g6
  194. be 72f
  195. andcc %g1, 8, %g0
  196. srl %g6, 1, %o4
  197. mov %o7, %g2
  198. add %g6, %o4, %o4
  199. 101: call 100f
  200. sub %o1, %g6, %o1
  201. mov %g2, %o7
  202. jmpl %o5 + (72f - 101b), %g0
  203. sub %o0, %g6, %o0
  204. 71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  205. RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  206. RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  207. RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  208. RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  209. RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  210. RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  211. 72: be 73f
  212. andcc %g1, 4, %g0
  213. ldd [%o1 - 0x08], %g2
  214. sub %o0, 8, %o0
  215. sub %o1, 8, %o1
  216. st %g2, [%o0]
  217. st %g3, [%o0 + 0x04]
  218. 73: be 1f
  219. andcc %g1, 2, %g0
  220. ld [%o1 - 4], %g2
  221. sub %o1, 4, %o1
  222. st %g2, [%o0 - 4]
  223. sub %o0, 4, %o0
  224. 1: be 1f
  225. andcc %g1, 1, %g0
  226. lduh [%o1 - 2], %g2
  227. sub %o1, 2, %o1
  228. sth %g2, [%o0 - 2]
  229. sub %o0, 2, %o0
  230. 1: be 1f
  231. nop
  232. ldub [%o1 - 1], %g2
  233. stb %g2, [%o0 - 1]
  234. 1: retl
  235. ld [%sp + 64], %o0
  236. 74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  237. RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  238. RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  239. RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  240. subcc %g6, 128, %g6
  241. sub %o1, 128, %o1
  242. bne 74b
  243. sub %o0, 128, %o0
  244. andcc %g1, 0x70, %g6
  245. be 72b
  246. andcc %g1, 8, %g0
  247. srl %g6, 1, %o4
  248. mov %o7, %g2
  249. add %g6, %o4, %o4
  250. 102: call 100f
  251. sub %o1, %g6, %o1
  252. mov %g2, %o7
  253. jmpl %o5 + (72b - 102b), %g0
  254. sub %o0, %g6, %o0
  255. 75: and %o2, 0xe, %o3
  256. mov %o7, %g2
  257. sll %o3, 3, %o4
  258. sub %o0, %o3, %o0
  259. 103: call 100f
  260. sub %o1, %o3, %o1
  261. mov %g2, %o7
  262. jmpl %o5 + (76f - 103b), %g0
  263. andcc %o2, 1, %g0
  264. RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
  265. RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
  266. RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
  267. RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
  268. RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
  269. RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
  270. RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
  271. 76: be 1f
  272. nop
  273. ldub [%o1 - 1], %g2
  274. stb %g2, [%o0 - 1]
  275. 1: retl
  276. ld [%sp + 64], %o0
  277. 91: bne 75b
  278. andcc %o2, 8, %g0
  279. be 1f
  280. andcc %o2, 4, %g0
  281. ld [%o1 - 0x08], %g2
  282. ld [%o1 - 0x04], %g3
  283. sub %o1, 8, %o1
  284. st %g2, [%o0 - 0x08]
  285. st %g3, [%o0 - 0x04]
  286. sub %o0, 8, %o0
  287. 1: b 73b
  288. mov %o2, %g1
  289. 77: cmp %o2, 15
  290. bleu 75b
  291. andcc %o0, 3, %g0
  292. be 64f
  293. andcc %o0, 1, %g0
  294. be 63f
  295. andcc %o0, 2, %g0
  296. ldub [%o1 - 1], %g5
  297. sub %o1, 1, %o1
  298. stb %g5, [%o0 - 1]
  299. sub %o0, 1, %o0
  300. be 64f
  301. sub %o2, 1, %o2
  302. 63: ldub [%o1 - 1], %g5
  303. sub %o1, 2, %o1
  304. stb %g5, [%o0 - 1]
  305. sub %o0, 2, %o0
  306. ldub [%o1], %g5
  307. sub %o2, 2, %o2
  308. stb %g5, [%o0]
  309. 64: and %o1, 3, %g2
  310. and %o1, -4, %o1
  311. and %o2, 0xc, %g3
  312. add %o1, 4, %o1
  313. cmp %g3, 4
  314. sll %g2, 3, %g4
  315. mov 32, %g2
  316. be 4f
  317. sub %g2, %g4, %g6
  318. blu 3f
  319. cmp %g3, 8
  320. be 2f
  321. srl %o2, 2, %g3
  322. ld [%o1 - 4], %o3
  323. add %o0, -8, %o0
  324. ld [%o1 - 8], %o4
  325. add %o1, -16, %o1
  326. b 7f
  327. add %g3, 1, %g3
  328. 2: ld [%o1 - 4], %o4
  329. add %o0, -4, %o0
  330. ld [%o1 - 8], %g1
  331. add %o1, -12, %o1
  332. b 8f
  333. add %g3, 2, %g3
  334. 3: ld [%o1 - 4], %o5
  335. add %o0, -12, %o0
  336. ld [%o1 - 8], %o3
  337. add %o1, -20, %o1
  338. b 6f
  339. srl %o2, 2, %g3
  340. 4: ld [%o1 - 4], %g1
  341. srl %o2, 2, %g3
  342. ld [%o1 - 8], %o5
  343. add %o1, -24, %o1
  344. add %o0, -16, %o0
  345. add %g3, -1, %g3
  346. ld [%o1 + 12], %o3
  347. 5: sll %o5, %g4, %g2
  348. srl %g1, %g6, %g5
  349. or %g2, %g5, %g2
  350. st %g2, [%o0 + 12]
  351. 6: ld [%o1 + 8], %o4
  352. sll %o3, %g4, %g2
  353. srl %o5, %g6, %g5
  354. or %g2, %g5, %g2
  355. st %g2, [%o0 + 8]
  356. 7: ld [%o1 + 4], %g1
  357. sll %o4, %g4, %g2
  358. srl %o3, %g6, %g5
  359. or %g2, %g5, %g2
  360. st %g2, [%o0 + 4]
  361. 8: ld [%o1], %o5
  362. sll %g1, %g4, %g2
  363. srl %o4, %g6, %g5
  364. addcc %g3, -4, %g3
  365. or %g2, %g5, %g2
  366. add %o1, -16, %o1
  367. st %g2, [%o0]
  368. add %o0, -16, %o0
  369. bne,a 5b
  370. ld [%o1 + 12], %o3
  371. sll %o5, %g4, %g2
  372. srl %g1, %g6, %g5
  373. srl %g4, 3, %g3
  374. or %g2, %g5, %g2
  375. add %o1, %g3, %o1
  376. andcc %o2, 2, %g0
  377. st %g2, [%o0 + 12]
  378. be 1f
  379. andcc %o2, 1, %g0
  380. ldub [%o1 + 15], %g5
  381. add %o1, -2, %o1
  382. stb %g5, [%o0 + 11]
  383. add %o0, -2, %o0
  384. ldub [%o1 + 16], %g5
  385. stb %g5, [%o0 + 12]
  386. 1: be 1f
  387. nop
  388. ldub [%o1 + 15], %g5
  389. stb %g5, [%o0 + 11]
  390. 1: retl
  391. ld [%sp + 64], %o0
  392. 78: andcc %o1, 1, %g0
  393. be 4f
  394. andcc %o1, 2, %g0
  395. ldub [%o1], %g2
  396. add %o1, 1, %o1
  397. stb %g2, [%o0]
  398. sub %o2, 1, %o2
  399. bne 3f
  400. add %o0, 1, %o0
  401. 4: lduh [%o1], %g2
  402. add %o1, 2, %o1
  403. sth %g2, [%o0]
  404. sub %o2, 2, %o2
  405. b 3f
  406. add %o0, 2, %o0
  407. END(__memmove)
  408. .globl memcpy
  409. .set memcpy,__memcpy
  410. .hidden __memcpy
  411. ENTRY(__memcpy) /* %o0=dst %o1=src %o2=len */
  412. sub %o0, %o1, %o4
  413. st %o0, [%sp + 64]
  414. 9: andcc %o4, 3, %o5
  415. 0: bne 86f
  416. cmp %o2, 15
  417. bleu 90f
  418. andcc %o1, 3, %g0
  419. bne 78b
  420. 3: andcc %o1, 4, %g0
  421. be 2f
  422. mov %o2, %g1
  423. ld [%o1], %o4
  424. sub %g1, 4, %g1
  425. st %o4, [%o0]
  426. add %o1, 4, %o1
  427. add %o0, 4, %o0
  428. 2: andcc %g1, 0xffffff80, %g6
  429. be 3f
  430. andcc %o0, 4, %g0
  431. be 82f + 4
  432. 5: MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  433. MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  434. MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  435. MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  436. subcc %g6, 128, %g6
  437. add %o1, 128, %o1
  438. bne 5b
  439. add %o0, 128, %o0
  440. 3: andcc %g1, 0x70, %g6
  441. be 80f
  442. andcc %g1, 8, %g0
  443. srl %g6, 1, %o4
  444. mov %o7, %g2
  445. add %g6, %o4, %o4
  446. add %o1, %g6, %o1
  447. 104: call 100f
  448. add %o0, %g6, %o0
  449. jmpl %o5 + (80f - 104b), %g0
  450. mov %g2, %o7
  451. 79: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  452. MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  453. MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  454. MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  455. MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  456. MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  457. MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  458. 80: be 81f
  459. andcc %g1, 4, %g0
  460. ldd [%o1], %g2
  461. add %o0, 8, %o0
  462. st %g2, [%o0 - 0x08]
  463. add %o1, 8, %o1
  464. st %g3, [%o0 - 0x04]
  465. 81: be 1f
  466. andcc %g1, 2, %g0
  467. ld [%o1], %g2
  468. add %o1, 4, %o1
  469. st %g2, [%o0]
  470. add %o0, 4, %o0
  471. 1: be 1f
  472. andcc %g1, 1, %g0
  473. lduh [%o1], %g2
  474. add %o1, 2, %o1
  475. sth %g2, [%o0]
  476. add %o0, 2, %o0
  477. 1: be 1f
  478. nop
  479. ldub [%o1], %g2
  480. stb %g2, [%o0]
  481. 1: retl
  482. ld [%sp + 64], %o0
  483. 82: /* ldd_std */
  484. MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  485. MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  486. MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  487. MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  488. subcc %g6, 128, %g6
  489. add %o1, 128, %o1
  490. bne 82b
  491. add %o0, 128, %o0
  492. andcc %g1, 0x70, %g6
  493. be 84f
  494. andcc %g1, 8, %g0
  495. mov %o7, %g2
  496. 111: call 110f
  497. add %o1, %g6, %o1
  498. mov %g2, %o7
  499. jmpl %o5 + (84f - 111b), %g0
  500. add %o0, %g6, %o0
  501. 83: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  502. MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  503. MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  504. MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  505. MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  506. MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  507. MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  508. 84: be 85f
  509. andcc %g1, 4, %g0
  510. ldd [%o1], %g2
  511. add %o0, 8, %o0
  512. std %g2, [%o0 - 0x08]
  513. add %o1, 8, %o1
  514. 85: be 1f
  515. andcc %g1, 2, %g0
  516. ld [%o1], %g2
  517. add %o1, 4, %o1
  518. st %g2, [%o0]
  519. add %o0, 4, %o0
  520. 1: be 1f
  521. andcc %g1, 1, %g0
  522. lduh [%o1], %g2
  523. add %o1, 2, %o1
  524. sth %g2, [%o0]
  525. add %o0, 2, %o0
  526. 1: be 1f
  527. nop
  528. ldub [%o1], %g2
  529. stb %g2, [%o0]
  530. 1: retl
  531. ld [%sp + 64], %o0
  532. 86: cmp %o2, 6
  533. bleu 88f
  534. cmp %o2, 256
  535. bcc 87f
  536. andcc %o0, 3, %g0
  537. be 61f
  538. andcc %o0, 1, %g0
  539. be 60f
  540. andcc %o0, 2, %g0
  541. ldub [%o1], %g5
  542. add %o1, 1, %o1
  543. stb %g5, [%o0]
  544. sub %o2, 1, %o2
  545. bne 61f
  546. add %o0, 1, %o0
  547. 60: ldub [%o1], %g3
  548. add %o1, 2, %o1
  549. stb %g3, [%o0]
  550. sub %o2, 2, %o2
  551. ldub [%o1 - 1], %g3
  552. add %o0, 2, %o0
  553. stb %g3, [%o0 - 1]
  554. 61: and %o1, 3, %g2
  555. and %o2, 0xc, %g3
  556. and %o1, -4, %o1
  557. cmp %g3, 4
  558. sll %g2, 3, %g4
  559. mov 32, %g2
  560. be 4f
  561. sub %g2, %g4, %g6
  562. blu 3f
  563. cmp %g3, 0x8
  564. be 2f
  565. srl %o2, 2, %g3
  566. ld [%o1], %o3
  567. add %o0, -8, %o0
  568. ld [%o1 + 4], %o4
  569. b 8f
  570. add %g3, 1, %g3
  571. 2: ld [%o1], %o4
  572. add %o0, -12, %o0
  573. ld [%o1 + 4], %o5
  574. add %g3, 2, %g3
  575. b 9f
  576. add %o1, -4, %o1
  577. 3: ld [%o1], %g1
  578. add %o0, -4, %o0
  579. ld [%o1 + 4], %o3
  580. srl %o2, 2, %g3
  581. b 7f
  582. add %o1, 4, %o1
  583. 4: ld [%o1], %o5
  584. cmp %o2, 7
  585. ld [%o1 + 4], %g1
  586. srl %o2, 2, %g3
  587. bleu 10f
  588. add %o1, 8, %o1
  589. ld [%o1], %o3
  590. add %g3, -1, %g3
  591. 5: sll %o5, %g4, %g2
  592. srl %g1, %g6, %g5
  593. or %g2, %g5, %g2
  594. st %g2, [%o0]
  595. 7: ld [%o1 + 4], %o4
  596. sll %g1, %g4, %g2
  597. srl %o3, %g6, %g5
  598. or %g2, %g5, %g2
  599. st %g2, [%o0 + 4]
  600. 8: ld [%o1 + 8], %o5
  601. sll %o3, %g4, %g2
  602. srl %o4, %g6, %g5
  603. or %g2, %g5, %g2
  604. st %g2, [%o0 + 8]
  605. 9: ld [%o1 + 12], %g1
  606. sll %o4, %g4, %g2
  607. srl %o5, %g6, %g5
  608. addcc %g3, -4, %g3
  609. or %g2, %g5, %g2
  610. add %o1, 16, %o1
  611. st %g2, [%o0 + 12]
  612. add %o0, 16, %o0
  613. bne,a 5b
  614. ld [%o1], %o3
  615. 10: sll %o5, %g4, %g2
  616. srl %g1, %g6, %g5
  617. srl %g6, 3, %g3
  618. or %g2, %g5, %g2
  619. sub %o1, %g3, %o1
  620. andcc %o2, 2, %g0
  621. st %g2, [%o0]
  622. be 1f
  623. andcc %o2, 1, %g0
  624. ldub [%o1], %g2
  625. add %o1, 2, %o1
  626. stb %g2, [%o0 + 4]
  627. add %o0, 2, %o0
  628. ldub [%o1 - 1], %g2
  629. stb %g2, [%o0 + 3]
  630. 1: be 1f
  631. nop
  632. ldub [%o1], %g2
  633. stb %g2, [%o0 + 4]
  634. 1: retl
  635. ld [%sp + 64], %o0
  636. 87: andcc %o1, 3, %g0
  637. be 3f
  638. andcc %o1, 1, %g0
  639. be 4f
  640. andcc %o1, 2, %g0
  641. ldub [%o1], %g2
  642. add %o1, 1, %o1
  643. stb %g2, [%o0]
  644. sub %o2, 1, %o2
  645. bne 3f
  646. add %o0, 1, %o0
  647. 4: lduh [%o1], %g2
  648. add %o1, 2, %o1
  649. srl %g2, 8, %g3
  650. sub %o2, 2, %o2
  651. stb %g3, [%o0]
  652. add %o0, 2, %o0
  653. stb %g2, [%o0 - 1]
  654. 3: andcc %o1, 4, %g0
  655. bne 2f
  656. cmp %o5, 1
  657. ld [%o1], %o4
  658. srl %o4, 24, %g2
  659. stb %g2, [%o0]
  660. srl %o4, 16, %g3
  661. stb %g3, [%o0 + 1]
  662. srl %o4, 8, %g2
  663. stb %g2, [%o0 + 2]
  664. sub %o2, 4, %o2
  665. stb %o4, [%o0 + 3]
  666. add %o1, 4, %o1
  667. add %o0, 4, %o0
  668. 2: be 33f
  669. cmp %o5, 2
  670. be 32f
  671. sub %o2, 4, %o2
  672. 31: ld [%o1], %g2
  673. add %o1, 4, %o1
  674. srl %g2, 24, %g3
  675. and %o0, 7, %g5
  676. stb %g3, [%o0]
  677. cmp %g5, 7
  678. sll %g2, 8, %g1
  679. add %o0, 4, %o0
  680. be 41f
  681. and %o2, 0xffffffc0, %o3
  682. ld [%o0 - 7], %o4
  683. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  684. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  685. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  686. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  687. subcc %o3, 64, %o3
  688. add %o1, 64, %o1
  689. bne 4b
  690. add %o0, 64, %o0
  691. andcc %o2, 0x30, %o3
  692. be,a 1f
  693. srl %g1, 16, %g2
  694. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  695. subcc %o3, 16, %o3
  696. add %o1, 16, %o1
  697. bne 4b
  698. add %o0, 16, %o0
  699. srl %g1, 16, %g2
  700. 1: st %o4, [%o0 - 7]
  701. sth %g2, [%o0 - 3]
  702. srl %g1, 8, %g4
  703. b 88f
  704. stb %g4, [%o0 - 1]
  705. 32: ld [%o1], %g2
  706. add %o1, 4, %o1
  707. srl %g2, 16, %g3
  708. and %o0, 7, %g5
  709. sth %g3, [%o0]
  710. cmp %g5, 6
  711. sll %g2, 16, %g1
  712. add %o0, 4, %o0
  713. be 42f
  714. and %o2, 0xffffffc0, %o3
  715. ld [%o0 - 6], %o4
  716. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  717. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  718. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  719. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  720. subcc %o3, 64, %o3
  721. add %o1, 64, %o1
  722. bne 4b
  723. add %o0, 64, %o0
  724. andcc %o2, 0x30, %o3
  725. be,a 1f
  726. srl %g1, 16, %g2
  727. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  728. subcc %o3, 16, %o3
  729. add %o1, 16, %o1
  730. bne 4b
  731. add %o0, 16, %o0
  732. srl %g1, 16, %g2
  733. 1: st %o4, [%o0 - 6]
  734. b 88f
  735. sth %g2, [%o0 - 2]
  736. 33: ld [%o1], %g2
  737. sub %o2, 4, %o2
  738. srl %g2, 24, %g3
  739. and %o0, 7, %g5
  740. stb %g3, [%o0]
  741. cmp %g5, 5
  742. srl %g2, 8, %g4
  743. sll %g2, 24, %g1
  744. sth %g4, [%o0 + 1]
  745. add %o1, 4, %o1
  746. be 43f
  747. and %o2, 0xffffffc0, %o3
  748. ld [%o0 - 1], %o4
  749. add %o0, 4, %o0
  750. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  751. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  752. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  753. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  754. subcc %o3, 64, %o3
  755. add %o1, 64, %o1
  756. bne 4b
  757. add %o0, 64, %o0
  758. andcc %o2, 0x30, %o3
  759. be,a 1f
  760. srl %g1, 24, %g2
  761. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  762. subcc %o3, 16, %o3
  763. add %o1, 16, %o1
  764. bne 4b
  765. add %o0, 16, %o0
  766. srl %g1, 24, %g2
  767. 1: st %o4, [%o0 - 5]
  768. b 88f
  769. stb %g2, [%o0 - 1]
  770. 41: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  771. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  772. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  773. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  774. subcc %o3, 64, %o3
  775. add %o1, 64, %o1
  776. bne 41b
  777. add %o0, 64, %o0
  778. andcc %o2, 0x30, %o3
  779. be,a 1f
  780. srl %g1, 16, %g2
  781. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  782. subcc %o3, 16, %o3
  783. add %o1, 16, %o1
  784. bne 4b
  785. add %o0, 16, %o0
  786. srl %g1, 16, %g2
  787. 1: sth %g2, [%o0 - 3]
  788. srl %g1, 8, %g4
  789. b 88f
  790. stb %g4, [%o0 - 1]
  791. 43: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  792. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  793. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  794. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  795. subcc %o3, 64, %o3
  796. add %o1, 64, %o1
  797. bne 43b
  798. add %o0, 64, %o0
  799. andcc %o2, 0x30, %o3
  800. be,a 1f
  801. srl %g1, 24, %g2
  802. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  803. subcc %o3, 16, %o3
  804. add %o1, 16, %o1
  805. bne 4b
  806. add %o0, 16, %o0
  807. srl %g1, 24, %g2
  808. 1: stb %g2, [%o0 + 3]
  809. b 88f
  810. add %o0, 4, %o0
  811. 42: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  812. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  813. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  814. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  815. subcc %o3, 64, %o3
  816. add %o1, 64, %o1
  817. bne 42b
  818. add %o0, 64, %o0
  819. andcc %o2, 0x30, %o3
  820. be,a 1f
  821. srl %g1, 16, %g2
  822. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  823. subcc %o3, 16, %o3
  824. add %o1, 16, %o1
  825. bne 4b
  826. add %o0, 16, %o0
  827. srl %g1, 16, %g2
  828. 1: sth %g2, [%o0 - 2]
  829. /* Fall through */
  830. 88: and %o2, 0xe, %o3
  831. mov %o7, %g2
  832. sll %o3, 3, %o4
  833. add %o0, %o3, %o0
  834. 106: call 100f
  835. add %o1, %o3, %o1
  836. mov %g2, %o7
  837. jmpl %o5 + (89f - 106b), %g0
  838. andcc %o2, 1, %g0
  839. MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
  840. MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
  841. MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
  842. MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
  843. MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
  844. MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
  845. MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
  846. 89: be 1f
  847. nop
  848. ldub [%o1], %g2
  849. stb %g2, [%o0]
  850. 1: retl
  851. ld [%sp + 64], %o0
  852. 90: bne 88b
  853. andcc %o2, 8, %g0
  854. be 1f
  855. andcc %o2, 4, %g0
  856. ld [%o1 + 0x00], %g2
  857. ld [%o1 + 0x04], %g3
  858. add %o1, 8, %o1
  859. st %g2, [%o0 + 0x00]
  860. st %g3, [%o0 + 0x04]
  861. add %o0, 8, %o0
  862. 1: b 81b
  863. mov %o2, %g1
  864. 100: retl
  865. sub %o7, %o4, %o5
  866. 110: retl
  867. sub %o7, %g6, %o5
  868. END(__memcpy)