memcpy.S 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966
  1. /* Copy SIZE bytes from SRC to DEST.
  2. For SPARC v7.
  3. Copyright (C) 1996, 1999, 2003 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by David S. Miller <davem@caip.rutgers.edu>,
  6. Eddie C. Dost <ecd@skynet.be> and
  7. Jakub Jelinek <jj@ultra.linux.cz>.
  8. The GNU C Library is free software; you can redistribute it and/or
  9. modify it under the terms of the GNU Lesser General Public
  10. License as published by the Free Software Foundation; either
  11. version 2.1 of the License, or (at your option) any later version.
  12. The GNU C Library is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. Lesser General Public License for more details.
  16. You should have received a copy of the GNU Lesser General Public
  17. License along with the GNU C Library; if not, write to the Free
  18. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19. 02111-1307 USA. */
  20. /* Both these macros have to start with exactly the same insn */
  21. #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  22. ldd [%src + offset + 0x00], %t0; \
  23. ldd [%src + offset + 0x08], %t2; \
  24. ldd [%src + offset + 0x10], %t4; \
  25. ldd [%src + offset + 0x18], %t6; \
  26. st %t0, [%dst + offset + 0x00]; \
  27. st %t1, [%dst + offset + 0x04]; \
  28. st %t2, [%dst + offset + 0x08]; \
  29. st %t3, [%dst + offset + 0x0c]; \
  30. st %t4, [%dst + offset + 0x10]; \
  31. st %t5, [%dst + offset + 0x14]; \
  32. st %t6, [%dst + offset + 0x18]; \
  33. st %t7, [%dst + offset + 0x1c];
  34. #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  35. ldd [%src + offset + 0x00], %t0; \
  36. ldd [%src + offset + 0x08], %t2; \
  37. ldd [%src + offset + 0x10], %t4; \
  38. ldd [%src + offset + 0x18], %t6; \
  39. std %t0, [%dst + offset + 0x00]; \
  40. std %t2, [%dst + offset + 0x08]; \
  41. std %t4, [%dst + offset + 0x10]; \
  42. std %t6, [%dst + offset + 0x18];
  43. #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  44. ldd [%src - offset - 0x10], %t0; \
  45. ldd [%src - offset - 0x08], %t2; \
  46. st %t0, [%dst - offset - 0x10]; \
  47. st %t1, [%dst - offset - 0x0c]; \
  48. st %t2, [%dst - offset - 0x08]; \
  49. st %t3, [%dst - offset - 0x04];
  50. #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \
  51. ldd [%src - offset - 0x10], %t0; \
  52. ldd [%src - offset - 0x08], %t2; \
  53. std %t0, [%dst - offset - 0x10]; \
  54. std %t2, [%dst - offset - 0x08];
  55. #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
  56. ldub [%src - offset - 0x02], %t0; \
  57. ldub [%src - offset - 0x01], %t1; \
  58. stb %t0, [%dst - offset - 0x02]; \
  59. stb %t1, [%dst - offset - 0x01];
  60. /* Both these macros have to start with exactly the same insn */
  61. #define RMOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  62. ldd [%src - offset - 0x20], %t0; \
  63. ldd [%src - offset - 0x18], %t2; \
  64. ldd [%src - offset - 0x10], %t4; \
  65. ldd [%src - offset - 0x08], %t6; \
  66. st %t0, [%dst - offset - 0x20]; \
  67. st %t1, [%dst - offset - 0x1c]; \
  68. st %t2, [%dst - offset - 0x18]; \
  69. st %t3, [%dst - offset - 0x14]; \
  70. st %t4, [%dst - offset - 0x10]; \
  71. st %t5, [%dst - offset - 0x0c]; \
  72. st %t6, [%dst - offset - 0x08]; \
  73. st %t7, [%dst - offset - 0x04];
  74. #define RMOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
  75. ldd [%src - offset - 0x20], %t0; \
  76. ldd [%src - offset - 0x18], %t2; \
  77. ldd [%src - offset - 0x10], %t4; \
  78. ldd [%src - offset - 0x08], %t6; \
  79. std %t0, [%dst - offset - 0x20]; \
  80. std %t2, [%dst - offset - 0x18]; \
  81. std %t4, [%dst - offset - 0x10]; \
  82. std %t6, [%dst - offset - 0x08];
  83. #define RMOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
  84. ldd [%src + offset + 0x00], %t0; \
  85. ldd [%src + offset + 0x08], %t2; \
  86. st %t0, [%dst + offset + 0x00]; \
  87. st %t1, [%dst + offset + 0x04]; \
  88. st %t2, [%dst + offset + 0x08]; \
  89. st %t3, [%dst + offset + 0x0c];
  90. #define RMOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
  91. ldub [%src + offset + 0x00], %t0; \
  92. ldub [%src + offset + 0x01], %t1; \
  93. stb %t0, [%dst + offset + 0x00]; \
  94. stb %t1, [%dst + offset + 0x01];
  95. #define SMOVE_CHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
  96. ldd [%src + offset + 0x00], %t0; \
  97. ldd [%src + offset + 0x08], %t2; \
  98. srl %t0, shir, %t5; \
  99. srl %t1, shir, %t6; \
  100. sll %t0, shil, %t0; \
  101. or %t5, %prev, %t5; \
  102. sll %t1, shil, %prev; \
  103. or %t6, %t0, %t0; \
  104. srl %t2, shir, %t1; \
  105. srl %t3, shir, %t6; \
  106. sll %t2, shil, %t2; \
  107. or %t1, %prev, %t1; \
  108. std %t4, [%dst + offset + offset2 - 0x04]; \
  109. std %t0, [%dst + offset + offset2 + 0x04]; \
  110. sll %t3, shil, %prev; \
  111. or %t6, %t2, %t4;
  112. #define SMOVE_ALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, prev, shil, shir, offset2) \
  113. ldd [%src + offset + 0x00], %t0; \
  114. ldd [%src + offset + 0x08], %t2; \
  115. srl %t0, shir, %t4; \
  116. srl %t1, shir, %t5; \
  117. sll %t0, shil, %t6; \
  118. or %t4, %prev, %t0; \
  119. sll %t1, shil, %prev; \
  120. or %t5, %t6, %t1; \
  121. srl %t2, shir, %t4; \
  122. srl %t3, shir, %t5; \
  123. sll %t2, shil, %t6; \
  124. or %t4, %prev, %t2; \
  125. sll %t3, shil, %prev; \
  126. or %t5, %t6, %t3; \
  127. std %t0, [%dst + offset + offset2 + 0x00]; \
  128. std %t2, [%dst + offset + offset2 + 0x08];
  129. .text
  130. .align 4
  131. 70: andcc %o1, 1, %g0
  132. be 4f
  133. andcc %o1, 2, %g0
  134. ldub [%o1 - 1], %g2
  135. sub %o1, 1, %o1
  136. stb %g2, [%o0 - 1]
  137. sub %o2, 1, %o2
  138. be 3f
  139. sub %o0, 1, %o0
  140. 4: lduh [%o1 - 2], %g2
  141. sub %o1, 2, %o1
  142. sth %g2, [%o0 - 2]
  143. sub %o2, 2, %o2
  144. b 3f
  145. sub %o0, 2, %o0
  146. ENTRY(bcopy)
  147. mov %o0, %o3
  148. mov %o1, %o0
  149. mov %o3, %o1
  150. END(bcopy)
  151. ENTRY(memmove)
  152. cmp %o0, %o1
  153. st %o0, [%sp + 64]
  154. bleu 9f
  155. sub %o0, %o1, %o4
  156. add %o1, %o2, %o3
  157. cmp %o3, %o0
  158. bleu 0f
  159. andcc %o4, 3, %o5
  160. add %o1, %o2, %o1
  161. add %o0, %o2, %o0
  162. bne 77f
  163. cmp %o2, 15
  164. bleu 91f
  165. andcc %o1, 3, %g0
  166. bne 70b
  167. 3: andcc %o1, 4, %g0
  168. be 2f
  169. mov %o2, %g1
  170. ld [%o1 - 4], %o4
  171. sub %g1, 4, %g1
  172. st %o4, [%o0 - 4]
  173. sub %o1, 4, %o1
  174. sub %o0, 4, %o0
  175. 2: andcc %g1, 0xffffff80, %g6
  176. be 3f
  177. andcc %o0, 4, %g0
  178. be 74f + 4
  179. 5: RMOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  180. RMOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  181. RMOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  182. RMOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  183. subcc %g6, 128, %g6
  184. sub %o1, 128, %o1
  185. bne 5b
  186. sub %o0, 128, %o0
  187. 3: andcc %g1, 0x70, %g6
  188. be 72f
  189. andcc %g1, 8, %g0
  190. srl %g6, 1, %o4
  191. mov %o7, %g2
  192. add %g6, %o4, %o4
  193. 101: call 100f
  194. sub %o1, %g6, %o1
  195. mov %g2, %o7
  196. jmpl %o5 + (72f - 101b), %g0
  197. sub %o0, %g6, %o0
  198. 71: RMOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  199. RMOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  200. RMOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  201. RMOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  202. RMOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  203. RMOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  204. RMOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  205. 72: be 73f
  206. andcc %g1, 4, %g0
  207. ldd [%o1 - 0x08], %g2
  208. sub %o0, 8, %o0
  209. sub %o1, 8, %o1
  210. st %g2, [%o0]
  211. st %g3, [%o0 + 0x04]
  212. 73: be 1f
  213. andcc %g1, 2, %g0
  214. ld [%o1 - 4], %g2
  215. sub %o1, 4, %o1
  216. st %g2, [%o0 - 4]
  217. sub %o0, 4, %o0
  218. 1: be 1f
  219. andcc %g1, 1, %g0
  220. lduh [%o1 - 2], %g2
  221. sub %o1, 2, %o1
  222. sth %g2, [%o0 - 2]
  223. sub %o0, 2, %o0
  224. 1: be 1f
  225. nop
  226. ldub [%o1 - 1], %g2
  227. stb %g2, [%o0 - 1]
  228. 1: retl
  229. ld [%sp + 64], %o0
  230. 74: RMOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  231. RMOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  232. RMOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  233. RMOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  234. subcc %g6, 128, %g6
  235. sub %o1, 128, %o1
  236. bne 74b
  237. sub %o0, 128, %o0
  238. andcc %g1, 0x70, %g6
  239. be 72b
  240. andcc %g1, 8, %g0
  241. srl %g6, 1, %o4
  242. mov %o7, %g2
  243. add %g6, %o4, %o4
  244. 102: call 100f
  245. sub %o1, %g6, %o1
  246. mov %g2, %o7
  247. jmpl %o5 + (72b - 102b), %g0
  248. sub %o0, %g6, %o0
  249. 75: and %o2, 0xe, %o3
  250. mov %o7, %g2
  251. sll %o3, 3, %o4
  252. sub %o0, %o3, %o0
  253. 103: call 100f
  254. sub %o1, %o3, %o1
  255. mov %g2, %o7
  256. jmpl %o5 + (76f - 103b), %g0
  257. andcc %o2, 1, %g0
  258. RMOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
  259. RMOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
  260. RMOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
  261. RMOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
  262. RMOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
  263. RMOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
  264. RMOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
  265. 76: be 1f
  266. nop
  267. ldub [%o1 - 1], %g2
  268. stb %g2, [%o0 - 1]
  269. 1: retl
  270. ld [%sp + 64], %o0
  271. 91: bne 75b
  272. andcc %o2, 8, %g0
  273. be 1f
  274. andcc %o2, 4, %g0
  275. ld [%o1 - 0x08], %g2
  276. ld [%o1 - 0x04], %g3
  277. sub %o1, 8, %o1
  278. st %g2, [%o0 - 0x08]
  279. st %g3, [%o0 - 0x04]
  280. sub %o0, 8, %o0
  281. 1: b 73b
  282. mov %o2, %g1
  283. 77: cmp %o2, 15
  284. bleu 75b
  285. andcc %o0, 3, %g0
  286. be 64f
  287. andcc %o0, 1, %g0
  288. be 63f
  289. andcc %o0, 2, %g0
  290. ldub [%o1 - 1], %g5
  291. sub %o1, 1, %o1
  292. stb %g5, [%o0 - 1]
  293. sub %o0, 1, %o0
  294. be 64f
  295. sub %o2, 1, %o2
  296. 63: ldub [%o1 - 1], %g5
  297. sub %o1, 2, %o1
  298. stb %g5, [%o0 - 1]
  299. sub %o0, 2, %o0
  300. ldub [%o1], %g5
  301. sub %o2, 2, %o2
  302. stb %g5, [%o0]
  303. 64: and %o1, 3, %g2
  304. and %o1, -4, %o1
  305. and %o2, 0xc, %g3
  306. add %o1, 4, %o1
  307. cmp %g3, 4
  308. sll %g2, 3, %g4
  309. mov 32, %g2
  310. be 4f
  311. sub %g2, %g4, %g6
  312. blu 3f
  313. cmp %g3, 8
  314. be 2f
  315. srl %o2, 2, %g3
  316. ld [%o1 - 4], %o3
  317. add %o0, -8, %o0
  318. ld [%o1 - 8], %o4
  319. add %o1, -16, %o1
  320. b 7f
  321. add %g3, 1, %g3
  322. 2: ld [%o1 - 4], %o4
  323. add %o0, -4, %o0
  324. ld [%o1 - 8], %g1
  325. add %o1, -12, %o1
  326. b 8f
  327. add %g3, 2, %g3
  328. 3: ld [%o1 - 4], %o5
  329. add %o0, -12, %o0
  330. ld [%o1 - 8], %o3
  331. add %o1, -20, %o1
  332. b 6f
  333. srl %o2, 2, %g3
  334. 4: ld [%o1 - 4], %g1
  335. srl %o2, 2, %g3
  336. ld [%o1 - 8], %o5
  337. add %o1, -24, %o1
  338. add %o0, -16, %o0
  339. add %g3, -1, %g3
  340. ld [%o1 + 12], %o3
  341. 5: sll %o5, %g4, %g2
  342. srl %g1, %g6, %g5
  343. or %g2, %g5, %g2
  344. st %g2, [%o0 + 12]
  345. 6: ld [%o1 + 8], %o4
  346. sll %o3, %g4, %g2
  347. srl %o5, %g6, %g5
  348. or %g2, %g5, %g2
  349. st %g2, [%o0 + 8]
  350. 7: ld [%o1 + 4], %g1
  351. sll %o4, %g4, %g2
  352. srl %o3, %g6, %g5
  353. or %g2, %g5, %g2
  354. st %g2, [%o0 + 4]
  355. 8: ld [%o1], %o5
  356. sll %g1, %g4, %g2
  357. srl %o4, %g6, %g5
  358. addcc %g3, -4, %g3
  359. or %g2, %g5, %g2
  360. add %o1, -16, %o1
  361. st %g2, [%o0]
  362. add %o0, -16, %o0
  363. bne,a 5b
  364. ld [%o1 + 12], %o3
  365. sll %o5, %g4, %g2
  366. srl %g1, %g6, %g5
  367. srl %g4, 3, %g3
  368. or %g2, %g5, %g2
  369. add %o1, %g3, %o1
  370. andcc %o2, 2, %g0
  371. st %g2, [%o0 + 12]
  372. be 1f
  373. andcc %o2, 1, %g0
  374. ldub [%o1 + 15], %g5
  375. add %o1, -2, %o1
  376. stb %g5, [%o0 + 11]
  377. add %o0, -2, %o0
  378. ldub [%o1 + 16], %g5
  379. stb %g5, [%o0 + 12]
  380. 1: be 1f
  381. nop
  382. ldub [%o1 + 15], %g5
  383. stb %g5, [%o0 + 11]
  384. 1: retl
  385. ld [%sp + 64], %o0
  386. 78: andcc %o1, 1, %g0
  387. be 4f
  388. andcc %o1, 2, %g0
  389. ldub [%o1], %g2
  390. add %o1, 1, %o1
  391. stb %g2, [%o0]
  392. sub %o2, 1, %o2
  393. bne 3f
  394. add %o0, 1, %o0
  395. 4: lduh [%o1], %g2
  396. add %o1, 2, %o1
  397. sth %g2, [%o0]
  398. sub %o2, 2, %o2
  399. b 3f
  400. add %o0, 2, %o0
  401. END(memmove)
  402. ENTRY(memcpy) /* %o0=dst %o1=src %o2=len */
  403. sub %o0, %o1, %o4
  404. st %o0, [%sp + 64]
  405. 9: andcc %o4, 3, %o5
  406. 0: bne 86f
  407. cmp %o2, 15
  408. bleu 90f
  409. andcc %o1, 3, %g0
  410. bne 78b
  411. 3: andcc %o1, 4, %g0
  412. be 2f
  413. mov %o2, %g1
  414. ld [%o1], %o4
  415. sub %g1, 4, %g1
  416. st %o4, [%o0]
  417. add %o1, 4, %o1
  418. add %o0, 4, %o0
  419. 2: andcc %g1, 0xffffff80, %g6
  420. be 3f
  421. andcc %o0, 4, %g0
  422. be 82f + 4
  423. 5: MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  424. MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  425. MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  426. MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  427. subcc %g6, 128, %g6
  428. add %o1, 128, %o1
  429. bne 5b
  430. add %o0, 128, %o0
  431. 3: andcc %g1, 0x70, %g6
  432. be 80f
  433. andcc %g1, 8, %g0
  434. srl %g6, 1, %o4
  435. mov %o7, %g2
  436. add %g6, %o4, %o4
  437. add %o1, %g6, %o1
  438. 104: call 100f
  439. add %o0, %g6, %o0
  440. jmpl %o5 + (80f - 104b), %g0
  441. mov %g2, %o7
  442. 79: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  443. MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  444. MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  445. MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  446. MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  447. MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  448. MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  449. 80: be 81f
  450. andcc %g1, 4, %g0
  451. ldd [%o1], %g2
  452. add %o0, 8, %o0
  453. st %g2, [%o0 - 0x08]
  454. add %o1, 8, %o1
  455. st %g3, [%o0 - 0x04]
  456. 81: be 1f
  457. andcc %g1, 2, %g0
  458. ld [%o1], %g2
  459. add %o1, 4, %o1
  460. st %g2, [%o0]
  461. add %o0, 4, %o0
  462. 1: be 1f
  463. andcc %g1, 1, %g0
  464. lduh [%o1], %g2
  465. add %o1, 2, %o1
  466. sth %g2, [%o0]
  467. add %o0, 2, %o0
  468. 1: be 1f
  469. nop
  470. ldub [%o1], %g2
  471. stb %g2, [%o0]
  472. 1: retl
  473. ld [%sp + 64], %o0
  474. 82: /* ldd_std */
  475. MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5)
  476. MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
  477. MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
  478. MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
  479. subcc %g6, 128, %g6
  480. add %o1, 128, %o1
  481. bne 82b
  482. add %o0, 128, %o0
  483. andcc %g1, 0x70, %g6
  484. be 84f
  485. andcc %g1, 8, %g0
  486. mov %o7, %g2
  487. 111: call 110f
  488. add %o1, %g6, %o1
  489. mov %g2, %o7
  490. jmpl %o5 + (84f - 111b), %g0
  491. add %o0, %g6, %o0
  492. 83: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
  493. MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
  494. MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
  495. MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5)
  496. MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5)
  497. MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
  498. MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
  499. 84: be 85f
  500. andcc %g1, 4, %g0
  501. ldd [%o1], %g2
  502. add %o0, 8, %o0
  503. std %g2, [%o0 - 0x08]
  504. add %o1, 8, %o1
  505. 85: be 1f
  506. andcc %g1, 2, %g0
  507. ld [%o1], %g2
  508. add %o1, 4, %o1
  509. st %g2, [%o0]
  510. add %o0, 4, %o0
  511. 1: be 1f
  512. andcc %g1, 1, %g0
  513. lduh [%o1], %g2
  514. add %o1, 2, %o1
  515. sth %g2, [%o0]
  516. add %o0, 2, %o0
  517. 1: be 1f
  518. nop
  519. ldub [%o1], %g2
  520. stb %g2, [%o0]
  521. 1: retl
  522. ld [%sp + 64], %o0
  523. 86: cmp %o2, 6
  524. bleu 88f
  525. cmp %o2, 256
  526. bcc 87f
  527. andcc %o0, 3, %g0
  528. be 61f
  529. andcc %o0, 1, %g0
  530. be 60f
  531. andcc %o0, 2, %g0
  532. ldub [%o1], %g5
  533. add %o1, 1, %o1
  534. stb %g5, [%o0]
  535. sub %o2, 1, %o2
  536. bne 61f
  537. add %o0, 1, %o0
  538. 60: ldub [%o1], %g3
  539. add %o1, 2, %o1
  540. stb %g3, [%o0]
  541. sub %o2, 2, %o2
  542. ldub [%o1 - 1], %g3
  543. add %o0, 2, %o0
  544. stb %g3, [%o0 - 1]
  545. 61: and %o1, 3, %g2
  546. and %o2, 0xc, %g3
  547. and %o1, -4, %o1
  548. cmp %g3, 4
  549. sll %g2, 3, %g4
  550. mov 32, %g2
  551. be 4f
  552. sub %g2, %g4, %g6
  553. blu 3f
  554. cmp %g3, 0x8
  555. be 2f
  556. srl %o2, 2, %g3
  557. ld [%o1], %o3
  558. add %o0, -8, %o0
  559. ld [%o1 + 4], %o4
  560. b 8f
  561. add %g3, 1, %g3
  562. 2: ld [%o1], %o4
  563. add %o0, -12, %o0
  564. ld [%o1 + 4], %o5
  565. add %g3, 2, %g3
  566. b 9f
  567. add %o1, -4, %o1
  568. 3: ld [%o1], %g1
  569. add %o0, -4, %o0
  570. ld [%o1 + 4], %o3
  571. srl %o2, 2, %g3
  572. b 7f
  573. add %o1, 4, %o1
  574. 4: ld [%o1], %o5
  575. cmp %o2, 7
  576. ld [%o1 + 4], %g1
  577. srl %o2, 2, %g3
  578. bleu 10f
  579. add %o1, 8, %o1
  580. ld [%o1], %o3
  581. add %g3, -1, %g3
  582. 5: sll %o5, %g4, %g2
  583. srl %g1, %g6, %g5
  584. or %g2, %g5, %g2
  585. st %g2, [%o0]
  586. 7: ld [%o1 + 4], %o4
  587. sll %g1, %g4, %g2
  588. srl %o3, %g6, %g5
  589. or %g2, %g5, %g2
  590. st %g2, [%o0 + 4]
  591. 8: ld [%o1 + 8], %o5
  592. sll %o3, %g4, %g2
  593. srl %o4, %g6, %g5
  594. or %g2, %g5, %g2
  595. st %g2, [%o0 + 8]
  596. 9: ld [%o1 + 12], %g1
  597. sll %o4, %g4, %g2
  598. srl %o5, %g6, %g5
  599. addcc %g3, -4, %g3
  600. or %g2, %g5, %g2
  601. add %o1, 16, %o1
  602. st %g2, [%o0 + 12]
  603. add %o0, 16, %o0
  604. bne,a 5b
  605. ld [%o1], %o3
  606. 10: sll %o5, %g4, %g2
  607. srl %g1, %g6, %g5
  608. srl %g6, 3, %g3
  609. or %g2, %g5, %g2
  610. sub %o1, %g3, %o1
  611. andcc %o2, 2, %g0
  612. st %g2, [%o0]
  613. be 1f
  614. andcc %o2, 1, %g0
  615. ldub [%o1], %g2
  616. add %o1, 2, %o1
  617. stb %g2, [%o0 + 4]
  618. add %o0, 2, %o0
  619. ldub [%o1 - 1], %g2
  620. stb %g2, [%o0 + 3]
  621. 1: be 1f
  622. nop
  623. ldub [%o1], %g2
  624. stb %g2, [%o0 + 4]
  625. 1: retl
  626. ld [%sp + 64], %o0
  627. 87: andcc %o1, 3, %g0
  628. be 3f
  629. andcc %o1, 1, %g0
  630. be 4f
  631. andcc %o1, 2, %g0
  632. ldub [%o1], %g2
  633. add %o1, 1, %o1
  634. stb %g2, [%o0]
  635. sub %o2, 1, %o2
  636. bne 3f
  637. add %o0, 1, %o0
  638. 4: lduh [%o1], %g2
  639. add %o1, 2, %o1
  640. srl %g2, 8, %g3
  641. sub %o2, 2, %o2
  642. stb %g3, [%o0]
  643. add %o0, 2, %o0
  644. stb %g2, [%o0 - 1]
  645. 3: andcc %o1, 4, %g0
  646. bne 2f
  647. cmp %o5, 1
  648. ld [%o1], %o4
  649. srl %o4, 24, %g2
  650. stb %g2, [%o0]
  651. srl %o4, 16, %g3
  652. stb %g3, [%o0 + 1]
  653. srl %o4, 8, %g2
  654. stb %g2, [%o0 + 2]
  655. sub %o2, 4, %o2
  656. stb %o4, [%o0 + 3]
  657. add %o1, 4, %o1
  658. add %o0, 4, %o0
  659. 2: be 33f
  660. cmp %o5, 2
  661. be 32f
  662. sub %o2, 4, %o2
  663. 31: ld [%o1], %g2
  664. add %o1, 4, %o1
  665. srl %g2, 24, %g3
  666. and %o0, 7, %g5
  667. stb %g3, [%o0]
  668. cmp %g5, 7
  669. sll %g2, 8, %g1
  670. add %o0, 4, %o0
  671. be 41f
  672. and %o2, 0xffffffc0, %o3
  673. ld [%o0 - 7], %o4
  674. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  675. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  676. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  677. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  678. subcc %o3, 64, %o3
  679. add %o1, 64, %o1
  680. bne 4b
  681. add %o0, 64, %o0
  682. andcc %o2, 0x30, %o3
  683. be,a 1f
  684. srl %g1, 16, %g2
  685. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  686. subcc %o3, 16, %o3
  687. add %o1, 16, %o1
  688. bne 4b
  689. add %o0, 16, %o0
  690. srl %g1, 16, %g2
  691. 1: st %o4, [%o0 - 7]
  692. sth %g2, [%o0 - 3]
  693. srl %g1, 8, %g4
  694. b 88f
  695. stb %g4, [%o0 - 1]
  696. 32: ld [%o1], %g2
  697. add %o1, 4, %o1
  698. srl %g2, 16, %g3
  699. and %o0, 7, %g5
  700. sth %g3, [%o0]
  701. cmp %g5, 6
  702. sll %g2, 16, %g1
  703. add %o0, 4, %o0
  704. be 42f
  705. and %o2, 0xffffffc0, %o3
  706. ld [%o0 - 6], %o4
  707. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  708. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  709. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  710. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  711. subcc %o3, 64, %o3
  712. add %o1, 64, %o1
  713. bne 4b
  714. add %o0, 64, %o0
  715. andcc %o2, 0x30, %o3
  716. be,a 1f
  717. srl %g1, 16, %g2
  718. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  719. subcc %o3, 16, %o3
  720. add %o1, 16, %o1
  721. bne 4b
  722. add %o0, 16, %o0
  723. srl %g1, 16, %g2
  724. 1: st %o4, [%o0 - 6]
  725. b 88f
  726. sth %g2, [%o0 - 2]
  727. 33: ld [%o1], %g2
  728. sub %o2, 4, %o2
  729. srl %g2, 24, %g3
  730. and %o0, 7, %g5
  731. stb %g3, [%o0]
  732. cmp %g5, 5
  733. srl %g2, 8, %g4
  734. sll %g2, 24, %g1
  735. sth %g4, [%o0 + 1]
  736. add %o1, 4, %o1
  737. be 43f
  738. and %o2, 0xffffffc0, %o3
  739. ld [%o0 - 1], %o4
  740. add %o0, 4, %o0
  741. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  742. SMOVE_CHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  743. SMOVE_CHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  744. SMOVE_CHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  745. subcc %o3, 64, %o3
  746. add %o1, 64, %o1
  747. bne 4b
  748. add %o0, 64, %o0
  749. andcc %o2, 0x30, %o3
  750. be,a 1f
  751. srl %g1, 24, %g2
  752. 4: SMOVE_CHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, -1)
  753. subcc %o3, 16, %o3
  754. add %o1, 16, %o1
  755. bne 4b
  756. add %o0, 16, %o0
  757. srl %g1, 24, %g2
  758. 1: st %o4, [%o0 - 5]
  759. b 88f
  760. stb %g2, [%o0 - 1]
  761. 41: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  762. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  763. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  764. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  765. subcc %o3, 64, %o3
  766. add %o1, 64, %o1
  767. bne 41b
  768. add %o0, 64, %o0
  769. andcc %o2, 0x30, %o3
  770. be,a 1f
  771. srl %g1, 16, %g2
  772. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 8, 24, -3)
  773. subcc %o3, 16, %o3
  774. add %o1, 16, %o1
  775. bne 4b
  776. add %o0, 16, %o0
  777. srl %g1, 16, %g2
  778. 1: sth %g2, [%o0 - 3]
  779. srl %g1, 8, %g4
  780. b 88f
  781. stb %g4, [%o0 - 1]
  782. 43: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  783. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  784. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  785. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  786. subcc %o3, 64, %o3
  787. add %o1, 64, %o1
  788. bne 43b
  789. add %o0, 64, %o0
  790. andcc %o2, 0x30, %o3
  791. be,a 1f
  792. srl %g1, 24, %g2
  793. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 24, 8, 3)
  794. subcc %o3, 16, %o3
  795. add %o1, 16, %o1
  796. bne 4b
  797. add %o0, 16, %o0
  798. srl %g1, 24, %g2
  799. 1: stb %g2, [%o0 + 3]
  800. b 88f
  801. add %o0, 4, %o0
  802. 42: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  803. SMOVE_ALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  804. SMOVE_ALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  805. SMOVE_ALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  806. subcc %o3, 64, %o3
  807. add %o1, 64, %o1
  808. bne 42b
  809. add %o0, 64, %o0
  810. andcc %o2, 0x30, %o3
  811. be,a 1f
  812. srl %g1, 16, %g2
  813. 4: SMOVE_ALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5, o4, o5, g6, g1, 16, 16, -2)
  814. subcc %o3, 16, %o3
  815. add %o1, 16, %o1
  816. bne 4b
  817. add %o0, 16, %o0
  818. srl %g1, 16, %g2
  819. 1: sth %g2, [%o0 - 2]
  820. /* Fall through */
  821. 88: and %o2, 0xe, %o3
  822. mov %o7, %g2
  823. sll %o3, 3, %o4
  824. add %o0, %o3, %o0
  825. 106: call 100f
  826. add %o1, %o3, %o1
  827. mov %g2, %o7
  828. jmpl %o5 + (89f - 106b), %g0
  829. andcc %o2, 1, %g0
  830. MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
  831. MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
  832. MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
  833. MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
  834. MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
  835. MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
  836. MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
  837. 89: be 1f
  838. nop
  839. ldub [%o1], %g2
  840. stb %g2, [%o0]
  841. 1: retl
  842. ld [%sp + 64], %o0
  843. 90: bne 88b
  844. andcc %o2, 8, %g0
  845. be 1f
  846. andcc %o2, 4, %g0
  847. ld [%o1 + 0x00], %g2
  848. ld [%o1 + 0x04], %g3
  849. add %o1, 8, %o1
  850. st %g2, [%o0 + 0x00]
  851. st %g3, [%o0 + 0x04]
  852. add %o0, 8, %o0
  853. 1: b 81b
  854. mov %o2, %g1
  855. 100: retl
  856. sub %o7, %o4, %o5
  857. 110: retl
  858. sub %o7, %g6, %o5
  859. END(memcpy)