_memcpy.S 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /*-
  2. * Copyright (c) 1997 The NetBSD Foundation, Inc.
  3. * All rights reserved.
  4. *
  5. * This code is derived from software contributed to The NetBSD Foundation
  6. * by Neil A. Carson and Mark Brinicombe
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 3. All advertising materials mentioning features or use of this software
  17. * must display the following acknowledgement:
  18. * This product includes software developed by the NetBSD
  19. * Foundation, Inc. and its contributors.
  20. * 4. Neither the name of The NetBSD Foundation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  25. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  26. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  27. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  28. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34. * POSSIBILITY OF SUCH DAMAGE.
  35. *
  36. * Adapted for uClibc from NetBSD _memcpy.S,v 1.6 2003/10/09
  37. * by Erik Andersen <andersen@codepoet.org>
  38. */
  39. #include <features.h>
  40. #include <endian.h>
  41. /*
  42. * This is one fun bit of code ...
  43. * Some easy listening music is suggested while trying to understand this
  44. * code e.g. Iron Maiden
  45. *
  46. * For anyone attempting to understand it :
  47. *
  48. * The core code is implemented here with simple stubs for memcpy()
  49. * memmove() and bcopy().
  50. *
  51. * All local labels are prefixed with Lmemcpy_
  52. * Following the prefix a label starting f is used in the forward copy code
  53. * while a label using b is used in the backwards copy code
  54. * The source and destination addresses determine whether a forward or
  55. * backward copy is performed.
  56. * Separate bits of code are used to deal with the following situations
  57. * for both the forward and backwards copy.
  58. * unaligned source address
  59. * unaligned destination address
  60. * Separate copy routines are used to produce an optimised result for each
  61. * of these cases.
  62. * The copy code will use LDM/STM instructions to copy up to 32 bytes at
  63. * a time where possible.
  64. *
  65. * Note: r12 (aka ip) can be trashed during the function along with
  66. * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
  67. * Additional registers are preserved prior to use i.e. r4, r5 & lr
  68. *
  69. * Apologies for the state of the comments ;-)
  70. */
  71. .text
  72. .global _memcpy
  73. .hidden _memcpy
  74. .type _memcpy,%function
  75. .align 4
  76. _memcpy:
  77. /* Determine copy direction */
  78. cmp r1, r0
  79. bcc .Lmemcpy_backwards
  80. moveq r0, #0 /* Quick abort for len=0 */
  81. #if defined(__USE_BX__)
  82. bxeq lr
  83. #else
  84. moveq pc, lr
  85. #endif
  86. stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
  87. subs r2, r2, #4
  88. blt .Lmemcpy_fl4 /* less than 4 bytes */
  89. ands r12, r0, #3
  90. bne .Lmemcpy_fdestul /* oh unaligned destination addr */
  91. ands r12, r1, #3
  92. bne .Lmemcpy_fsrcul /* oh unaligned source addr */
  93. .Lmemcpy_ft8:
  94. /* We have aligned source and destination */
  95. subs r2, r2, #8
  96. blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
  97. subs r2, r2, #0x14
  98. blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
  99. stmdb sp!, {r4} /* borrow r4 */
  100. /* blat 32 bytes at a time */
  101. /* XXX for really big copies perhaps we should use more registers */
  102. .Lmemcpy_floop32:
  103. ldmia r1!, {r3, r4, r12, lr}
  104. stmia r0!, {r3, r4, r12, lr}
  105. ldmia r1!, {r3, r4, r12, lr}
  106. stmia r0!, {r3, r4, r12, lr}
  107. subs r2, r2, #0x20
  108. bge .Lmemcpy_floop32
  109. cmn r2, #0x10
  110. ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
  111. stmgeia r0!, {r3, r4, r12, lr}
  112. subge r2, r2, #0x10
  113. ldmia sp!, {r4} /* return r4 */
  114. .Lmemcpy_fl32:
  115. adds r2, r2, #0x14
  116. /* blat 12 bytes at a time */
  117. .Lmemcpy_floop12:
  118. ldmgeia r1!, {r3, r12, lr}
  119. stmgeia r0!, {r3, r12, lr}
  120. subges r2, r2, #0x0c
  121. bge .Lmemcpy_floop12
  122. .Lmemcpy_fl12:
  123. adds r2, r2, #8
  124. blt .Lmemcpy_fl4
  125. subs r2, r2, #4
  126. ldrlt r3, [r1], #4
  127. strlt r3, [r0], #4
  128. ldmgeia r1!, {r3, r12}
  129. stmgeia r0!, {r3, r12}
  130. subge r2, r2, #4
  131. .Lmemcpy_fl4:
  132. /* less than 4 bytes to go */
  133. adds r2, r2, #4
  134. ldmeqia sp!, {r0, pc} /* done */
  135. /* copy the crud byte at a time */
  136. cmp r2, #2
  137. ldrb r3, [r1], #1
  138. strb r3, [r0], #1
  139. ldrgeb r3, [r1], #1
  140. strgeb r3, [r0], #1
  141. ldrgtb r3, [r1], #1
  142. strgtb r3, [r0], #1
  143. ldmia sp!, {r0, pc}
  144. /* erg - unaligned destination */
  145. .Lmemcpy_fdestul:
  146. rsb r12, r12, #4
  147. cmp r12, #2
  148. /* align destination with byte copies */
  149. ldrb r3, [r1], #1
  150. strb r3, [r0], #1
  151. ldrgeb r3, [r1], #1
  152. strgeb r3, [r0], #1
  153. ldrgtb r3, [r1], #1
  154. strgtb r3, [r0], #1
  155. subs r2, r2, r12
  156. blt .Lmemcpy_fl4 /* less the 4 bytes */
  157. ands r12, r1, #3
  158. beq .Lmemcpy_ft8 /* we have an aligned source */
  159. /* erg - unaligned source */
  160. /* This is where it gets nasty ... */
  161. .Lmemcpy_fsrcul:
  162. bic r1, r1, #3
  163. ldr lr, [r1], #4
  164. cmp r12, #2
  165. bgt .Lmemcpy_fsrcul3
  166. beq .Lmemcpy_fsrcul2
  167. cmp r2, #0x0c
  168. blt .Lmemcpy_fsrcul1loop4
  169. sub r2, r2, #0x0c
  170. stmdb sp!, {r4, r5}
  171. .Lmemcpy_fsrcul1loop16:
  172. #if __BYTE_ORDER == __BIG_ENDIAN
  173. mov r3, lr, lsl #8
  174. ldmia r1!, {r4, r5, r12, lr}
  175. orr r3, r3, r4, lsr #24
  176. mov r4, r4, lsl #8
  177. orr r4, r4, r5, lsr #24
  178. mov r5, r5, lsl #8
  179. orr r5, r5, r12, lsr #24
  180. mov r12, r12, lsl #8
  181. orr r12, r12, lr, lsr #24
  182. #else
  183. mov r3, lr, lsr #8
  184. ldmia r1!, {r4, r5, r12, lr}
  185. orr r3, r3, r4, lsl #24
  186. mov r4, r4, lsr #8
  187. orr r4, r4, r5, lsl #24
  188. mov r5, r5, lsr #8
  189. orr r5, r5, r12, lsl #24
  190. mov r12, r12, lsr #8
  191. orr r12, r12, lr, lsl #24
  192. #endif
  193. stmia r0!, {r3-r5, r12}
  194. subs r2, r2, #0x10
  195. bge .Lmemcpy_fsrcul1loop16
  196. ldmia sp!, {r4, r5}
  197. adds r2, r2, #0x0c
  198. blt .Lmemcpy_fsrcul1l4
  199. .Lmemcpy_fsrcul1loop4:
  200. #if __BYTE_ORDER == __BIG_ENDIAN
  201. mov r12, lr, lsl #8
  202. ldr lr, [r1], #4
  203. orr r12, r12, lr, lsr #24
  204. #else
  205. mov r12, lr, lsr #8
  206. ldr lr, [r1], #4
  207. orr r12, r12, lr, lsl #24
  208. #endif
  209. str r12, [r0], #4
  210. subs r2, r2, #4
  211. bge .Lmemcpy_fsrcul1loop4
  212. .Lmemcpy_fsrcul1l4:
  213. sub r1, r1, #3
  214. b .Lmemcpy_fl4
  215. .Lmemcpy_fsrcul2:
  216. cmp r2, #0x0c
  217. blt .Lmemcpy_fsrcul2loop4
  218. sub r2, r2, #0x0c
  219. stmdb sp!, {r4, r5}
  220. .Lmemcpy_fsrcul2loop16:
  221. #if __BYTE_ORDER == __BIG_ENDIAN
  222. mov r3, lr, lsl #16
  223. ldmia r1!, {r4, r5, r12, lr}
  224. orr r3, r3, r4, lsr #16
  225. mov r4, r4, lsl #16
  226. orr r4, r4, r5, lsr #16
  227. mov r5, r5, lsl #16
  228. orr r5, r5, r12, lsr #16
  229. mov r12, r12, lsl #16
  230. orr r12, r12, lr, lsr #16
  231. #else
  232. mov r3, lr, lsr #16
  233. ldmia r1!, {r4, r5, r12, lr}
  234. orr r3, r3, r4, lsl #16
  235. mov r4, r4, lsr #16
  236. orr r4, r4, r5, lsl #16
  237. mov r5, r5, lsr #16
  238. orr r5, r5, r12, lsl #16
  239. mov r12, r12, lsr #16
  240. orr r12, r12, lr, lsl #16
  241. #endif
  242. stmia r0!, {r3-r5, r12}
  243. subs r2, r2, #0x10
  244. bge .Lmemcpy_fsrcul2loop16
  245. ldmia sp!, {r4, r5}
  246. adds r2, r2, #0x0c
  247. blt .Lmemcpy_fsrcul2l4
  248. .Lmemcpy_fsrcul2loop4:
  249. #if __BYTE_ORDER == __BIG_ENDIAN
  250. mov r12, lr, lsl #16
  251. ldr lr, [r1], #4
  252. orr r12, r12, lr, lsr #16
  253. #else
  254. mov r12, lr, lsr #16
  255. ldr lr, [r1], #4
  256. orr r12, r12, lr, lsl #16
  257. #endif
  258. str r12, [r0], #4
  259. subs r2, r2, #4
  260. bge .Lmemcpy_fsrcul2loop4
  261. .Lmemcpy_fsrcul2l4:
  262. sub r1, r1, #2
  263. b .Lmemcpy_fl4
  264. .Lmemcpy_fsrcul3:
  265. cmp r2, #0x0c
  266. blt .Lmemcpy_fsrcul3loop4
  267. sub r2, r2, #0x0c
  268. stmdb sp!, {r4, r5}
  269. .Lmemcpy_fsrcul3loop16:
  270. #if __BYTE_ORDER == __BIG_ENDIAN
  271. mov r3, lr, lsl #24
  272. ldmia r1!, {r4, r5, r12, lr}
  273. orr r3, r3, r4, lsr #8
  274. mov r4, r4, lsl #24
  275. orr r4, r4, r5, lsr #8
  276. mov r5, r5, lsl #24
  277. orr r5, r5, r12, lsr #8
  278. mov r12, r12, lsl #24
  279. orr r12, r12, lr, lsr #8
  280. #else
  281. mov r3, lr, lsr #24
  282. ldmia r1!, {r4, r5, r12, lr}
  283. orr r3, r3, r4, lsl #8
  284. mov r4, r4, lsr #24
  285. orr r4, r4, r5, lsl #8
  286. mov r5, r5, lsr #24
  287. orr r5, r5, r12, lsl #8
  288. mov r12, r12, lsr #24
  289. orr r12, r12, lr, lsl #8
  290. #endif
  291. stmia r0!, {r3-r5, r12}
  292. subs r2, r2, #0x10
  293. bge .Lmemcpy_fsrcul3loop16
  294. ldmia sp!, {r4, r5}
  295. adds r2, r2, #0x0c
  296. blt .Lmemcpy_fsrcul3l4
  297. .Lmemcpy_fsrcul3loop4:
  298. #if __BYTE_ORDER == __BIG_ENDIAN
  299. mov r12, lr, lsl #24
  300. ldr lr, [r1], #4
  301. orr r12, r12, lr, lsr #8
  302. #else
  303. mov r12, lr, lsr #24
  304. ldr lr, [r1], #4
  305. orr r12, r12, lr, lsl #8
  306. #endif
  307. str r12, [r0], #4
  308. subs r2, r2, #4
  309. bge .Lmemcpy_fsrcul3loop4
  310. .Lmemcpy_fsrcul3l4:
  311. sub r1, r1, #1
  312. b .Lmemcpy_fl4
  313. .Lmemcpy_backwards:
  314. add r1, r1, r2
  315. add r0, r0, r2
  316. subs r2, r2, #4
  317. blt .Lmemcpy_bl4 /* less than 4 bytes */
  318. ands r12, r0, #3
  319. bne .Lmemcpy_bdestul /* oh unaligned destination addr */
  320. ands r12, r1, #3
  321. bne .Lmemcpy_bsrcul /* oh unaligned source addr */
  322. .Lmemcpy_bt8:
  323. /* We have aligned source and destination */
  324. subs r2, r2, #8
  325. blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
  326. stmdb sp!, {r4, lr}
  327. subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
  328. blt .Lmemcpy_bl32
  329. /* blat 32 bytes at a time */
  330. /* XXX for really big copies perhaps we should use more registers */
  331. .Lmemcpy_bloop32:
  332. ldmdb r1!, {r3, r4, r12, lr}
  333. stmdb r0!, {r3, r4, r12, lr}
  334. ldmdb r1!, {r3, r4, r12, lr}
  335. stmdb r0!, {r3, r4, r12, lr}
  336. subs r2, r2, #0x20
  337. bge .Lmemcpy_bloop32
  338. .Lmemcpy_bl32:
  339. cmn r2, #0x10
  340. ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
  341. stmgedb r0!, {r3, r4, r12, lr}
  342. subge r2, r2, #0x10
  343. adds r2, r2, #0x14
  344. ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
  345. stmgedb r0!, {r3, r12, lr}
  346. subge r2, r2, #0x0c
  347. ldmia sp!, {r4, lr}
  348. .Lmemcpy_bl12:
  349. adds r2, r2, #8
  350. blt .Lmemcpy_bl4
  351. subs r2, r2, #4
  352. ldrlt r3, [r1, #-4]!
  353. strlt r3, [r0, #-4]!
  354. ldmgedb r1!, {r3, r12}
  355. stmgedb r0!, {r3, r12}
  356. subge r2, r2, #4
  357. .Lmemcpy_bl4:
  358. /* less than 4 bytes to go */
  359. adds r2, r2, #4
  360. #if defined(__USE_BX__)
  361. bxeq lr
  362. #else
  363. moveq pc, lr /* done */
  364. #endif
  365. /* copy the crud byte at a time */
  366. cmp r2, #2
  367. ldrb r3, [r1, #-1]!
  368. strb r3, [r0, #-1]!
  369. ldrgeb r3, [r1, #-1]!
  370. strgeb r3, [r0, #-1]!
  371. ldrgtb r3, [r1, #-1]!
  372. strgtb r3, [r0, #-1]!
  373. #if defined(__USE_BX__)
  374. bx lr
  375. #else
  376. mov pc, lr
  377. #endif
  378. /* erg - unaligned destination */
  379. .Lmemcpy_bdestul:
  380. cmp r12, #2
  381. /* align destination with byte copies */
  382. ldrb r3, [r1, #-1]!
  383. strb r3, [r0, #-1]!
  384. ldrgeb r3, [r1, #-1]!
  385. strgeb r3, [r0, #-1]!
  386. ldrgtb r3, [r1, #-1]!
  387. strgtb r3, [r0, #-1]!
  388. subs r2, r2, r12
  389. blt .Lmemcpy_bl4 /* less than 4 bytes to go */
  390. ands r12, r1, #3
  391. beq .Lmemcpy_bt8 /* we have an aligned source */
  392. /* erg - unaligned source */
  393. /* This is where it gets nasty ... */
  394. .Lmemcpy_bsrcul:
  395. bic r1, r1, #3
  396. ldr r3, [r1, #0]
  397. cmp r12, #2
  398. blt .Lmemcpy_bsrcul1
  399. beq .Lmemcpy_bsrcul2
  400. cmp r2, #0x0c
  401. blt .Lmemcpy_bsrcul3loop4
  402. sub r2, r2, #0x0c
  403. stmdb sp!, {r4, r5, lr}
  404. .Lmemcpy_bsrcul3loop16:
  405. #if __BYTE_ORDER == __BIG_ENDIAN
  406. mov lr, r3, lsr #8
  407. ldmdb r1!, {r3-r5, r12}
  408. orr lr, lr, r12, lsl #24
  409. mov r12, r12, lsr #8
  410. orr r12, r12, r5, lsl #24
  411. mov r5, r5, lsr #8
  412. orr r5, r5, r4, lsl #24
  413. mov r4, r4, lsr #8
  414. orr r4, r4, r3, lsl #24
  415. #else
  416. mov lr, r3, lsl #8
  417. ldmdb r1!, {r3-r5, r12}
  418. orr lr, lr, r12, lsr #24
  419. mov r12, r12, lsl #8
  420. orr r12, r12, r5, lsr #24
  421. mov r5, r5, lsl #8
  422. orr r5, r5, r4, lsr #24
  423. mov r4, r4, lsl #8
  424. orr r4, r4, r3, lsr #24
  425. #endif
  426. stmdb r0!, {r4, r5, r12, lr}
  427. subs r2, r2, #0x10
  428. bge .Lmemcpy_bsrcul3loop16
  429. ldmia sp!, {r4, r5, lr}
  430. adds r2, r2, #0x0c
  431. blt .Lmemcpy_bsrcul3l4
  432. .Lmemcpy_bsrcul3loop4:
  433. #if __BYTE_ORDER == __BIG_ENDIAN
  434. mov r12, r3, lsr #8
  435. ldr r3, [r1, #-4]!
  436. orr r12, r12, r3, lsl #24
  437. #else
  438. mov r12, r3, lsl #8
  439. ldr r3, [r1, #-4]!
  440. orr r12, r12, r3, lsr #24
  441. #endif
  442. str r12, [r0, #-4]!
  443. subs r2, r2, #4
  444. bge .Lmemcpy_bsrcul3loop4
  445. .Lmemcpy_bsrcul3l4:
  446. add r1, r1, #3
  447. b .Lmemcpy_bl4
  448. .Lmemcpy_bsrcul2:
  449. cmp r2, #0x0c
  450. blt .Lmemcpy_bsrcul2loop4
  451. sub r2, r2, #0x0c
  452. stmdb sp!, {r4, r5, lr}
  453. .Lmemcpy_bsrcul2loop16:
  454. #if __BYTE_ORDER == __BIG_ENDIAN
  455. mov lr, r3, lsr #16
  456. ldmdb r1!, {r3-r5, r12}
  457. orr lr, lr, r12, lsl #16
  458. mov r12, r12, lsr #16
  459. orr r12, r12, r5, lsl #16
  460. mov r5, r5, lsr #16
  461. orr r5, r5, r4, lsl #16
  462. mov r4, r4, lsr #16
  463. orr r4, r4, r3, lsl #16
  464. #else
  465. mov lr, r3, lsl #16
  466. ldmdb r1!, {r3-r5, r12}
  467. orr lr, lr, r12, lsr #16
  468. mov r12, r12, lsl #16
  469. orr r12, r12, r5, lsr #16
  470. mov r5, r5, lsl #16
  471. orr r5, r5, r4, lsr #16
  472. mov r4, r4, lsl #16
  473. orr r4, r4, r3, lsr #16
  474. #endif
  475. stmdb r0!, {r4, r5, r12, lr}
  476. subs r2, r2, #0x10
  477. bge .Lmemcpy_bsrcul2loop16
  478. ldmia sp!, {r4, r5, lr}
  479. adds r2, r2, #0x0c
  480. blt .Lmemcpy_bsrcul2l4
  481. .Lmemcpy_bsrcul2loop4:
  482. #if __BYTE_ORDER == __BIG_ENDIAN
  483. mov r12, r3, lsr #16
  484. ldr r3, [r1, #-4]!
  485. orr r12, r12, r3, lsl #16
  486. #else
  487. mov r12, r3, lsl #16
  488. ldr r3, [r1, #-4]!
  489. orr r12, r12, r3, lsr #16
  490. #endif
  491. str r12, [r0, #-4]!
  492. subs r2, r2, #4
  493. bge .Lmemcpy_bsrcul2loop4
  494. .Lmemcpy_bsrcul2l4:
  495. add r1, r1, #2
  496. b .Lmemcpy_bl4
  497. .Lmemcpy_bsrcul1:
  498. cmp r2, #0x0c
  499. blt .Lmemcpy_bsrcul1loop4
  500. sub r2, r2, #0x0c
  501. stmdb sp!, {r4, r5, lr}
  502. .Lmemcpy_bsrcul1loop32:
  503. #if __BYTE_ORDER == __BIG_ENDIAN
  504. mov lr, r3, lsr #24
  505. ldmdb r1!, {r3-r5, r12}
  506. orr lr, lr, r12, lsl #8
  507. mov r12, r12, lsr #24
  508. orr r12, r12, r5, lsl #8
  509. mov r5, r5, lsr #24
  510. orr r5, r5, r4, lsl #8
  511. mov r4, r4, lsr #24
  512. orr r4, r4, r3, lsl #8
  513. #else
  514. mov lr, r3, lsl #24
  515. ldmdb r1!, {r3-r5, r12}
  516. orr lr, lr, r12, lsr #8
  517. mov r12, r12, lsl #24
  518. orr r12, r12, r5, lsr #8
  519. mov r5, r5, lsl #24
  520. orr r5, r5, r4, lsr #8
  521. mov r4, r4, lsl #24
  522. orr r4, r4, r3, lsr #8
  523. #endif
  524. stmdb r0!, {r4, r5, r12, lr}
  525. subs r2, r2, #0x10
  526. bge .Lmemcpy_bsrcul1loop32
  527. ldmia sp!, {r4, r5, lr}
  528. adds r2, r2, #0x0c
  529. blt .Lmemcpy_bsrcul1l4
  530. .Lmemcpy_bsrcul1loop4:
  531. #if __BYTE_ORDER == __BIG_ENDIAN
  532. mov r12, r3, lsr #24
  533. ldr r3, [r1, #-4]!
  534. orr r12, r12, r3, lsl #8
  535. #else
  536. mov r12, r3, lsl #24
  537. ldr r3, [r1, #-4]!
  538. orr r12, r12, r3, lsr #8
  539. #endif
  540. str r12, [r0, #-4]!
  541. subs r2, r2, #4
  542. bge .Lmemcpy_bsrcul1loop4
  543. .Lmemcpy_bsrcul1l4:
  544. add r1, r1, #1
  545. b .Lmemcpy_bl4