_memcpy.S 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736
  1. /*-
  2. * Copyright (c) 1997 The NetBSD Foundation, Inc.
  3. * All rights reserved.
  4. *
  5. * This code is derived from software contributed to The NetBSD Foundation
  6. * by Neil A. Carson and Mark Brinicombe
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 3. All advertising materials mentioning features or use of this software
  17. * must display the following acknowledgement:
  18. * This product includes software developed by the NetBSD
  19. * Foundation, Inc. and its contributors.
  20. * 4. Neither the name of The NetBSD Foundation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  25. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  26. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  27. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  28. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34. * POSSIBILITY OF SUCH DAMAGE.
  35. *
  36. * Adapted for uClibc from NetBSD _memcpy.S,v 1.6 2003/10/09
  37. * by Erik Andersen <andersen@codepoet.org>
  38. */
  39. #include <features.h>
  40. #include <endian.h>
  41. #include <bits/arm_asm.h>
  42. #include <bits/arm_bx.h>
  43. #if !defined(THUMB1_ONLY)
  44. /*
  45. * This is one fun bit of code ...
  46. * Some easy listening music is suggested while trying to understand this
  47. * code e.g. Iron Maiden
  48. *
  49. * For anyone attempting to understand it :
  50. *
  51. * The core code is implemented here with simple stubs for memcpy()
  52. * memmove() and bcopy().
  53. *
  54. * All local labels are prefixed with Lmemcpy_
  55. * Following the prefix a label starting f is used in the forward copy code
  56. * while a label using b is used in the backwards copy code
  57. * The source and destination addresses determine whether a forward or
  58. * backward copy is performed.
  59. * Separate bits of code are used to deal with the following situations
  60. * for both the forward and backwards copy.
  61. * unaligned source address
  62. * unaligned destination address
  63. * Separate copy routines are used to produce an optimised result for each
  64. * of these cases.
  65. * The copy code will use LDM/STM instructions to copy up to 32 bytes at
  66. * a time where possible.
  67. *
  68. * Note: r12 (aka ip) can be trashed during the function along with
  69. * r0-r3 although r0-r2 have defined uses i.e. dest, src, len throughout.
  70. * Additional registers are preserved prior to use i.e. r4, r5 & lr
  71. * The return value in r0 must be the destination address.
  72. *
  73. * Apologies for the state of the comments ;-)
  74. */
  75. .text
  76. .global _memcpy
  77. .hidden _memcpy
  78. .type _memcpy,%function
  79. .align 4
  80. /* XXX: The Thumb-2 conditionals can be removed if/when we require an
  81. assembler that supports unified syntax. */
  82. .macro copy regs
  83. #if defined(__thumb2__)
  84. ittt ge
  85. ldmiage r1!, \regs
  86. stmiage r0!, \regs
  87. #else
  88. ldmgeia r1!, \regs
  89. stmgeia r0!, \regs
  90. #endif
  91. .endm
  92. .macro copydb regs
  93. #if defined(__thumb2__)
  94. ittt ge
  95. ldmdbge r1!, \regs
  96. stmdbge r0!, \regs
  97. #else
  98. ldmgedb r1!, \regs
  99. stmgedb r0!, \regs
  100. #endif
  101. .endm
  102. _memcpy:
  103. /* Determine copy direction */
  104. cmp r1, r0
  105. bcc .Lmemcpy_backwards
  106. IT(t, eq) /* Quick abort for src=dst */
  107. BXC(eq, lr)
  108. stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
  109. subs r2, r2, #4
  110. blt .Lmemcpy_fl4 /* less than 4 bytes */
  111. ands r12, r0, #3
  112. bne .Lmemcpy_fdestul /* oh unaligned destination addr */
  113. ands r12, r1, #3
  114. bne .Lmemcpy_fsrcul /* oh unaligned source addr */
  115. .Lmemcpy_ft8:
  116. /* We have aligned source and destination */
  117. subs r2, r2, #8
  118. blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
  119. subs r2, r2, #0x14
  120. blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
  121. str r4, [sp, #-4]! /* borrow r4 */
  122. /* blat 32 bytes at a time */
  123. /* XXX for really big copies perhaps we should use more registers */
  124. .Lmemcpy_floop32:
  125. ldmia r1!, {r3, r4, r12, lr}
  126. stmia r0!, {r3, r4, r12, lr}
  127. ldmia r1!, {r3, r4, r12, lr}
  128. stmia r0!, {r3, r4, r12, lr}
  129. subs r2, r2, #0x20
  130. bge .Lmemcpy_floop32
  131. cmn r2, #0x10
  132. /* blat a remaining 16 bytes */
  133. copy "{r3, r4, r12, lr}"
  134. subge r2, r2, #0x10
  135. ldr r4, [sp], #4 /* restore r4 */
  136. .Lmemcpy_fl32:
  137. adds r2, r2, #0x14
  138. /* blat 12 bytes at a time */
  139. .Lmemcpy_floop12:
  140. copy "{r3, r12, lr}"
  141. #if defined(__thumb2__)
  142. subsge r2, r2, #0x0c
  143. #else
  144. subges r2, r2, #0x0c
  145. #endif
  146. bge .Lmemcpy_floop12
  147. .Lmemcpy_fl12:
  148. adds r2, r2, #8
  149. blt .Lmemcpy_fl4
  150. subs r2, r2, #4
  151. IT(tt, lt)
  152. ldrlt r3, [r1], #4
  153. strlt r3, [r0], #4
  154. copy "{r3, r12}"
  155. subge r2, r2, #4
  156. .Lmemcpy_fl4:
  157. /* less than 4 bytes to go */
  158. adds r2, r2, #4
  159. #if defined(__thumb2__)
  160. it eq
  161. popeq {r0, pc} /* done */
  162. #elif defined(__ARM_ARCH_4T__)
  163. ldmeqia sp!, {r0, r3} /* done */
  164. bxeq r3
  165. #else
  166. ldmeqia sp!, {r0, pc} /* done */
  167. #endif
  168. /* copy the crud byte at a time */
  169. cmp r2, #2
  170. ldrb r3, [r1], #1
  171. strb r3, [r0], #1
  172. #if defined(__thumb2__)
  173. itt ge
  174. ldrbge r3, [r1], #1
  175. strbge r3, [r0], #1
  176. itt gt
  177. ldrbgt r3, [r1], #1
  178. strbgt r3, [r0], #1
  179. #else
  180. ldrgeb r3, [r1], #1
  181. strgeb r3, [r0], #1
  182. ldrgtb r3, [r1], #1
  183. strgtb r3, [r0], #1
  184. #endif
  185. #if defined(__ARM_ARCH_4T__)
  186. ldmia sp!, {r0, r3}
  187. bx r3
  188. #else
  189. ldmia sp!, {r0, pc}
  190. #endif
  191. /* erg - unaligned destination */
  192. .Lmemcpy_fdestul:
  193. rsb r12, r12, #4
  194. cmp r12, #2
  195. /* align destination with byte copies */
  196. ldrb r3, [r1], #1
  197. strb r3, [r0], #1
  198. #if defined(__thumb2__)
  199. itt ge
  200. ldrbge r3, [r1], #1
  201. strbge r3, [r0], #1
  202. itt gt
  203. ldrbgt r3, [r1], #1
  204. strbgt r3, [r0], #1
  205. #else
  206. ldrgeb r3, [r1], #1
  207. strgeb r3, [r0], #1
  208. ldrgtb r3, [r1], #1
  209. strgtb r3, [r0], #1
  210. #endif
  211. subs r2, r2, r12
  212. blt .Lmemcpy_fl4 /* less the 4 bytes */
  213. ands r12, r1, #3
  214. beq .Lmemcpy_ft8 /* we have an aligned source */
  215. /* erg - unaligned source */
  216. /* This is where it gets nasty ... */
  217. .Lmemcpy_fsrcul:
  218. bic r1, r1, #3
  219. ldr lr, [r1], #4
  220. cmp r12, #2
  221. bgt .Lmemcpy_fsrcul3
  222. beq .Lmemcpy_fsrcul2
  223. cmp r2, #0x0c
  224. blt .Lmemcpy_fsrcul1loop4
  225. sub r2, r2, #0x0c
  226. stmdb sp!, {r4, r5}
  227. .Lmemcpy_fsrcul1loop16:
  228. #if __BYTE_ORDER == __BIG_ENDIAN
  229. mov r3, lr, lsl #8
  230. ldmia r1!, {r4, r5, r12, lr}
  231. orr r3, r3, r4, lsr #24
  232. mov r4, r4, lsl #8
  233. orr r4, r4, r5, lsr #24
  234. mov r5, r5, lsl #8
  235. orr r5, r5, r12, lsr #24
  236. mov r12, r12, lsl #8
  237. orr r12, r12, lr, lsr #24
  238. #else
  239. mov r3, lr, lsr #8
  240. ldmia r1!, {r4, r5, r12, lr}
  241. orr r3, r3, r4, lsl #24
  242. mov r4, r4, lsr #8
  243. orr r4, r4, r5, lsl #24
  244. mov r5, r5, lsr #8
  245. orr r5, r5, r12, lsl #24
  246. mov r12, r12, lsr #8
  247. orr r12, r12, lr, lsl #24
  248. #endif
  249. stmia r0!, {r3-r5, r12}
  250. subs r2, r2, #0x10
  251. bge .Lmemcpy_fsrcul1loop16
  252. ldmia sp!, {r4, r5}
  253. adds r2, r2, #0x0c
  254. blt .Lmemcpy_fsrcul1l4
  255. .Lmemcpy_fsrcul1loop4:
  256. #if __BYTE_ORDER == __BIG_ENDIAN
  257. mov r12, lr, lsl #8
  258. ldr lr, [r1], #4
  259. orr r12, r12, lr, lsr #24
  260. #else
  261. mov r12, lr, lsr #8
  262. ldr lr, [r1], #4
  263. orr r12, r12, lr, lsl #24
  264. #endif
  265. str r12, [r0], #4
  266. subs r2, r2, #4
  267. bge .Lmemcpy_fsrcul1loop4
  268. .Lmemcpy_fsrcul1l4:
  269. sub r1, r1, #3
  270. b .Lmemcpy_fl4
  271. .Lmemcpy_fsrcul2:
  272. cmp r2, #0x0c
  273. blt .Lmemcpy_fsrcul2loop4
  274. sub r2, r2, #0x0c
  275. stmdb sp!, {r4, r5}
  276. .Lmemcpy_fsrcul2loop16:
  277. #if __BYTE_ORDER == __BIG_ENDIAN
  278. mov r3, lr, lsl #16
  279. ldmia r1!, {r4, r5, r12, lr}
  280. orr r3, r3, r4, lsr #16
  281. mov r4, r4, lsl #16
  282. orr r4, r4, r5, lsr #16
  283. mov r5, r5, lsl #16
  284. orr r5, r5, r12, lsr #16
  285. mov r12, r12, lsl #16
  286. orr r12, r12, lr, lsr #16
  287. #else
  288. mov r3, lr, lsr #16
  289. ldmia r1!, {r4, r5, r12, lr}
  290. orr r3, r3, r4, lsl #16
  291. mov r4, r4, lsr #16
  292. orr r4, r4, r5, lsl #16
  293. mov r5, r5, lsr #16
  294. orr r5, r5, r12, lsl #16
  295. mov r12, r12, lsr #16
  296. orr r12, r12, lr, lsl #16
  297. #endif
  298. stmia r0!, {r3-r5, r12}
  299. subs r2, r2, #0x10
  300. bge .Lmemcpy_fsrcul2loop16
  301. ldmia sp!, {r4, r5}
  302. adds r2, r2, #0x0c
  303. blt .Lmemcpy_fsrcul2l4
  304. .Lmemcpy_fsrcul2loop4:
  305. #if __BYTE_ORDER == __BIG_ENDIAN
  306. mov r12, lr, lsl #16
  307. ldr lr, [r1], #4
  308. orr r12, r12, lr, lsr #16
  309. #else
  310. mov r12, lr, lsr #16
  311. ldr lr, [r1], #4
  312. orr r12, r12, lr, lsl #16
  313. #endif
  314. str r12, [r0], #4
  315. subs r2, r2, #4
  316. bge .Lmemcpy_fsrcul2loop4
  317. .Lmemcpy_fsrcul2l4:
  318. sub r1, r1, #2
  319. b .Lmemcpy_fl4
  320. .Lmemcpy_fsrcul3:
  321. cmp r2, #0x0c
  322. blt .Lmemcpy_fsrcul3loop4
  323. sub r2, r2, #0x0c
  324. stmdb sp!, {r4, r5}
  325. .Lmemcpy_fsrcul3loop16:
  326. #if __BYTE_ORDER == __BIG_ENDIAN
  327. mov r3, lr, lsl #24
  328. ldmia r1!, {r4, r5, r12, lr}
  329. orr r3, r3, r4, lsr #8
  330. mov r4, r4, lsl #24
  331. orr r4, r4, r5, lsr #8
  332. mov r5, r5, lsl #24
  333. orr r5, r5, r12, lsr #8
  334. mov r12, r12, lsl #24
  335. orr r12, r12, lr, lsr #8
  336. #else
  337. mov r3, lr, lsr #24
  338. ldmia r1!, {r4, r5, r12, lr}
  339. orr r3, r3, r4, lsl #8
  340. mov r4, r4, lsr #24
  341. orr r4, r4, r5, lsl #8
  342. mov r5, r5, lsr #24
  343. orr r5, r5, r12, lsl #8
  344. mov r12, r12, lsr #24
  345. orr r12, r12, lr, lsl #8
  346. #endif
  347. stmia r0!, {r3-r5, r12}
  348. subs r2, r2, #0x10
  349. bge .Lmemcpy_fsrcul3loop16
  350. ldmia sp!, {r4, r5}
  351. adds r2, r2, #0x0c
  352. blt .Lmemcpy_fsrcul3l4
  353. .Lmemcpy_fsrcul3loop4:
  354. #if __BYTE_ORDER == __BIG_ENDIAN
  355. mov r12, lr, lsl #24
  356. ldr lr, [r1], #4
  357. orr r12, r12, lr, lsr #8
  358. #else
  359. mov r12, lr, lsr #24
  360. ldr lr, [r1], #4
  361. orr r12, r12, lr, lsl #8
  362. #endif
  363. str r12, [r0], #4
  364. subs r2, r2, #4
  365. bge .Lmemcpy_fsrcul3loop4
  366. .Lmemcpy_fsrcul3l4:
  367. sub r1, r1, #1
  368. b .Lmemcpy_fl4
  369. .Lmemcpy_backwards:
  370. add r1, r1, r2
  371. add r0, r0, r2
  372. subs r2, r2, #4
  373. blt .Lmemcpy_bl4 /* less than 4 bytes */
  374. ands r12, r0, #3
  375. bne .Lmemcpy_bdestul /* oh unaligned destination addr */
  376. ands r12, r1, #3
  377. bne .Lmemcpy_bsrcul /* oh unaligned source addr */
  378. .Lmemcpy_bt8:
  379. /* We have aligned source and destination */
  380. subs r2, r2, #8
  381. blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
  382. stmdb sp!, {r4, lr}
  383. subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
  384. blt .Lmemcpy_bl32
  385. /* blat 32 bytes at a time */
  386. /* XXX for really big copies perhaps we should use more registers */
  387. .Lmemcpy_bloop32:
  388. ldmdb r1!, {r3, r4, r12, lr}
  389. stmdb r0!, {r3, r4, r12, lr}
  390. ldmdb r1!, {r3, r4, r12, lr}
  391. stmdb r0!, {r3, r4, r12, lr}
  392. subs r2, r2, #0x20
  393. bge .Lmemcpy_bloop32
  394. .Lmemcpy_bl32:
  395. cmn r2, #0x10
  396. /* blat a remaining 16 bytes */
  397. copydb "{r3, r4, r12, lr}"
  398. subge r2, r2, #0x10
  399. adds r2, r2, #0x14
  400. /* blat a remaining 12 bytes */
  401. copydb "{r3, r12, lr}"
  402. subge r2, r2, #0x0c
  403. ldmia sp!, {r4, lr}
  404. .Lmemcpy_bl12:
  405. adds r2, r2, #8
  406. blt .Lmemcpy_bl4
  407. subs r2, r2, #4
  408. IT(tt, lt)
  409. ldrlt r3, [r1, #-4]!
  410. strlt r3, [r0, #-4]!
  411. copydb "{r3, r12}"
  412. subge r2, r2, #4
  413. .Lmemcpy_bl4:
  414. /* less than 4 bytes to go */
  415. adds r2, r2, #4
  416. IT(t, eq)
  417. BXC(eq, lr) /* done */
  418. /* copy the crud byte at a time */
  419. cmp r2, #2
  420. ldrb r3, [r1, #-1]!
  421. strb r3, [r0, #-1]!
  422. #ifdef __thumb2__
  423. itt ge
  424. ldrbge r3, [r1, #-1]!
  425. strbge r3, [r0, #-1]!
  426. itt gt
  427. ldrbgt r3, [r1, #-1]!
  428. strbgt r3, [r0, #-1]!
  429. #else
  430. ldrgeb r3, [r1, #-1]!
  431. strgeb r3, [r0, #-1]!
  432. ldrgtb r3, [r1, #-1]!
  433. strgtb r3, [r0, #-1]!
  434. #endif
  435. BX(lr)
  436. /* erg - unaligned destination */
  437. .Lmemcpy_bdestul:
  438. cmp r12, #2
  439. /* align destination with byte copies */
  440. ldrb r3, [r1, #-1]!
  441. strb r3, [r0, #-1]!
  442. #ifdef __thumb2__
  443. itt ge
  444. ldrbge r3, [r1, #-1]!
  445. strbge r3, [r0, #-1]!
  446. itt gt
  447. ldrbgt r3, [r1, #-1]!
  448. strbgt r3, [r0, #-1]!
  449. #else
  450. ldrgeb r3, [r1, #-1]!
  451. strgeb r3, [r0, #-1]!
  452. ldrgtb r3, [r1, #-1]!
  453. strgtb r3, [r0, #-1]!
  454. #endif
  455. subs r2, r2, r12
  456. blt .Lmemcpy_bl4 /* less than 4 bytes to go */
  457. ands r12, r1, #3
  458. beq .Lmemcpy_bt8 /* we have an aligned source */
  459. /* erg - unaligned source */
  460. /* This is where it gets nasty ... */
  461. .Lmemcpy_bsrcul:
  462. bic r1, r1, #3
  463. ldr r3, [r1, #0]
  464. cmp r12, #2
  465. blt .Lmemcpy_bsrcul1
  466. beq .Lmemcpy_bsrcul2
  467. cmp r2, #0x0c
  468. blt .Lmemcpy_bsrcul3loop4
  469. sub r2, r2, #0x0c
  470. stmdb sp!, {r4, r5, lr}
  471. .Lmemcpy_bsrcul3loop16:
  472. #if __BYTE_ORDER == __BIG_ENDIAN
  473. mov lr, r3, lsr #8
  474. ldmdb r1!, {r3-r5, r12}
  475. orr lr, lr, r12, lsl #24
  476. mov r12, r12, lsr #8
  477. orr r12, r12, r5, lsl #24
  478. mov r5, r5, lsr #8
  479. orr r5, r5, r4, lsl #24
  480. mov r4, r4, lsr #8
  481. orr r4, r4, r3, lsl #24
  482. #else
  483. mov lr, r3, lsl #8
  484. ldmdb r1!, {r3-r5, r12}
  485. orr lr, lr, r12, lsr #24
  486. mov r12, r12, lsl #8
  487. orr r12, r12, r5, lsr #24
  488. mov r5, r5, lsl #8
  489. orr r5, r5, r4, lsr #24
  490. mov r4, r4, lsl #8
  491. orr r4, r4, r3, lsr #24
  492. #endif
  493. stmdb r0!, {r4, r5, r12, lr}
  494. subs r2, r2, #0x10
  495. bge .Lmemcpy_bsrcul3loop16
  496. ldmia sp!, {r4, r5, lr}
  497. adds r2, r2, #0x0c
  498. blt .Lmemcpy_bsrcul3l4
  499. .Lmemcpy_bsrcul3loop4:
  500. #if __BYTE_ORDER == __BIG_ENDIAN
  501. mov r12, r3, lsr #8
  502. ldr r3, [r1, #-4]!
  503. orr r12, r12, r3, lsl #24
  504. #else
  505. mov r12, r3, lsl #8
  506. ldr r3, [r1, #-4]!
  507. orr r12, r12, r3, lsr #24
  508. #endif
  509. str r12, [r0, #-4]!
  510. subs r2, r2, #4
  511. bge .Lmemcpy_bsrcul3loop4
  512. .Lmemcpy_bsrcul3l4:
  513. add r1, r1, #3
  514. b .Lmemcpy_bl4
  515. .Lmemcpy_bsrcul2:
  516. cmp r2, #0x0c
  517. blt .Lmemcpy_bsrcul2loop4
  518. sub r2, r2, #0x0c
  519. stmdb sp!, {r4, r5, lr}
  520. .Lmemcpy_bsrcul2loop16:
  521. #if __BYTE_ORDER == __BIG_ENDIAN
  522. mov lr, r3, lsr #16
  523. ldmdb r1!, {r3-r5, r12}
  524. orr lr, lr, r12, lsl #16
  525. mov r12, r12, lsr #16
  526. orr r12, r12, r5, lsl #16
  527. mov r5, r5, lsr #16
  528. orr r5, r5, r4, lsl #16
  529. mov r4, r4, lsr #16
  530. orr r4, r4, r3, lsl #16
  531. #else
  532. mov lr, r3, lsl #16
  533. ldmdb r1!, {r3-r5, r12}
  534. orr lr, lr, r12, lsr #16
  535. mov r12, r12, lsl #16
  536. orr r12, r12, r5, lsr #16
  537. mov r5, r5, lsl #16
  538. orr r5, r5, r4, lsr #16
  539. mov r4, r4, lsl #16
  540. orr r4, r4, r3, lsr #16
  541. #endif
  542. stmdb r0!, {r4, r5, r12, lr}
  543. subs r2, r2, #0x10
  544. bge .Lmemcpy_bsrcul2loop16
  545. ldmia sp!, {r4, r5, lr}
  546. adds r2, r2, #0x0c
  547. blt .Lmemcpy_bsrcul2l4
  548. .Lmemcpy_bsrcul2loop4:
  549. #if __BYTE_ORDER == __BIG_ENDIAN
  550. mov r12, r3, lsr #16
  551. ldr r3, [r1, #-4]!
  552. orr r12, r12, r3, lsl #16
  553. #else
  554. mov r12, r3, lsl #16
  555. ldr r3, [r1, #-4]!
  556. orr r12, r12, r3, lsr #16
  557. #endif
  558. str r12, [r0, #-4]!
  559. subs r2, r2, #4
  560. bge .Lmemcpy_bsrcul2loop4
  561. .Lmemcpy_bsrcul2l4:
  562. add r1, r1, #2
  563. b .Lmemcpy_bl4
  564. .Lmemcpy_bsrcul1:
  565. cmp r2, #0x0c
  566. blt .Lmemcpy_bsrcul1loop4
  567. sub r2, r2, #0x0c
  568. stmdb sp!, {r4, r5, lr}
  569. .Lmemcpy_bsrcul1loop32:
  570. #if __BYTE_ORDER == __BIG_ENDIAN
  571. mov lr, r3, lsr #24
  572. ldmdb r1!, {r3-r5, r12}
  573. orr lr, lr, r12, lsl #8
  574. mov r12, r12, lsr #24
  575. orr r12, r12, r5, lsl #8
  576. mov r5, r5, lsr #24
  577. orr r5, r5, r4, lsl #8
  578. mov r4, r4, lsr #24
  579. orr r4, r4, r3, lsl #8
  580. #else
  581. mov lr, r3, lsl #24
  582. ldmdb r1!, {r3-r5, r12}
  583. orr lr, lr, r12, lsr #8
  584. mov r12, r12, lsl #24
  585. orr r12, r12, r5, lsr #8
  586. mov r5, r5, lsl #24
  587. orr r5, r5, r4, lsr #8
  588. mov r4, r4, lsl #24
  589. orr r4, r4, r3, lsr #8
  590. #endif
  591. stmdb r0!, {r4, r5, r12, lr}
  592. subs r2, r2, #0x10
  593. bge .Lmemcpy_bsrcul1loop32
  594. ldmia sp!, {r4, r5, lr}
  595. adds r2, r2, #0x0c
  596. blt .Lmemcpy_bsrcul1l4
  597. .Lmemcpy_bsrcul1loop4:
  598. #if __BYTE_ORDER == __BIG_ENDIAN
  599. mov r12, r3, lsr #24
  600. ldr r3, [r1, #-4]!
  601. orr r12, r12, r3, lsl #8
  602. #else
  603. mov r12, r3, lsl #24
  604. ldr r3, [r1, #-4]!
  605. orr r12, r12, r3, lsr #8
  606. #endif
  607. str r12, [r0, #-4]!
  608. subs r2, r2, #4
  609. bge .Lmemcpy_bsrcul1loop4
  610. .Lmemcpy_bsrcul1l4:
  611. add r1, r1, #1
  612. b .Lmemcpy_bl4
  613. #else /* THUMB1_ONLY */
  614. /* This is a fairly dumb implementation for when we can't use the 32-bit code
  615. above. */
  616. .text
  617. .global _memcpy
  618. .hidden _memcpy
  619. .type _memcpy,%function
  620. .align 4
  621. .thumb
  622. _memcpy:
  623. push {r0, r4}
  624. cmp r2, #0
  625. beq .Lmemcpy_exit
  626. @ See if we have overlapping regions, and need to reverse the
  627. @ direction of the copy
  628. cmp r0, r1
  629. bls .Lmemcpy_forwards
  630. add r4, r1, r2
  631. cmp r0, r4
  632. bcc .Lmemcpy_backwards
  633. .Lmemcpy_forwards:
  634. /* Forwards. */
  635. mov r3, r0
  636. eor r3, r1
  637. mov r4, #3
  638. tst r3, r4
  639. bne .Lmemcpy_funaligned
  640. cmp r2, #8
  641. bcc .Lmemcpy_funaligned
  642. 1: @ copy up to the first word boundary.
  643. tst r0, r4
  644. beq 1f
  645. ldrb r3, [r1]
  646. add r1, r1, #1
  647. strb r3, [r0]
  648. add r0, r0, #1
  649. sub r2, r2, #1
  650. b 1b
  651. 1: @ Copy aligned words
  652. ldr r3, [r1]
  653. add r1, r1, #4
  654. str r3, [r0]
  655. add r0, r0, #4
  656. sub r2, r2, #4
  657. cmp r2, #4
  658. bcs 1b
  659. cmp r2, #0
  660. beq .Lmemcpy_exit
  661. .Lmemcpy_funaligned:
  662. 1:
  663. ldrb r3, [r1]
  664. add r1, r1, #1
  665. strb r3, [r0]
  666. add r0, r0, #1
  667. sub r2, r2, #1
  668. bne 1b
  669. .Lmemcpy_exit:
  670. pop {r0, r4}
  671. bx lr
  672. .Lmemcpy_backwards:
  673. add r0, r0, r2
  674. add r1, r1, r2
  675. 1:
  676. sub r0, r0, #1
  677. sub r1, r1, #1
  678. ldrb r3, [r1]
  679. strb r3, [r0]
  680. sub r2, r2, #1
  681. bne 1b
  682. b .Lmemcpy_exit
  683. #endif