_memcpy.S 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. /*-
  2. * Copyright (c) 1997 The NetBSD Foundation, Inc.
  3. * All rights reserved.
  4. *
  5. * This code is derived from software contributed to The NetBSD Foundation
  6. * by Neil A. Carson and Mark Brinicombe
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 3. All advertising materials mentioning features or use of this software
  17. * must display the following acknowledgement:
  18. * This product includes software developed by the NetBSD
  19. * Foundation, Inc. and its contributors.
  20. * 4. Neither the name of The NetBSD Foundation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  25. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  26. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  27. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  28. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34. * POSSIBILITY OF SUCH DAMAGE.
  35. *
  36. * Adapted for uClibc from NetBSD _memcpy.S,v 1.6 2003/10/09
  37. * by Erik Andersen <andersen@codepoet.org>
  38. */
  39. #include <features.h>
  40. #include <endian.h>
  41. #include <bits/arm_asm.h>
  42. #include <bits/arm_bx.h>
  43. #if !defined(THUMB1_ONLY)
  44. /*
  45. * This is one fun bit of code ...
  46. * Some easy listening music is suggested while trying to understand this
  47. * code e.g. Iron Maiden
  48. *
  49. * For anyone attempting to understand it :
  50. *
  51. * The core code is implemented here with simple stubs for memcpy()
  52. * memmove() and bcopy().
  53. *
  54. * All local labels are prefixed with Lmemcpy_
  55. * Following the prefix a label starting f is used in the forward copy code
  56. * while a label using b is used in the backwards copy code
  57. * The source and destination addresses determine whether a forward or
  58. * backward copy is performed.
  59. * Separate bits of code are used to deal with the following situations
  60. * for both the forward and backwards copy.
  61. * unaligned source address
  62. * unaligned destination address
  63. * Separate copy routines are used to produce an optimised result for each
  64. * of these cases.
  65. * The copy code will use LDM/STM instructions to copy up to 32 bytes at
  66. * a time where possible.
  67. *
  68. * Note: r12 (aka ip) can be trashed during the function along with
  69. * r0-r3 although r0-r2 have defined uses i.e. dest, src, len throughout.
  70. * Additional registers are preserved prior to use i.e. r4, r5 & lr
  71. * The return value in r0 must be the destination address.
  72. *
  73. * Apologies for the state of the comments ;-)
  74. */
  75. .text
  76. .global _memcpy
  77. .hidden _memcpy
  78. .type _memcpy,%function
  79. .align 4
  80. /* XXX: The Thumb-2 conditionals can be removed if/when we require an
  81. assembler that supports unified syntax. */
  82. .macro copy regs
  83. #if defined(__thumb2__)
  84. ittt ge
  85. ldmiage r1!, \regs
  86. stmiage r0!, \regs
  87. #else
  88. ldmgeia r1!, \regs
  89. stmgeia r0!, \regs
  90. #endif
  91. .endm
  92. .macro copydb regs
  93. #if defined(__thumb2__)
  94. ittt ge
  95. ldmdbge r1!, \regs
  96. stmdbge r0!, \regs
  97. #else
  98. ldmgedb r1!, \regs
  99. stmgedb r0!, \regs
  100. #endif
  101. .endm
  102. _memcpy:
  103. /* Determine copy direction */
  104. cmp r1, r0
  105. bcc .Lmemcpy_backwards
  106. IT(t, eq) /* Quick abort for src=dst */
  107. #if defined(__USE_BX__)
  108. bxeq lr
  109. #else
  110. moveq pc, lr
  111. #endif
  112. stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
  113. subs r2, r2, #4
  114. blt .Lmemcpy_fl4 /* less than 4 bytes */
  115. ands r12, r0, #3
  116. bne .Lmemcpy_fdestul /* oh unaligned destination addr */
  117. ands r12, r1, #3
  118. bne .Lmemcpy_fsrcul /* oh unaligned source addr */
  119. .Lmemcpy_ft8:
  120. /* We have aligned source and destination */
  121. subs r2, r2, #8
  122. blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
  123. subs r2, r2, #0x14
  124. blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
  125. str r4, [sp, #-4]! /* borrow r4 */
  126. /* blat 32 bytes at a time */
  127. /* XXX for really big copies perhaps we should use more registers */
  128. .Lmemcpy_floop32:
  129. ldmia r1!, {r3, r4, r12, lr}
  130. stmia r0!, {r3, r4, r12, lr}
  131. ldmia r1!, {r3, r4, r12, lr}
  132. stmia r0!, {r3, r4, r12, lr}
  133. subs r2, r2, #0x20
  134. bge .Lmemcpy_floop32
  135. cmn r2, #0x10
  136. /* blat a remaining 16 bytes */
  137. copy "{r3, r4, r12, lr}"
  138. subge r2, r2, #0x10
  139. ldr r4, [sp], #4 /* restore r4 */
  140. .Lmemcpy_fl32:
  141. adds r2, r2, #0x14
  142. /* blat 12 bytes at a time */
  143. .Lmemcpy_floop12:
  144. copy "{r3, r12, lr}"
  145. #if defined(__thumb2__)
  146. subsge r2, r2, #0x0c
  147. #else
  148. subges r2, r2, #0x0c
  149. #endif
  150. bge .Lmemcpy_floop12
  151. .Lmemcpy_fl12:
  152. adds r2, r2, #8
  153. blt .Lmemcpy_fl4
  154. subs r2, r2, #4
  155. IT(tt, lt)
  156. ldrlt r3, [r1], #4
  157. strlt r3, [r0], #4
  158. copy "{r3, r12}"
  159. subge r2, r2, #4
  160. .Lmemcpy_fl4:
  161. /* less than 4 bytes to go */
  162. adds r2, r2, #4
  163. #if defined(__thumb2__)
  164. it eq
  165. popeq {r0, pc} /* done */
  166. #elif defined(__ARM_ARCH_4T__)
  167. ldmeqia sp!, {r0, r3} /* done */
  168. bxeq r3
  169. #else
  170. ldmeqia sp!, {r0, pc} /* done */
  171. #endif
  172. /* copy the crud byte at a time */
  173. cmp r2, #2
  174. ldrb r3, [r1], #1
  175. strb r3, [r0], #1
  176. #if defined(__thumb2__)
  177. itt ge
  178. ldrbge r3, [r1], #1
  179. strbge r3, [r0], #1
  180. itt gt
  181. ldrbgt r3, [r1], #1
  182. strbgt r3, [r0], #1
  183. #else
  184. ldrgeb r3, [r1], #1
  185. strgeb r3, [r0], #1
  186. ldrgtb r3, [r1], #1
  187. strgtb r3, [r0], #1
  188. #endif
  189. #if defined(__ARM_ARCH_4T__)
  190. ldmia sp!, {r0, r3}
  191. bx r3
  192. #else
  193. ldmia sp!, {r0, pc}
  194. #endif
  195. /* erg - unaligned destination */
  196. .Lmemcpy_fdestul:
  197. rsb r12, r12, #4
  198. cmp r12, #2
  199. /* align destination with byte copies */
  200. ldrb r3, [r1], #1
  201. strb r3, [r0], #1
  202. #if defined(__thumb2__)
  203. itt ge
  204. ldrbge r3, [r1], #1
  205. strbge r3, [r0], #1
  206. itt gt
  207. ldrbgt r3, [r1], #1
  208. strbgt r3, [r0], #1
  209. #else
  210. ldrgeb r3, [r1], #1
  211. strgeb r3, [r0], #1
  212. ldrgtb r3, [r1], #1
  213. strgtb r3, [r0], #1
  214. #endif
  215. subs r2, r2, r12
  216. blt .Lmemcpy_fl4 /* less the 4 bytes */
  217. ands r12, r1, #3
  218. beq .Lmemcpy_ft8 /* we have an aligned source */
  219. /* erg - unaligned source */
  220. /* This is where it gets nasty ... */
  221. .Lmemcpy_fsrcul:
  222. bic r1, r1, #3
  223. ldr lr, [r1], #4
  224. cmp r12, #2
  225. bgt .Lmemcpy_fsrcul3
  226. beq .Lmemcpy_fsrcul2
  227. cmp r2, #0x0c
  228. blt .Lmemcpy_fsrcul1loop4
  229. sub r2, r2, #0x0c
  230. stmdb sp!, {r4, r5}
  231. .Lmemcpy_fsrcul1loop16:
  232. #if __BYTE_ORDER == __BIG_ENDIAN
  233. mov r3, lr, lsl #8
  234. ldmia r1!, {r4, r5, r12, lr}
  235. orr r3, r3, r4, lsr #24
  236. mov r4, r4, lsl #8
  237. orr r4, r4, r5, lsr #24
  238. mov r5, r5, lsl #8
  239. orr r5, r5, r12, lsr #24
  240. mov r12, r12, lsl #8
  241. orr r12, r12, lr, lsr #24
  242. #else
  243. mov r3, lr, lsr #8
  244. ldmia r1!, {r4, r5, r12, lr}
  245. orr r3, r3, r4, lsl #24
  246. mov r4, r4, lsr #8
  247. orr r4, r4, r5, lsl #24
  248. mov r5, r5, lsr #8
  249. orr r5, r5, r12, lsl #24
  250. mov r12, r12, lsr #8
  251. orr r12, r12, lr, lsl #24
  252. #endif
  253. stmia r0!, {r3-r5, r12}
  254. subs r2, r2, #0x10
  255. bge .Lmemcpy_fsrcul1loop16
  256. ldmia sp!, {r4, r5}
  257. adds r2, r2, #0x0c
  258. blt .Lmemcpy_fsrcul1l4
  259. .Lmemcpy_fsrcul1loop4:
  260. #if __BYTE_ORDER == __BIG_ENDIAN
  261. mov r12, lr, lsl #8
  262. ldr lr, [r1], #4
  263. orr r12, r12, lr, lsr #24
  264. #else
  265. mov r12, lr, lsr #8
  266. ldr lr, [r1], #4
  267. orr r12, r12, lr, lsl #24
  268. #endif
  269. str r12, [r0], #4
  270. subs r2, r2, #4
  271. bge .Lmemcpy_fsrcul1loop4
  272. .Lmemcpy_fsrcul1l4:
  273. sub r1, r1, #3
  274. b .Lmemcpy_fl4
  275. .Lmemcpy_fsrcul2:
  276. cmp r2, #0x0c
  277. blt .Lmemcpy_fsrcul2loop4
  278. sub r2, r2, #0x0c
  279. stmdb sp!, {r4, r5}
  280. .Lmemcpy_fsrcul2loop16:
  281. #if __BYTE_ORDER == __BIG_ENDIAN
  282. mov r3, lr, lsl #16
  283. ldmia r1!, {r4, r5, r12, lr}
  284. orr r3, r3, r4, lsr #16
  285. mov r4, r4, lsl #16
  286. orr r4, r4, r5, lsr #16
  287. mov r5, r5, lsl #16
  288. orr r5, r5, r12, lsr #16
  289. mov r12, r12, lsl #16
  290. orr r12, r12, lr, lsr #16
  291. #else
  292. mov r3, lr, lsr #16
  293. ldmia r1!, {r4, r5, r12, lr}
  294. orr r3, r3, r4, lsl #16
  295. mov r4, r4, lsr #16
  296. orr r4, r4, r5, lsl #16
  297. mov r5, r5, lsr #16
  298. orr r5, r5, r12, lsl #16
  299. mov r12, r12, lsr #16
  300. orr r12, r12, lr, lsl #16
  301. #endif
  302. stmia r0!, {r3-r5, r12}
  303. subs r2, r2, #0x10
  304. bge .Lmemcpy_fsrcul2loop16
  305. ldmia sp!, {r4, r5}
  306. adds r2, r2, #0x0c
  307. blt .Lmemcpy_fsrcul2l4
  308. .Lmemcpy_fsrcul2loop4:
  309. #if __BYTE_ORDER == __BIG_ENDIAN
  310. mov r12, lr, lsl #16
  311. ldr lr, [r1], #4
  312. orr r12, r12, lr, lsr #16
  313. #else
  314. mov r12, lr, lsr #16
  315. ldr lr, [r1], #4
  316. orr r12, r12, lr, lsl #16
  317. #endif
  318. str r12, [r0], #4
  319. subs r2, r2, #4
  320. bge .Lmemcpy_fsrcul2loop4
  321. .Lmemcpy_fsrcul2l4:
  322. sub r1, r1, #2
  323. b .Lmemcpy_fl4
  324. .Lmemcpy_fsrcul3:
  325. cmp r2, #0x0c
  326. blt .Lmemcpy_fsrcul3loop4
  327. sub r2, r2, #0x0c
  328. stmdb sp!, {r4, r5}
  329. .Lmemcpy_fsrcul3loop16:
  330. #if __BYTE_ORDER == __BIG_ENDIAN
  331. mov r3, lr, lsl #24
  332. ldmia r1!, {r4, r5, r12, lr}
  333. orr r3, r3, r4, lsr #8
  334. mov r4, r4, lsl #24
  335. orr r4, r4, r5, lsr #8
  336. mov r5, r5, lsl #24
  337. orr r5, r5, r12, lsr #8
  338. mov r12, r12, lsl #24
  339. orr r12, r12, lr, lsr #8
  340. #else
  341. mov r3, lr, lsr #24
  342. ldmia r1!, {r4, r5, r12, lr}
  343. orr r3, r3, r4, lsl #8
  344. mov r4, r4, lsr #24
  345. orr r4, r4, r5, lsl #8
  346. mov r5, r5, lsr #24
  347. orr r5, r5, r12, lsl #8
  348. mov r12, r12, lsr #24
  349. orr r12, r12, lr, lsl #8
  350. #endif
  351. stmia r0!, {r3-r5, r12}
  352. subs r2, r2, #0x10
  353. bge .Lmemcpy_fsrcul3loop16
  354. ldmia sp!, {r4, r5}
  355. adds r2, r2, #0x0c
  356. blt .Lmemcpy_fsrcul3l4
  357. .Lmemcpy_fsrcul3loop4:
  358. #if __BYTE_ORDER == __BIG_ENDIAN
  359. mov r12, lr, lsl #24
  360. ldr lr, [r1], #4
  361. orr r12, r12, lr, lsr #8
  362. #else
  363. mov r12, lr, lsr #24
  364. ldr lr, [r1], #4
  365. orr r12, r12, lr, lsl #8
  366. #endif
  367. str r12, [r0], #4
  368. subs r2, r2, #4
  369. bge .Lmemcpy_fsrcul3loop4
  370. .Lmemcpy_fsrcul3l4:
  371. sub r1, r1, #1
  372. b .Lmemcpy_fl4
  373. .Lmemcpy_backwards:
  374. add r1, r1, r2
  375. add r0, r0, r2
  376. subs r2, r2, #4
  377. blt .Lmemcpy_bl4 /* less than 4 bytes */
  378. ands r12, r0, #3
  379. bne .Lmemcpy_bdestul /* oh unaligned destination addr */
  380. ands r12, r1, #3
  381. bne .Lmemcpy_bsrcul /* oh unaligned source addr */
  382. .Lmemcpy_bt8:
  383. /* We have aligned source and destination */
  384. subs r2, r2, #8
  385. blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
  386. stmdb sp!, {r4, lr}
  387. subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
  388. blt .Lmemcpy_bl32
  389. /* blat 32 bytes at a time */
  390. /* XXX for really big copies perhaps we should use more registers */
  391. .Lmemcpy_bloop32:
  392. ldmdb r1!, {r3, r4, r12, lr}
  393. stmdb r0!, {r3, r4, r12, lr}
  394. ldmdb r1!, {r3, r4, r12, lr}
  395. stmdb r0!, {r3, r4, r12, lr}
  396. subs r2, r2, #0x20
  397. bge .Lmemcpy_bloop32
  398. .Lmemcpy_bl32:
  399. cmn r2, #0x10
  400. /* blat a remaining 16 bytes */
  401. copydb "{r3, r4, r12, lr}"
  402. subge r2, r2, #0x10
  403. adds r2, r2, #0x14
  404. /* blat a remaining 12 bytes */
  405. copydb "{r3, r12, lr}"
  406. subge r2, r2, #0x0c
  407. ldmia sp!, {r4, lr}
  408. .Lmemcpy_bl12:
  409. adds r2, r2, #8
  410. blt .Lmemcpy_bl4
  411. subs r2, r2, #4
  412. IT(tt, lt)
  413. ldrlt r3, [r1, #-4]!
  414. strlt r3, [r0, #-4]!
  415. copydb "{r3, r12}"
  416. subge r2, r2, #4
  417. .Lmemcpy_bl4:
  418. /* less than 4 bytes to go */
  419. adds r2, r2, #4
  420. IT(t, eq)
  421. #if defined(__USE_BX__)
  422. bxeq lr
  423. #else
  424. moveq pc, lr /* done */
  425. #endif
  426. /* copy the crud byte at a time */
  427. cmp r2, #2
  428. ldrb r3, [r1, #-1]!
  429. strb r3, [r0, #-1]!
  430. #ifdef __thumb2__
  431. itt ge
  432. ldrbge r3, [r1, #-1]!
  433. strbge r3, [r0, #-1]!
  434. itt gt
  435. ldrbgt r3, [r1, #-1]!
  436. strbgt r3, [r0, #-1]!
  437. #else
  438. ldrgeb r3, [r1, #-1]!
  439. strgeb r3, [r0, #-1]!
  440. ldrgtb r3, [r1, #-1]!
  441. strgtb r3, [r0, #-1]!
  442. #endif
  443. #if defined(__USE_BX__)
  444. bx lr
  445. #else
  446. mov pc, lr
  447. #endif
  448. /* erg - unaligned destination */
  449. .Lmemcpy_bdestul:
  450. cmp r12, #2
  451. /* align destination with byte copies */
  452. ldrb r3, [r1, #-1]!
  453. strb r3, [r0, #-1]!
  454. #ifdef __thumb2__
  455. itt ge
  456. ldrbge r3, [r1, #-1]!
  457. strbge r3, [r0, #-1]!
  458. itt gt
  459. ldrbgt r3, [r1, #-1]!
  460. strbgt r3, [r0, #-1]!
  461. #else
  462. ldrgeb r3, [r1, #-1]!
  463. strgeb r3, [r0, #-1]!
  464. ldrgtb r3, [r1, #-1]!
  465. strgtb r3, [r0, #-1]!
  466. #endif
  467. subs r2, r2, r12
  468. blt .Lmemcpy_bl4 /* less than 4 bytes to go */
  469. ands r12, r1, #3
  470. beq .Lmemcpy_bt8 /* we have an aligned source */
  471. /* erg - unaligned source */
  472. /* This is where it gets nasty ... */
  473. .Lmemcpy_bsrcul:
  474. bic r1, r1, #3
  475. ldr r3, [r1, #0]
  476. cmp r12, #2
  477. blt .Lmemcpy_bsrcul1
  478. beq .Lmemcpy_bsrcul2
  479. cmp r2, #0x0c
  480. blt .Lmemcpy_bsrcul3loop4
  481. sub r2, r2, #0x0c
  482. stmdb sp!, {r4, r5, lr}
  483. .Lmemcpy_bsrcul3loop16:
  484. #if __BYTE_ORDER == __BIG_ENDIAN
  485. mov lr, r3, lsr #8
  486. ldmdb r1!, {r3-r5, r12}
  487. orr lr, lr, r12, lsl #24
  488. mov r12, r12, lsr #8
  489. orr r12, r12, r5, lsl #24
  490. mov r5, r5, lsr #8
  491. orr r5, r5, r4, lsl #24
  492. mov r4, r4, lsr #8
  493. orr r4, r4, r3, lsl #24
  494. #else
  495. mov lr, r3, lsl #8
  496. ldmdb r1!, {r3-r5, r12}
  497. orr lr, lr, r12, lsr #24
  498. mov r12, r12, lsl #8
  499. orr r12, r12, r5, lsr #24
  500. mov r5, r5, lsl #8
  501. orr r5, r5, r4, lsr #24
  502. mov r4, r4, lsl #8
  503. orr r4, r4, r3, lsr #24
  504. #endif
  505. stmdb r0!, {r4, r5, r12, lr}
  506. subs r2, r2, #0x10
  507. bge .Lmemcpy_bsrcul3loop16
  508. ldmia sp!, {r4, r5, lr}
  509. adds r2, r2, #0x0c
  510. blt .Lmemcpy_bsrcul3l4
  511. .Lmemcpy_bsrcul3loop4:
  512. #if __BYTE_ORDER == __BIG_ENDIAN
  513. mov r12, r3, lsr #8
  514. ldr r3, [r1, #-4]!
  515. orr r12, r12, r3, lsl #24
  516. #else
  517. mov r12, r3, lsl #8
  518. ldr r3, [r1, #-4]!
  519. orr r12, r12, r3, lsr #24
  520. #endif
  521. str r12, [r0, #-4]!
  522. subs r2, r2, #4
  523. bge .Lmemcpy_bsrcul3loop4
  524. .Lmemcpy_bsrcul3l4:
  525. add r1, r1, #3
  526. b .Lmemcpy_bl4
  527. .Lmemcpy_bsrcul2:
  528. cmp r2, #0x0c
  529. blt .Lmemcpy_bsrcul2loop4
  530. sub r2, r2, #0x0c
  531. stmdb sp!, {r4, r5, lr}
  532. .Lmemcpy_bsrcul2loop16:
  533. #if __BYTE_ORDER == __BIG_ENDIAN
  534. mov lr, r3, lsr #16
  535. ldmdb r1!, {r3-r5, r12}
  536. orr lr, lr, r12, lsl #16
  537. mov r12, r12, lsr #16
  538. orr r12, r12, r5, lsl #16
  539. mov r5, r5, lsr #16
  540. orr r5, r5, r4, lsl #16
  541. mov r4, r4, lsr #16
  542. orr r4, r4, r3, lsl #16
  543. #else
  544. mov lr, r3, lsl #16
  545. ldmdb r1!, {r3-r5, r12}
  546. orr lr, lr, r12, lsr #16
  547. mov r12, r12, lsl #16
  548. orr r12, r12, r5, lsr #16
  549. mov r5, r5, lsl #16
  550. orr r5, r5, r4, lsr #16
  551. mov r4, r4, lsl #16
  552. orr r4, r4, r3, lsr #16
  553. #endif
  554. stmdb r0!, {r4, r5, r12, lr}
  555. subs r2, r2, #0x10
  556. bge .Lmemcpy_bsrcul2loop16
  557. ldmia sp!, {r4, r5, lr}
  558. adds r2, r2, #0x0c
  559. blt .Lmemcpy_bsrcul2l4
  560. .Lmemcpy_bsrcul2loop4:
  561. #if __BYTE_ORDER == __BIG_ENDIAN
  562. mov r12, r3, lsr #16
  563. ldr r3, [r1, #-4]!
  564. orr r12, r12, r3, lsl #16
  565. #else
  566. mov r12, r3, lsl #16
  567. ldr r3, [r1, #-4]!
  568. orr r12, r12, r3, lsr #16
  569. #endif
  570. str r12, [r0, #-4]!
  571. subs r2, r2, #4
  572. bge .Lmemcpy_bsrcul2loop4
  573. .Lmemcpy_bsrcul2l4:
  574. add r1, r1, #2
  575. b .Lmemcpy_bl4
  576. .Lmemcpy_bsrcul1:
  577. cmp r2, #0x0c
  578. blt .Lmemcpy_bsrcul1loop4
  579. sub r2, r2, #0x0c
  580. stmdb sp!, {r4, r5, lr}
  581. .Lmemcpy_bsrcul1loop32:
  582. #if __BYTE_ORDER == __BIG_ENDIAN
  583. mov lr, r3, lsr #24
  584. ldmdb r1!, {r3-r5, r12}
  585. orr lr, lr, r12, lsl #8
  586. mov r12, r12, lsr #24
  587. orr r12, r12, r5, lsl #8
  588. mov r5, r5, lsr #24
  589. orr r5, r5, r4, lsl #8
  590. mov r4, r4, lsr #24
  591. orr r4, r4, r3, lsl #8
  592. #else
  593. mov lr, r3, lsl #24
  594. ldmdb r1!, {r3-r5, r12}
  595. orr lr, lr, r12, lsr #8
  596. mov r12, r12, lsl #24
  597. orr r12, r12, r5, lsr #8
  598. mov r5, r5, lsl #24
  599. orr r5, r5, r4, lsr #8
  600. mov r4, r4, lsl #24
  601. orr r4, r4, r3, lsr #8
  602. #endif
  603. stmdb r0!, {r4, r5, r12, lr}
  604. subs r2, r2, #0x10
  605. bge .Lmemcpy_bsrcul1loop32
  606. ldmia sp!, {r4, r5, lr}
  607. adds r2, r2, #0x0c
  608. blt .Lmemcpy_bsrcul1l4
  609. .Lmemcpy_bsrcul1loop4:
  610. #if __BYTE_ORDER == __BIG_ENDIAN
  611. mov r12, r3, lsr #24
  612. ldr r3, [r1, #-4]!
  613. orr r12, r12, r3, lsl #8
  614. #else
  615. mov r12, r3, lsl #24
  616. ldr r3, [r1, #-4]!
  617. orr r12, r12, r3, lsr #8
  618. #endif
  619. str r12, [r0, #-4]!
  620. subs r2, r2, #4
  621. bge .Lmemcpy_bsrcul1loop4
  622. .Lmemcpy_bsrcul1l4:
  623. add r1, r1, #1
  624. b .Lmemcpy_bl4
  625. #else /* THUMB1_ONLY */
  626. /* This is a fairly dumb implementation for when we can't use the 32-bit code
  627. above. */
  628. .text
  629. .global _memcpy
  630. .hidden _memcpy
  631. .type _memcpy,%function
  632. .align 4
  633. .thumb
  634. _memcpy:
  635. push {r0, r4}
  636. cmp r2, #0
  637. beq .Lmemcpy_exit
  638. @ See if we have overlapping regions, and need to reverse the
  639. @ direction of the copy
  640. cmp r0, r1
  641. bls .Lmemcpy_forwards
  642. add r4, r1, r2
  643. cmp r0, r4
  644. bcc .Lmemcpy_backwards
  645. .Lmemcpy_forwards:
  646. /* Forwards. */
  647. mov r3, r0
  648. eor r3, r1
  649. mov r4, #3
  650. tst r3, r4
  651. bne .Lmemcpy_funaligned
  652. cmp r2, #8
  653. bcc .Lmemcpy_funaligned
  654. 1: @ copy up to the first word boundary.
  655. tst r0, r4
  656. beq 1f
  657. ldrb r3, [r1]
  658. add r1, r1, #1
  659. strb r3, [r0]
  660. add r0, r0, #1
  661. sub r2, r2, #1
  662. b 1b
  663. 1: @ Copy aligned words
  664. ldr r3, [r1]
  665. add r1, r1, #4
  666. str r3, [r0]
  667. add r0, r0, #4
  668. sub r2, r2, #4
  669. cmp r2, #4
  670. bcs 1b
  671. cmp r2, #0
  672. beq .Lmemcpy_exit
  673. .Lmemcpy_funaligned:
  674. 1:
  675. ldrb r3, [r1]
  676. add r1, r1, #1
  677. strb r3, [r0]
  678. add r0, r0, #1
  679. sub r2, r2, #1
  680. bne 1b
  681. .Lmemcpy_exit:
  682. pop {r0, r4}
  683. bx lr
  684. .Lmemcpy_backwards:
  685. add r0, r0, r2
  686. add r1, r1, r2
  687. 1:
  688. sub r0, r0, #1
  689. sub r1, r1, #1
  690. ldrb r3, [r1]
  691. strb r3, [r0]
  692. sub r2, r2, #1
  693. bne 1b
  694. b .Lmemcpy_exit
  695. #endif