_memcpy.S 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. /*-
  2. * Copyright (c) 1997 The NetBSD Foundation, Inc.
  3. * All rights reserved.
  4. *
  5. * This code is derived from software contributed to The NetBSD Foundation
  6. * by Neil A. Carson and Mark Brinicombe
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. * 3. All advertising materials mentioning features or use of this software
  17. * must display the following acknowledgement:
  18. * This product includes software developed by the NetBSD
  19. * Foundation, Inc. and its contributors.
  20. * 4. Neither the name of The NetBSD Foundation nor the names of its
  21. * contributors may be used to endorse or promote products derived
  22. * from this software without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  25. * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  26. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  27. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  28. * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  30. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  31. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  32. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  33. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34. * POSSIBILITY OF SUCH DAMAGE.
  35. *
  36. * Adapted for uClibc from NetBSD _memcpy.S,v 1.6 2003/10/09
  37. * by Erik Andersen <andersen@codepoet.org>
  38. */
  39. #include <features.h>
  40. #include <endian.h>
  41. #include <bits/arm_asm.h>
  42. #if !defined(THUMB1_ONLY)
  43. /*
  44. * This is one fun bit of code ...
  45. * Some easy listening music is suggested while trying to understand this
  46. * code e.g. Iron Maiden
  47. *
  48. * For anyone attempting to understand it :
  49. *
  50. * The core code is implemented here with simple stubs for memcpy()
  51. * memmove() and bcopy().
  52. *
  53. * All local labels are prefixed with Lmemcpy_
  54. * Following the prefix a label starting f is used in the forward copy code
  55. * while a label using b is used in the backwards copy code
  56. * The source and destination addresses determine whether a forward or
  57. * backward copy is performed.
  58. * Separate bits of code are used to deal with the following situations
  59. * for both the forward and backwards copy.
  60. * unaligned source address
  61. * unaligned destination address
  62. * Separate copy routines are used to produce an optimised result for each
  63. * of these cases.
  64. * The copy code will use LDM/STM instructions to copy up to 32 bytes at
  65. * a time where possible.
  66. *
  67. * Note: r12 (aka ip) can be trashed during the function along with
  68. * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
  69. * Additional registers are preserved prior to use i.e. r4, r5 & lr
  70. *
  71. * Apologies for the state of the comments ;-)
  72. */
  73. .text
  74. .global _memcpy
  75. .hidden _memcpy
  76. .type _memcpy,%function
  77. .align 4
  78. /* XXX: The Thumb-2 conditionals can be removed if/when we require an
  79. assembler that supports unified syntax. */
  80. .macro copy regs
  81. #if defined(__thumb2__)
  82. ittt ge
  83. ldmiage r1!, \regs
  84. stmiage r0!, \regs
  85. #else
  86. ldmgeia r1!, \regs
  87. stmgeia r0!, \regs
  88. #endif
  89. .endm
  90. .macro copydb regs
  91. #if defined(__thumb2__)
  92. ittt ge
  93. ldmdbge r1!, \regs
  94. stmdbge r0!, \regs
  95. #else
  96. ldmgedb r1!, \regs
  97. stmgedb r0!, \regs
  98. #endif
  99. .endm
  100. _memcpy:
  101. /* Determine copy direction */
  102. cmp r1, r0
  103. bcc .Lmemcpy_backwards
  104. IT(tt, eq) /* Quick abort for src=dst */
  105. #if defined(__USE_BX__)
  106. bxeq lr
  107. #else
  108. moveq pc, lr
  109. #endif
  110. stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
  111. subs r2, r2, #4
  112. blt .Lmemcpy_fl4 /* less than 4 bytes */
  113. ands r12, r0, #3
  114. bne .Lmemcpy_fdestul /* oh unaligned destination addr */
  115. ands r12, r1, #3
  116. bne .Lmemcpy_fsrcul /* oh unaligned source addr */
  117. .Lmemcpy_ft8:
  118. /* We have aligned source and destination */
  119. subs r2, r2, #8
  120. blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
  121. subs r2, r2, #0x14
  122. blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
  123. str r4, [sp, #-4]! /* borrow r4 */
  124. /* blat 32 bytes at a time */
  125. /* XXX for really big copies perhaps we should use more registers */
  126. .Lmemcpy_floop32:
  127. ldmia r1!, {r3, r4, r12, lr}
  128. stmia r0!, {r3, r4, r12, lr}
  129. ldmia r1!, {r3, r4, r12, lr}
  130. stmia r0!, {r3, r4, r12, lr}
  131. subs r2, r2, #0x20
  132. bge .Lmemcpy_floop32
  133. cmn r2, #0x10
  134. /* blat a remaining 16 bytes */
  135. copy "{r3, r4, r12, lr}"
  136. subge r2, r2, #0x10
  137. ldr r4, [sp], #4 /* restore r4 */
  138. .Lmemcpy_fl32:
  139. adds r2, r2, #0x14
  140. /* blat 12 bytes at a time */
  141. .Lmemcpy_floop12:
  142. copy "{r3, r12, lr}"
  143. #if defined(__thumb2__)
  144. subsge r2, r2, #0x0c
  145. #else
  146. subges r2, r2, #0x0c
  147. #endif
  148. bge .Lmemcpy_floop12
  149. .Lmemcpy_fl12:
  150. adds r2, r2, #8
  151. blt .Lmemcpy_fl4
  152. subs r2, r2, #4
  153. IT(tt, lt)
  154. ldrlt r3, [r1], #4
  155. strlt r3, [r0], #4
  156. copy "{r3, r12}"
  157. subge r2, r2, #4
  158. .Lmemcpy_fl4:
  159. /* less than 4 bytes to go */
  160. adds r2, r2, #4
  161. #if defined(__thumb2__)
  162. it eq
  163. popeq {r0, pc} /* done */
  164. #elif defined(__ARM_ARCH_4T__)
  165. ldmeqia sp!, {r0, r3} /* done */
  166. bxeq r3
  167. #else
  168. ldmeqia sp!, {r0, pc} /* done */
  169. #endif
  170. /* copy the crud byte at a time */
  171. cmp r2, #2
  172. ldrb r3, [r1], #1
  173. strb r3, [r0], #1
  174. #if defined(__thumb2__)
  175. itt ge
  176. ldrbge r3, [r1], #1
  177. strbge r3, [r0], #1
  178. itt gt
  179. ldrbgt r3, [r1], #1
  180. strbgt r3, [r0], #1
  181. #else
  182. ldrgeb r3, [r1], #1
  183. strgeb r3, [r0], #1
  184. ldrgtb r3, [r1], #1
  185. strgtb r3, [r0], #1
  186. #endif
  187. #if defined(__ARM_ARCH_4T__)
  188. ldmia sp!, {r0, r3}
  189. bx r3
  190. #else
  191. ldmia sp!, {r0, pc}
  192. #endif
  193. /* erg - unaligned destination */
  194. .Lmemcpy_fdestul:
  195. rsb r12, r12, #4
  196. cmp r12, #2
  197. /* align destination with byte copies */
  198. ldrb r3, [r1], #1
  199. strb r3, [r0], #1
  200. #if defined(__thumb2__)
  201. itt ge
  202. ldrbge r3, [r1], #1
  203. strbge r3, [r0], #1
  204. itt gt
  205. ldrbgt r3, [r1], #1
  206. strbgt r3, [r0], #1
  207. #else
  208. ldrgeb r3, [r1], #1
  209. strgeb r3, [r0], #1
  210. ldrgtb r3, [r1], #1
  211. strgtb r3, [r0], #1
  212. #endif
  213. subs r2, r2, r12
  214. blt .Lmemcpy_fl4 /* less the 4 bytes */
  215. ands r12, r1, #3
  216. beq .Lmemcpy_ft8 /* we have an aligned source */
  217. /* erg - unaligned source */
  218. /* This is where it gets nasty ... */
  219. .Lmemcpy_fsrcul:
  220. bic r1, r1, #3
  221. ldr lr, [r1], #4
  222. cmp r12, #2
  223. bgt .Lmemcpy_fsrcul3
  224. beq .Lmemcpy_fsrcul2
  225. cmp r2, #0x0c
  226. blt .Lmemcpy_fsrcul1loop4
  227. sub r2, r2, #0x0c
  228. stmdb sp!, {r4, r5}
  229. .Lmemcpy_fsrcul1loop16:
  230. #if __BYTE_ORDER == __BIG_ENDIAN
  231. mov r3, lr, lsl #8
  232. ldmia r1!, {r4, r5, r12, lr}
  233. orr r3, r3, r4, lsr #24
  234. mov r4, r4, lsl #8
  235. orr r4, r4, r5, lsr #24
  236. mov r5, r5, lsl #8
  237. orr r5, r5, r12, lsr #24
  238. mov r12, r12, lsl #8
  239. orr r12, r12, lr, lsr #24
  240. #else
  241. mov r3, lr, lsr #8
  242. ldmia r1!, {r4, r5, r12, lr}
  243. orr r3, r3, r4, lsl #24
  244. mov r4, r4, lsr #8
  245. orr r4, r4, r5, lsl #24
  246. mov r5, r5, lsr #8
  247. orr r5, r5, r12, lsl #24
  248. mov r12, r12, lsr #8
  249. orr r12, r12, lr, lsl #24
  250. #endif
  251. stmia r0!, {r3-r5, r12}
  252. subs r2, r2, #0x10
  253. bge .Lmemcpy_fsrcul1loop16
  254. ldmia sp!, {r4, r5}
  255. adds r2, r2, #0x0c
  256. blt .Lmemcpy_fsrcul1l4
  257. .Lmemcpy_fsrcul1loop4:
  258. #if __BYTE_ORDER == __BIG_ENDIAN
  259. mov r12, lr, lsl #8
  260. ldr lr, [r1], #4
  261. orr r12, r12, lr, lsr #24
  262. #else
  263. mov r12, lr, lsr #8
  264. ldr lr, [r1], #4
  265. orr r12, r12, lr, lsl #24
  266. #endif
  267. str r12, [r0], #4
  268. subs r2, r2, #4
  269. bge .Lmemcpy_fsrcul1loop4
  270. .Lmemcpy_fsrcul1l4:
  271. sub r1, r1, #3
  272. b .Lmemcpy_fl4
  273. .Lmemcpy_fsrcul2:
  274. cmp r2, #0x0c
  275. blt .Lmemcpy_fsrcul2loop4
  276. sub r2, r2, #0x0c
  277. stmdb sp!, {r4, r5}
  278. .Lmemcpy_fsrcul2loop16:
  279. #if __BYTE_ORDER == __BIG_ENDIAN
  280. mov r3, lr, lsl #16
  281. ldmia r1!, {r4, r5, r12, lr}
  282. orr r3, r3, r4, lsr #16
  283. mov r4, r4, lsl #16
  284. orr r4, r4, r5, lsr #16
  285. mov r5, r5, lsl #16
  286. orr r5, r5, r12, lsr #16
  287. mov r12, r12, lsl #16
  288. orr r12, r12, lr, lsr #16
  289. #else
  290. mov r3, lr, lsr #16
  291. ldmia r1!, {r4, r5, r12, lr}
  292. orr r3, r3, r4, lsl #16
  293. mov r4, r4, lsr #16
  294. orr r4, r4, r5, lsl #16
  295. mov r5, r5, lsr #16
  296. orr r5, r5, r12, lsl #16
  297. mov r12, r12, lsr #16
  298. orr r12, r12, lr, lsl #16
  299. #endif
  300. stmia r0!, {r3-r5, r12}
  301. subs r2, r2, #0x10
  302. bge .Lmemcpy_fsrcul2loop16
  303. ldmia sp!, {r4, r5}
  304. adds r2, r2, #0x0c
  305. blt .Lmemcpy_fsrcul2l4
  306. .Lmemcpy_fsrcul2loop4:
  307. #if __BYTE_ORDER == __BIG_ENDIAN
  308. mov r12, lr, lsl #16
  309. ldr lr, [r1], #4
  310. orr r12, r12, lr, lsr #16
  311. #else
  312. mov r12, lr, lsr #16
  313. ldr lr, [r1], #4
  314. orr r12, r12, lr, lsl #16
  315. #endif
  316. str r12, [r0], #4
  317. subs r2, r2, #4
  318. bge .Lmemcpy_fsrcul2loop4
  319. .Lmemcpy_fsrcul2l4:
  320. sub r1, r1, #2
  321. b .Lmemcpy_fl4
  322. .Lmemcpy_fsrcul3:
  323. cmp r2, #0x0c
  324. blt .Lmemcpy_fsrcul3loop4
  325. sub r2, r2, #0x0c
  326. stmdb sp!, {r4, r5}
  327. .Lmemcpy_fsrcul3loop16:
  328. #if __BYTE_ORDER == __BIG_ENDIAN
  329. mov r3, lr, lsl #24
  330. ldmia r1!, {r4, r5, r12, lr}
  331. orr r3, r3, r4, lsr #8
  332. mov r4, r4, lsl #24
  333. orr r4, r4, r5, lsr #8
  334. mov r5, r5, lsl #24
  335. orr r5, r5, r12, lsr #8
  336. mov r12, r12, lsl #24
  337. orr r12, r12, lr, lsr #8
  338. #else
  339. mov r3, lr, lsr #24
  340. ldmia r1!, {r4, r5, r12, lr}
  341. orr r3, r3, r4, lsl #8
  342. mov r4, r4, lsr #24
  343. orr r4, r4, r5, lsl #8
  344. mov r5, r5, lsr #24
  345. orr r5, r5, r12, lsl #8
  346. mov r12, r12, lsr #24
  347. orr r12, r12, lr, lsl #8
  348. #endif
  349. stmia r0!, {r3-r5, r12}
  350. subs r2, r2, #0x10
  351. bge .Lmemcpy_fsrcul3loop16
  352. ldmia sp!, {r4, r5}
  353. adds r2, r2, #0x0c
  354. blt .Lmemcpy_fsrcul3l4
  355. .Lmemcpy_fsrcul3loop4:
  356. #if __BYTE_ORDER == __BIG_ENDIAN
  357. mov r12, lr, lsl #24
  358. ldr lr, [r1], #4
  359. orr r12, r12, lr, lsr #8
  360. #else
  361. mov r12, lr, lsr #24
  362. ldr lr, [r1], #4
  363. orr r12, r12, lr, lsl #8
  364. #endif
  365. str r12, [r0], #4
  366. subs r2, r2, #4
  367. bge .Lmemcpy_fsrcul3loop4
  368. .Lmemcpy_fsrcul3l4:
  369. sub r1, r1, #1
  370. b .Lmemcpy_fl4
  371. .Lmemcpy_backwards:
  372. add r1, r1, r2
  373. add r0, r0, r2
  374. subs r2, r2, #4
  375. blt .Lmemcpy_bl4 /* less than 4 bytes */
  376. ands r12, r0, #3
  377. bne .Lmemcpy_bdestul /* oh unaligned destination addr */
  378. ands r12, r1, #3
  379. bne .Lmemcpy_bsrcul /* oh unaligned source addr */
  380. .Lmemcpy_bt8:
  381. /* We have aligned source and destination */
  382. subs r2, r2, #8
  383. blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
  384. stmdb sp!, {r4, lr}
  385. subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
  386. blt .Lmemcpy_bl32
  387. /* blat 32 bytes at a time */
  388. /* XXX for really big copies perhaps we should use more registers */
  389. .Lmemcpy_bloop32:
  390. ldmdb r1!, {r3, r4, r12, lr}
  391. stmdb r0!, {r3, r4, r12, lr}
  392. ldmdb r1!, {r3, r4, r12, lr}
  393. stmdb r0!, {r3, r4, r12, lr}
  394. subs r2, r2, #0x20
  395. bge .Lmemcpy_bloop32
  396. .Lmemcpy_bl32:
  397. cmn r2, #0x10
  398. /* blat a remaining 16 bytes */
  399. copydb "{r3, r4, r12, lr}"
  400. subge r2, r2, #0x10
  401. adds r2, r2, #0x14
  402. /* blat a remaining 12 bytes */
  403. copydb "{r3, r12, lr}"
  404. subge r2, r2, #0x0c
  405. ldmia sp!, {r4, lr}
  406. .Lmemcpy_bl12:
  407. adds r2, r2, #8
  408. blt .Lmemcpy_bl4
  409. subs r2, r2, #4
  410. IT(tt, lt)
  411. ldrlt r3, [r1, #-4]!
  412. strlt r3, [r0, #-4]!
  413. copydb "{r3, r12}"
  414. subge r2, r2, #4
  415. .Lmemcpy_bl4:
  416. /* less than 4 bytes to go */
  417. adds r2, r2, #4
  418. IT(t, eq)
  419. #if defined(__USE_BX__)
  420. bxeq lr
  421. #else
  422. moveq pc, lr /* done */
  423. #endif
  424. /* copy the crud byte at a time */
  425. cmp r2, #2
  426. ldrb r3, [r1, #-1]!
  427. strb r3, [r0, #-1]!
  428. #ifdef __thumb2__
  429. itt ge
  430. ldrbge r3, [r1, #-1]!
  431. strbge r3, [r0, #-1]!
  432. itt gt
  433. ldrbgt r3, [r1, #-1]!
  434. strbgt r3, [r0, #-1]!
  435. #else
  436. ldrgeb r3, [r1, #-1]!
  437. strgeb r3, [r0, #-1]!
  438. ldrgtb r3, [r1, #-1]!
  439. strgtb r3, [r0, #-1]!
  440. #endif
  441. #if defined(__USE_BX__)
  442. bx lr
  443. #else
  444. mov pc, lr
  445. #endif
  446. /* erg - unaligned destination */
  447. .Lmemcpy_bdestul:
  448. cmp r12, #2
  449. /* align destination with byte copies */
  450. ldrb r3, [r1, #-1]!
  451. strb r3, [r0, #-1]!
  452. #ifdef __thumb2__
  453. itt ge
  454. ldrbge r3, [r1, #-1]!
  455. strbge r3, [r0, #-1]!
  456. itt gt
  457. ldrbgt r3, [r1, #-1]!
  458. strbgt r3, [r0, #-1]!
  459. #else
  460. ldrgeb r3, [r1, #-1]!
  461. strgeb r3, [r0, #-1]!
  462. ldrgtb r3, [r1, #-1]!
  463. strgtb r3, [r0, #-1]!
  464. #endif
  465. subs r2, r2, r12
  466. blt .Lmemcpy_bl4 /* less than 4 bytes to go */
  467. ands r12, r1, #3
  468. beq .Lmemcpy_bt8 /* we have an aligned source */
  469. /* erg - unaligned source */
  470. /* This is where it gets nasty ... */
  471. .Lmemcpy_bsrcul:
  472. bic r1, r1, #3
  473. ldr r3, [r1, #0]
  474. cmp r12, #2
  475. blt .Lmemcpy_bsrcul1
  476. beq .Lmemcpy_bsrcul2
  477. cmp r2, #0x0c
  478. blt .Lmemcpy_bsrcul3loop4
  479. sub r2, r2, #0x0c
  480. stmdb sp!, {r4, r5, lr}
  481. .Lmemcpy_bsrcul3loop16:
  482. #if __BYTE_ORDER == __BIG_ENDIAN
  483. mov lr, r3, lsr #8
  484. ldmdb r1!, {r3-r5, r12}
  485. orr lr, lr, r12, lsl #24
  486. mov r12, r12, lsr #8
  487. orr r12, r12, r5, lsl #24
  488. mov r5, r5, lsr #8
  489. orr r5, r5, r4, lsl #24
  490. mov r4, r4, lsr #8
  491. orr r4, r4, r3, lsl #24
  492. #else
  493. mov lr, r3, lsl #8
  494. ldmdb r1!, {r3-r5, r12}
  495. orr lr, lr, r12, lsr #24
  496. mov r12, r12, lsl #8
  497. orr r12, r12, r5, lsr #24
  498. mov r5, r5, lsl #8
  499. orr r5, r5, r4, lsr #24
  500. mov r4, r4, lsl #8
  501. orr r4, r4, r3, lsr #24
  502. #endif
  503. stmdb r0!, {r4, r5, r12, lr}
  504. subs r2, r2, #0x10
  505. bge .Lmemcpy_bsrcul3loop16
  506. ldmia sp!, {r4, r5, lr}
  507. adds r2, r2, #0x0c
  508. blt .Lmemcpy_bsrcul3l4
  509. .Lmemcpy_bsrcul3loop4:
  510. #if __BYTE_ORDER == __BIG_ENDIAN
  511. mov r12, r3, lsr #8
  512. ldr r3, [r1, #-4]!
  513. orr r12, r12, r3, lsl #24
  514. #else
  515. mov r12, r3, lsl #8
  516. ldr r3, [r1, #-4]!
  517. orr r12, r12, r3, lsr #24
  518. #endif
  519. str r12, [r0, #-4]!
  520. subs r2, r2, #4
  521. bge .Lmemcpy_bsrcul3loop4
  522. .Lmemcpy_bsrcul3l4:
  523. add r1, r1, #3
  524. b .Lmemcpy_bl4
  525. .Lmemcpy_bsrcul2:
  526. cmp r2, #0x0c
  527. blt .Lmemcpy_bsrcul2loop4
  528. sub r2, r2, #0x0c
  529. stmdb sp!, {r4, r5, lr}
  530. .Lmemcpy_bsrcul2loop16:
  531. #if __BYTE_ORDER == __BIG_ENDIAN
  532. mov lr, r3, lsr #16
  533. ldmdb r1!, {r3-r5, r12}
  534. orr lr, lr, r12, lsl #16
  535. mov r12, r12, lsr #16
  536. orr r12, r12, r5, lsl #16
  537. mov r5, r5, lsr #16
  538. orr r5, r5, r4, lsl #16
  539. mov r4, r4, lsr #16
  540. orr r4, r4, r3, lsl #16
  541. #else
  542. mov lr, r3, lsl #16
  543. ldmdb r1!, {r3-r5, r12}
  544. orr lr, lr, r12, lsr #16
  545. mov r12, r12, lsl #16
  546. orr r12, r12, r5, lsr #16
  547. mov r5, r5, lsl #16
  548. orr r5, r5, r4, lsr #16
  549. mov r4, r4, lsl #16
  550. orr r4, r4, r3, lsr #16
  551. #endif
  552. stmdb r0!, {r4, r5, r12, lr}
  553. subs r2, r2, #0x10
  554. bge .Lmemcpy_bsrcul2loop16
  555. ldmia sp!, {r4, r5, lr}
  556. adds r2, r2, #0x0c
  557. blt .Lmemcpy_bsrcul2l4
  558. .Lmemcpy_bsrcul2loop4:
  559. #if __BYTE_ORDER == __BIG_ENDIAN
  560. mov r12, r3, lsr #16
  561. ldr r3, [r1, #-4]!
  562. orr r12, r12, r3, lsl #16
  563. #else
  564. mov r12, r3, lsl #16
  565. ldr r3, [r1, #-4]!
  566. orr r12, r12, r3, lsr #16
  567. #endif
  568. str r12, [r0, #-4]!
  569. subs r2, r2, #4
  570. bge .Lmemcpy_bsrcul2loop4
  571. .Lmemcpy_bsrcul2l4:
  572. add r1, r1, #2
  573. b .Lmemcpy_bl4
  574. .Lmemcpy_bsrcul1:
  575. cmp r2, #0x0c
  576. blt .Lmemcpy_bsrcul1loop4
  577. sub r2, r2, #0x0c
  578. stmdb sp!, {r4, r5, lr}
  579. .Lmemcpy_bsrcul1loop32:
  580. #if __BYTE_ORDER == __BIG_ENDIAN
  581. mov lr, r3, lsr #24
  582. ldmdb r1!, {r3-r5, r12}
  583. orr lr, lr, r12, lsl #8
  584. mov r12, r12, lsr #24
  585. orr r12, r12, r5, lsl #8
  586. mov r5, r5, lsr #24
  587. orr r5, r5, r4, lsl #8
  588. mov r4, r4, lsr #24
  589. orr r4, r4, r3, lsl #8
  590. #else
  591. mov lr, r3, lsl #24
  592. ldmdb r1!, {r3-r5, r12}
  593. orr lr, lr, r12, lsr #8
  594. mov r12, r12, lsl #24
  595. orr r12, r12, r5, lsr #8
  596. mov r5, r5, lsl #24
  597. orr r5, r5, r4, lsr #8
  598. mov r4, r4, lsl #24
  599. orr r4, r4, r3, lsr #8
  600. #endif
  601. stmdb r0!, {r4, r5, r12, lr}
  602. subs r2, r2, #0x10
  603. bge .Lmemcpy_bsrcul1loop32
  604. ldmia sp!, {r4, r5, lr}
  605. adds r2, r2, #0x0c
  606. blt .Lmemcpy_bsrcul1l4
  607. .Lmemcpy_bsrcul1loop4:
  608. #if __BYTE_ORDER == __BIG_ENDIAN
  609. mov r12, r3, lsr #24
  610. ldr r3, [r1, #-4]!
  611. orr r12, r12, r3, lsl #8
  612. #else
  613. mov r12, r3, lsl #24
  614. ldr r3, [r1, #-4]!
  615. orr r12, r12, r3, lsr #8
  616. #endif
  617. str r12, [r0, #-4]!
  618. subs r2, r2, #4
  619. bge .Lmemcpy_bsrcul1loop4
  620. .Lmemcpy_bsrcul1l4:
  621. add r1, r1, #1
  622. b .Lmemcpy_bl4
  623. #else /* THUMB1_ONLY */
  624. /* This is a fairly dumb implementation for when we can't use the 32-bit code
  625. above. */
  626. .text
  627. .global _memcpy
  628. .hidden _memcpy
  629. .type _memcpy,%function
  630. .align 4
  631. .thumb
  632. _memcpy:
  633. push {r0, r4}
  634. cmp r2, #0
  635. beq .Lmemcpy_exit
  636. @ See if we have overlapping regions, and need to reverse the
  637. @ direction of the copy
  638. cmp r0, r1
  639. bls .Lmemcpy_forwards
  640. add r4, r1, r2
  641. cmp r0, r4
  642. bcc .Lmemcpy_backwards
  643. .Lmemcpy_forwards:
  644. /* Forwards. */
  645. mov r3, r0
  646. eor r3, r1
  647. mov r4, #3
  648. tst r3, r4
  649. bne .Lmemcpy_funaligned
  650. cmp r2, #8
  651. bcc .Lmemcpy_funaligned
  652. 1: @ copy up to the first word boundary.
  653. tst r0, r4
  654. beq 1f
  655. ldrb r3, [r1]
  656. add r1, r1, #1
  657. strb r3, [r0]
  658. add r0, r0, #1
  659. sub r2, r2, #1
  660. b 1b
  661. 1: @ Copy aligned words
  662. ldr r3, [r1]
  663. add r1, r1, #4
  664. str r3, [r0]
  665. add r0, r0, #4
  666. sub r2, r2, #4
  667. cmp r2, #4
  668. bcs 1b
  669. cmp r2, #0
  670. beq .Lmemcpy_exit
  671. .Lmemcpy_funaligned:
  672. 1:
  673. ldrb r3, [r1]
  674. add r1, r1, #1
  675. strb r3, [r0]
  676. add r0, r0, #1
  677. sub r2, r2, #1
  678. bne 1b
  679. .Lmemcpy_exit:
  680. pop {r0, r4}
  681. bx lr
  682. .Lmemcpy_backwards:
  683. add r0, r0, r2
  684. add r1, r1, r2
  685. 1:
  686. sub r0, r0, #1
  687. sub r1, r1, #1
  688. ldrb r3, [r1]
  689. strb r3, [r0]
  690. sub r2, r2, #1
  691. bne 1b
  692. b .Lmemcpy_exit
  693. #endif