123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- /*
- * Copyright (C) 2004-2007 Atmel Corporation
- *
- * This file is subject to the terms and conditions of the GNU Lesser General
- * Public License. See the file "COPYING.LIB" in the main directory of this
- * archive for more details.
- */
- /* Don't use r12 as dst since we must return it unmodified */
- #define dst r9
- #define src r11
- #define len r10
- .text
- .global memcpy
- .type memcpy, @function
- memcpy:
- pref src[0]
- mov dst, r12
- /* If we have less than 32 bytes, don't do anything fancy */
- cp.w len, 32
- brge .Lmore_than_31
- sub len, 1
- retlt r12
- 1: ld.ub r8, src++
- st.b dst++, r8
- sub len, 1
- brge 1b
- retal r12
- .Lmore_than_31:
- pushm r0-r7, lr
- /* Check alignment */
- mov r8, src
- andl r8, 31, COH
- brne .Lunaligned_src
- mov r8, dst
- andl r8, 3, COH
- brne .Lunaligned_dst
- .Laligned_copy:
- sub len, 32
- brlt .Lless_than_32
- 1: /* Copy 32 bytes at a time */
- ldm src, r0-r7
- sub src, -32
- stm dst, r0-r7
- sub dst, -32
- sub len, 32
- brge 1b
- .Lless_than_32:
- /* Copy 16 more bytes if possible */
- sub len, -16
- brlt .Lless_than_16
- ldm src, r0-r3
- sub src, -16
- sub len, 16
- stm dst, r0-r3
- sub dst, -16
- .Lless_than_16:
- /* Do the remaining as byte copies */
- neg len
- add pc, pc, len << 2
- .rept 15
- ld.ub r0, src++
- st.b dst++, r0
- .endr
- popm r0-r7, pc
- .Lunaligned_src:
- /* Make src cacheline-aligned. r8 = (src & 31) */
- rsub r8, r8, 32
- sub len, r8
- 1: ld.ub r0, src++
- st.b dst++, r0
- sub r8, 1
- brne 1b
- /* If dst is word-aligned, we're ready to go */
- pref src[0]
- mov r8, 3
- tst dst, r8
- breq .Laligned_copy
- .Lunaligned_dst:
- /* src is aligned, but dst is not. Expect bad performance */
- sub len, 4
- brlt 2f
- 1: ld.w r0, src++
- st.w dst++, r0
- sub len, 4
- brge 1b
- 2: neg len
- add pc, pc, len << 2
- .rept 3
- ld.ub r0, src++
- st.b dst++, r0
- .endr
- popm r0-r7, pc
- .size memcpy, . - memcpy
- libc_hidden_def(memcpy)
|