| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 | /* * Copyright (C) 2004-2007 Atmel Corporation * * This file is subject to the terms and conditions of the GNU Lesser General * Public License.  See the file "COPYING.LIB" in the main directory of this * archive for more details. *//* Don't use r12 as dst since we must return it unmodified */#define dst r9#define src r11#define len r10	.text	.global	memcpy	.type	memcpy, @functionmemcpy:	pref	src[0]	mov	dst, r12	/* If we have less than 32 bytes, don't do anything fancy */	cp.w	len, 32	brge	.Lmore_than_31	sub	len, 1	retlt	r121:	ld.ub	r8, src++	st.b	dst++, r8	sub	len, 1	brge	1b	retal	r12.Lmore_than_31:	pushm	r0-r7, lr	/* Check alignment */	mov	r8, src	andl	r8, 31, COH	brne	.Lunaligned_src	mov	r8, dst	andl	r8, 3, COH	brne	.Lunaligned_dst.Laligned_copy:	sub	len, 32	brlt	.Lless_than_321:	/* Copy 32 bytes at a time */	ldm	src, r0-r7	sub	src, -32	stm	dst, r0-r7	sub	dst, -32	sub	len, 32	brge	1b.Lless_than_32:	/* Copy 16 more bytes if possible */	sub	len, -16	brlt	.Lless_than_16	ldm	src, r0-r3	sub	src, -16	sub	len, 16	stm	dst, r0-r3	sub	dst, -16.Lless_than_16:	/* Do the remaining as byte copies */	neg	len	add	pc, pc, len << 2	.rept	15	ld.ub	r0, src++	st.b	dst++, r0	.endr	popm	r0-r7, pc.Lunaligned_src:	/* Make src cacheline-aligned. r8 = (src & 31) */	rsub	r8, r8, 32	sub	len, r81:	ld.ub	r0, src++	st.b	dst++, r0	sub	r8, 1	brne	1b	/* If dst is word-aligned, we're ready to go */	pref	src[0]	mov	r8, 3	tst	dst, r8	breq	.Laligned_copy.Lunaligned_dst:	/* src is aligned, but dst is not. Expect bad performance */	sub	len, 4	brlt	2f1:	ld.w	r0, src++	st.w	dst++, r0	sub	len, 4	brge	1b2:	neg	len	add	pc, pc, len << 2	.rept	3	ld.ub	r0, src++	st.b	dst++, r0	.endr	popm	r0-r7, pc	.size	memcpy, . - memcpylibc_hidden_def(memcpy)
 |