|
@@ -10,107 +10,107 @@
|
|
|
#define src r11
|
|
|
#define len r10
|
|
|
|
|
|
- .text
|
|
|
- .global memmove
|
|
|
- .type memmove, @function
|
|
|
+ .text
|
|
|
+ .global memmove
|
|
|
+ .type memmove, @function
|
|
|
memmove:
|
|
|
- cp.w src, dst
|
|
|
- brge HIDDEN_JUMPTARGET(memcpy)
|
|
|
-
|
|
|
- add dst, len
|
|
|
- add src, len
|
|
|
- pref src[-1]
|
|
|
-
|
|
|
- /*
|
|
|
- * The rest is basically the same as in memcpy.S except that
|
|
|
- * the direction is reversed.
|
|
|
- */
|
|
|
- cp.w len, 32
|
|
|
- brge .Lmore_than_31
|
|
|
-
|
|
|
- sub len, 1
|
|
|
- retlt r12
|
|
|
-1: ld.ub r8, --src
|
|
|
- st.b --dst, r8
|
|
|
- sub len, 1
|
|
|
- brge 1b
|
|
|
- retal r12
|
|
|
+ cp.w src, dst
|
|
|
+ brge HIDDEN_JUMPTARGET(memcpy)
|
|
|
+
|
|
|
+ add dst, len
|
|
|
+ add src, len
|
|
|
+ pref src[-1]
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The rest is basically the same as in memcpy.S except that
|
|
|
+ * the direction is reversed.
|
|
|
+ */
|
|
|
+ cp.w len, 32
|
|
|
+ brge .Lmore_than_31
|
|
|
+
|
|
|
+ sub len, 1
|
|
|
+ retlt r12
|
|
|
+1: ld.ub r8, --src
|
|
|
+ st.b --dst, r8
|
|
|
+ sub len, 1
|
|
|
+ brge 1b
|
|
|
+ retal r12
|
|
|
|
|
|
.Lmore_than_31:
|
|
|
- pushm r0-r7, lr
|
|
|
+ pushm r0-r7, lr
|
|
|
|
|
|
- /* Check alignment */
|
|
|
- mov r8, src
|
|
|
- andl r8, 31, COH
|
|
|
- brne .Lunaligned_src
|
|
|
- mov r8, r12
|
|
|
- andl r8, 3, COH
|
|
|
- brne .Lunaligned_dst
|
|
|
+/* Check alignment */
|
|
|
+ mov r8, src
|
|
|
+ andl r8, 31, COH
|
|
|
+ brne .Lunaligned_src
|
|
|
+ mov r8, r12
|
|
|
+ andl r8, 3, COH
|
|
|
+ brne .Lunaligned_dst
|
|
|
|
|
|
.Laligned_copy:
|
|
|
- sub len, 32
|
|
|
- brlt .Lless_than_32
|
|
|
+ sub len, 32
|
|
|
+ brlt .Lless_than_32
|
|
|
|
|
|
-1: /* Copy 32 bytes at a time */
|
|
|
- sub src, 32
|
|
|
- ldm src, r0-r7
|
|
|
- sub dst, 32
|
|
|
- sub len, 32
|
|
|
- stm dst, r0-r7
|
|
|
- brge 1b
|
|
|
+1: /* Copy 32 bytes at a time */
|
|
|
+ sub src, 32
|
|
|
+ ldm src, r0-r7
|
|
|
+ sub dst, 32
|
|
|
+ sub len, 32
|
|
|
+ stm dst, r0-r7
|
|
|
+ brge 1b
|
|
|
|
|
|
.Lless_than_32:
|
|
|
- /* Copy 16 more bytes if possible */
|
|
|
- sub len, -16
|
|
|
- brlt .Lless_than_16
|
|
|
- sub src, 16
|
|
|
- ldm src, r0-r3
|
|
|
- sub dst, 16
|
|
|
- sub len, 16
|
|
|
- stm dst, r0-r3
|
|
|
+ /* Copy 16 more bytes if possible */
|
|
|
+ sub len, -16
|
|
|
+ brlt .Lless_than_16
|
|
|
+ sub src, 16
|
|
|
+ ldm src, r0-r3
|
|
|
+ sub dst, 16
|
|
|
+ sub len, 16
|
|
|
+ stm dst, r0-r3
|
|
|
|
|
|
.Lless_than_16:
|
|
|
- /* Do the remaining as byte copies */
|
|
|
- sub len, -16
|
|
|
- breq 2f
|
|
|
-1: ld.ub r0, --src
|
|
|
- st.b --dst, r0
|
|
|
- sub len, 1
|
|
|
- brne 1b
|
|
|
+ /* Do the remaining as byte copies */
|
|
|
+ sub len, -16
|
|
|
+ breq 2f
|
|
|
+1: ld.ub r0, --src
|
|
|
+ st.b --dst, r0
|
|
|
+ sub len, 1
|
|
|
+ brne 1b
|
|
|
|
|
|
-2: popm r0-r7, pc
|
|
|
+2: popm r0-r7, pc
|
|
|
|
|
|
.Lunaligned_src:
|
|
|
- /* Make src cacheline-aligned. r8 = (src & 31) */
|
|
|
- sub len, r8
|
|
|
-1: ld.ub r0, --src
|
|
|
- st.b --dst, r0
|
|
|
- sub r8, 1
|
|
|
- brne 1b
|
|
|
-
|
|
|
- /* If dst is word-aligned, we're ready to go */
|
|
|
- pref src[-4]
|
|
|
- mov r8, 3
|
|
|
- tst dst, r8
|
|
|
- breq .Laligned_copy
|
|
|
+ /* Make src cacheline-aligned. r8 = (src & 31) */
|
|
|
+ sub len, r8
|
|
|
+1: ld.ub r0, --src
|
|
|
+ st.b --dst, r0
|
|
|
+ sub r8, 1
|
|
|
+ brne 1b
|
|
|
+
|
|
|
+ /* If dst is word-aligned, we're ready to go */
|
|
|
+ pref src[-4]
|
|
|
+ mov r8, 3
|
|
|
+ tst dst, r8
|
|
|
+ breq .Laligned_copy
|
|
|
|
|
|
.Lunaligned_dst:
|
|
|
- /* src is aligned, but dst is not. Expect bad performance */
|
|
|
- sub len, 4
|
|
|
- brlt 2f
|
|
|
-1: ld.w r0, --src
|
|
|
- st.w --dst, r0
|
|
|
- sub len, 4
|
|
|
- brge 1b
|
|
|
-
|
|
|
-2: neg len
|
|
|
- add pc, pc, len << 2
|
|
|
- .rept 3
|
|
|
- ld.ub r0, --src
|
|
|
- st.b --dst, r0
|
|
|
- .endr
|
|
|
-
|
|
|
- popm r0-r7, pc
|
|
|
- .size memmove, . - memmove
|
|
|
+ /* src is aligned, but dst is not. Expect bad performance */
|
|
|
+ sub len, 4
|
|
|
+ brlt 2f
|
|
|
+1: ld.w r0, --src
|
|
|
+ st.w --dst, r0
|
|
|
+ sub len, 4
|
|
|
+ brge 1b
|
|
|
+
|
|
|
+2: neg len
|
|
|
+ add pc, pc, len << 2
|
|
|
+ .rept 3
|
|
|
+ ld.ub r0, --src
|
|
|
+ st.b --dst, r0
|
|
|
+ .endr
|
|
|
+
|
|
|
+ popm r0-r7, pc
|
|
|
+ .size memmove, . - memmove
|
|
|
|
|
|
libc_hidden_def(memmove)
|