|
@@ -35,19 +35,20 @@
|
|
|
|
|
|
void *memcpy(void * to, const void * from, size_t n)
|
|
|
{
|
|
|
- int d0, d1, d2;
|
|
|
- __asm__ __volatile__(
|
|
|
- "rep ; movsl\n\t"
|
|
|
- "testb $2,%b4\n\t"
|
|
|
- "je 1f\n\t"
|
|
|
- "movsw\n"
|
|
|
- "1:\ttestb $1,%b4\n\t"
|
|
|
- "je 2f\n\t"
|
|
|
- "movsb\n"
|
|
|
- "2:"
|
|
|
- : "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
|
|
- :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
|
|
|
- : "memory");
|
|
|
- return (to);
|
|
|
+ int d0, d1, d2;
|
|
|
+ __asm__ __volatile__(
|
|
|
+ " rep; movsl\n"
|
|
|
+ " movl %4,%%ecx\n"
|
|
|
+ " andl $3,%%ecx\n"
|
|
|
+
|
|
|
+ * but adds a branch, which is currently (2008) faster */
|
|
|
+ " jz 1f\n"
|
|
|
+ " rep; movsb\n"
|
|
|
+ "1:\n"
|
|
|
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
|
|
+ : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from)
|
|
|
+ : "memory"
|
|
|
+ );
|
|
|
+ return to;
|
|
|
}
|
|
|
libc_hidden_def(memcpy)
|