16 years ago · 6acbe8c471
--- a/libc/string/cris/memcpy.c
+++ b/libc/string/cris/memcpy.c
@@ -1,264 +1,243 @@
 
															-/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
														
+
															+/* A memcpy for CRIS.
														
 
															-   Copyright (C) 1994, 1995, 2000 Axis Communications AB.
														
+
															+   Copyright (C) 1994-2008 Axis Communications.
														
 
															-
														
+
															+   All rights reserved.
														
 
															-   This file is part of the GNU C Library.
														
+
															+
														
 
															-
														
+
															+   Redistribution and use in source and binary forms, with or without
														
 
															-   The GNU C Library is free software; you can redistribute it and/or
														
+
															+   modification, are permitted provided that the following conditions
														
 
															-   modify it under the terms of the GNU Library General Public License as
														
+
															+   are met:
														
 
															-   published by the Free Software Foundation; either version 2 of the
														
+
															+
														
 
															-   License, or (at your option) any later version.
														
+
															+   1. Redistributions of source code must retain the above copyright
														
 
															-
														
+
															+      notice, this list of conditions and the following disclaimer.
														
 
															-   The GNU C Library is distributed in the hope that it will be useful,
														
+
															+
														
 
															-   but WITHOUT ANY WARRANTY; without even the implied warranty of
														
+
															+   2. Neither the name of Axis Communications nor the names of its
														
 
															-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
+
															+      contributors may be used to endorse or promote products derived
														
 
															-   Library General Public License for more details.
														
+
															+      from this software without specific prior written permission.
														
 
															-
														
+
															+
														
 
															-   You should have received a copy of the GNU Library General Public
														
+
															+   THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
														
 
															-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
														
+
															+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
+
															+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															-   Boston, MA 02111-1307, USA.  */
														
+
															+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
														
 
															-
														
+
															+   COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
														
 
															-/*#************************************************************************#*/
														
+
															+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
														
 
															-/*#-------------------------------------------------------------------------*/
														
+
															+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
														
 
															-/*#                                                                         */
														
+
															+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
														
 
															-/*# FUNCTION NAME: memcpy()                                                 */
														
+
															+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
														
 
															-/*#                                                                         */
														
+
															+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
														
 
															-/*# PARAMETERS:  void* dst;   Destination address.                          */
														
+
															+   IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
														
 
															-/*#              void* src;   Source address.                               */
														
+
															+   POSSIBILITY OF SUCH DAMAGE.  */
														
 
															-/*#              int   len;   Number of bytes to copy.                      */
														
+
															+
														
 
															-/*#                                                                         */
														
+
															+/* FIXME: This file should really only be used for reference, as the
														
 
															-/*# RETURNS:     dst.                                                       */
														
+
															+   result is somewhat depending on gcc generating what we expect rather
														
 
															-/*#                                                                         */
														
+
															+   than what we describe.  An assembly file should be used instead.  */
														
 
															-/*# DESCRIPTION: Copies len bytes of memory from src to dst.  No guarantees */
														
+
															+
														
 
															-/*#              about copying of overlapping memory areas. This routine is */
														
+
															+#include <string.h>
														
 
															-/*#              very sensitive to compiler changes in register allocation. */
														
+
															+
														
 
															-/*#              Should really be rewritten to avoid this problem.          */
														
+
															+#ifdef __arch_v32
														
 
															-/*#                                                                         */
														
 
															-/*#-------------------------------------------------------------------------*/
														
 
															-/*#                                                                         */
														
 
															-/*# HISTORY                                                                 */
														
 
															-/*#                                                                         */
														
 
															-/*# DATE      NAME            CHANGES                                       */
														
 
															-/*# ----      ----            -------                                       */
														
 
															-/*# 941007    Kenny R         Creation                                      */
														
 
															-/*# 941011    Kenny R         Lots of optimizations and inlining.           */
														
 
															-/*# 941129    Ulf A           Adapted for use in libc.                      */
														
 
															-/*# 950216    HP              N==0 forgotten if non-aligned src/dst.        */
														
 
															-/*#                           Added some optimizations.                     */
														
 
															-/*# 001025    HP              Make src and dst char *.  Align dst to	    */
														
 
															-/*#			      dword, not just word-if-both-src-and-dst-	    */
														
 
															-/*#			      are-misaligned.				    */
														
 
															-/*# 070806    RW              Modified for uClibc                           */
														
 
															-/*#                           (__arch_v32 -> __CONFIG_CRISV32__,            */
														
 
															-/*#                           include features.h to reach it.)              */
														
 
															-/*#                                                                         */
														
 
															-/*#-------------------------------------------------------------------------*/
														
 
															-
														
 
															-#include <features.h>
														
 
															-
														
 
															-#ifdef __CONFIG_CRISV32__
														
 
															 /* For CRISv32, movem is very cheap.  */
														
 
															-#define MEMCPY_BLOCK_THRESHOLD (44)
														
+
															+#define MEMCPY_BY_BLOCK_THRESHOLD (44)
														
 
															 #else
														
 
															-/* Break even between movem and move16 is at 38.7*2, but modulo 44. */
														
+
															+/* Break even between movem and move16 is really at 38.7 * 2, but
														
 
															-#define MEMCPY_BLOCK_THRESHOLD (44*2)
														
+
															+   modulo 44, so up to the next multiple of 44, we use ordinary code.  */
														
 
															+#define MEMCPY_BY_BLOCK_THRESHOLD (44 * 2)
														
 
															 #endif
														
 
															-void *memcpy(void *, const void *, unsigned int);
														
+
															+/* No name ambiguities in this file.  */
														
 
															+__asm__ (".syntax no_register_prefix");
														
 
															 /* Experimentally off - libc_hidden_proto(memcpy) */
														
 
															-void *memcpy(void *pdst,
														
+
															+void *
														
 
															-             const void *psrc,
														
+
															+memcpy(void *pdst, const void *psrc, size_t pn)
														
 
															-             unsigned int pn)
														
 
															 {
														
 
															-  /* Ok.  Now we want the parameters put in special registers.
														
+
															+  /* Now we want the parameters put in special registers.
														
 
															      Make sure the compiler is able to make something useful of this.
														
 
															-      As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
														
+
															+     As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
														
 
															      If gcc was allright, it really would need no temporaries, and no
														
 
															-     stack space to save stuff on. */
														
+
															+     stack space to save stuff on.  */
														
 
															-#ifndef MEMPCPY
														
 
															   register void *return_dst __asm__ ("r10") = pdst;
														
 
															-#else
														
+
															+  register unsigned char *dst __asm__ ("r13") = pdst;
														
 
															-  /* FIXME: Use R10 for something.  */
														
+
															+  register unsigned const char *src __asm__ ("r11") = psrc;
														
 
															-# define return_dst dst
														
 
															-#endif
														
 
															-
														
 
															-  register char *dst __asm__ ("r13") = pdst;
														
 
															-  register char *src __asm__ ("r11") = (char *) psrc;
														
 
															   register int n __asm__ ("r12") = pn;
														
 
															-
														
 
															   /* When src is aligned but not dst, this makes a few extra needless
														
 
															      cycles.  I believe it would take as many to check that the
														
 
															      re-alignment was unnecessary.  */
														
 
															   if (((unsigned long) dst & 3) != 0
														
 
															       /* Don't align if we wouldn't copy more than a few bytes; so we
														
 
															-	 don't have to check further for overflows.  */
														
+
															+         don't have to check further for overflows.  */
														
 
															       && n >= 3)
														
 
															   {
														
 
															     if ((unsigned long) dst & 1)
														
 
															-    {
														
+
															+      {
														
 
															-      n--;
														
+
															+        n--;
														
 
															-      *(char*)dst = *(char*)src;
														
+
															+        *dst = *src;
														
 
															-      src++;
														
+
															+        src++;
														
 
															-      dst++;
														
+
															+        dst++;
														
 
															-    }
														
+
															+      }
														
 
															     if ((unsigned long) dst & 2)
														
 
															-    {
														
+
															+      {
														
 
															-      n -= 2;
														
+
															+        n -= 2;
														
 
															-      *(short*)dst = *(short*)src;
														
+
															+        *(short *) dst = *(short *) src;
														
 
															-      src += 2;
														
+
															+        src += 2;
														
 
															-      dst += 2;
														
+
															+        dst += 2;
														
 
															-    }
														
+
															+      }
														
 
															   }
														
 
															-  /* Decide which copying method to use. */
														
+
															+  /* Decide which copying method to use.  */
														
 
															-  if (n >= MEMCPY_BLOCK_THRESHOLD)
														
+
															+  if (n >= MEMCPY_BY_BLOCK_THRESHOLD)
														
 
															-  {
														
+
															+    {
														
 
															-    /* For large copies we use 'movem' */
														
+
															+      /* It is not optimal to tell the compiler about clobbering any
														
 
															-
														
+
															+         registers; that will move the saving/restoring of those registers
														
 
															-  /* It is not optimal to tell the compiler about clobbering any
														
+
															+         to the function prologue/epilogue, and make non-movem sizes
														
 
															-     registers; that will move the saving/restoring of those registers
														
+
															+         suboptimal.  */
														
 
															-     to the function prologue/epilogue, and make non-movem sizes
														
+
															+      __asm__ __volatile__
														
 
															-     suboptimal.
														
+
															+        ("\
														
 
															-
														
+
															+         ;; GCC does promise correct register allocations, but let's    \n\
														
 
															-      This method is not foolproof; it assumes that the "register asm"
														
+
															+         ;; make sure it keeps its promises.                            \n\
														
 
															-     declarations at the beginning of the function really are used
														
+
															+         .ifnc %0-%1-%2,$r13-$r11-$r12                                  \n\
														
 
															-     here (beware: they may be moved to temporary registers).
														
+
															+         .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"       \n\
														
 
															-      This way, we do not have to save/move the registers around into
														
+
															+         .endif                                                         \n\
														
 
															-     temporaries; we can safely use them straight away.  */
														
+
															+                                                                        \n\
														
 
															-    __asm__ __volatile__ ("\
														
+
															+         ;; Save the registers we'll use in the movem process           \n\
														
 
															-	.syntax no_register_prefix					\n\
														
+
															+         ;; on the stack.                                               \n\
														
 
															-									\n\
														
+
															+         subq   11*4,sp                                                 \n\
														
 
															-        ;; Check that the register asm declaration got right.		\n\
														
+
															+         movem  r10,[sp]                                                \n\
														
 
															-        ;; The GCC manual explicitly says TRT will happen.		\n\
														
+
															+                                                                        \n\
														
 
															-	.ifnc %0-%1-%2,$r13-$r11-$r12					\n\
														
+
															+         ;; Now we've got this:                                         \n\
														
 
															-	.err								\n\
														
+
															+         ;; r11 - src                                                   \n\
														
 
															-	.endif								\n\
														
+
															+         ;; r13 - dst                                                   \n\
														
 
															-									\n\
														
+
															+         ;; r12 - n                                                     \n\
														
 
															-	;; Save the registers we'll use in the movem process		\n\
														
+
															+                                                                        \n\
														
 
															-	;; on the stack.						\n\
														
+
															+         ;; Update n for the first loop.                                \n\
														
 
															-	subq 	11*4,sp							\n\
														
+
															+         subq    44,r12                                                 \n\
														
 
															-	movem	r10,[sp]						\n\
														
+
															+0:                                                                      \n\
														
 
															-									\n\
														
+
															+"
														
 
															-        ;; Now we've got this:						\n\
														
+
															+#ifdef __arch_common_v10_v32
														
 
															-	;; r11 - src							\n\
														
+
															+         /* Cater to branch offset difference between v32 and v10.  We
														
 
															-	;; r13 - dst							\n\
														
+
															+            assume the branch below has an 8-bit offset.  */
														
 
															-	;; r12 - n							\n\
														
+
															+"        setf\n"
														
 
															-									\n\
														
+
															+#endif
														
 
															-        ;; Update n for the first loop					\n\
														
+
															+"        movem  [r11+],r10                                              \n\
														
 
															-        subq    44,r12							\n\
														
+
															+         subq   44,r12                                                  \n\
														
 
															-0:									\n\
														
+
															+         bge     0b                                                     \n\
														
 
															-	movem	[r11+],r10						\n\
														
+
															+         movem  r10,[r13+]                                              \n\
														
 
															-        subq   44,r12							\n\
														
+
															+                                                                        \n\
														
 
															-        bge     0b							\n\
														
+
															+         ;; Compensate for last loop underflowing n.                    \n\
														
 
															-	movem	r10,[r13+]						\n\
														
+
															+         addq   44,r12                                                  \n\
														
 
															-									\n\
														
+
															+                                                                        \n\
														
 
															-        addq   44,r12  ;; compensate for last loop underflowing n	\n\
														
+
															+         ;; Restore registers from stack.                               \n\
														
 
															-									\n\
														
+
															+         movem [sp+],r10"
														
 
															-	;; Restore registers from stack					\n\
														
+
															+
														
 
															-        movem [sp+],r10"
														
+
															+         /* Outputs.  */
														
 
															-
														
+
															+         : "=r" (dst), "=r" (src), "=r" (n)
														
 
															-     /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
														
+
															+
														
 
															-     /* Inputs */ : "0" (dst), "1" (src), "2" (n));
														
+
															+         /* Inputs.  */
														
 
															-  }
														
+
															+         : "0" (dst), "1" (src), "2" (n));
														
 
															+    }
														
 
															-  /* Either we directly starts copying, using dword copying
														
+
															+  while (n >= 16)
														
 
															-     in a loop, or we copy as much as possible with 'movem'
														
+
															+    {
														
 
															-     and then the last block (<44 bytes) is copied here.
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-     This will work since 'movem' will have updated src,dst,n. */
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-  while ( n >= 16 )
														
+
															+      n -= 16;
														
 
															-  {
														
+
															+    }
														
 
															-    *((long*)dst)++ = *((long*)src)++;
														
 
															-    *((long*)dst)++ = *((long*)src)++;
														
 
															-    *((long*)dst)++ = *((long*)src)++;
														
 
															-    *((long*)dst)++ = *((long*)src)++;
														
 
															-    n -= 16;
														
 
															-  }
														
 
															-  /* A switch() is definitely the fastest although it takes a LOT of code.
														
 
															-   * Particularly if you inline code this.
														
 
															-   */
														
 
															   switch (n)
														
 
															-  {
														
+
															+    {
														
 
															     case 0:
														
 
															       break;
														
 
															+
														
 
															     case 1:
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 2:
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src;
														
 
															       break;
														
 
															+
														
 
															     case 3:
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src; dst += 2; src += 2;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 4:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src;
														
 
															       break;
														
 
															+
														
 
															     case 5:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 6:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src;
														
 
															       break;
														
 
															+
														
 
															     case 7:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src; dst += 2; src += 2;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 8:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src;
														
 
															       break;
														
 
															+
														
 
															     case 9:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 10:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src;
														
 
															       break;
														
 
															+
														
 
															     case 11:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src; dst += 2; src += 2;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 12:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src;
														
 
															       break;
														
 
															+
														
 
															     case 13:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															+
														
 
															     case 14:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src;
														
 
															       break;
														
 
															+
														
 
															     case 15:
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((long*)dst)++ = *((long*)src)++;
														
+
															+      *(long *) dst = *(long *) src; dst += 4; src += 4;
														
 
															-      *((short*)dst)++ = *((short*)src)++;
														
+
															+      *(short *) dst = *(short *) src; dst += 2; src += 2;
														
 
															-      *((char*)dst)++ = *((char*)src)++;
														
+
															+      *dst = *src;
														
 
															       break;
														
 
															-  }
														
+
															+    }
														
 
															-  return return_dst; /* destination pointer. */
														
+
															+  return return_dst;
														
 
															-} /* memcpy() */
														
+
															+}
														
 
															 libc_hidden_def(memcpy)
														
--- a/libc/string/cris/memset.c
+++ b/libc/string/cris/memset.c
@@ -1,271 +1,263 @@
 
															-/* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
														
+
															+/* A memset for CRIS.
														
 
															-   Copyright (C) 1999, 2000 Axis Communications AB.
														
+
															+   Copyright (C) 1999-2008 Axis Communications.
														
 
															-
														
+
															+   All rights reserved.
														
 
															-   This file is part of the GNU C Library.
														
+
															+
														
 
															-
														
+
															+   Redistribution and use in source and binary forms, with or without
														
 
															-   The GNU C Library is free software; you can redistribute it and/or
														
+
															+   modification, are permitted provided that the following conditions
														
 
															-   modify it under the terms of the GNU Library General Public License as
														
+
															+   are met:
														
 
															-   published by the Free Software Foundation; either version 2 of the
														
+
															+
														
 
															-   License, or (at your option) any later version.
														
+
															+   1. Redistributions of source code must retain the above copyright
														
 
															-
														
+
															+      notice, this list of conditions and the following disclaimer.
														
 
															-   The GNU C Library is distributed in the hope that it will be useful,
														
+
															+
														
 
															-   but WITHOUT ANY WARRANTY; without even the implied warranty of
														
+
															+   2. Neither the name of Axis Communications nor the names of its
														
 
															-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
														
+
															+      contributors may be used to endorse or promote products derived
														
 
															-   Library General Public License for more details.
														
+
															+      from this software without specific prior written permission.
														
 
															-
														
+
															+
														
 
															-   You should have received a copy of the GNU Library General Public
														
+
															+   THIS SOFTWARE IS PROVIDED BY AXIS COMMUNICATIONS AND ITS CONTRIBUTORS
														
 
															-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
														
+
															+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
														
 
															-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
														
+
															+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
														
 
															-   Boston, MA 02111-1307, USA.  */
														
+
															+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL AXIS
														
 
															-
														
+
															+   COMMUNICATIONS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
														
 
															-/*#************************************************************************#*/
														
+
															+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
														
 
															-/*#-------------------------------------------------------------------------*/
														
+
															+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
														
 
															-/*#                                                                         */
														
+
															+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
														
 
															-/*# FUNCTION NAME: memset()                                                 */
														
+
															+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
														
 
															-/*#                                                                         */
														
+
															+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
														
 
															-/*# PARAMETERS:  void* dst;   Destination address.                          */
														
+
															+   IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
														
 
															-/*#              int     c;   Value of byte to write.                       */
														
+
															+   POSSIBILITY OF SUCH DAMAGE.  */
														
 
															-/*#              int   len;   Number of bytes to write.                     */
														
+
															+
														
 
															-/*#                                                                         */
														
+
															+/* FIXME: This file should really only be used for reference, as the
														
 
															-/*# RETURNS:     dst.                                                       */
														
+
															+   result is somewhat depending on gcc generating what we expect rather
														
 
															-/*#                                                                         */
														
+
															+   than what we describe.  An assembly file should be used instead.  */
														
 
															-/*# DESCRIPTION: Sets the memory dst of length len bytes to c, as standard. */
														
+
															+
														
 
															-/*#              Framework taken from memcpy.  This routine is              */
														
+
															+#include <string.h>
														
 
															-/*#              very sensitive to compiler changes in register allocation. */
														
+
															+
														
 
															-/*#              Should really be rewritten to avoid this problem.          */
														
+
															+/* Note the multiple occurrence of the expression "12*4", including the
														
 
															-/*#                                                                         */
														
+
															+   asm.  It is hard to get it into the asm in a good way.  Thus better to
														
 
															-/*#-------------------------------------------------------------------------*/
														
+
															+   expose the problem everywhere: no macro.  */
														
 
															-/*#                                                                         */
														
+
															+
														
 
															-/*# HISTORY                                                                 */
														
+
															+/* Assuming one cycle per dword written or read (ok, not really true; the
														
 
															-/*#                                                                         */
														
+
															+   world is not ideal), and one cycle per instruction, then 43+3*(n/48-1)
														
 
															-/*# DATE      NAME            CHANGES                                       */
														
+
															+   <= 24+24*(n/48-1) so n >= 45.7; n >= 0.9; we win on the first full
														
 
															-/*# ----      ----            -------                                       */
														
+
															+   48-byte block to set.  */
														
 
															-/*# 990713    HP              Tired of watching this function (or           */
														
+
															+
														
 
															-/*#                           really, the nonoptimized generic              */
														
+
															+#define MEMSET_BY_BLOCK_THRESHOLD (1 * 48)
														
 
															-/*#                           implementation) take up 90% of simulator      */
														
+
															+
														
 
															-/*#                           output.  Measurements needed.                 */
														
+
															+/* No name ambiguities in this file.  */
														
 
															-/*#                                                                         */
														
+
															+__asm__ (".syntax no_register_prefix");
														
 
															-/*#-------------------------------------------------------------------------*/
														
 
															-
														
 
															-/* No, there's no macro saying 12*4, since it is "hard" to get it into
														
 
															-   the asm in a good way.  Thus better to expose the problem everywhere.
														
 
															-   */
														
 
															-
														
 
															-/* Assuming 1 cycle per dword written or read (ok, not really true), and
														
 
															-   one per instruction, then 43+3*(n/48-1) <= 24+24*(n/48-1)
														
 
															-   so n >= 45.7; n >= 0.9; we win on the first full 48-byte block to set. */
														
 
															-
														
 
															-#define ZERO_BLOCK_SIZE (1*12*4)
														
 
															-
														
 
															-void *memset(void *, int, unsigned long);
														
 
															 /* Experimentally off - libc_hidden_proto(memset) */
														
 
															-void *memset(void *pdst,
														
+
															+void *memset(void *pdst, int c, unsigned int plen)
														
 
															-             int c,
														
 
															-             unsigned long plen)
														
 
															 {
														
 
															-  /* Ok.  Now we want the parameters put in special registers.
														
+
															+  /* Now we want the parameters in special registers.  Make sure the
														
 
															-     Make sure the compiler is able to make something useful of this. */
														
+
															+     compiler does something usable with this.  */
														
 
															   register char *return_dst __asm__ ("r10") = pdst;
														
 
															-  register long n __asm__ ("r12") = plen;
														
+
															+  register int n __asm__ ("r12") = plen;
														
 
															   register int lc __asm__ ("r11") = c;
														
 
															-  /* Most apps use memset sanely.  Only those memsetting about 3..4
														
+
															+  /* Most apps use memset sanely.  Memsetting about 3..4 bytes or less get
														
 
															-     bytes or less get penalized compared to the generic implementation
														
+
															+     penalized here compared to the generic implementation.  */
														
 
															-     - and that's not really sane use. */
														
 
															-  /* Ugh.  This is fragile at best.  Check with newer GCC releases, if
														
+
															+  /* This is fragile performancewise at best.  Check with newer GCC
														
 
															-     they compile cascaded "x |= x << 8" sanely! */
														
+
															+     releases, if they compile cascaded "x |= x << 8" to sane code.  */
														
 
															-  __asm__("movu.b %0,$r13 \n\
														
+
															+  __asm__("movu.b %0,r13                                                \n\
														
 
															-	   lslq 8,$r13    \n\
														
+
															+           lslq 8,r13                                                   \n\
														
 
															-	   move.b %0,$r13 \n\
														
+
															+           move.b %0,r13                                                \n\
														
 
															-	   move.d $r13,%0 \n\
														
+
															+           move.d r13,%0                                                \n\
														
 
															-	   lslq 16,$r13   \n\
														
+
															+           lslq 16,r13                                                  \n\
														
 
															-	   or.d $r13,%0"
														
+
															+           or.d r13,%0"
														
 
															-          : "=r" (lc) : "0" (lc) : "r13");
														
+
															+          : "=r" (lc)           /* Inputs.  */
														
 
															+          : "0" (lc)            /* Outputs.  */
														
 
															+          : "r13");             /* Trash.  */
														
 
															   {
														
 
															     register char *dst __asm__ ("r13") = pdst;
														
 
															-  if (((unsigned long) pdst & 3) != 0
														
+
															+    if (((unsigned long) pdst & 3) != 0
														
 
															-     /* Oops! n=0 must be a legal call, regardless of alignment. */
														
+
															+        /* Oops! n = 0 must be a valid call, regardless of alignment.  */
														
 
															-      && n >= 3)
														
+
															+        && n >= 3)
														
 
															-  {
														
+
															+      {
														
 
															-    if ((unsigned long)dst & 1)
														
+
															+        if ((unsigned long) dst & 1)
														
 
															-    {
														
+
															+          {
														
 
															-      *dst = (char) lc;
														
+
															+            *dst = (char) lc;
														
 
															-      n--;
														
+
															+            n--;
														
 
															-      dst++;
														
+
															+            dst++;
														
 
															-    }
														
+
															+          }
														
 
															-
														
 
															-    if ((unsigned long)dst & 2)
														
 
															-    {
														
 
															-      *(short *)dst = lc;
														
 
															-      n -= 2;
														
 
															-      dst += 2;
														
 
															-    }
														
 
															-  }
														
 
															-  /* Now the fun part.  For the threshold value of this, check the equation
														
+
															+        if ((unsigned long) dst & 2)
														
 
															-     above. */
														
+
															+          {
														
 
															-  /* Decide which copying method to use. */
														
+
															+            *(short *) dst = lc;
														
 
															-  if (n >= ZERO_BLOCK_SIZE)
														
+
															+            n -= 2;
														
 
															-  {
														
+
															+            dst += 2;
														
 
															-    /* For large copies we use 'movem' */
														
+
															+          }
														
 
															-
														
+
															+      }
														
 
															-  /* It is not optimal to tell the compiler about clobbering any
														
 
															-     registers; that will move the saving/restoring of those registers
														
 
															-     to the function prologue/epilogue, and make non-movem sizes
														
 
															-     suboptimal.
														
 
															-
														
 
															-      This method is not foolproof; it assumes that the "asm reg"
														
 
															-     declarations at the beginning of the function really are used
														
 
															-     here (beware: they may be moved to temporary registers).
														
 
															-      This way, we do not have to save/move the registers around into
														
 
															-     temporaries; we can safely use them straight away.  */
														
 
															-    __asm__ __volatile__ ("								\n\
														
 
															-	.syntax no_register_prefix						\n\
														
 
															-										\n\
														
 
															-        ;; Check that the register asm declaration got right.			\n\
														
 
															-        ;; The GCC manual explicitly says there's no warranty for that (too).	\n\
														
 
															-	.ifnc %0-%1-%4,$r13-$r12-$r11						\n\
														
 
															-	.err									\n\
														
 
															-	.endif									\n\
														
 
															-										\n\
														
 
															-	;; Save the registers we'll clobber in the movem process		\n\
														
 
															-	;; on the stack.  Don't mention them to gcc, it will only be		\n\
														
 
															-	;; upset.								\n\
														
 
															-	subq 	11*4,sp								\n\
														
 
															-        movem   r10,[sp]							\n\
														
 
															-										\n\
														
 
															-        move.d  r11,r0								\n\
														
 
															-        move.d  r11,r1								\n\
														
 
															-        move.d  r11,r2								\n\
														
 
															-        move.d  r11,r3								\n\
														
 
															-        move.d  r11,r4								\n\
														
 
															-        move.d  r11,r5								\n\
														
 
															-        move.d  r11,r6								\n\
														
 
															-        move.d  r11,r7								\n\
														
 
															-        move.d  r11,r8								\n\
														
 
															-        move.d  r11,r9								\n\
														
 
															-        move.d  r11,r10								\n\
														
 
															-										\n\
														
 
															-        ;; Now we've got this:							\n\
														
 
															-	;; r13 - dst								\n\
														
 
															-	;; r12 - n								\n\
														
 
															-										\n\
														
 
															-        ;; Update n for the first loop						\n\
														
 
															-        subq    12*4,r12							\n\
														
 
															-0:										\n\
														
 
															-        subq   12*4,r12								\n\
														
 
															-        bge     0b								\n\
														
 
															-	movem	r11,[r13+]							\n\
														
 
															-										\n\
														
 
															-        addq   12*4,r12  ;; compensate for last loop underflowing n		\n\
														
 
															-										\n\
														
 
															-	;; Restore registers from stack						\n\
														
 
															-        movem [sp+],r10"
														
 
															-
														
 
															-     /* Outputs */ : "=r" (dst), "=r" (n)
														
 
															-     /* Inputs */ : "0" (dst), "1" (n), "r" (lc));
														
 
															-  }
														
+
															+    /* Decide which setting method to use.  */
														
 
															+    if (n >= MEMSET_BY_BLOCK_THRESHOLD)
														
 
															+      {
														
 
															+        /* It is not optimal to tell the compiler about clobbering any
														
 
															+           registers; that will move the saving/restoring of those registers
														
 
															+           to the function prologue/epilogue, and make non-block sizes
														
 
															+           suboptimal.  */
														
 
															+        __asm__ __volatile__
														
 
															+          ("\
														
 
															+           ;; GCC does promise correct register allocations, but let's  \n\
														
 
															+           ;; make sure it keeps its promises.                          \n\
														
 
															+           .ifnc %0-%1-%4,$r13-$r12-$r11                                \n\
														
 
															+           .error \"GCC reg alloc bug: %0-%1-%4 != $r13-$r12-$r11\"     \n\
														
 
															+           .endif                                                       \n\
														
 
															+                                                                        \n\
														
 
															+           ;; Save the registers we'll clobber in the movem process     \n\
														
 
															+           ;; on the stack.  Don't mention them to gcc, it will only be \n\
														
 
															+           ;; upset.                                                    \n\
														
 
															+           subq    11*4,sp                                              \n\
														
 
															+           movem   r10,[sp]                                             \n\
														
 
															+                                                                        \n\
														
 
															+           move.d  r11,r0                                               \n\
														
 
															+           move.d  r11,r1                                               \n\
														
 
															+           move.d  r11,r2                                               \n\
														
 
															+           move.d  r11,r3                                               \n\
														
 
															+           move.d  r11,r4                                               \n\
														
 
															+           move.d  r11,r5                                               \n\
														
 
															+           move.d  r11,r6                                               \n\
														
 
															+           move.d  r11,r7                                               \n\
														
 
															+           move.d  r11,r8                                               \n\
														
 
															+           move.d  r11,r9                                               \n\
														
 
															+           move.d  r11,r10                                              \n\
														
 
															+                                                                        \n\
														
 
															+           ;; Now we've got this:                                       \n\
														
 
															+           ;; r13 - dst                                                 \n\
														
 
															+           ;; r12 - n                                                   \n\
														
 
															+                                                                        \n\
														
 
															+           ;; Update n for the first loop                               \n\
														
 
															+           subq    12*4,r12                                             \n\
														
 
															+0:                                                                      \n\
														
 
															+"
														
 
															+#ifdef __arch_common_v10_v32
														
 
															+           /* Cater to branch offset difference between v32 and v10.  We
														
 
															+              assume the branch below has an 8-bit offset.  */
														
 
															+"          setf\n"
														
 
															+#endif
														
 
															+"          subq   12*4,r12                                              \n\
														
 
															+           bge     0b                                                   \n\
														
 
															+           movem        r11,[r13+]                                      \n\
														
 
															+                                                                        \n\
														
 
															+           ;; Compensate for last loop underflowing n.                  \n\
														
 
															+           addq   12*4,r12                                              \n\
														
 
															+                                                                        \n\
														
 
															+           ;; Restore registers from stack.                             \n\
														
 
															+           movem [sp+],r10"
														
 
															+
														
 
															+           /* Outputs.  */
														
 
															+           : "=r" (dst), "=r" (n)
														
 
															+
														
 
															+           /* Inputs.  */
														
 
															+           : "0" (dst), "1" (n), "r" (lc));
														
 
															+      }
														
 
															+
														
 
															+    /* An ad-hoc unroll, used for 4*12-1..16 bytes. */
														
 
															+    while (n >= 16)
														
 
															+      {
														
 
															+        *(long *) dst = lc; dst += 4;
														
 
															+        *(long *) dst = lc; dst += 4;
														
 
															+        *(long *) dst = lc; dst += 4;
														
 
															+        *(long *) dst = lc; dst += 4;
														
 
															+        n -= 16;
														
 
															+      }
														
 
															-    /* Either we directly starts copying, using dword copying
														
 
															-       in a loop, or we copy as much as possible with 'movem'
														
 
															-       and then the last block (<44 bytes) is copied here.
														
 
															-       This will work since 'movem' will have updated src,dst,n. */
														
 
															-
														
 
															-    while ( n >= 16 )
														
 
															-    {
														
 
															-      *((long*)dst)++ = lc;
														
 
															-      *((long*)dst)++ = lc;
														
 
															-      *((long*)dst)++ = lc;
														
 
															-      *((long*)dst)++ = lc;
														
 
															-      n -= 16;
														
 
															-    }
														
 
															-
														
 
															-    /* A switch() is definitely the fastest although it takes a LOT of code.
														
 
															-     * Particularly if you inline code this.
														
 
															-     */
														
 
															     switch (n)
														
 
															-    {
														
+
															+      {
														
 
															       case 0:
														
 
															         break;
														
 
															+
														
 
															       case 1:
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 2:
														
 
															-        *(short*)dst = (short) lc;
														
+
															+        *(short *) dst = (short) lc;
														
 
															         break;
														
 
															+
														
 
															       case 3:
														
 
															-        *((short*)dst)++ = (short) lc;
														
+
															+        *(short *) dst = (short) lc; dst += 2;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 4:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc;
														
 
															         break;
														
 
															+
														
 
															       case 5:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 6:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *(short*)dst = (short) lc;
														
+
															+        *(short *) dst = (short) lc;
														
 
															         break;
														
 
															+
														
 
															       case 7:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((short*)dst)++ = (short) lc;
														
+
															+        *(short *) dst = (short) lc; dst += 2;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 8:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc;
														
 
															         break;
														
 
															+
														
 
															       case 9:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 10:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *(short*)dst = (short) lc;
														
+
															+        *(short *) dst = (short) lc;
														
 
															         break;
														
 
															+
														
 
															       case 11:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((short*)dst)++ = (short) lc;
														
+
															+        *(short *) dst = (short) lc; dst += 2;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 12:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc;
														
 
															         break;
														
 
															+
														
 
															       case 13:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															+
														
 
															       case 14:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *(short*)dst = (short) lc;
														
+
															+        *(short *) dst = (short) lc;
														
 
															         break;
														
 
															+
														
 
															       case 15:
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((long*)dst)++ = lc;
														
+
															+        *(long *) dst = lc; dst += 4;
														
 
															-        *((short*)dst)++ = (short) lc;
														
+
															+        *(short *) dst = (short) lc; dst += 2;
														
 
															-        *(char*)dst = (char) lc;
														
+
															+        *dst = (char) lc;
														
 
															         break;
														
 
															-    }
														
+
															+      }
														
 
															   }
														
 
															-  return return_dst; /* destination pointer. */
														
+
															+  return return_dst;
														
 
															-} /* memset() */
														
+
															+}
														
 
															 libc_hidden_def(memset)