memcpy.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264
  1. /* Copyright (C) 2001, 2003 Free Software Foundation, Inc.
  2. Copyright (C) 1994, 1995, 2000 Axis Communications AB.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Library General Public License as
  6. published by the Free Software Foundation; either version 2 of the
  7. License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Library General Public License for more details.
  12. You should have received a copy of the GNU Library General Public
  13. License along with the GNU C Library; see the file COPYING.LIB. If not,
  14. write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  15. Boston, MA 02111-1307, USA. */
  16. /*#************************************************************************#*/
  17. /*#-------------------------------------------------------------------------*/
  18. /*# */
  19. /*# FUNCTION NAME: memcpy() */
  20. /*# */
  21. /*# PARAMETERS: void* dst; Destination address. */
  22. /*# void* src; Source address. */
  23. /*# int len; Number of bytes to copy. */
  24. /*# */
  25. /*# RETURNS: dst. */
  26. /*# */
  27. /*# DESCRIPTION: Copies len bytes of memory from src to dst. No guarantees */
  28. /*# about copying of overlapping memory areas. This routine is */
  29. /*# very sensitive to compiler changes in register allocation. */
  30. /*# Should really be rewritten to avoid this problem. */
  31. /*# */
  32. /*#-------------------------------------------------------------------------*/
  33. /*# */
  34. /*# HISTORY */
  35. /*# */
  36. /*# DATE NAME CHANGES */
  37. /*# ---- ---- ------- */
  38. /*# 941007 Kenny R Creation */
  39. /*# 941011 Kenny R Lots of optimizations and inlining. */
  40. /*# 941129 Ulf A Adapted for use in libc. */
  41. /*# 950216 HP N==0 forgotten if non-aligned src/dst. */
  42. /*# Added some optimizations. */
  43. /*# 001025 HP Make src and dst char *. Align dst to */
  44. /*# dword, not just word-if-both-src-and-dst- */
  45. /*# are-misaligned. */
  46. /*# 070806 RW Modified for uClibc */
  47. /*# (__arch_v32 -> __CONFIG_CRISV32__, */
  48. /*# include features.h to reach it.) */
  49. /*# */
  50. /*#-------------------------------------------------------------------------*/
  51. #include <features.h>
  52. #ifdef __CONFIG_CRISV32__
  53. /* For CRISv32, movem is very cheap. */
  54. #define MEMCPY_BLOCK_THRESHOLD (44)
  55. #else
  56. /* Break even between movem and move16 is at 38.7*2, but modulo 44. */
  57. #define MEMCPY_BLOCK_THRESHOLD (44*2)
  58. #endif
  59. void *memcpy(void *, const void *, unsigned int);
  60. libc_hidden_proto(memcpy)
  61. void *memcpy(void *pdst,
  62. const void *psrc,
  63. unsigned int pn)
  64. {
  65. /* Ok. Now we want the parameters put in special registers.
  66. Make sure the compiler is able to make something useful of this.
  67. As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
  68. If gcc was allright, it really would need no temporaries, and no
  69. stack space to save stuff on. */
  70. #ifndef MEMPCPY
  71. register void *return_dst __asm__ ("r10") = pdst;
  72. #else
  73. /* FIXME: Use R10 for something. */
  74. # define return_dst dst
  75. #endif
  76. register char *dst __asm__ ("r13") = pdst;
  77. register char *src __asm__ ("r11") = (char *) psrc;
  78. register int n __asm__ ("r12") = pn;
  79. /* When src is aligned but not dst, this makes a few extra needless
  80. cycles. I believe it would take as many to check that the
  81. re-alignment was unnecessary. */
  82. if (((unsigned long) dst & 3) != 0
  83. /* Don't align if we wouldn't copy more than a few bytes; so we
  84. don't have to check further for overflows. */
  85. && n >= 3)
  86. {
  87. if ((unsigned long) dst & 1)
  88. {
  89. n--;
  90. *(char*)dst = *(char*)src;
  91. src++;
  92. dst++;
  93. }
  94. if ((unsigned long) dst & 2)
  95. {
  96. n -= 2;
  97. *(short*)dst = *(short*)src;
  98. src += 2;
  99. dst += 2;
  100. }
  101. }
  102. /* Decide which copying method to use. */
  103. if (n >= MEMCPY_BLOCK_THRESHOLD)
  104. {
  105. /* For large copies we use 'movem' */
  106. /* It is not optimal to tell the compiler about clobbering any
  107. registers; that will move the saving/restoring of those registers
  108. to the function prologue/epilogue, and make non-movem sizes
  109. suboptimal.
  110. This method is not foolproof; it assumes that the "register asm"
  111. declarations at the beginning of the function really are used
  112. here (beware: they may be moved to temporary registers).
  113. This way, we do not have to save/move the registers around into
  114. temporaries; we can safely use them straight away. */
  115. __asm__ __volatile__ ("\
  116. .syntax no_register_prefix \n\
  117. \n\
  118. ;; Check that the register asm declaration got right. \n\
  119. ;; The GCC manual explicitly says TRT will happen. \n\
  120. .ifnc %0-%1-%2,$r13-$r11-$r12 \n\
  121. .err \n\
  122. .endif \n\
  123. \n\
  124. ;; Save the registers we'll use in the movem process \n\
  125. ;; on the stack. \n\
  126. subq 11*4,sp \n\
  127. movem r10,[sp] \n\
  128. \n\
  129. ;; Now we've got this: \n\
  130. ;; r11 - src \n\
  131. ;; r13 - dst \n\
  132. ;; r12 - n \n\
  133. \n\
  134. ;; Update n for the first loop \n\
  135. subq 44,r12 \n\
  136. 0: \n\
  137. movem [r11+],r10 \n\
  138. subq 44,r12 \n\
  139. bge 0b \n\
  140. movem r10,[r13+] \n\
  141. \n\
  142. addq 44,r12 ;; compensate for last loop underflowing n \n\
  143. \n\
  144. ;; Restore registers from stack \n\
  145. movem [sp+],r10"
  146. /* Outputs */ : "=r" (dst), "=r" (src), "=r" (n)
  147. /* Inputs */ : "0" (dst), "1" (src), "2" (n));
  148. }
  149. /* Either we directly starts copying, using dword copying
  150. in a loop, or we copy as much as possible with 'movem'
  151. and then the last block (<44 bytes) is copied here.
  152. This will work since 'movem' will have updated src,dst,n. */
  153. while ( n >= 16 )
  154. {
  155. *((long*)dst)++ = *((long*)src)++;
  156. *((long*)dst)++ = *((long*)src)++;
  157. *((long*)dst)++ = *((long*)src)++;
  158. *((long*)dst)++ = *((long*)src)++;
  159. n -= 16;
  160. }
  161. /* A switch() is definitely the fastest although it takes a LOT of code.
  162. * Particularly if you inline code this.
  163. */
  164. switch (n)
  165. {
  166. case 0:
  167. break;
  168. case 1:
  169. *((char*)dst)++ = *((char*)src)++;
  170. break;
  171. case 2:
  172. *((short*)dst)++ = *((short*)src)++;
  173. break;
  174. case 3:
  175. *((short*)dst)++ = *((short*)src)++;
  176. *((char*)dst)++ = *((char*)src)++;
  177. break;
  178. case 4:
  179. *((long*)dst)++ = *((long*)src)++;
  180. break;
  181. case 5:
  182. *((long*)dst)++ = *((long*)src)++;
  183. *((char*)dst)++ = *((char*)src)++;
  184. break;
  185. case 6:
  186. *((long*)dst)++ = *((long*)src)++;
  187. *((short*)dst)++ = *((short*)src)++;
  188. break;
  189. case 7:
  190. *((long*)dst)++ = *((long*)src)++;
  191. *((short*)dst)++ = *((short*)src)++;
  192. *((char*)dst)++ = *((char*)src)++;
  193. break;
  194. case 8:
  195. *((long*)dst)++ = *((long*)src)++;
  196. *((long*)dst)++ = *((long*)src)++;
  197. break;
  198. case 9:
  199. *((long*)dst)++ = *((long*)src)++;
  200. *((long*)dst)++ = *((long*)src)++;
  201. *((char*)dst)++ = *((char*)src)++;
  202. break;
  203. case 10:
  204. *((long*)dst)++ = *((long*)src)++;
  205. *((long*)dst)++ = *((long*)src)++;
  206. *((short*)dst)++ = *((short*)src)++;
  207. break;
  208. case 11:
  209. *((long*)dst)++ = *((long*)src)++;
  210. *((long*)dst)++ = *((long*)src)++;
  211. *((short*)dst)++ = *((short*)src)++;
  212. *((char*)dst)++ = *((char*)src)++;
  213. break;
  214. case 12:
  215. *((long*)dst)++ = *((long*)src)++;
  216. *((long*)dst)++ = *((long*)src)++;
  217. *((long*)dst)++ = *((long*)src)++;
  218. break;
  219. case 13:
  220. *((long*)dst)++ = *((long*)src)++;
  221. *((long*)dst)++ = *((long*)src)++;
  222. *((long*)dst)++ = *((long*)src)++;
  223. *((char*)dst)++ = *((char*)src)++;
  224. break;
  225. case 14:
  226. *((long*)dst)++ = *((long*)src)++;
  227. *((long*)dst)++ = *((long*)src)++;
  228. *((long*)dst)++ = *((long*)src)++;
  229. *((short*)dst)++ = *((short*)src)++;
  230. break;
  231. case 15:
  232. *((long*)dst)++ = *((long*)src)++;
  233. *((long*)dst)++ = *((long*)src)++;
  234. *((long*)dst)++ = *((long*)src)++;
  235. *((short*)dst)++ = *((short*)src)++;
  236. *((char*)dst)++ = *((char*)src)++;
  237. break;
  238. }
  239. return return_dst; /* destination pointer. */
  240. } /* memcpy() */
  241. libc_hidden_def(memcpy)