memset.c 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /*
  2. * This string-include defines all string functions as inline
  3. * functions. Use gcc. It also assumes ds=es=data space, this should be
  4. * normal. Most of the string-functions are rather heavily hand-optimized,
  5. * see especially strtok,strstr,str[c]spn. They should work, but are not
  6. * very easy to understand. Everything is done entirely within the register
  7. * set, making the functions fast and clean. String instructions have been
  8. * used through-out, making for "slightly" unclear code :-)
  9. *
  10. * NO Copyright (C) 1991, 1992 Linus Torvalds,
  11. * consider these trivial functions to be PD.
  12. */
  13. /*
  14. * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
  15. *
  16. * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  17. */
  18. /*
  19. * Modified for uClibc by Erik Andersen <andersen@codepoet.org>
  20. * These make no attempt to use nifty things like mmx/3dnow/etc.
  21. * These are not inline, and will therefore not be as fast as
  22. * modifying the headers to use inlines (and cannot therefore
  23. * do tricky things when dealing with const memory). But they
  24. * should (I hope!) be faster than their generic equivalents....
  25. *
  26. * More importantly, these should provide a good example for
  27. * others to follow when adding arch specific optimizations.
  28. * -Erik
  29. *
  30. * 2009-04: modified by Denys Vlasenko <vda.linux@googlemail.com>
  31. * Fill byte-by-byte is a bit too slow. I prefer 46 byte function
  32. * which fills x4 faster than 21 bytes one.
  33. */
  34. #include <string.h>
  35. #undef memset
  36. void *memset(void *s, int c, size_t count)
  37. {
  38. int reg, edi;
  39. __asm__ __volatile__(
  40. /* Most of the time, count is divisible by 4 and nonzero */
  41. /* It's better to make this case faster */
  42. /* " jecxz 9f\n" - (optional) count == 0: goto ret */
  43. " mov %%ecx, %1\n"
  44. " shr $2, %%ecx\n"
  45. " jz 1f\n" /* zero words: goto fill_bytes */
  46. /* extend 8-bit fill to 32 bits */
  47. " movzx %%al, %%eax\n" /* 3 bytes */
  48. /* or: " and $0xff, %%eax\n" - 5 bytes */
  49. " imul $0x01010101, %%eax\n" /* 6 bytes */
  50. /* fill full words */
  51. " rep; stosl\n"
  52. /* fill 0-3 bytes */
  53. "1: and $3, %1\n"
  54. " jz 9f\n" /* (count & 3) == 0: goto end */
  55. "2: stosb\n"
  56. " dec %1\n"
  57. " jnz 2b\n"
  58. /* end */
  59. "9:\n"
  60. : "=&D" (edi), "=&r" (reg)
  61. : "0" (s), "a" (c), "c" (count)
  62. : "memory"
  63. );
  64. return s;
  65. }
  66. libc_hidden_def(memset)
  67. /*
  68. gcc 4.3.1
  69. =========
  70. 57 push %edi
  71. 8b 7c 24 08 mov 0x8(%esp),%edi
  72. 8b 4c 24 10 mov 0x10(%esp),%ecx
  73. 8b 44 24 0c mov 0xc(%esp),%eax
  74. 89 ca mov %ecx,%edx
  75. c1 e9 02 shr $0x2,%ecx
  76. 74 0b je 1f <__GI_memset+0x1f>
  77. 0f b6 c0 movzbl %al,%eax
  78. 69 c0 01 01 01 01 imul $0x1010101,%eax,%eax
  79. f3 ab rep stos %eax,%es:(%edi)
  80. 83 e2 03 and $0x3,%edx
  81. 74 04 je 28 <__GI_memset+0x28>
  82. aa stos %al,%es:(%edi)
  83. 4a dec %edx
  84. 75 fc jne 24 <__GI_memset+0x24>
  85. 8b 44 24 08 mov 0x8(%esp),%eax
  86. 5f pop %edi
  87. c3 ret
  88. */