memset.S 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. /* memset/bzero -- set memory area to CH/0
  2. Optimized version for x86-64.
  3. Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by Andreas Jaeger <aj@suse.de>.
  6. The GNU C Library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2.1 of the License, or (at your option) any later version.
  10. The GNU C Library is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with the GNU C Library; if not, see
  16. <http://www.gnu.org/licenses/>. */
  17. #include "_glibc_inc.h"
  18. /* BEWARE: `#ifdef memset' means that memset is redefined as `bzero' */
  19. #define BZERO_P (defined memset)
  20. /* This is somehow experimental and could made dependend on the cache
  21. size. */
  22. #define LARGE $120000
  23. .text
  24. ENTRY (memset)
  25. #if BZERO_P
  26. mov %rsi,%rdx /* Adjust parameter. */
  27. xorl %esi,%esi /* Fill with 0s. */
  28. #endif
  29. cmp $0x7,%rdx /* Check for small length. */
  30. mov %rdi,%rcx /* Save ptr as return value. */
  31. jbe 7f
  32. #if BZERO_P
  33. mov %rsi,%r8 /* Just copy 0. */
  34. #else
  35. /* Populate 8 bit data to full 64-bit. */
  36. movabs $0x0101010101010101,%r8
  37. movzbl %sil,%eax
  38. imul %rax,%r8
  39. #endif
  40. test $0x7,%edi /* Check for alignment. */
  41. jz 2f
  42. /* Next 3 insns are 9 bytes total, make sure we decode them in one go */
  43. .p2align 4,,9
  44. 1:
  45. /* Align ptr to 8 byte. */
  46. mov %sil,(%rcx)
  47. dec %rdx
  48. inc %rcx
  49. test $0x7,%cl
  50. jnz 1b
  51. 2: /* Check for really large regions. */
  52. mov %rdx,%rax
  53. shr $0x6,%rax
  54. je 4f
  55. cmp LARGE, %rdx
  56. jae 11f
  57. /* Next 3 insns are 11 bytes total, make sure we decode them in one go */
  58. .p2align 4,,11
  59. 3:
  60. /* Fill 64 bytes. */
  61. mov %r8,(%rcx)
  62. mov %r8,0x8(%rcx)
  63. mov %r8,0x10(%rcx)
  64. mov %r8,0x18(%rcx)
  65. mov %r8,0x20(%rcx)
  66. mov %r8,0x28(%rcx)
  67. mov %r8,0x30(%rcx)
  68. mov %r8,0x38(%rcx)
  69. add $0x40,%rcx
  70. dec %rax
  71. jne 3b
  72. 4: /* Fill final bytes. */
  73. and $0x3f,%edx
  74. mov %rdx,%rax
  75. shr $0x3,%rax
  76. je 6f
  77. 5: /* First in chunks of 8 bytes. */
  78. mov %r8,(%rcx)
  79. add $0x8,%rcx
  80. dec %rax
  81. jne 5b
  82. 6:
  83. and $0x7,%edx
  84. 7:
  85. test %rdx,%rdx
  86. je 9f
  87. 8: /* And finally as bytes (up to 7). */
  88. mov %sil,(%rcx)
  89. inc %rcx
  90. dec %rdx
  91. jne 8b
  92. 9:
  93. #if BZERO_P
  94. /* nothing */
  95. #else
  96. /* Load result (only if used as memset). */
  97. mov %rdi,%rax /* start address of destination is result */
  98. #endif
  99. retq
  100. /* Next 3 insns are 14 bytes total, make sure we decode them in one go */
  101. .p2align 4,,14
  102. 11:
  103. /* Fill 64 bytes without polluting the cache. */
  104. /* We could use movntdq %xmm0,(%rcx) here to further
  105. speed up for large cases but let's not use XMM registers. */
  106. movnti %r8,(%rcx)
  107. movnti %r8,0x8(%rcx)
  108. movnti %r8,0x10(%rcx)
  109. movnti %r8,0x18(%rcx)
  110. movnti %r8,0x20(%rcx)
  111. movnti %r8,0x28(%rcx)
  112. movnti %r8,0x30(%rcx)
  113. movnti %r8,0x38(%rcx)
  114. add $0x40,%rcx
  115. dec %rax
  116. jne 11b
  117. jmp 4b
  118. END (memset)
  119. #if !BZERO_P
  120. libc_hidden_def(memset)
  121. #endif