memset.S 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. /* memset/bzero -- set memory area to CH/0
  2. Optimized version for x86-64.
  3. Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
  4. This file is part of the GNU C Library.
  5. Contributed by Andreas Jaeger <aj@suse.de>.
  6. The GNU C Library is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU Lesser General Public
  8. License as published by the Free Software Foundation; either
  9. version 2.1 of the License, or (at your option) any later version.
  10. The GNU C Library is distributed in the hope that it will be useful,
  11. but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. Lesser General Public License for more details.
  14. You should have received a copy of the GNU Lesser General Public
  15. License along with the GNU C Library; if not, write to the Free
  16. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  17. 02111-1307 USA. */
  18. #include "_glibc_inc.h"
  19. /* BEWARE: `#ifdef memset' means that memset is redefined as `bzero' */
  20. #define BZERO_P (defined memset)
  21. /* This is somehow experimental and could made dependend on the cache
  22. size. */
  23. #define LARGE $120000
  24. .text
  25. #if !BZERO_P && defined PIC && !defined NOT_IN_libc
  26. ENTRY (__memset_chk)
  27. cmpq %rdx, %rcx
  28. jb HIDDEN_JUMPTARGET (__chk_fail)
  29. END (__memset_chk)
  30. #endif
  31. .global memset
  32. .set memset,__memset
  33. .hidden __memset
  34. ENTRY (__memset)
  35. #if BZERO_P
  36. mov %rsi,%rdx /* Adjust parameter. */
  37. xorl %esi,%esi /* Fill with 0s. */
  38. #endif
  39. cmp $0x7,%rdx /* Check for small length. */
  40. mov %rdi,%rcx /* Save ptr as return value. */
  41. jbe 7f
  42. #if BZERO_P
  43. mov %rsi,%r8 /* Just copy 0. */
  44. #else
  45. /* Populate 8 bit data to full 64-bit. */
  46. movabs $0x0101010101010101,%r8
  47. movzbl %sil,%eax
  48. imul %rax,%r8
  49. #endif
  50. test $0x7,%edi /* Check for alignment. */
  51. je 2f
  52. .p2align 4
  53. 1: /* Align ptr to 8 byte. */
  54. mov %sil,(%rcx)
  55. dec %rdx
  56. inc %rcx
  57. test $0x7,%ecx
  58. jne 1b
  59. 2: /* Check for really large regions. */
  60. mov %rdx,%rax
  61. shr $0x6,%rax
  62. je 4f
  63. cmp LARGE, %rdx
  64. jae 11f
  65. .p2align 4
  66. 3: /* Copy 64 bytes. */
  67. mov %r8,(%rcx)
  68. mov %r8,0x8(%rcx)
  69. mov %r8,0x10(%rcx)
  70. mov %r8,0x18(%rcx)
  71. mov %r8,0x20(%rcx)
  72. mov %r8,0x28(%rcx)
  73. mov %r8,0x30(%rcx)
  74. mov %r8,0x38(%rcx)
  75. add $0x40,%rcx
  76. dec %rax
  77. jne 3b
  78. 4: /* Copy final bytes. */
  79. and $0x3f,%edx
  80. mov %rdx,%rax
  81. shr $0x3,%rax
  82. je 6f
  83. 5: /* First in chunks of 8 bytes. */
  84. mov %r8,(%rcx)
  85. add $0x8,%rcx
  86. dec %rax
  87. jne 5b
  88. 6:
  89. and $0x7,%edx
  90. 7:
  91. test %rdx,%rdx
  92. je 9f
  93. 8: /* And finally as bytes (up to 7). */
  94. mov %sil,(%rcx)
  95. inc %rcx
  96. dec %rdx
  97. jne 8b
  98. 9:
  99. #if BZERO_P
  100. nop
  101. #else
  102. /* Load result (only if used as memset). */
  103. mov %rdi,%rax /* start address of destination is result */
  104. #endif
  105. retq
  106. .p2align 4
  107. 11: /* Copy 64 bytes without polluting the cache. */
  108. /* We could use movntdq %xmm0,(%rcx) here to further
  109. speed up for large cases but let's not use XMM registers. */
  110. movnti %r8,(%rcx)
  111. movnti %r8,0x8(%rcx)
  112. movnti %r8,0x10(%rcx)
  113. movnti %r8,0x18(%rcx)
  114. movnti %r8,0x20(%rcx)
  115. movnti %r8,0x28(%rcx)
  116. movnti %r8,0x30(%rcx)
  117. movnti %r8,0x38(%rcx)
  118. add $0x40,%rcx
  119. dec %rax
  120. jne 11b
  121. jmp 4b
  122. END (__memset)
  123. #if !BZERO_P && defined PIC && !defined NOT_IN_libc
  124. strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
  125. #endif