memset.S 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. /* Optimized memset for Xtensa.
  2. Copyright (C) 2001, 2007 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <http://www.gnu.org/licenses/>. */
  15. #include <sysdep.h>
  16. #include <bits/xtensa-config.h>
  17. /* Do not use .literal_position in the ENTRY macro. */
  18. #undef LITERAL_POSITION
  19. #define LITERAL_POSITION
  20. /* void *memset (void *dst, int c, size_t length)
  21. The algorithm is as follows:
  22. Create a word with c in all byte positions.
  23. If the destination is aligned, set 16B chunks with a loop, and then
  24. finish up with 8B, 4B, 2B, and 1B stores conditional on the length.
  25. If the destination is unaligned, align it by conditionally
  26. setting 1B and/or 2B and then go to aligned case.
  27. This code tries to use fall-through branches for the common
  28. case of an aligned destination (except for the branches to
  29. the alignment labels). */
  30. /* Byte-by-byte set. */
  31. .text
  32. .align 4
  33. .literal_position
  34. __memset_aux:
  35. /* Skip a byte to get 1 mod 4 alignment for LOOPNEZ
  36. (0 mod 4 alignment for LBEG). */
  37. .byte 0
  38. .Lbyteset:
  39. #if XCHAL_HAVE_LOOPS
  40. loopnez a4, 2f
  41. #else
  42. beqz a4, 2f
  43. add a6, a5, a4 /* a6 = ending address */
  44. #endif
  45. 1: s8i a3, a5, 0
  46. addi a5, a5, 1
  47. #if !XCHAL_HAVE_LOOPS
  48. blt a5, a6, 1b
  49. #endif
  50. 2: abi_ret
  51. /* Destination is unaligned. */
  52. .align 4
  53. .Ldst1mod2: /* dst is only byte aligned */
  54. /* Do short sizes byte-by-byte. */
  55. bltui a4, 8, .Lbyteset
  56. /* Set 1 byte. */
  57. s8i a3, a5, 0
  58. addi a5, a5, 1
  59. addi a4, a4, -1
  60. /* Now retest if dst is aligned. */
  61. _bbci.l a5, 1, .Ldstaligned
  62. .Ldst2mod4: /* dst has 16-bit alignment */
  63. /* Do short sizes byte-by-byte. */
  64. bltui a4, 8, .Lbyteset
  65. /* Set 2 bytes. */
  66. s16i a3, a5, 0
  67. addi a5, a5, 2
  68. addi a4, a4, -2
  69. /* dst is now aligned; return to main algorithm */
  70. j .Ldstaligned
  71. ENTRY (memset)
  72. /* a2 = dst, a3 = c, a4 = length */
  73. /* Duplicate character into all bytes of word. */
  74. extui a3, a3, 0, 8
  75. slli a7, a3, 8
  76. or a3, a3, a7
  77. slli a7, a3, 16
  78. or a3, a3, a7
  79. mov a5, a2 /* copy dst so that a2 is return value */
  80. /* Check if dst is unaligned. */
  81. _bbsi.l a2, 0, .Ldst1mod2
  82. _bbsi.l a2, 1, .Ldst2mod4
  83. .Ldstaligned:
  84. /* Get number of loop iterations with 16B per iteration. */
  85. srli a7, a4, 4
  86. /* Destination is word-aligned. */
  87. #if XCHAL_HAVE_LOOPS
  88. loopnez a7, 2f
  89. #else
  90. beqz a7, 2f
  91. slli a6, a7, 4
  92. add a6, a6, a5 /* a6 = end of last 16B chunk */
  93. #endif
  94. /* Set 16 bytes per iteration. */
  95. 1: s32i a3, a5, 0
  96. s32i a3, a5, 4
  97. s32i a3, a5, 8
  98. s32i a3, a5, 12
  99. addi a5, a5, 16
  100. #if !XCHAL_HAVE_LOOPS
  101. blt a5, a6, 1b
  102. #endif
  103. /* Set any leftover pieces smaller than 16B. */
  104. 2: bbci.l a4, 3, 3f
  105. /* Set 8 bytes. */
  106. s32i a3, a5, 0
  107. s32i a3, a5, 4
  108. addi a5, a5, 8
  109. 3: bbci.l a4, 2, 4f
  110. /* Set 4 bytes. */
  111. s32i a3, a5, 0
  112. addi a5, a5, 4
  113. 4: bbci.l a4, 1, 5f
  114. /* Set 2 bytes. */
  115. s16i a3, a5, 0
  116. addi a5, a5, 2
  117. 5: bbci.l a4, 0, 6f
  118. /* Set 1 byte. */
  119. s8i a3, a5, 0
  120. 6: abi_ret
  121. libc_hidden_def (memset)