strncpy.S 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. /* Optimized strcpy for Xtensa.
  2. Copyright (C) 2001, 2007 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, see
  14. <http://www.gnu.org/licenses/>. */
  15. #include <sysdep.h>
  16. #include <bits/xtensa-config.h>
  17. #ifdef __XTENSA_EB__
  18. #define MASK0 0xff000000
  19. #define MASK1 0x00ff0000
  20. #define MASK2 0x0000ff00
  21. #define MASK3 0x000000ff
  22. #else
  23. #define MASK0 0x000000ff
  24. #define MASK1 0x0000ff00
  25. #define MASK2 0x00ff0000
  26. #define MASK3 0xff000000
  27. #endif
  28. /* Do not use .literal_position in the ENTRY macro. */
  29. #undef LITERAL_POSITION
  30. #define LITERAL_POSITION
  31. .text
  32. .align 4
  33. .literal_position
  34. __strncpy_aux:
  35. .Lsrc1mod2: /* src address is odd */
  36. l8ui a8, a3, 0 /* get byte 0 */
  37. addi a3, a3, 1 /* advance src pointer */
  38. s8i a8, a10, 0 /* store byte 0 */
  39. addi a4, a4, -1 /* decrement n */
  40. beqz a4, .Lret /* if n is zero */
  41. addi a10, a10, 1 /* advance dst pointer */
  42. beqz a8, .Lfill /* if byte 0 is zero */
  43. bbci.l a3, 1, .Lsrcaligned /* if src is now word-aligned */
  44. .Lsrc2mod4: /* src address is 2 mod 4 */
  45. l8ui a8, a3, 0 /* get byte 0 */
  46. addi a4, a4, -1 /* decrement n */
  47. s8i a8, a10, 0 /* store byte 0 */
  48. beqz a4, .Lret /* if n is zero */
  49. addi a10, a10, 1 /* advance dst pointer */
  50. beqz a8, .Lfill /* if byte 0 is zero */
  51. l8ui a8, a3, 1 /* get byte 0 */
  52. addi a3, a3, 2 /* advance src pointer */
  53. s8i a8, a10, 0 /* store byte 0 */
  54. addi a4, a4, -1 /* decrement n */
  55. beqz a4, .Lret /* if n is zero */
  56. addi a10, a10, 1 /* advance dst pointer */
  57. bnez a8, .Lsrcaligned
  58. j .Lfill
  59. .Lret:
  60. abi_ret
  61. ENTRY (strncpy)
  62. /* a2 = dst, a3 = src */
  63. mov a10, a2 /* leave dst in return value register */
  64. beqz a4, .Lret /* if n is zero */
  65. movi a11, MASK0
  66. movi a5, MASK1
  67. movi a6, MASK2
  68. movi a7, MASK3
  69. bbsi.l a3, 0, .Lsrc1mod2
  70. bbsi.l a3, 1, .Lsrc2mod4
  71. .Lsrcaligned:
  72. /* Check if the destination is aligned. */
  73. movi a8, 3
  74. bnone a10, a8, .Laligned
  75. j .Ldstunaligned
  76. /* Fill the dst with zeros -- n is at least 1. */
  77. .Lfill:
  78. movi a9, 0
  79. bbsi.l a10, 0, .Lfill1mod2
  80. bbsi.l a10, 1, .Lfill2mod4
  81. .Lfillaligned:
  82. blti a4, 4, .Lfillcleanup
  83. /* Loop filling complete words with zero. */
  84. #if XCHAL_HAVE_LOOPS
  85. srai a8, a4, 2
  86. loop a8, 1f
  87. s32i a9, a10, 0
  88. addi a10, a10, 4
  89. 1: slli a8, a8, 2
  90. sub a4, a4, a8
  91. #else /* !XCHAL_HAVE_LOOPS */
  92. 1: s32i a9, a10, 0
  93. addi a10, a10, 4
  94. addi a4, a4, -4
  95. bgei a4, 4, 1b
  96. #endif /* !XCHAL_HAVE_LOOPS */
  97. beqz a4, 2f
  98. .Lfillcleanup:
  99. /* Fill leftover (1 to 3) bytes with zero. */
  100. s8i a9, a10, 0 /* store byte 0 */
  101. addi a4, a4, -1 /* decrement n */
  102. addi a10, a10, 1
  103. bnez a4, .Lfillcleanup
  104. 2: abi_ret
  105. .Lfill1mod2: /* dst address is odd */
  106. s8i a9, a10, 0 /* store byte 0 */
  107. addi a4, a4, -1 /* decrement n */
  108. beqz a4, 2b /* if n is zero */
  109. addi a10, a10, 1 /* advance dst pointer */
  110. bbci.l a10, 1, .Lfillaligned /* if dst is now word-aligned */
  111. .Lfill2mod4: /* dst address is 2 mod 4 */
  112. s8i a9, a10, 0 /* store byte 0 */
  113. addi a4, a4, -1 /* decrement n */
  114. beqz a4, 2b /* if n is zero */
  115. s8i a9, a10, 1 /* store byte 1 */
  116. addi a4, a4, -1 /* decrement n */
  117. beqz a4, 2b /* if n is zero */
  118. addi a10, a10, 2 /* advance dst pointer */
  119. j .Lfillaligned
  120. /* dst is word-aligned; src is word-aligned; n is at least 1. */
  121. .align 4
  122. /* (2 mod 4) alignment for loop instruction */
  123. .Laligned:
  124. #if XCHAL_HAVE_LOOPS
  125. _movi.n a8, 0 /* set up for the maximum loop count */
  126. loop a8, 1f /* loop forever (almost anyway) */
  127. blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */
  128. l32i a8, a3, 0 /* get word from src */
  129. addi a3, a3, 4 /* advance src pointer */
  130. bnone a8, a11, .Lz0 /* if byte 0 is zero */
  131. bnone a8, a5, .Lz1 /* if byte 1 is zero */
  132. bnone a8, a6, .Lz2 /* if byte 2 is zero */
  133. s32i a8, a10, 0 /* store word to dst */
  134. addi a4, a4, -4 /* decrement n */
  135. addi a10, a10, 4 /* advance dst pointer */
  136. bnone a8, a7, .Lfill /* if byte 3 is zero */
  137. 1:
  138. #else /* !XCHAL_HAVE_LOOPS */
  139. 1: blti a4, 5, .Ldstunaligned /* n is near limit; do one at a time */
  140. l32i a8, a3, 0 /* get word from src */
  141. addi a3, a3, 4 /* advance src pointer */
  142. bnone a8, a11, .Lz0 /* if byte 0 is zero */
  143. bnone a8, a5, .Lz1 /* if byte 1 is zero */
  144. bnone a8, a6, .Lz2 /* if byte 2 is zero */
  145. s32i a8, a10, 0 /* store word to dst */
  146. addi a4, a4, -4 /* decrement n */
  147. addi a10, a10, 4 /* advance dst pointer */
  148. bany a8, a7, 1b /* no zeroes */
  149. #endif /* !XCHAL_HAVE_LOOPS */
  150. j .Lfill
  151. .Lz0: /* Byte 0 is zero. */
  152. #ifdef __XTENSA_EB__
  153. movi a8, 0
  154. #endif
  155. s8i a8, a10, 0
  156. addi a4, a4, -1 /* decrement n */
  157. addi a10, a10, 1 /* advance dst pointer */
  158. j .Lfill
  159. .Lz1: /* Byte 1 is zero. */
  160. #ifdef __XTENSA_EB__
  161. extui a8, a8, 16, 16
  162. #endif
  163. s16i a8, a10, 0
  164. addi a4, a4, -2 /* decrement n */
  165. addi a10, a10, 2 /* advance dst pointer */
  166. j .Lfill
  167. .Lz2: /* Byte 2 is zero. */
  168. #ifdef __XTENSA_EB__
  169. extui a8, a8, 16, 16
  170. #endif
  171. s16i a8, a10, 0
  172. movi a8, 0
  173. s8i a8, a10, 2
  174. addi a4, a4, -3 /* decrement n */
  175. addi a10, a10, 3 /* advance dst pointer */
  176. j .Lfill
  177. .align 4
  178. /* (2 mod 4) alignment for loop instruction */
  179. .Ldstunaligned:
  180. #if XCHAL_HAVE_LOOPS
  181. _movi.n a8, 0 /* set up for the maximum loop count */
  182. loop a8, 2f /* loop forever (almost anyway) */
  183. #endif
  184. 1: l8ui a8, a3, 0
  185. addi a3, a3, 1
  186. s8i a8, a10, 0
  187. addi a4, a4, -1
  188. beqz a4, 3f
  189. addi a10, a10, 1
  190. #if XCHAL_HAVE_LOOPS
  191. beqz a8, 2f
  192. #else
  193. bnez a8, 1b
  194. #endif
  195. 2: j .Lfill
  196. 3: abi_ret
  197. libc_hidden_def (strncpy)