strcpy.S 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150
  1. /* Optimized strcpy for Xtensa.
  2. Copyright (C) 2001, 2007 Free Software Foundation, Inc.
  3. This file is part of the GNU C Library.
  4. The GNU C Library is free software; you can redistribute it and/or
  5. modify it under the terms of the GNU Lesser General Public
  6. License as published by the Free Software Foundation; either
  7. version 2.1 of the License, or (at your option) any later version.
  8. The GNU C Library is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public
  13. License along with the GNU C Library; if not, write to the Free
  14. Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
  15. Boston, MA 02110-1301, USA. */
  16. #include "../../sysdeps/linux/xtensa/sysdep.h"
  17. #include <bits/xtensa-config.h>
  18. #ifdef __XTENSA_EB__
  19. #define MASK0 0xff000000
  20. #define MASK1 0x00ff0000
  21. #define MASK2 0x0000ff00
  22. #define MASK3 0x000000ff
  23. #else
  24. #define MASK0 0x000000ff
  25. #define MASK1 0x0000ff00
  26. #define MASK2 0x00ff0000
  27. #define MASK3 0xff000000
  28. #endif
  29. .text
  30. ENTRY (strcpy)
  31. /* a2 = dst, a3 = src */
  32. mov a10, a2 // leave dst in return value register
  33. movi a4, MASK0
  34. movi a5, MASK1
  35. movi a6, MASK2
  36. movi a7, MASK3
  37. bbsi.l a3, 0, .Lsrc1mod2
  38. bbsi.l a3, 1, .Lsrc2mod4
  39. .Lsrcaligned:
  40. /* Check if the destination is aligned. */
  41. movi a8, 3
  42. bnone a10, a8, .Laligned
  43. j .Ldstunaligned
  44. .Lsrc1mod2: // src address is odd
  45. l8ui a8, a3, 0 // get byte 0
  46. addi a3, a3, 1 // advance src pointer
  47. s8i a8, a10, 0 // store byte 0
  48. beqz a8, 1f // if byte 0 is zero
  49. addi a10, a10, 1 // advance dst pointer
  50. bbci.l a3, 1, .Lsrcaligned // if src is now word-aligned
  51. .Lsrc2mod4: // src address is 2 mod 4
  52. l8ui a8, a3, 0 // get byte 0
  53. /* 1-cycle interlock */
  54. s8i a8, a10, 0 // store byte 0
  55. beqz a8, 1f // if byte 0 is zero
  56. l8ui a8, a3, 1 // get byte 0
  57. addi a3, a3, 2 // advance src pointer
  58. s8i a8, a10, 1 // store byte 0
  59. addi a10, a10, 2 // advance dst pointer
  60. bnez a8, .Lsrcaligned
  61. 1: retw
  62. /* dst is word-aligned; src is word-aligned. */
  63. .align 4
  64. #if XCHAL_HAVE_LOOPS
  65. /* (2 mod 4) alignment for loop instruction */
  66. .Laligned:
  67. _movi.n a8, 0 // set up for the maximum loop count
  68. loop a8, .Lz3 // loop forever (almost anyway)
  69. l32i a8, a3, 0 // get word from src
  70. addi a3, a3, 4 // advance src pointer
  71. bnone a8, a4, .Lz0 // if byte 0 is zero
  72. bnone a8, a5, .Lz1 // if byte 1 is zero
  73. bnone a8, a6, .Lz2 // if byte 2 is zero
  74. s32i a8, a10, 0 // store word to dst
  75. bnone a8, a7, .Lz3 // if byte 3 is zero
  76. addi a10, a10, 4 // advance dst pointer
  77. #else /* !XCHAL_HAVE_LOOPS */
  78. 1: addi a10, a10, 4 // advance dst pointer
  79. .Laligned:
  80. l32i a8, a3, 0 // get word from src
  81. addi a3, a3, 4 // advance src pointer
  82. bnone a8, a4, .Lz0 // if byte 0 is zero
  83. bnone a8, a5, .Lz1 // if byte 1 is zero
  84. bnone a8, a6, .Lz2 // if byte 2 is zero
  85. s32i a8, a10, 0 // store word to dst
  86. bany a8, a7, 1b // if byte 3 is zero
  87. #endif /* !XCHAL_HAVE_LOOPS */
  88. .Lz3: /* Byte 3 is zero. */
  89. retw
  90. .Lz0: /* Byte 0 is zero. */
  91. #ifdef __XTENSA_EB__
  92. movi a8, 0
  93. #endif
  94. s8i a8, a10, 0
  95. retw
  96. .Lz1: /* Byte 1 is zero. */
  97. #ifdef __XTENSA_EB__
  98. extui a8, a8, 16, 16
  99. #endif
  100. s16i a8, a10, 0
  101. retw
  102. .Lz2: /* Byte 2 is zero. */
  103. #ifdef __XTENSA_EB__
  104. extui a8, a8, 16, 16
  105. #endif
  106. s16i a8, a10, 0
  107. movi a8, 0
  108. s8i a8, a10, 2
  109. retw
  110. .align 4
  111. /* (2 mod 4) alignment for loop instruction */
  112. .Ldstunaligned:
  113. #if XCHAL_HAVE_LOOPS
  114. _movi.n a8, 0 // set up for the maximum loop count
  115. loop a8, 2f // loop forever (almost anyway)
  116. #endif
  117. 1: l8ui a8, a3, 0
  118. addi a3, a3, 1
  119. s8i a8, a10, 0
  120. addi a10, a10, 1
  121. #if XCHAL_HAVE_LOOPS
  122. beqz a8, 2f
  123. #else
  124. bnez a8, 1b
  125. #endif
  126. 2: retw
  127. libc_hidden_def (strcpy)