strcmp.S 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /*
  2. * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
  3. * Copyright (C) 2007 ARC International (UK) LTD
  4. *
  5. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  6. */
  7. #include <features.h>
  8. #include <sysdep.h>
  9. #include <asm.h>
  10. ENTRY(strcmp)
  11. #if defined(__ARC700__) || defined(__ARC64_ARCH32__)
  12. /* This is optimized primarily for the ARC700.
  13. It would be possible to speed up the loops by one cycle / word
  14. respective one cycle / byte by forcing double source 1 alignment, unrolling
  15. by a factor of two, and speculatively loading the second word / byte of
  16. source 1; however, that would increase the overhead for loop setup / finish,
  17. and strcmp might often terminate early. */
  18. or r2,r0,r1
  19. bmsk_s r2,r2,1
  20. brne r2,0,.Lcharloop
  21. mov_s r12,0x01010101
  22. ror r5,r12
  23. .Lwordloop:
  24. ld.ab r2,[r0,4]
  25. ld.ab r3,[r1,4]
  26. nop_s
  27. sub r4,r2,r12
  28. bic r4,r4,r2
  29. and r4,r4,r5
  30. brne r4,0,.Lfound0
  31. breq r2,r3,.Lwordloop
  32. #ifdef __LITTLE_ENDIAN__
  33. xor r0,r2,r3 ; mask for difference
  34. SUBR_S r1,r0,1
  35. bic_s r0,r0,r1 ; mask for least significant difference bit
  36. sub r1,r5,r0
  37. xor r0,r5,r1 ; mask for least significant difference byte
  38. and_s r2,r2,r0
  39. and_s r3,r3,r0
  40. #endif /* LITTLE ENDIAN */
  41. cmp_s r2,r3
  42. mov_s r0,1
  43. j_s.d [blink]
  44. bset.lo r0,r0,31
  45. .balign 4
  46. #ifdef __LITTLE_ENDIAN__
  47. .Lfound0:
  48. xor r0,r2,r3 ; mask for difference
  49. or r0,r0,r4 ; or in zero indicator
  50. SUBR_S r1,r0,1
  51. bic_s r0,r0,r1 ; mask for least significant difference bit
  52. sub r1,r5,r0
  53. xor r0,r5,r1 ; mask for least significant difference byte
  54. and_s r2,r2,r0
  55. and_s r3,r3,r0
  56. sub.f r0,r2,r3
  57. mov.hi r0,1
  58. j_s.d [blink]
  59. bset.lo r0,r0,31
  60. #else /* BIG ENDIAN */
  61. /* The zero-detection above can mis-detect 0x01 bytes as zeroes
  62. because of carry-propagateion from a lower significant zero byte.
  63. We can compensate for this by checking that bit0 is zero.
  64. This compensation is not necessary in the step where we
  65. get a low estimate for r2, because in any affected bytes
  66. we already have 0x00 or 0x01, which will remain unchanged
  67. when bit 7 is cleared. */
  68. .balign 4
  69. .Lfound0:
  70. lsr r0,r4,8
  71. lsr_s r1,r2
  72. bic_s r2,r2,r0 ; get low estimate for r2 and get ...
  73. bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
  74. or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
  75. cmp_s r3,r2 ; ... be independent of trailing garbage
  76. or_s r2,r2,r0 ; likewise for r3 > r2
  77. bic_s r3,r3,r0
  78. rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
  79. cmp_s r2,r3
  80. j_s.d [blink]
  81. bset.lo r0,r0,31
  82. #endif /* ENDIAN */
  83. .balign 4
  84. .Lcharloop:
  85. ldb.ab r2,[r0,1]
  86. ldb.ab r3,[r1,1]
  87. nop_s
  88. breq r2,0,.Lcmpend
  89. breq r2,r3,.Lcharloop
  90. .Lcmpend:
  91. j_s.d [blink]
  92. sub r0,r2,r3
  93. #elif defined(__ARCHS__)
  94. or r2, r0, r1
  95. bmsk_s r2, r2, 1
  96. brne r2, 0, @.Lcharloop
  97. ;;; s1 and s2 are word aligned
  98. ld.ab r2, [r0, 4]
  99. mov_s r12, 0x01010101
  100. ror r11, r12
  101. .align 4
  102. .LwordLoop:
  103. ld.ab r3, [r1, 4]
  104. ;; Detect NULL char in str1
  105. sub r4, r2, r12
  106. ld.ab r5, [r0, 4]
  107. bic r4, r4, r2
  108. and r4, r4, r11
  109. brne.d.nt r4, 0, .LfoundNULL
  110. ;; Check if the read locations are the same
  111. cmp r2, r3
  112. beq.d .LwordLoop
  113. mov.eq r2, r5
  114. ;; A match is found, spot it out
  115. #ifdef __LITTLE_ENDIAN__
  116. swape r3, r3
  117. mov_s r0, 1
  118. swape r2, r2
  119. #else
  120. mov_s r0, 1
  121. #endif
  122. cmp_s r2, r3
  123. j_s.d [blink]
  124. bset.lo r0, r0, 31
  125. .align 4
  126. .LfoundNULL:
  127. #ifdef __BIG_ENDIAN__
  128. swape r4, r4
  129. swape r2, r2
  130. swape r3, r3
  131. #endif
  132. ;; Find null byte
  133. ffs r0, r4
  134. bmsk r2, r2, r0
  135. bmsk r3, r3, r0
  136. swape r2, r2
  137. swape r3, r3
  138. ;; make the return value
  139. sub.f r0, r2, r3
  140. mov.hi r0, 1
  141. j_s.d [blink]
  142. bset.lo r0, r0, 31
  143. .align 4
  144. .Lcharloop:
  145. ldb.ab r2, [r0, 1]
  146. ldb.ab r3, [r1, 1]
  147. nop
  148. breq r2, 0, .Lcmpend
  149. breq r2, r3, .Lcharloop
  150. .align 4
  151. .Lcmpend:
  152. j_s.d [blink]
  153. sub r0, r2, r3
  154. #else
  155. #error "Unsupported ARC CPU type"
  156. #endif
  157. END(strcmp)
  158. libc_hidden_def(strcmp)
  159. #ifndef __UCLIBC_HAS_LOCALE__
  160. strong_alias(strcmp,strcoll)
  161. libc_hidden_def(strcoll)
  162. #endif