strcmp.S 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. /*
  2. * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
  3. * Copyright (C) 2007 ARC International (UK) LTD
  4. *
  5. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  6. */
  7. #include <features.h>
  8. #include <sysdep.h>
  9. #if !defined(__ARC700__) && !defined(__ARCHS__)
  10. #error "Neither ARC700 nor ARCHS is defined!"
  11. #endif
  12. ENTRY(strcmp)
  13. #ifdef __ARC700__
  14. /* This is optimized primarily for the ARC700.
  15. It would be possible to speed up the loops by one cycle / word
  16. respective one cycle / byte by forcing double source 1 alignment, unrolling
  17. by a factor of two, and speculatively loading the second word / byte of
  18. source 1; however, that would increase the overhead for loop setup / finish,
  19. and strcmp might often terminate early. */
  20. or r2,r0,r1
  21. bmsk_s r2,r2,1
  22. brne r2,0,.Lcharloop
  23. mov_s r12,0x01010101
  24. ror r5,r12
  25. .Lwordloop:
  26. ld.ab r2,[r0,4]
  27. ld.ab r3,[r1,4]
  28. nop_s
  29. sub r4,r2,r12
  30. bic r4,r4,r2
  31. and r4,r4,r5
  32. brne r4,0,.Lfound0
  33. breq r2,r3,.Lwordloop
  34. #ifdef __LITTLE_ENDIAN__
  35. xor r0,r2,r3 ; mask for difference
  36. sub_s r1,r0,1
  37. bic_s r0,r0,r1 ; mask for least significant difference bit
  38. sub r1,r5,r0
  39. xor r0,r5,r1 ; mask for least significant difference byte
  40. and_s r2,r2,r0
  41. and_s r3,r3,r0
  42. #endif /* LITTLE ENDIAN */
  43. cmp_s r2,r3
  44. mov_s r0,1
  45. j_s.d [blink]
  46. bset.lo r0,r0,31
  47. .balign 4
  48. #ifdef __LITTLE_ENDIAN__
  49. .Lfound0:
  50. xor r0,r2,r3 ; mask for difference
  51. or r0,r0,r4 ; or in zero indicator
  52. sub_s r1,r0,1
  53. bic_s r0,r0,r1 ; mask for least significant difference bit
  54. sub r1,r5,r0
  55. xor r0,r5,r1 ; mask for least significant difference byte
  56. and_s r2,r2,r0
  57. and_s r3,r3,r0
  58. sub.f r0,r2,r3
  59. mov.hi r0,1
  60. j_s.d [blink]
  61. bset.lo r0,r0,31
  62. #else /* BIG ENDIAN */
  63. /* The zero-detection above can mis-detect 0x01 bytes as zeroes
  64. because of carry-propagateion from a lower significant zero byte.
  65. We can compensate for this by checking that bit0 is zero.
  66. This compensation is not necessary in the step where we
  67. get a low estimate for r2, because in any affected bytes
  68. we already have 0x00 or 0x01, which will remain unchanged
  69. when bit 7 is cleared. */
  70. .balign 4
  71. .Lfound0:
  72. lsr r0,r4,8
  73. lsr_s r1,r2
  74. bic_s r2,r2,r0 ; get low estimate for r2 and get ...
  75. bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
  76. or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
  77. cmp_s r3,r2 ; ... be independent of trailing garbage
  78. or_s r2,r2,r0 ; likewise for r3 > r2
  79. bic_s r3,r3,r0
  80. rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
  81. cmp_s r2,r3
  82. j_s.d [blink]
  83. bset.lo r0,r0,31
  84. #endif /* ENDIAN */
  85. .balign 4
  86. .Lcharloop:
  87. ldb.ab r2,[r0,1]
  88. ldb.ab r3,[r1,1]
  89. nop_s
  90. breq r2,0,.Lcmpend
  91. breq r2,r3,.Lcharloop
  92. .Lcmpend:
  93. j_s.d [blink]
  94. sub r0,r2,r3
  95. #endif /* __ARC700__ */
  96. #ifdef __ARCHS__
  97. or r2, r0, r1
  98. bmsk_s r2, r2, 1
  99. brne r2, 0, @.Lcharloop
  100. ;;; s1 and s2 are word aligned
  101. ld.ab r2, [r0, 4]
  102. mov_s r12, 0x01010101
  103. ror r11, r12
  104. .align 4
  105. .LwordLoop:
  106. ld.ab r3, [r1, 4]
  107. ;; Detect NULL char in str1
  108. sub r4, r2, r12
  109. ld.ab r5, [r0, 4]
  110. bic r4, r4, r2
  111. and r4, r4, r11
  112. brne.d.nt r4, 0, .LfoundNULL
  113. ;; Check if the read locations are the same
  114. cmp r2, r3
  115. beq.d .LwordLoop
  116. mov.eq r2, r5
  117. ;; A match is found, spot it out
  118. #ifdef __LITTLE_ENDIAN__
  119. swape r3, r3
  120. mov_s r0, 1
  121. swape r2, r2
  122. #else
  123. mov_s r0, 1
  124. #endif
  125. cmp_s r2, r3
  126. j_s.d [blink]
  127. bset.lo r0, r0, 31
  128. .align 4
  129. .LfoundNULL:
  130. #ifdef __BIG_ENDIAN__
  131. swape r4, r4
  132. swape r2, r2
  133. swape r3, r3
  134. #endif
  135. ;; Find null byte
  136. ffs r0, r4
  137. bmsk r2, r2, r0
  138. bmsk r3, r3, r0
  139. swape r2, r2
  140. swape r3, r3
  141. ;; make the return value
  142. sub.f r0, r2, r3
  143. mov.hi r0, 1
  144. j_s.d [blink]
  145. bset.lo r0, r0, 31
  146. .align 4
  147. .Lcharloop:
  148. ldb.ab r2, [r0, 1]
  149. ldb.ab r3, [r1, 1]
  150. nop
  151. breq r2, 0, .Lcmpend
  152. breq r2, r3, .Lcharloop
  153. .align 4
  154. .Lcmpend:
  155. j_s.d [blink]
  156. sub r0, r2, r3
  157. #endif /* __ARCHS__ */
  158. END(strcmp)
  159. libc_hidden_def(strcmp)
  160. #ifndef __UCLIBC_HAS_LOCALE__
  161. strong_alias(strcmp,strcoll)
  162. libc_hidden_def(strcoll)
  163. #endif