strcmp.S 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. /*
  2. * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
  3. * Copyright (C) 2007 ARC International (UK) LTD
  4. *
  5. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  6. */
  7. #include <features.h>
  8. #include <sysdep.h>
  9. /* This is optimized primarily for the ARC700.
  10. It would be possible to speed up the loops by one cycle / word
  11. respective one cycle / byte by forcing double source 1 alignment, unrolling
  12. by a factor of two, and speculatively loading the second word / byte of
  13. source 1; however, that would increase the overhead for loop setup / finish,
  14. and strcmp might often terminate early. */
  15. ENTRY(strcmp)
  16. or r2,r0,r1
  17. bmsk_s r2,r2,1
  18. brne r2,0,.Lcharloop
  19. mov_s r12,0x01010101
  20. ror r5,r12
  21. .Lwordloop:
  22. ld.ab r2,[r0,4]
  23. ld.ab r3,[r1,4]
  24. nop_s
  25. sub r4,r2,r12
  26. bic r4,r4,r2
  27. and r4,r4,r5
  28. brne r4,0,.Lfound0
  29. breq r2,r3,.Lwordloop
  30. #ifdef __LITTLE_ENDIAN__
  31. xor r0,r2,r3 ; mask for difference
  32. sub_s r1,r0,1
  33. bic_s r0,r0,r1 ; mask for least significant difference bit
  34. sub r1,r5,r0
  35. xor r0,r5,r1 ; mask for least significant difference byte
  36. and_s r2,r2,r0
  37. and_s r3,r3,r0
  38. #endif /* LITTLE ENDIAN */
  39. cmp_s r2,r3
  40. mov_s r0,1
  41. j_s.d [blink]
  42. bset.lo r0,r0,31
  43. .balign 4
  44. #ifdef __LITTLE_ENDIAN__
  45. .Lfound0:
  46. xor r0,r2,r3 ; mask for difference
  47. or r0,r0,r4 ; or in zero indicator
  48. sub_s r1,r0,1
  49. bic_s r0,r0,r1 ; mask for least significant difference bit
  50. sub r1,r5,r0
  51. xor r0,r5,r1 ; mask for least significant difference byte
  52. and_s r2,r2,r0
  53. and_s r3,r3,r0
  54. sub.f r0,r2,r3
  55. mov.hi r0,1
  56. j_s.d [blink]
  57. bset.lo r0,r0,31
  58. #else /* BIG ENDIAN */
  59. /* The zero-detection above can mis-detect 0x01 bytes as zeroes
  60. because of carry-propagateion from a lower significant zero byte.
  61. We can compensate for this by checking that bit0 is zero.
  62. This compensation is not necessary in the step where we
  63. get a low estimate for r2, because in any affected bytes
  64. we already have 0x00 or 0x01, which will remain unchanged
  65. when bit 7 is cleared. */
  66. .balign 4
  67. .Lfound0:
  68. lsr r0,r4,8
  69. lsr_s r1,r2
  70. bic_s r2,r2,r0 ; get low estimate for r2 and get ...
  71. bic_s r0,r0,r1 ; <this is the adjusted mask for zeros>
  72. or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ...
  73. cmp_s r3,r2 ; ... be independent of trailing garbage
  74. or_s r2,r2,r0 ; likewise for r3 > r2
  75. bic_s r3,r3,r0
  76. rlc r0,0 ; r0 := r2 > r3 ? 1 : 0
  77. cmp_s r2,r3
  78. j_s.d [blink]
  79. bset.lo r0,r0,31
  80. #endif /* ENDIAN */
  81. .balign 4
  82. .Lcharloop:
  83. ldb.ab r2,[r0,1]
  84. ldb.ab r3,[r1,1]
  85. nop_s
  86. breq r2,0,.Lcmpend
  87. breq r2,r3,.Lcharloop
  88. .Lcmpend:
  89. j_s.d [blink]
  90. sub r0,r2,r3
  91. END(strcmp)
  92. libc_hidden_def(strcmp)
  93. #ifndef __UCLIBC_HAS_LOCALE__
  94. strong_alias(strcmp,strcoll)
  95. libc_hidden_def(strcoll)
  96. #endif