memcmp.S 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. /*
  2. * Copyright (C) 2013, 2022 Synopsys, Inc. (www.synopsys.com)
  3. * Copyright (C) 2007 ARC International (UK) LTD
  4. *
  5. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  6. */
  7. #include <sysdep.h>
  8. #include <features.h>
  9. #ifdef __LITTLE_ENDIAN__
  10. #define WORD2 r2
  11. #define SHIFT r3
  12. #else /* BIG ENDIAN */
  13. #define WORD2 r3
  14. #define SHIFT r2
  15. #endif
  16. ENTRY(memcmp)
  17. #if defined(__ARC700__) || defined(__ARCHS__)
  18. or r12,r0,r1
  19. asl_s r12,r12,30
  20. sub r3,r2,1
  21. brls r2,r12,.Lbytewise
  22. ld r4,[r0,0]
  23. ld r5,[r1,0]
  24. lsr.f lp_count,r3,3
  25. #ifdef __HS__
  26. /* In ARCv2 a branch can't be the last instruction in a zero overhead
  27. * loop.
  28. * So we move the branch to the start of the loop, duplicate it
  29. * after the end, and set up r12 so that the branch isn't taken
  30. * initially.
  31. */
  32. mov_s r12,WORD2
  33. lpne .Loop_end
  34. brne WORD2,r12,.Lodd
  35. ld WORD2,[r0,4]
  36. #else
  37. lpne .Loop_end
  38. ld_s WORD2,[r0,4]
  39. #endif
  40. ld_s r12,[r1,4]
  41. brne r4,r5,.Leven
  42. ld.a r4,[r0,8]
  43. ld.a r5,[r1,8]
  44. #ifdef __HS__
  45. .Loop_end:
  46. brne WORD2,r12,.Lodd
  47. #else
  48. brne WORD2,r12,.Lodd
  49. .Loop_end:
  50. #endif
  51. asl_s SHIFT,SHIFT,3
  52. bhs_s .Last_cmp
  53. brne r4,r5,.Leven
  54. ld r4,[r0,4]
  55. ld r5,[r1,4]
  56. #ifdef __LITTLE_ENDIAN__
  57. nop_s
  58. ; one more load latency cycle
  59. .Last_cmp:
  60. xor r0,r4,r5
  61. bset r0,r0,SHIFT
  62. sub_s r1,r0,1
  63. bic_s r1,r1,r0
  64. norm r1,r1
  65. b.d .Leven_cmp
  66. and r1,r1,24
  67. .Leven:
  68. xor r0,r4,r5
  69. sub_s r1,r0,1
  70. bic_s r1,r1,r0
  71. norm r1,r1
  72. ; slow track insn
  73. and r1,r1,24
  74. .Leven_cmp:
  75. asl r2,r4,r1
  76. asl r12,r5,r1
  77. lsr_s r2,r2,1
  78. lsr_s r12,r12,1
  79. j_s.d [blink]
  80. sub r0,r2,r12
  81. .balign 4
  82. .Lodd:
  83. xor r0,WORD2,r12
  84. sub_s r1,r0,1
  85. bic_s r1,r1,r0
  86. norm r1,r1
  87. ; slow track insn
  88. and r1,r1,24
  89. asl_s r2,r2,r1
  90. asl_s r12,r12,r1
  91. lsr_s r2,r2,1
  92. lsr_s r12,r12,1
  93. j_s.d [blink]
  94. sub r0,r2,r12
  95. #else /* BIG ENDIAN */
  96. .Last_cmp:
  97. neg_s SHIFT,SHIFT
  98. lsr r4,r4,SHIFT
  99. lsr r5,r5,SHIFT
  100. ; slow track insn
  101. .Leven:
  102. sub.f r0,r4,r5
  103. mov.ne r0,1
  104. j_s.d [blink]
  105. bset.cs r0,r0,31
  106. .Lodd:
  107. cmp_s WORD2,r12
  108. mov_s r0,1
  109. j_s.d [blink]
  110. bset.cs r0,r0,31
  111. #endif /* ENDIAN */
  112. .balign 4
  113. .Lbytewise:
  114. breq r2,0,.Lnil
  115. ldb r4,[r0,0]
  116. ldb r5,[r1,0]
  117. lsr.f lp_count,r3
  118. #ifdef __HS__
  119. mov r12,r3
  120. lpne .Lbyte_end
  121. brne r3,r12,.Lbyte_odd
  122. #else
  123. lpne .Lbyte_end
  124. #endif
  125. ldb_s r3,[r0,1]
  126. ldb r12,[r1,1]
  127. brne r4,r5,.Lbyte_even
  128. ldb.a r4,[r0,2]
  129. ldb.a r5,[r1,2]
  130. #ifdef __HS__
  131. .Lbyte_end:
  132. brne r3,r12,.Lbyte_odd
  133. #else
  134. brne r3,r12,.Lbyte_odd
  135. .Lbyte_end:
  136. #endif
  137. bcc .Lbyte_even
  138. brne r4,r5,.Lbyte_even
  139. ldb_s r3,[r0,1]
  140. ldb_s r12,[r1,1]
  141. .Lbyte_odd:
  142. j_s.d [blink]
  143. sub r0,r3,r12
  144. .Lbyte_even:
  145. j_s.d [blink]
  146. sub r0,r4,r5
  147. .Lnil:
  148. j_s.d [blink]
  149. mov r0,0
  150. #elif (__ARC64_ARCH32__)
  151. ;; Based on Synopsys code from newlib's arc64/memcmp.S
  152. cmp r2, 32
  153. bls.d @.L_compare_1_bytes
  154. mov r3, r0 ; "r0" will be used as return value
  155. lsr r12, r2, 4 ; counter for 16-byte chunks
  156. xor r13, r13, r13 ; the mask showing inequal registers
  157. .L_compare_16_bytes:
  158. ld.ab r4, [r3, +4]
  159. ld.ab r5, [r1, +4]
  160. ld.ab r6, [r3, +4]
  161. ld.ab r7, [r1, +4]
  162. ld.ab r8, [r3, +4]
  163. ld.ab r9, [r1, +4]
  164. ld.ab r10, [r3, +4]
  165. ld.ab r11, [r1, +4]
  166. xor.f 0, r4, r5
  167. xor.ne r13, r13, 0b0001
  168. xor.f 0, r6, r7
  169. xor.ne r13, r13, 0b0010
  170. xor.f 0, r8, r9
  171. xor.ne r13, r13, 0b0100
  172. xor.f 0, r10, r11
  173. xor.ne r13, r13, 0b1000
  174. brne r13, 0, @.L_unequal_find
  175. dbnz r12, @.L_compare_16_bytes
  176. ;; Adjusting the pointers because of the extra loads in the end
  177. sub r1, r1, 4
  178. sub r3, r3, 4
  179. bmsk_s r2, r2, 3 ; any remaining bytes to compare
  180. .L_compare_1_bytes:
  181. cmp r2, 0
  182. jeq.d [blink]
  183. xor_s r0, r0, r0
  184. 2:
  185. ldb.ab r4, [r3, +1]
  186. ldb.ab r5, [r1, +1]
  187. sub.f r0, r4, r5
  188. jne [blink]
  189. dbnz r2, @2b
  190. j_s [blink]
  191. ;; At this point, we want to find the _first_ comparison that marked the
  192. ;; inequality of "lhs" and "rhs"
  193. .L_unequal_find:
  194. ffs r13, r13
  195. asl r13, r13, 2
  196. bi [r13]
  197. .L_unequal_r4r5:
  198. mov r1, r4
  199. b.d @.L_diff_byte_in_regs
  200. mov r2, r5
  201. nop
  202. .L_unequal_r6r7:
  203. mov r1, r6
  204. b.d @.L_diff_byte_in_regs
  205. mov r2, r7
  206. nop
  207. .L_unequal_r8r9:
  208. mov r1, r8
  209. b.d @.L_diff_byte_in_regs
  210. mov r2, r9
  211. nop
  212. .L_unequal_r10r11:
  213. mov r1, r10
  214. mov r2, r11
  215. ;; fall-through
  216. ;; If we're here, that means the two operands are not equal.
  217. .L_diff_byte_in_regs:
  218. xor r0, r1, r2
  219. ffs r0, r0
  220. and r0, r0, 0x18
  221. lsr r1, r1, r0
  222. lsr r2, r2, r0
  223. bmsk_s r1, r1, 7
  224. bmsk_s r2, r2, 7
  225. j_s.d [blink]
  226. sub r0, r1, r2
  227. #else
  228. #error "Unsupported ARC CPU type"
  229. #endif
  230. END(memcmp)
  231. libc_hidden_def(memcmp)
  232. #ifdef __UCLIBC_SUSV3_LEGACY__
  233. strong_alias(memcmp,bcmp)
  234. #endif