strcmp.S 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /* strcmp.S
  2. * Copyright (C) 2003-2007 Analog Devices Inc., All Rights Reserved.
  3. *
  4. * This file is subject to the terms and conditions of the GNU Library General
  5. * Public License. See the file "COPYING.LIB" in the main directory of this
  6. * archive for more details.
  7. *
  8. * Non-LGPL License also available as part of VisualDSP++
  9. * http://www.analog.com/processors/resources/crosscore/visualDspDevSoftware.html
  10. */
  11. #include <sysdep.h>
  12. /* Fast strcmp() for Blackfin.
  13. * When both strings are aligned, this processes four characters at
  14. * a time. Uses a hw loop with "very big" count to loop "forever",
  15. * until difference or a terminating zero is found.
  16. * Once the end-case word has been identified, breaks out of the
  17. * loop to check more carefully (same as the unaligned case).
  18. */
  19. .text
  20. .align 2
  21. .weak _strcmp
  22. ENTRY(_strcmp)
  23. [--sp] = (R7:4);
  24. p1 = r0;
  25. p2 = r1;
  26. p0 = -1; // (need for loop counter init)
  27. // check if byte aligned
  28. r0 = r0 | r1; // check both pointers at same time
  29. r0 <<= 30; // dump all but last 2 bits
  30. cc = az; // are they zero?
  31. if !cc jump .Lunaligned; // no; use unaligned code.
  32. // fall-thru for aligned case..
  33. // note that r0 is zero from the previous...
  34. // p0 set to -1
  35. LSETUP (.Lbeginloop, .Lendloop) lc0=p0;
  36. // pick up first words
  37. r1 = [p1++];
  38. r2 = [p2++];
  39. // make up mask: 0FF0FF
  40. r7 = 0xFF;
  41. r7.h = 0xFF;
  42. // loop : 9 cycles to check 4 characters
  43. cc = r1 == r2;
  44. .Lbeginloop:
  45. if !cc jump .Lnotequal4; // compare failure, exit loop
  46. // starting with 44332211
  47. // see if char 3 or char 1 is 0
  48. r3 = r1 & r7; // form 00330011
  49. // add to zero, and (r2 is free, reload)
  50. r6 = r3 +|+ r0 || r2 = [p2++] || nop;
  51. cc = az; // true if either is zero
  52. r3 = r1 ^ r3; // form 44002200 (4321^0301 => 4020)
  53. // (trick, saves having another mask)
  54. // add to zero, and (r1 is free, reload)
  55. r6 = r3 +|+ r0 || r1 = [p1++] || nop;
  56. cc |= az; // true if either is zero
  57. if cc jump .Lzero4; // leave if a zero somewhere
  58. .Lendloop:
  59. cc = r1 == r2;
  60. // loop exits
  61. .Lnotequal4: // compare failure on 4-char compare
  62. // address pointers are one word ahead;
  63. // faster to use zero4 exit code
  64. p1 += 4;
  65. p2 += 4;
  66. .Lzero4: // one of the bytes in word 1 is zero
  67. // but we've already fetched the next word; so
  68. // backup two to look at failing word again
  69. p1 += -8;
  70. p2 += -8;
  71. // here when pointers are unaligned: checks one
  72. // character at a time. Also use at the end of
  73. // the word-check algorithm to figure out what happened
  74. .Lunaligned:
  75. // R0 is non-zero from before.
  76. // p0 set to -1
  77. r0 = 0 (Z);
  78. r1 = B[p1++] (Z);
  79. r2 = B[p2++] (Z);
  80. LSETUP (.Lbeginloop1, .Lendloop1) lc0=p0;
  81. .Lbeginloop1:
  82. cc = r1; // first char must be non-zero
  83. // chars must be the same
  84. r3 = r2 - r1 (NS) || r1 = B[p1++] (Z) || nop;
  85. cc &= az;
  86. r3 = r0 - r2; // second char must be non-zero
  87. cc &= an;
  88. if !cc jump .Lexitloop1;
  89. .Lendloop1:
  90. r2 = B[p2++] (Z);
  91. .Lexitloop1: // here means we found a zero or a difference.
  92. // we have r2(N), p2(N), r1(N+1), p1(N+2)
  93. r1=B[p1+ -2] (Z);
  94. r0 = r1 - r2;
  95. (r7:4) = [sp++];
  96. rts;
  97. .size _strcmp,.-_strcmp
  98. libc_hidden_def (strcmp)
  99. #ifndef __UCLIBC_HAS_LOCALE__
  100. weak_alias (strcmp,strcoll)
  101. libc_hidden_def (strcoll)
  102. #endif