memchr.S 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /* Optimized version of the standard memchr() function.
  2. This file is part of the GNU C Library.
  3. Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
  4. Contributed by Dan Pop <Dan.Pop@cern.ch>.
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, see
  15. <http://www.gnu.org/licenses/>. */
  16. /* Return: the address of the first occurence of chr in str or NULL
  17. Inputs:
  18. in0: str
  19. in1: chr
  20. in2: byte count
  21. This implementation assumes little endian mode. For big endian mode,
  22. the instruction czx1.r should be replaced by czx1.l.
  23. The algorithm is fairly straightforward: search byte by byte until we
  24. we get to a word aligned address, then search word by word as much as
  25. possible; the remaining few bytes are searched one at a time.
  26. The word by word search is performed by xor-ing the word with a word
  27. containing chr in every byte. If there is a hit, the result will
  28. contain a zero byte in the corresponding position. The presence and
  29. position of that zero byte is detected with a czx instruction.
  30. All the loops in this function could have had the internal branch removed
  31. if br.ctop and br.cloop could be predicated :-(. */
  32. #include <sysdep.h>
  33. #undef ret
  34. #define saved_pr r15
  35. #define saved_lc r16
  36. #define chr r17
  37. #define len r18
  38. #define pos0 r20
  39. #define val r21
  40. #define tmp r24
  41. #define chrx8 r25
  42. #define loopcnt r30
  43. #define str in0
  44. ENTRY(__memchr)
  45. .prologue
  46. alloc r2 = ar.pfs, 3, 0, 29, 32
  47. #include "softpipe.h"
  48. .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2]
  49. .rotp p[MEMLAT+3]
  50. .save ar.lc, saved_lc
  51. mov saved_lc = ar.lc /* save the loop counter */
  52. .save pr, saved_pr
  53. mov saved_pr = pr /* save the predicates */
  54. .body
  55. mov ret0 = str
  56. and tmp = 7, str /* tmp = str % 8 */
  57. cmp.ne p7, p0 = r0, r0 /* clear p7 */
  58. extr.u chr = in1, 0, 8 /* chr = (unsigned char) in1 */
  59. mov len = in2
  60. cmp.gtu p6, p0 = 16, in2 /* use a simple loop for short */
  61. (p6) br.cond.spnt .srchfew ;; /* searches */
  62. sub loopcnt = 8, tmp /* loopcnt = 8 - tmp */
  63. cmp.eq p6, p0 = tmp, r0
  64. (p6) br.cond.sptk .str_aligned;;
  65. sub len = len, loopcnt
  66. adds loopcnt = -1, loopcnt;;
  67. mov ar.lc = loopcnt
  68. .l1:
  69. ld1 val = [ret0], 1
  70. ;;
  71. cmp.eq p6, p0 = val, chr
  72. (p6) br.cond.spnt .foundit
  73. br.cloop.sptk .l1 ;;
  74. .str_aligned:
  75. cmp.ne p6, p0 = r0, r0 /* clear p6 */
  76. shr.u loopcnt = len, 3 /* loopcnt = len / 8 */
  77. and len = 7, len ;; /* remaining len = len & 7 */
  78. adds loopcnt = -1, loopcnt
  79. mov ar.ec = MEMLAT + 3
  80. mux1 chrx8 = chr, @brcst ;; /* get a word full of chr */
  81. mov ar.lc = loopcnt
  82. mov pr.rot = 1 << 16 ;;
  83. .l2:
  84. (p[0]) mov addr[0] = ret0
  85. (p[0]) ld8 value[0] = [ret0], 8
  86. (p[MEMLAT]) xor aux[0] = value[MEMLAT], chrx8
  87. (p[MEMLAT+1]) czx1.r poschr[0] = aux[1]
  88. (p[MEMLAT+2]) cmp.ne p7, p0 = 8, poschr[1]
  89. (p7) br.cond.dpnt .foundit
  90. br.ctop.dptk .l2
  91. .srchfew:
  92. adds loopcnt = -1, len
  93. cmp.eq p6, p0 = len, r0
  94. (p6) br.cond.spnt .notfound ;;
  95. mov ar.lc = loopcnt
  96. .l3:
  97. ld1 val = [ret0], 1
  98. ;;
  99. cmp.eq p6, p0 = val, chr
  100. (p6) br.cond.dpnt .foundit
  101. br.cloop.sptk .l3 ;;
  102. .notfound:
  103. cmp.ne p6, p0 = r0, r0 /* clear p6 (p7 was already 0 when we got here) */
  104. mov ret0 = r0 ;; /* return NULL */
  105. .foundit:
  106. .pred.rel "mutex" p6, p7
  107. (p6) adds ret0 = -1, ret0 /* if we got here from l1 or l3 */
  108. (p7) add ret0 = addr[MEMLAT+2], poschr[1] /* if we got here from l2 */
  109. mov pr = saved_pr, -1
  110. mov ar.lc = saved_lc
  111. br.ret.sptk.many b0
  112. END(__memchr)
  113. weak_alias(__memchr, memchr)
  114. weak_alias(__memchr, __ubp_memchr)
  115. libc_hidden_def(memchr)