memchr.S 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. /* Optimized version of the standard memchr() function.
  2. This file is part of the GNU C Library.
  3. Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
  4. Contributed by Dan Pop <Dan.Pop@cern.ch>.
  5. The GNU C Library is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU Lesser General Public
  7. License as published by the Free Software Foundation; either
  8. version 2.1 of the License, or (at your option) any later version.
  9. The GNU C Library is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. Lesser General Public License for more details.
  13. You should have received a copy of the GNU Lesser General Public
  14. License along with the GNU C Library; if not, write to the Free
  15. Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  16. 02111-1307 USA. */
  17. /* Return: the address of the first occurence of chr in str or NULL
  18. Inputs:
  19. in0: str
  20. in1: chr
  21. in2: byte count
  22. This implementation assumes little endian mode. For big endian mode,
  23. the instruction czx1.r should be replaced by czx1.l.
  24. The algorithm is fairly straightforward: search byte by byte until we
  25. we get to a word aligned address, then search word by word as much as
  26. possible; the remaining few bytes are searched one at a time.
  27. The word by word search is performed by xor-ing the word with a word
  28. containing chr in every byte. If there is a hit, the result will
  29. contain a zero byte in the corresponding position. The presence and
  30. position of that zero byte is detected with a czx instruction.
  31. All the loops in this function could have had the internal branch removed
  32. if br.ctop and br.cloop could be predicated :-(. */
  33. #include "sysdep.h"
  34. #undef ret
  35. #define saved_pr r15
  36. #define saved_lc r16
  37. #define chr r17
  38. #define len r18
  39. #define pos0 r20
  40. #define val r21
  41. #define tmp r24
  42. #define chrx8 r25
  43. #define loopcnt r30
  44. #define str in0
  45. ENTRY(__memchr)
  46. .prologue
  47. alloc r2 = ar.pfs, 3, 0, 29, 32
  48. #include "softpipe.h"
  49. .rotr value[MEMLAT+1], addr[MEMLAT+3], aux[2], poschr[2]
  50. .rotp p[MEMLAT+3]
  51. .save ar.lc, saved_lc
  52. mov saved_lc = ar.lc // save the loop counter
  53. .save pr, saved_pr
  54. mov saved_pr = pr // save the predicates
  55. .body
  56. mov ret0 = str
  57. and tmp = 7, str // tmp = str % 8
  58. cmp.ne p7, p0 = r0, r0 // clear p7
  59. extr.u chr = in1, 0, 8 // chr = (unsigned char) in1
  60. mov len = in2
  61. cmp.gtu p6, p0 = 16, in2 // use a simple loop for short
  62. (p6) br.cond.spnt .srchfew ;; // searches
  63. sub loopcnt = 8, tmp // loopcnt = 8 - tmp
  64. cmp.eq p6, p0 = tmp, r0
  65. (p6) br.cond.sptk .str_aligned;;
  66. sub len = len, loopcnt
  67. adds loopcnt = -1, loopcnt;;
  68. mov ar.lc = loopcnt
  69. .l1:
  70. ld1 val = [ret0], 1
  71. ;;
  72. cmp.eq p6, p0 = val, chr
  73. (p6) br.cond.spnt .foundit
  74. br.cloop.sptk .l1 ;;
  75. .str_aligned:
  76. cmp.ne p6, p0 = r0, r0 // clear p6
  77. shr.u loopcnt = len, 3 // loopcnt = len / 8
  78. and len = 7, len ;; // remaining len = len & 7
  79. adds loopcnt = -1, loopcnt
  80. mov ar.ec = MEMLAT + 3
  81. mux1 chrx8 = chr, @brcst ;; // get a word full of chr
  82. mov ar.lc = loopcnt
  83. mov pr.rot = 1 << 16 ;;
  84. .l2:
  85. (p[0]) mov addr[0] = ret0
  86. (p[0]) ld8 value[0] = [ret0], 8
  87. (p[MEMLAT]) xor aux[0] = value[MEMLAT], chrx8
  88. (p[MEMLAT+1]) czx1.r poschr[0] = aux[1]
  89. (p[MEMLAT+2]) cmp.ne p7, p0 = 8, poschr[1]
  90. (p7) br.cond.dpnt .foundit
  91. br.ctop.dptk .l2
  92. .srchfew:
  93. adds loopcnt = -1, len
  94. cmp.eq p6, p0 = len, r0
  95. (p6) br.cond.spnt .notfound ;;
  96. mov ar.lc = loopcnt
  97. .l3:
  98. ld1 val = [ret0], 1
  99. ;;
  100. cmp.eq p6, p0 = val, chr
  101. (p6) br.cond.dpnt .foundit
  102. br.cloop.sptk .l3 ;;
  103. .notfound:
  104. cmp.ne p6, p0 = r0, r0 // clear p6 (p7 was already 0 when we got here)
  105. mov ret0 = r0 ;; // return NULL
  106. .foundit:
  107. .pred.rel "mutex" p6, p7
  108. (p6) adds ret0 = -1, ret0 // if we got here from l1 or l3
  109. (p7) add ret0 = addr[MEMLAT+2], poschr[1] // if we got here from l2
  110. mov pr = saved_pr, -1
  111. mov ar.lc = saved_lc
  112. br.ret.sptk.many b0
  113. END(__memchr)
  114. weak_alias (__memchr, memchr)
  115. #if !__BOUNDED_POINTERS__
  116. weak_alias (__memchr, __ubp_memchr)
  117. #endif
  118. libc_hidden_def (memchr)