memset.S 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /*
  2. * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
  3. * Copyright (C) 2007 ARC International (UK) LTD
  4. *
  5. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  6. */
  7. #include <sysdep.h>
  8. #if !defined(__ARC700__) && !defined(__ARCHS__)
  9. #error "Neither ARC700 nor ARCHS is defined!"
  10. #endif
  11. ENTRY(memset)
  12. #ifdef __ARC700__
  13. #define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
  14. mov_s r4,r0
  15. or r12,r0,r2
  16. bmsk.f r12,r12,1
  17. extb_s r1,r1
  18. asl r3,r1,8
  19. beq.d .Laligned
  20. or_s r1,r1,r3
  21. brls r2,SMALL,.Ltiny
  22. add r3,r2,r0
  23. stb r1,[r3,-1]
  24. bclr_s r3,r3,0
  25. stw r1,[r3,-2]
  26. bmsk.f r12,r0,1
  27. add_s r2,r2,r12
  28. sub.ne r2,r2,4
  29. stb.ab r1,[r4,1]
  30. and r4,r4,-2
  31. stw.ab r1,[r4,2]
  32. and r4,r4,-4
  33. .Laligned: ; This code address should be aligned for speed.
  34. asl r3,r1,16
  35. lsr.f lp_count,r2,2
  36. or_s r1,r1,r3
  37. lpne .Loop_end
  38. st.ab r1,[r4,4]
  39. .Loop_end:
  40. j_s [blink]
  41. .balign 4
  42. .Ltiny:
  43. mov.f lp_count,r2
  44. lpne .Ltiny_end
  45. stb.ab r1,[r4,1]
  46. .Ltiny_end:
  47. j_s [blink]
  48. #endif /* __ARC700__ */
  49. #ifdef __ARCHS__
  50. #ifdef DONT_USE_PREALLOC
  51. #define PREWRITE(A,B) prefetchw [(A),(B)]
  52. #else
  53. #define PREWRITE(A,B) prealloc [(A),(B)]
  54. #endif
  55. prefetchw [r0] ; Prefetch the write location
  56. mov.f 0, r2
  57. ;;; if size is zero
  58. jz.d [blink]
  59. mov r3, r0 ; don't clobber ret val
  60. ;;; if length < 8
  61. brls.d.nt r2, 8, .Lsmallchunk
  62. mov.f lp_count,r2
  63. and.f r4, r0, 0x03
  64. rsub lp_count, r4, 4
  65. lpnz @.Laligndestination
  66. ;; LOOP BEGIN
  67. stb.ab r1, [r3,1]
  68. sub r2, r2, 1
  69. .Laligndestination:
  70. ;;; Destination is aligned
  71. and r1, r1, 0xFF
  72. asl r4, r1, 8
  73. or r4, r4, r1
  74. asl r5, r4, 16
  75. or r5, r5, r4
  76. mov r4, r5
  77. sub3 lp_count, r2, 8
  78. cmp r2, 64
  79. bmsk.hi r2, r2, 5
  80. mov.ls lp_count, 0
  81. add3.hi r2, r2, 8
  82. ;;; Convert len to Dwords, unfold x8
  83. lsr.f lp_count, lp_count, 6
  84. lpnz @.Lset64bytes
  85. ;; LOOP START
  86. PREWRITE(r3, 64) ;Prefetch the next write location
  87. #if defined(__LL64__) || defined(__ARC_LL64__)
  88. std.ab r4, [r3, 8]
  89. std.ab r4, [r3, 8]
  90. std.ab r4, [r3, 8]
  91. std.ab r4, [r3, 8]
  92. std.ab r4, [r3, 8]
  93. std.ab r4, [r3, 8]
  94. std.ab r4, [r3, 8]
  95. std.ab r4, [r3, 8]
  96. #else
  97. st.ab r4, [r3, 4]
  98. st.ab r4, [r3, 4]
  99. st.ab r4, [r3, 4]
  100. st.ab r4, [r3, 4]
  101. st.ab r4, [r3, 4]
  102. st.ab r4, [r3, 4]
  103. st.ab r4, [r3, 4]
  104. st.ab r4, [r3, 4]
  105. st.ab r4, [r3, 4]
  106. st.ab r4, [r3, 4]
  107. st.ab r4, [r3, 4]
  108. st.ab r4, [r3, 4]
  109. st.ab r4, [r3, 4]
  110. st.ab r4, [r3, 4]
  111. st.ab r4, [r3, 4]
  112. st.ab r4, [r3, 4]
  113. #endif
  114. .Lset64bytes:
  115. lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
  116. lpnz .Lset32bytes
  117. ;; LOOP START
  118. prefetchw [r3, 32] ;Prefetch the next write location
  119. #if defined(__LL64__) || defined(__ARC_LL64__)
  120. std.ab r4, [r3, 8]
  121. std.ab r4, [r3, 8]
  122. std.ab r4, [r3, 8]
  123. std.ab r4, [r3, 8]
  124. #else
  125. st.ab r4, [r3, 4]
  126. st.ab r4, [r3, 4]
  127. st.ab r4, [r3, 4]
  128. st.ab r4, [r3, 4]
  129. st.ab r4, [r3, 4]
  130. st.ab r4, [r3, 4]
  131. st.ab r4, [r3, 4]
  132. st.ab r4, [r3, 4]
  133. #endif
  134. .Lset32bytes:
  135. and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
  136. .Lsmallchunk:
  137. lpnz .Lcopy3bytes
  138. ;; LOOP START
  139. stb.ab r1, [r3, 1]
  140. .Lcopy3bytes:
  141. j [blink]
  142. #endif /* __ARCHS__ */
  143. END(memset)
  144. libc_hidden_def(memset)