memset.S 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. /*
  2. * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  5. */
  6. #include <features.h>
  7. #include <sysdep.h>
  8. #ifdef DONT_USE_PREALLOC
  9. #define PREWRITE(A,B) prefetchw [(A),(B)]
  10. #else
  11. #define PREWRITE(A,B) prealloc [(A),(B)]
  12. #endif
  13. ENTRY(memset)
  14. prefetchw [r0] ; Prefetch the write location
  15. mov.f 0, r2
  16. ;;; if size is zero
  17. jz.d [blink]
  18. mov r3, r0 ; don't clobber ret val
  19. ;;; if length < 8
  20. brls.d.nt r2, 8, .Lsmallchunk
  21. mov.f lp_count,r2
  22. and.f r4, r0, 0x03
  23. rsub lp_count, r4, 4
  24. lpnz @.Laligndestination
  25. ;; LOOP BEGIN
  26. stb.ab r1, [r3,1]
  27. sub r2, r2, 1
  28. .Laligndestination:
  29. ;;; Destination is aligned
  30. and r1, r1, 0xFF
  31. asl r4, r1, 8
  32. or r4, r4, r1
  33. asl r5, r4, 16
  34. or r5, r5, r4
  35. mov r4, r5
  36. sub3 lp_count, r2, 8
  37. cmp r2, 64
  38. bmsk.hi r2, r2, 5
  39. mov.ls lp_count, 0
  40. add3.hi r2, r2, 8
  41. ;;; Convert len to Dwords, unfold x8
  42. lsr.f lp_count, lp_count, 6
  43. lpnz @.Lset64bytes
  44. ;; LOOP START
  45. PREWRITE(r3, 64) ;Prefetch the next write location
  46. #if defined(__LL64__) || defined(__ARC_LL64__)
  47. std.ab r4, [r3, 8]
  48. std.ab r4, [r3, 8]
  49. std.ab r4, [r3, 8]
  50. std.ab r4, [r3, 8]
  51. std.ab r4, [r3, 8]
  52. std.ab r4, [r3, 8]
  53. std.ab r4, [r3, 8]
  54. std.ab r4, [r3, 8]
  55. #else
  56. st.ab r4, [r3, 4]
  57. st.ab r4, [r3, 4]
  58. st.ab r4, [r3, 4]
  59. st.ab r4, [r3, 4]
  60. st.ab r4, [r3, 4]
  61. st.ab r4, [r3, 4]
  62. st.ab r4, [r3, 4]
  63. st.ab r4, [r3, 4]
  64. st.ab r4, [r3, 4]
  65. st.ab r4, [r3, 4]
  66. st.ab r4, [r3, 4]
  67. st.ab r4, [r3, 4]
  68. st.ab r4, [r3, 4]
  69. st.ab r4, [r3, 4]
  70. st.ab r4, [r3, 4]
  71. st.ab r4, [r3, 4]
  72. #endif
  73. .Lset64bytes:
  74. lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
  75. lpnz .Lset32bytes
  76. ;; LOOP START
  77. prefetchw [r3, 32] ;Prefetch the next write location
  78. #if defined(__LL64__) || defined(__ARC_LL64__)
  79. std.ab r4, [r3, 8]
  80. std.ab r4, [r3, 8]
  81. std.ab r4, [r3, 8]
  82. std.ab r4, [r3, 8]
  83. #else
  84. st.ab r4, [r3, 4]
  85. st.ab r4, [r3, 4]
  86. st.ab r4, [r3, 4]
  87. st.ab r4, [r3, 4]
  88. st.ab r4, [r3, 4]
  89. st.ab r4, [r3, 4]
  90. st.ab r4, [r3, 4]
  91. st.ab r4, [r3, 4]
  92. #endif
  93. .Lset32bytes:
  94. and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
  95. .Lsmallchunk:
  96. lpnz .Lcopy3bytes
  97. ;; LOOP START
  98. stb.ab r1, [r3, 1]
  99. .Lcopy3bytes:
  100. j [blink]
  101. END(memset)
  102. libc_hidden_def(memset)