memset.S 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. /*
  2. * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
  3. *
  4. * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  5. */
  6. #include <features.h>
  7. #include <sysdep.h>
  8. #ifdef DONT_USE_PREALLOC
  9. #define PREWRITE(A,B) prefetchw [(A),(B)]
  10. #else
  11. #define PREWRITE(A,B) prealloc [(A),(B)]
  12. #endif
  13. ENTRY(memset)
  14. prefetchw [r0] ; Prefetch the write location
  15. mov.f 0, r2
  16. ;;; if size is zero
  17. jz.d [blink]
  18. mov r3, r0 ; don't clobber ret val
  19. ;;; if length < 8
  20. brls.d.nt r2, 8, .Lsmallchunk
  21. mov.f lp_count,r2
  22. and.f r4, r0, 0x03
  23. rsub lp_count, r4, 4
  24. lpnz @.Laligndestination
  25. ;; LOOP BEGIN
  26. stb.ab r1, [r3,1]
  27. sub r2, r2, 1
  28. .Laligndestination:
  29. ;;; Destination is aligned
  30. and r1, r1, 0xFF
  31. asl r4, r1, 8
  32. or r4, r4, r1
  33. asl r5, r4, 16
  34. or r5, r5, r4
  35. mov r4, r5
  36. sub3 lp_count, r2, 8
  37. cmp r2, 64
  38. bmsk.hi r2, r2, 5
  39. mov.ls lp_count, 0
  40. add3.hi r2, r2, 8
  41. ;;; Convert len to Dwords, unfold x8
  42. lsr.f lp_count, lp_count, 6
  43. lpnz @.Lset64bytes
  44. ;; LOOP START
  45. PREWRITE(r3, 64) ;Prefetch the next write location
  46. std.ab r4, [r3, 8]
  47. std.ab r4, [r3, 8]
  48. std.ab r4, [r3, 8]
  49. std.ab r4, [r3, 8]
  50. std.ab r4, [r3, 8]
  51. std.ab r4, [r3, 8]
  52. std.ab r4, [r3, 8]
  53. std.ab r4, [r3, 8]
  54. .Lset64bytes:
  55. lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
  56. lpnz .Lset32bytes
  57. ;; LOOP START
  58. prefetchw [r3, 32] ;Prefetch the next write location
  59. std.ab r4, [r3, 8]
  60. std.ab r4, [r3, 8]
  61. std.ab r4, [r3, 8]
  62. std.ab r4, [r3, 8]
  63. .Lset32bytes:
  64. and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
  65. .Lsmallchunk:
  66. lpnz .Lcopy3bytes
  67. ;; LOOP START
  68. stb.ab r1, [r3, 1]
  69. .Lcopy3bytes:
  70. j [blink]
  71. END(memset)
  72. libc_hidden_def(memset)