memset.S 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. ! Copyright (C) 2013 Imagination Technologies Ltd.
  2. ! Licensed under LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  3. .text
  4. .global _memset
  5. .type _memset,function
  6. ! D1Ar1 dst
  7. ! D0Ar2 c
  8. ! D1Ar3 cnt
  9. ! D0Re0 dst
  10. _memset:
  11. AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value
  12. MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15
  13. ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst
  14. LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31
  15. ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2)
  16. MOV D0Re0,D1Ar1 ! Return dst
  17. BZ $LLongStub ! if start address is aligned
  18. ! start address is not aligned on an 8 byte boundary, so we
  19. ! need the number of bytes up to the next 8 byte address
  20. ! boundary, or the length of the string if less than 8, in D1Ar5
  21. MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ...
  22. SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N
  23. CMP D1Ar3,D1Ar5
  24. MOVMI D1Ar5,D1Ar3
  25. B $LByteStub ! dst is mis-aligned, do $LByteStub
  26. !
  27. ! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles)
  28. !
  29. $LLongStub:
  30. LSRS D0Ar2,D1Ar3,#5
  31. AND D1Ar3,D1Ar3,#0x1F
  32. MOV A1.2,A0.2
  33. BEQ $LLongishStub
  34. SUB TXRPT,D0Ar2,#1
  35. CMP D1Ar3,#0
  36. $LLongLoop:
  37. SETL [D1Ar1++],A0.2,A1.2
  38. SETL [D1Ar1++],A0.2,A1.2
  39. SETL [D1Ar1++],A0.2,A1.2
  40. SETL [D1Ar1++],A0.2,A1.2
  41. BR $LLongLoop
  42. BZ $Lexit
  43. !
  44. ! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles)
  45. !
  46. $LLongishStub:
  47. LSRS D0Ar2,D1Ar3,#3
  48. AND D1Ar3,D1Ar3,#0x7
  49. MOV D1Ar5,D1Ar3
  50. BEQ $LByteStub
  51. SUB TXRPT,D0Ar2,#1
  52. CMP D1Ar3,#0
  53. $LLongishLoop:
  54. SETL [D1Ar1++],A0.2,A1.2
  55. BR $LLongishLoop
  56. BZ $Lexit
  57. !
  58. ! This does a byte structured burst of up to 7 bytes
  59. !
  60. ! D1Ar1 should point to the location required
  61. ! D1Ar3 should be the remaining total byte count
  62. ! D1Ar5 should be burst size (<= D1Ar3)
  63. !
  64. $LByteStub:
  65. SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count
  66. ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area
  67. MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4)
  68. SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ...
  69. MOV A1.2,D1Ar5
  70. SUB PC,CPC1,A1.2 ! Jump into table below
  71. SETB [D1Ar1+#(-7)],A0.2
  72. SETB [D1Ar1+#(-6)],A0.2
  73. SETB [D1Ar1+#(-5)],A0.2
  74. SETB [D1Ar1+#(-4)],A0.2
  75. SETB [D1Ar1+#(-3)],A0.2
  76. SETB [D1Ar1+#(-2)],A0.2
  77. SETB [D1Ar1+#(-1)],A0.2
  78. !
  79. ! Return if all data has been output, otherwise do $LLongStub
  80. !
  81. BNZ $LLongStub
  82. $Lexit:
  83. MOV PC,D1RtP
  84. .size _memset,.-_memset
  85. libc_hidden_def(memset)