memset.S 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
  2. /* Modified by SuperH, Inc. September 2003 */
  3. !
  4. ! Fast SH memset
  5. !
  6. ! by Toshiyasu Morita (tm@netcom.com)
  7. !
  8. ! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
  9. ! Copyright 2002 SuperH Ltd.
  10. !
  11. #include <endian.h>
  12. #if __BYTE_ORDER == __LITTLE_ENDIAN
  13. #define SHHI shlld
  14. #define SHLO shlrd
  15. #else
  16. #define SHHI shlrd
  17. #define SHLO shlld
  18. #endif
  19. .section .text..SHmedia32,"ax"
  20. .globl memset
  21. .set memset,__memset
  22. .globl __memset
  23. .hidden __memset
  24. .type __memset, @function
  25. .align 5
  26. __memset:
  27. pta/l multiquad, tr0
  28. andi r2, 7, r22
  29. ptabs r18, tr2
  30. mshflo.b r3,r3,r3
  31. add r4, r22, r23
  32. mperm.w r3, r63, r3 // Fill pattern now in every byte of r3
  33. movi 8, r9
  34. bgtu/u r23, r9, tr0 // multiquad
  35. beqi/u r4, 0, tr2 // Return with size 0 - ensures no mem accesses
  36. ldlo.q r2, 0, r7
  37. shlli r4, 2, r4
  38. movi -1, r8
  39. SHHI r8, r4, r8
  40. SHHI r8, r4, r8
  41. mcmv r7, r8, r3
  42. stlo.q r2, 0, r3
  43. blink tr2, r63
  44. multiquad:
  45. pta/l lastquad, tr0
  46. stlo.q r2, 0, r3
  47. shlri r23, 3, r24
  48. add r2, r4, r5
  49. beqi/u r24, 1, tr0 // lastquad
  50. pta/l loop, tr1
  51. sub r2, r22, r25
  52. andi r5, -8, r20 // calculate end address and
  53. addi r20, -7*8, r8 // loop end address; This might overflow, so we need
  54. // to use a different test before we start the loop
  55. bge/u r24, r9, tr1 // loop
  56. st.q r25, 8, r3
  57. st.q r20, -8, r3
  58. shlri r24, 1, r24
  59. beqi/u r24, 1, tr0 // lastquad
  60. st.q r25, 16, r3
  61. st.q r20, -16, r3
  62. beqi/u r24, 2, tr0 // lastquad
  63. st.q r25, 24, r3
  64. st.q r20, -24, r3
  65. lastquad:
  66. sthi.q r5, -1, r3
  67. blink tr2,r63
  68. loop:
  69. !!! alloco r25, 32 // QQQ comment out for short-term fix to SHUK #3895.
  70. // QQQ commenting out is locically correct, but sub-optimal
  71. // QQQ Sean McGoogan - 4th April 2003.
  72. st.q r25, 8, r3
  73. st.q r25, 16, r3
  74. st.q r25, 24, r3
  75. st.q r25, 32, r3
  76. addi r25, 32, r25
  77. bgeu/l r8, r25, tr1 // loop
  78. st.q r20, -40, r3
  79. st.q r20, -32, r3
  80. st.q r20, -24, r3
  81. st.q r20, -16, r3
  82. st.q r20, -8, r3
  83. sthi.q r5, -1, r3
  84. blink tr2,r63
  85. .size __memset,.-__memset