memset.S 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. /* Cloned and hacked for uClibc by Paul Mundt, December 2003 */
  2. /* Modified by SuperH, Inc. September 2003 */
  3. !
  4. ! Fast SH memset
  5. !
  6. ! by Toshiyasu Morita (tm@netcom.com)
  7. !
  8. ! SH5 code by J"orn Rennecke (joern.rennecke@superh.com)
  9. ! Copyright 2002 SuperH Ltd.
  10. !
  11. #include <features.h>
  12. #include <endian.h>
  13. #if __BYTE_ORDER == __LITTLE_ENDIAN
  14. #define SHHI shlld
  15. #define SHLO shlrd
  16. #else
  17. #define SHHI shlrd
  18. #define SHLO shlld
  19. #endif
  20. .section .text..SHmedia32,"ax"
  21. .globl memset
  22. .type memset, @function
  23. .align 5
  24. memset:
  25. pta/l multiquad, tr0
  26. andi r2, 7, r22
  27. ptabs r18, tr2
  28. mshflo.b r3,r3,r3
  29. add r4, r22, r23
  30. mperm.w r3, r63, r3 /* Fill pattern now in every byte of r3 */
  31. movi 8, r9
  32. bgtu/u r23, r9, tr0 /* multiquad */
  33. beqi/u r4, 0, tr2 /* Return with size 0 - ensures no mem accesses */
  34. ldlo.q r2, 0, r7
  35. shlli r4, 2, r4
  36. movi -1, r8
  37. SHHI r8, r4, r8
  38. SHHI r8, r4, r8
  39. mcmv r7, r8, r3
  40. stlo.q r2, 0, r3
  41. blink tr2, r63
  42. multiquad:
  43. pta/l lastquad, tr0
  44. stlo.q r2, 0, r3
  45. shlri r23, 3, r24
  46. add r2, r4, r5
  47. beqi/u r24, 1, tr0 /* lastquad */
  48. pta/l loop, tr1
  49. sub r2, r22, r25
  50. andi r5, -8, r20 /* calculate end address and */
  51. addi r20, -7*8, r8 /* loop end address; This might overflow, so we need
  52. to use a different test before we start the loop
  53. */
  54. bge/u r24, r9, tr1 /* loop */
  55. st.q r25, 8, r3
  56. st.q r20, -8, r3
  57. shlri r24, 1, r24
  58. beqi/u r24, 1, tr0 /* lastquad */
  59. st.q r25, 16, r3
  60. st.q r20, -16, r3
  61. beqi/u r24, 2, tr0 /* lastquad */
  62. st.q r25, 24, r3
  63. st.q r20, -24, r3
  64. lastquad:
  65. sthi.q r5, -1, r3
  66. blink tr2,r63
  67. loop:
  68. !!! alloco r25, 32 /* QQQ comment out for short-term fix to SHUK #3895.
  69. QQQ commenting out is locically correct, but sub-optimal
  70. QQQ Sean McGoogan - 4th April 2003. */
  71. st.q r25, 8, r3
  72. st.q r25, 16, r3
  73. st.q r25, 24, r3
  74. st.q r25, 32, r3
  75. addi r25, 32, r25
  76. bgeu/l r8, r25, tr1 /* loop */
  77. st.q r20, -40, r3
  78. st.q r20, -32, r3
  79. st.q r20, -24, r3
  80. st.q r20, -16, r3
  81. st.q r20, -8, r3
  82. sthi.q r5, -1, r3
  83. blink tr2,r63
  84. .size memset,.-memset
  85. libc_hidden_def(memset)