memcpy.S 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. /*
  2. * Copyright (C) 2004-2007 Atmel Corporation
  3. *
  4. * This file is subject to the terms and conditions of the GNU Lesser General
  5. * Public License. See the file "COPYING.LIB" in the main directory of this
  6. * archive for more details.
  7. */
  8. /* Don't use r12 as dst since we must return it unmodified */
  9. #define dst r9
  10. #define src r11
  11. #define len r10
  12. .text
  13. .global memcpy
  14. .type memcpy, @function
  15. memcpy:
  16. pref src[0]
  17. mov dst, r12
  18. /* If we have less than 32 bytes, don't do anything fancy */
  19. cp.w len, 32
  20. brge .Lmore_than_31
  21. sub len, 1
  22. retlt r12
  23. 1: ld.ub r8, src++
  24. st.b dst++, r8
  25. sub len, 1
  26. brge 1b
  27. retal r12
  28. .Lmore_than_31:
  29. pushm r0-r7, lr
  30. /* Check alignment */
  31. mov r8, src
  32. andl r8, 31, COH
  33. brne .Lunaligned_src
  34. mov r8, dst
  35. andl r8, 3, COH
  36. brne .Lunaligned_dst
  37. .Laligned_copy:
  38. sub len, 32
  39. brlt .Lless_than_32
  40. 1: /* Copy 32 bytes at a time */
  41. ldm src, r0-r7
  42. sub src, -32
  43. stm dst, r0-r7
  44. sub dst, -32
  45. sub len, 32
  46. brge 1b
  47. .Lless_than_32:
  48. /* Copy 16 more bytes if possible */
  49. sub len, -16
  50. brlt .Lless_than_16
  51. ldm src, r0-r3
  52. sub src, -16
  53. sub len, 16
  54. stm dst, r0-r3
  55. sub dst, -16
  56. .Lless_than_16:
  57. /* Do the remaining as byte copies */
  58. neg len
  59. add pc, pc, len << 2
  60. .rept 15
  61. ld.ub r0, src++
  62. st.b dst++, r0
  63. .endr
  64. popm r0-r7, pc
  65. .Lunaligned_src:
  66. /* Make src cacheline-aligned. r8 = (src & 31) */
  67. rsub r8, r8, 32
  68. sub len, r8
  69. 1: ld.ub r0, src++
  70. st.b dst++, r0
  71. sub r8, 1
  72. brne 1b
  73. /* If dst is word-aligned, we're ready to go */
  74. pref src[0]
  75. mov r8, 3
  76. tst dst, r8
  77. breq .Laligned_copy
  78. .Lunaligned_dst:
  79. /* src is aligned, but dst is not. Expect bad performance */
  80. sub len, 4
  81. brlt 2f
  82. 1: ld.w r0, src++
  83. st.w dst++, r0
  84. sub len, 4
  85. brge 1b
  86. 2: neg len
  87. add pc, pc, len << 2
  88. .rept 3
  89. ld.ub r0, src++
  90. st.b dst++, r0
  91. .endr
  92. popm r0-r7, pc
  93. .size memcpy, . - memcpy
  94. libc_hidden_def(memcpy)