memmove.c 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. /* memmove implementation for SH4
  2. *
  3. * Copyright (C) 2009 STMicroelectronics Ltd.
  4. *
  5. * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
  6. *
  7. * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  8. */
  9. #include <string.h>
  10. #define FPSCR_SR (1 << 20)
  11. #define STORE_FPSCR(x) __asm__ volatile("sts fpscr, %0" : "=r"(x))
  12. #define LOAD_FPSCR(x) __asm__ volatile("lds %0, fpscr" : : "r"(x))
  13. static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len)
  14. {
  15. char *d = (char *)dest;
  16. char *s = (char *)src;
  17. if (len >= 64) {
  18. unsigned long fpscr;
  19. int *s1;
  20. int *d1;
  21. /* Align the dest to 4 byte boundary. */
  22. while ((unsigned)d & 0x7) {
  23. *d++ = *s++;
  24. len--;
  25. }
  26. s1 = (int *)s;
  27. d1 = (int *)d;
  28. /* check if s is well aligned to use FPU */
  29. if (!((unsigned)s1 & 0x7)) {
  30. /* Align the dest to cache-line boundary */
  31. while ((unsigned)d1 & 0x1c) {
  32. *d1++ = *s1++;
  33. len -= 4;
  34. }
  35. /* Use paired single precision load or store mode for
  36. * 64-bit tranfering.*/
  37. STORE_FPSCR(fpscr);
  38. LOAD_FPSCR(FPSCR_SR);
  39. while (len >= 32) {
  40. __asm__ volatile ("fmov @%0+,dr0":"+r" (s1));
  41. __asm__ volatile ("fmov @%0+,dr2":"+r" (s1));
  42. __asm__ volatile ("fmov @%0+,dr4":"+r" (s1));
  43. __asm__ volatile ("fmov @%0+,dr6":"+r" (s1));
  44. __asm__
  45. volatile ("fmov dr0,@%0"::"r"
  46. (d1):"memory");
  47. d1 += 2;
  48. __asm__
  49. volatile ("fmov dr2,@%0"::"r"
  50. (d1):"memory");
  51. d1 += 2;
  52. __asm__
  53. volatile ("fmov dr4,@%0"::"r"
  54. (d1):"memory");
  55. d1 += 2;
  56. __asm__
  57. volatile ("fmov dr6,@%0"::"r"
  58. (d1):"memory");
  59. d1 += 2;
  60. len -= 32;
  61. }
  62. LOAD_FPSCR(fpscr);
  63. }
  64. s = (char *)s1;
  65. d = (char *)d1;
  66. /*TODO: other subcases could be covered here?!?*/
  67. }
  68. /* Go to per-byte copy */
  69. while (len > 0) {
  70. *d++ = *s++;
  71. len--;
  72. }
  73. return;
  74. }
  75. void *memmove(void *dest, const void *src, size_t len)
  76. {
  77. unsigned long int d = (long int)dest;
  78. unsigned long int s = (long int)src;
  79. unsigned long int res;
  80. if (d >= s)
  81. res = d - s;
  82. else
  83. res = s - d;
  84. /*
  85. * 1) dest and src are not overlap ==> memcpy (BWD/FDW)
  86. * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)
  87. * 3) left-to-right overlap ==> Copy from the beginning to the end
  88. * 4) right-to-left overlap ==> Copy from the end to the beginning
  89. */
  90. if (res == 0) /* 100% overlap */
  91. memcpy(dest, src, len); /* No overlap */
  92. else if (res >= len)
  93. memcpy(dest, src, len);
  94. else {
  95. if (d > s) /* right-to-left overlap */
  96. memcpy(dest, src, len); /* memcpy is BWD */
  97. else /* cannot use SH4 memcpy for this case */
  98. fpu_optimised_copy_fwd(dest, src, len);
  99. }
  100. return (dest);
  101. }
  102. libc_hidden_def(memmove)