memmove.c 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. /* memmove implementation for SH4
  2. *
  3. * Copyright (C) 2009 STMicroelectronics Ltd.
  4. *
  5. * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
  6. *
  7. * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  8. */
  9. #ifndef __SH_FPU_ANY__
  10. #include "../../generic/memmove.c"
  11. #else
  12. #include <string.h>
  13. #define FPSCR_SR (1 << 20)
  14. #define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x))
  15. #define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x))
  16. static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len)
  17. {
  18. char *d = (char *)dest;
  19. char *s = (char *)src;
  20. if (len >= 64) {
  21. unsigned long fpscr;
  22. int *s1;
  23. int *d1;
  24. /* Align the dest to 4 byte boundary. */
  25. while ((unsigned)d & 0x7) {
  26. *d++ = *s++;
  27. len--;
  28. }
  29. s1 = (int *)s;
  30. d1 = (int *)d;
  31. /* check if s is well aligned to use FPU */
  32. if (!((unsigned)s1 & 0x7)) {
  33. /* Align the dest to cache-line boundary */
  34. while ((unsigned)d1 & 0x1c) {
  35. *d1++ = *s1++;
  36. len -= 4;
  37. }
  38. /* Use paired single precision load or store mode for
  39. * 64-bit tranfering.*/
  40. STORE_FPSCR(fpscr);
  41. LOAD_FPSCR(FPSCR_SR);
  42. while (len >= 32) {
  43. __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1));
  44. __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1));
  45. __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1));
  46. __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1));
  47. __asm__
  48. __volatile__ ("fmov dr0,@%0"::"r"
  49. (d1):"memory");
  50. d1 += 2;
  51. __asm__
  52. __volatile__ ("fmov dr2,@%0"::"r"
  53. (d1):"memory");
  54. d1 += 2;
  55. __asm__
  56. __volatile__ ("fmov dr4,@%0"::"r"
  57. (d1):"memory");
  58. d1 += 2;
  59. __asm__
  60. __volatile__ ("fmov dr6,@%0"::"r"
  61. (d1):"memory");
  62. d1 += 2;
  63. len -= 32;
  64. }
  65. LOAD_FPSCR(fpscr);
  66. }
  67. s = (char *)s1;
  68. d = (char *)d1;
  69. /*TODO: other subcases could be covered here?!?*/
  70. }
  71. /* Go to per-byte copy */
  72. while (len > 0) {
  73. *d++ = *s++;
  74. len--;
  75. }
  76. return;
  77. }
  78. void *memmove(void *dest, const void *src, size_t len)
  79. {
  80. unsigned long int d = (long int)dest;
  81. unsigned long int s = (long int)src;
  82. unsigned long int res;
  83. if (d >= s)
  84. res = d - s;
  85. else
  86. res = s - d;
  87. /*
  88. * 1) dest and src are not overlap ==> memcpy (BWD/FDW)
  89. * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)
  90. * 3) left-to-right overlap ==> Copy from the beginning to the end
  91. * 4) right-to-left overlap ==> Copy from the end to the beginning
  92. */
  93. if (res == 0) /* 100% overlap */
  94. memcpy(dest, src, len); /* No overlap */
  95. else if (res >= len)
  96. memcpy(dest, src, len);
  97. else {
  98. if (d > s) /* right-to-left overlap */
  99. memcpy(dest, src, len); /* memcpy is BWD */
  100. else /* cannot use SH4 memcpy for this case */
  101. fpu_optimised_copy_fwd(dest, src, len);
  102. }
  103. return (dest);
  104. }
  105. libc_hidden_def(memmove)
  106. #endif /*__SH_FPU_ANY__ */