123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- /* memmove implementation for SH4
- *
- * Copyright (C) 2009 STMicroelectronics Ltd.
- *
- * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
- *
- * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
- */
- #ifndef __SH_FPU_ANY__
- #include "../../generic/memmove.c"
- #else
- #include <string.h>
- #define FPSCR_SR (1 << 20)
- #define STORE_FPSCR(x) __asm__ __volatile__("sts fpscr, %0" : "=r"(x))
- #define LOAD_FPSCR(x) __asm__ __volatile__("lds %0, fpscr" : : "r"(x))
- static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len)
- {
- char *d = (char *)dest;
- char *s = (char *)src;
- if (len >= 64) {
- unsigned long fpscr;
- int *s1;
- int *d1;
- /* Align the dest to 4 byte boundary. */
- while ((unsigned)d & 0x7) {
- *d++ = *s++;
- len--;
- }
- s1 = (int *)s;
- d1 = (int *)d;
- /* check if s is well aligned to use FPU */
- if (!((unsigned)s1 & 0x7)) {
- /* Align the dest to cache-line boundary */
- while ((unsigned)d1 & 0x1c) {
- *d1++ = *s1++;
- len -= 4;
- }
- /* Use paired single precision load or store mode for
- * 64-bit tranfering.*/
- STORE_FPSCR(fpscr);
- LOAD_FPSCR(FPSCR_SR);
- while (len >= 32) {
- __asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1));
- __asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1));
- __asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1));
- __asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1));
- __asm__
- __volatile__ ("fmov dr0,@%0"::"r"
- (d1):"memory");
- d1 += 2;
- __asm__
- __volatile__ ("fmov dr2,@%0"::"r"
- (d1):"memory");
- d1 += 2;
- __asm__
- __volatile__ ("fmov dr4,@%0"::"r"
- (d1):"memory");
- d1 += 2;
- __asm__
- __volatile__ ("fmov dr6,@%0"::"r"
- (d1):"memory");
- d1 += 2;
- len -= 32;
- }
- LOAD_FPSCR(fpscr);
- }
- s = (char *)s1;
- d = (char *)d1;
- /*TODO: other subcases could be covered here?!?*/
- }
- /* Go to per-byte copy */
- while (len > 0) {
- *d++ = *s++;
- len--;
- }
- return;
- }
- void *memmove(void *dest, const void *src, size_t len)
- {
- unsigned long int d = (long int)dest;
- unsigned long int s = (long int)src;
- unsigned long int res;
- if (d >= s)
- res = d - s;
- else
- res = s - d;
- /*
- * 1) dest and src are not overlap ==> memcpy (BWD/FDW)
- * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)
- * 3) left-to-right overlap ==> Copy from the beginning to the end
- * 4) right-to-left overlap ==> Copy from the end to the beginning
- */
- if (res == 0) /* 100% overlap */
- memcpy(dest, src, len); /* No overlap */
- else if (res >= len)
- memcpy(dest, src, len);
- else {
- if (d > s) /* right-to-left overlap */
- memcpy(dest, src, len); /* memcpy is BWD */
- else /* cannot use SH4 memcpy for this case */
- fpu_optimised_copy_fwd(dest, src, len);
- }
- return (dest);
- }
- libc_hidden_def(memmove)
- #endif /*__SH_FPU_ANY__ */
|