| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 | /* memmove implementation for SH4 * * Copyright (C) 2009 STMicroelectronics Ltd. * * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com> * * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball. */#ifndef __SH_FPU_ANY__#include "../../generic/memmove.c"#else#include <string.h>#define FPSCR_SR	(1 << 20)#define STORE_FPSCR(x)	__asm__ __volatile__("sts fpscr, %0" : "=r"(x))#define LOAD_FPSCR(x)	__asm__ __volatile__("lds %0, fpscr" : : "r"(x))static void fpu_optimised_copy_fwd(void *dest, const void *src, size_t len){	char *d = (char *)dest;	char *s = (char *)src;	if (len >= 64) {		unsigned long fpscr;		int *s1;		int *d1;		/* Align the dest to 4 byte boundary. */		while ((unsigned)d & 0x7) {			*d++ = *s++;			len--;		}		s1 = (int *)s;		d1 = (int *)d;		/* check if s is well aligned to use FPU */		if (!((unsigned)s1 & 0x7)) {			/* Align the dest to cache-line boundary */			while ((unsigned)d1 & 0x1c) {				*d1++ = *s1++;				len -= 4;			}			/* Use paired single precision load or store mode for			 * 64-bit tranfering.*/			STORE_FPSCR(fpscr);			LOAD_FPSCR(FPSCR_SR);			while (len >= 32) {				__asm__ __volatile__ ("fmov @%0+,dr0":"+r" (s1));				__asm__ __volatile__ ("fmov @%0+,dr2":"+r" (s1));				__asm__ __volatile__ ("fmov @%0+,dr4":"+r" (s1));				__asm__ __volatile__ ("fmov @%0+,dr6":"+r" (s1));				__asm__				    __volatile__ ("fmov dr0,@%0"::"r"					      (d1):"memory");				d1 += 2;				__asm__				    __volatile__ ("fmov dr2,@%0"::"r"					      (d1):"memory");				d1 += 2;				__asm__				    __volatile__ ("fmov dr4,@%0"::"r"					      (d1):"memory");				d1 += 2;				__asm__				    __volatile__ ("fmov dr6,@%0"::"r"					      (d1):"memory");				d1 += 2;				len -= 32;			}			LOAD_FPSCR(fpscr);		}		s = (char *)s1;		d = (char *)d1;		/*TODO: other subcases could be covered here?!?*/	}	/* Go to per-byte copy */	while (len > 0) {		*d++ = *s++;		len--;	}	return;}void *memmove(void *dest, const void *src, size_t len){	unsigned long int d = (long int)dest;	unsigned long int s = (long int)src;	unsigned long int res;	if (d >= s)		res = d - s;	else		res = s - d;	/*	 * 1) dest and src are not overlap  ==> memcpy (BWD/FDW)	 * 2) dest and src are 100% overlap ==> memcpy (BWD/FDW)	 * 3) left-to-right overlap ==>  Copy from the beginning to the end	 * 4) right-to-left overlap ==>  Copy from the end to the beginning	 */	if (res == 0)		/* 100% overlap */		memcpy(dest, src, len);	/* No overlap */	else if (res >= len)		memcpy(dest, src, len);	else {		if (d > s)	/* right-to-left overlap */			memcpy(dest, src, len);	/* memcpy is BWD */		else		/* cannot use SH4 memcpy for this case */			fpu_optimised_copy_fwd(dest, src, len);	}	return (dest);}libc_hidden_def(memmove)#endif /*__SH_FPU_ANY__ */
 |