Selaa lähdekoodia

Alexandre Oliva writes:

This patch introduces optimized versions of memcpy and memset for
frv.
Eric Andersen 20 vuotta sitten
vanhempi
commit
2baa0cccce
4 muutettua tiedostoa jossa 319 lisäystä ja 1 poistoa
  1. 1 1
      libc/string/Makefile
  2. 39 0
      libc/string/frv/Makefile
  3. 124 0
      libc/string/frv/memcpy.S
  4. 155 0
      libc/string/frv/memset.S

+ 1 - 1
libc/string/Makefile

@@ -23,7 +23,7 @@ DIRS=
 ifeq ($(TARGET_ARCH),$(wildcard $(TARGET_ARCH)))
 DIRS = $(TARGET_ARCH)
 endif
-ALL_SUBDIRS = i386 arm sh64 powerpc
+ALL_SUBDIRS = arm frv i386 sh64 powerpc
 
 MSRC= wstring.c
 MOBJ=  basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \

+ 39 - 0
libc/string/frv/Makefile

@@ -0,0 +1,39 @@
+# Makefile for uClibc
+#
+# Copyright (C) 2004 Alexandre Oliva <aoliva@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU Library General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Library General Public License
+# along with this program; if not, write to the Free Software Foundation, Inc.,
+# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+TOPDIR=../../../
+include $(TOPDIR)Rules.mak
+
+SSRC=memcpy.S memset.S
+SOBJS=$(patsubst %.S,%.o, $(SSRC))
+OBJS=$(SOBJS)
+
+all: $(OBJS) $(LIBC)
+
+$(LIBC): ar-target
+
+ar-target: $(OBJS)
+	$(AR) $(ARFLAGS) $(LIBC) $(OBJS)
+
+$(SOBJS): %.o : %.S
+	$(CC) $(CFLAGS) -c $< -o $@
+	$(STRIPTOOL) -x -R .note -R .comment $*.o
+
+clean:
+	$(RM) *.[oa] *~ core
+

+ 124 - 0
libc/string/frv/memcpy.S

@@ -0,0 +1,124 @@
+/* memcpy.S: optimised assembly memcpy
+ *
+ * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+        .text
+        .p2align	4
+
+###############################################################################
+#
+# void *memcpy(void *to, const char *from, size_t count)
+#
+# - NOTE: must not use any stack. exception detection performs function return
+#         to caller's fixup routine, aborting the remainder of the copy
+#
+###############################################################################
+        .globl		memcpy
+        .type		memcpy,@function
+memcpy:
+	or.p		gr8,gr9,gr4
+	orcc		gr10,gr0,gr0,icc3
+	or.p		gr10,gr4,gr4
+	beqlr		icc3,#0
+
+	# optimise based on best common alignment for to, from & count
+	andicc.p	gr4,#0x1f,gr0,icc0
+	setlos		#8,gr11
+	andicc.p	gr4,#0x0f,gr0,icc1
+	beq		icc0,#0,memcpy_32
+	andicc.p	gr4,#0x07,gr0,icc0
+	beq		icc1,#0,memcpy_16
+	andicc.p	gr4,#0x03,gr0,icc1
+	beq		icc0,#0,memcpy_8
+	andicc.p	gr4,#0x01,gr0,icc0
+	beq		icc1,#0,memcpy_4
+	setlos.p	#1,gr11
+	beq		icc0,#0,memcpy_2
+
+	# do byte by byte copy
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	ldubu.p		@(gr9,gr11),gr4
+	subicc		gr10,#1,gr10,icc0
+	stbu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do halfword by halfword copy
+memcpy_2:
+	setlos		#2,gr11
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lduhu.p		@(gr9,gr11),gr4
+	subicc		gr10,#2,gr10,icc0
+	sthu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do word by word copy
+memcpy_4:
+	setlos		#4,gr11
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	ldu.p		@(gr9,gr11),gr4
+	subicc		gr10,#4,gr10,icc0
+	stu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do double-word by double-word copy
+memcpy_8:
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lddu.p		@(gr9,gr11),gr4
+	subicc		gr10,#8,gr10,icc0
+	stdu.p		gr4,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do quad-word by quad-word copy
+memcpy_16:
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lddu		@(gr9,gr11),gr4
+	lddu.p		@(gr9,gr11),gr6
+	subicc		gr10,#16,gr10,icc0
+	stdu		gr4,@(gr3,gr11)
+	stdu.p		gr6,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	# do eight-word by eight-word copy
+memcpy_32:
+	sub.p		gr8,gr11,gr3
+	sub		gr9,gr11,gr9
+0:	lddu		@(gr9,gr11),gr4
+	lddu		@(gr9,gr11),gr6
+	lddu		@(gr9,gr11),gr12
+	lddu.p		@(gr9,gr11),gr14
+	subicc		gr10,#32,gr10,icc0
+	stdu		gr4,@(gr3,gr11)
+	stdu		gr6,@(gr3,gr11)
+	stdu		gr12,@(gr3,gr11)
+	stdu.p		gr14,@(gr3,gr11)
+	bne		icc0,#2,0b
+	bralr
+
+	.size		memcpy, .-memcpy

+ 155 - 0
libc/string/frv/memset.S

@@ -0,0 +1,155 @@
+/* memset.S: optimised assembly memset
+ *
+ * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Library General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Library General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Library General Public
+ *  License along with this library; if not, write to the Free
+ *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+
+        .text
+        .p2align	4
+
+###############################################################################
+#
+# void *memset(void *p, char ch, size_t count)
+#
+# - NOTE: must not use any stack. exception detection performs function return
+#         to caller's fixup routine, aborting the remainder of the set
+#         GR4, GR7, GR8, and GR11 must be managed
+#
+###############################################################################
+        .globl		memset
+        .type		memset,@function
+memset:
+	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count
+	andi		gr9,#0xff,gr9
+	or.p		gr8,gr0,gr4			; GR4 = address
+	beqlr		icc3,#0
+
+	# conditionally write a byte to 2b-align the address
+	setlos.p	#1,gr6
+	andicc		gr4,#1,gr0,icc0
+	ckne		icc0,cc7
+	cstb.p		gr9,@(gr4,gr0)		,cc7,#1
+	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
+	cadd.p		gr4,gr6,gr4		,cc7,#1
+	beqlr		icc3,#0
+
+	# conditionally write a word to 4b-align the address
+	andicc.p	gr4,#2,gr0,icc0
+	subicc		gr5,#2,gr0,icc1
+	setlos.p	#2,gr6
+	ckne		icc0,cc7
+	slli.p		gr9,#8,gr12			; need to double up the pattern
+	cknc		icc1,cc5
+	or.p		gr9,gr12,gr12
+	andcr		cc7,cc5,cc7
+
+	csth.p		gr12,@(gr4,gr0)		,cc7,#1
+	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
+	cadd.p		gr4,gr6,gr4		,cc7,#1
+	beqlr		icc3,#0
+
+	# conditionally write a dword to 8b-align the address
+	andicc.p	gr4,#4,gr0,icc0
+	subicc		gr5,#4,gr0,icc1
+	setlos.p	#4,gr6
+	ckne		icc0,cc7
+	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern
+	cknc		icc1,cc5
+	or.p		gr13,gr12,gr12
+	andcr		cc7,cc5,cc7
+
+	cst.p		gr12,@(gr4,gr0)		,cc7,#1
+	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
+	cadd.p		gr4,gr6,gr4		,cc7,#1
+	beqlr		icc3,#0
+
+	or.p		gr12,gr12,gr13			; need to octuple-up the pattern
+
+	# the address is now 8b-aligned - loop around writing 64b chunks
+	setlos		#8,gr7
+	subi.p		gr4,#8,gr4			; store with update index does weird stuff
+	setlos		#64,gr6
+
+	subicc		gr5,#64,gr0,icc0
+0:	cknc		icc0,cc7
+	cstdu		gr12,@(gr4,gr7)		,cc7,#1
+	cstdu		gr12,@(gr4,gr7)		,cc7,#1
+	cstdu		gr12,@(gr4,gr7)		,cc7,#1
+	cstdu		gr12,@(gr4,gr7)		,cc7,#1
+	cstdu		gr12,@(gr4,gr7)		,cc7,#1
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	subicc		gr5,#64,gr0,icc0
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	beqlr		icc3,#0
+	bnc		icc0,#2,0b
+
+	# now do 32-byte remnant
+	subicc.p	gr5,#32,gr0,icc0
+	setlos		#32,gr6
+	cknc		icc0,cc7
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	setlos		#16,gr6
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	subicc		gr5,#16,gr0,icc0
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	beqlr		icc3,#0
+
+	# now do 16-byte remnant
+	cknc		icc0,cc7
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	beqlr		icc3,#0
+
+	# now do 8-byte remnant
+	subicc		gr5,#8,gr0,icc1
+	cknc		icc1,cc7
+	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1
+	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
+	setlos.p	#4,gr7
+	beqlr		icc3,#0
+
+	# now do 4-byte remnant
+	subicc		gr5,#4,gr0,icc0
+	addi.p		gr4,#4,gr4
+	cknc		icc0,cc7
+	cstu.p		gr12,@(gr4,gr7)		,cc7,#1
+	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
+	subicc.p	gr5,#2,gr0,icc1
+	beqlr		icc3,#0
+
+	# now do 2-byte remnant
+	setlos		#2,gr7
+	addi.p		gr4,#2,gr4
+	cknc		icc1,cc7
+	csthu.p		gr12,@(gr4,gr7)		,cc7,#1
+	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3
+	subicc.p	gr5,#1,gr0,icc0
+	beqlr		icc3,#0
+
+	# now do 1-byte remnant
+	setlos		#0,gr7
+	addi.p		gr4,#2,gr4
+	cknc		icc0,cc7
+	cstb.p		gr12,@(gr4,gr0)		,cc7,#1
+	bralr
+	.size		memset, .-memset