| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158 | /* memset.S: optimised assembly memset * * Copyright (C) 2003, 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Library General Public *  License as published by the Free Software Foundation; either *  version 2 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Library General Public License for more details. * *  You should have received a copy of the GNU Library General Public *  License along with this library; if not, write to the Free *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */#include <features.h>        .text        .p2align	4################################################################################# void *memset(void *p, char ch, size_t count)## - NOTE: must not use any stack. exception detection performs function return#         to caller's fixup routine, aborting the remainder of the set#         GR4, GR7, GR8, and GR11 must be managed################################################################################        .globl		memset        .type		memset,@functionmemset:	orcc.p		gr10,gr0,gr5,icc3		; GR5 = count	andi		gr9,#0xff,gr9	or.p		gr8,gr0,gr4			; GR4 = address	beqlr		icc3,#0	# conditionally write a byte to 2b-align the address	setlos.p	#1,gr6	andicc		gr4,#1,gr0,icc0	ckne		icc0,cc7	cstb.p		gr9,@(gr4,gr0)		,cc7,#1	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3	cadd.p		gr4,gr6,gr4		,cc7,#1	beqlr		icc3,#0	# conditionally write a word to 4b-align the address	andicc.p	gr4,#2,gr0,icc0	subicc		gr5,#2,gr0,icc1	setlos.p	#2,gr6	ckne		icc0,cc7	slli.p		gr9,#8,gr12			; need to double up the pattern	cknc		icc1,cc5	or.p		gr9,gr12,gr12	andcr		cc7,cc5,cc7	csth.p		gr12,@(gr4,gr0)		,cc7,#1	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3	cadd.p		gr4,gr6,gr4		,cc7,#1	beqlr		icc3,#0	# conditionally write a dword to 8b-align the address	andicc.p	gr4,#4,gr0,icc0	subicc		gr5,#4,gr0,icc1	setlos.p	#4,gr6	ckne		icc0,cc7	slli.p		gr12,#16,gr13			; need to quadruple-up the pattern	cknc		icc1,cc5	or.p		gr13,gr12,gr12	andcr		cc7,cc5,cc7	cst.p		gr12,@(gr4,gr0)		,cc7,#1	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3	cadd.p		gr4,gr6,gr4		,cc7,#1	beqlr		icc3,#0	or.p		gr12,gr12,gr13			; need to octuple-up the pattern	# the address is now 8b-aligned - loop around writing 64b chunks	setlos		#8,gr7	subi.p		gr4,#8,gr4			; store with update index does weird stuff	setlos		#64,gr6	subicc		gr5,#64,gr0,icc00:	cknc		icc0,cc7	cstdu		gr12,@(gr4,gr7)		,cc7,#1	cstdu		gr12,@(gr4,gr7)		,cc7,#1	cstdu		gr12,@(gr4,gr7)		,cc7,#1	cstdu		gr12,@(gr4,gr7)		,cc7,#1	cstdu		gr12,@(gr4,gr7)		,cc7,#1	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	subicc		gr5,#64,gr0,icc0	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	beqlr		icc3,#0	bnc		icc0,#2,0b	# now do 32-byte remnant	subicc.p	gr5,#32,gr0,icc0	setlos		#32,gr6	cknc		icc0,cc7	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	setlos		#16,gr6	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	subicc		gr5,#16,gr0,icc0	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	beqlr		icc3,#0	# now do 16-byte remnant	cknc		icc0,cc7	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	beqlr		icc3,#0	# now do 8-byte remnant	subicc		gr5,#8,gr0,icc1	cknc		icc1,cc7	cstdu.p		gr12,@(gr4,gr7)		,cc7,#1	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3	setlos.p	#4,gr7	beqlr		icc3,#0	# now do 4-byte remnant	subicc		gr5,#4,gr0,icc0	addi.p		gr4,#4,gr4	cknc		icc0,cc7	cstu.p		gr12,@(gr4,gr7)		,cc7,#1	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3	subicc.p	gr5,#2,gr0,icc1	beqlr		icc3,#0	# now do 2-byte remnant	setlos		#2,gr7	addi.p		gr4,#2,gr4	cknc		icc1,cc7	csthu.p		gr12,@(gr4,gr7)		,cc7,#1	csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3	subicc.p	gr5,#1,gr0,icc0	beqlr		icc3,#0	# now do 1-byte remnant	setlos		#0,gr7	addi.p		gr4,#2,gr4	cknc		icc0,cc7	cstb.p		gr12,@(gr4,gr0)		,cc7,#1	bralr	.size		memset, .-memsetlibc_hidden_proto(memset)
 |