| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 | /* memset/bzero -- set memory area to CH/0   Optimized version for x86-64.   Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.   This file is part of the GNU C Library.   Contributed by Andreas Jaeger <aj@suse.de>.   The GNU C Library is free software; you can redistribute it and/or   modify it under the terms of the GNU Lesser General Public   License as published by the Free Software Foundation; either   version 2.1 of the License, or (at your option) any later version.   The GNU C Library is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU   Lesser General Public License for more details.   You should have received a copy of the GNU Lesser General Public   License along with the GNU C Library; if not, write to the Free   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA   02111-1307 USA.  */#include "_glibc_inc.h"/* BEWARE: `#ifdef memset' means that memset is redefined as `bzero' */#define BZERO_P (defined memset)/* This is somehow experimental and could made dependend on the cache   size.  */#define LARGE $120000        .text#if defined __PIC__ && !defined NOT_IN_libcENTRY (__memset_chk)	cmpq	%rdx, %rcx#if defined __UCLIBC_HAS_SSP__	jb	HIDDEN_JUMPTARGET (__chk_fail)#endifEND (__memset_chk)#endifENTRY (memset)#if BZERO_P	mov	%rsi,%rdx	/* Adjust parameter.  */	xorl	%esi,%esi	/* Fill with 0s.  */#endif	cmp	$0x7,%rdx	/* Check for small length.  */	mov	%rdi,%rcx	/* Save ptr as return value.  */	jbe	7f#if BZERO_P	mov	%rsi,%r8	/* Just copy 0.  */#else	/* Populate 8 bit data to full 64-bit.  */	movabs	$0x0101010101010101,%r8	movzbl	%sil,%eax	imul	%rax,%r8#endif	test	$0x7,%edi	/* Check for alignment.  */	jz	2f	/* Next 3 insns are 9 bytes total, make sure we decode them in one go */	.p2align 4,,91:	/* Align ptr to 8 byte.  */	mov	%sil,(%rcx)	dec	%rdx	inc	%rcx	test	$0x7,%cl	jnz	1b2:	/* Check for really large regions.  */	mov	%rdx,%rax	shr	$0x6,%rax	je	4f	cmp	LARGE, %rdx	jae	11f	/* Next 3 insns are 11 bytes total, make sure we decode them in one go */	.p2align 4,,113:	/* Fill 64 bytes.  */	mov	%r8,(%rcx)	mov	%r8,0x8(%rcx)	mov	%r8,0x10(%rcx)	mov	%r8,0x18(%rcx)	mov	%r8,0x20(%rcx)	mov	%r8,0x28(%rcx)	mov	%r8,0x30(%rcx)	mov	%r8,0x38(%rcx)	add	$0x40,%rcx	dec	%rax	jne	3b4:	/* Fill final bytes.  */	and	$0x3f,%edx	mov	%rdx,%rax	shr	$0x3,%rax	je	6f5:	/* First in chunks of 8 bytes.  */	mov	%r8,(%rcx)	add	$0x8,%rcx	dec	%rax	jne	5b6:	and	$0x7,%edx7:	test	%rdx,%rdx	je	9f8:	/* And finally as bytes (up to 7).  */	mov	%sil,(%rcx)	inc	%rcx	dec	%rdx	jne	8b9:#if BZERO_P	/* nothing */#else	/* Load result (only if used as memset).  */	mov	%rdi,%rax	/* start address of destination is result */#endif	retq	/* Next 3 insns are 14 bytes total, make sure we decode them in one go */	.p2align 4,,1411:	/* Fill 64 bytes without polluting the cache.  */	/* We could use	movntdq %xmm0,(%rcx) here to further	   speed up for large cases but let's not use XMM registers.  */	movnti	%r8,(%rcx)	movnti  %r8,0x8(%rcx)	movnti  %r8,0x10(%rcx)	movnti  %r8,0x18(%rcx)	movnti  %r8,0x20(%rcx)	movnti  %r8,0x28(%rcx)	movnti  %r8,0x30(%rcx)	movnti  %r8,0x38(%rcx)	add	$0x40,%rcx	dec	%rax	jne	11b	jmp	4bEND (memset)#if !BZERO_Plibc_hidden_def(memset)#endif#if !BZERO_P && defined __PIC__ && !defined NOT_IN_libcstrong_alias (__memset_chk, __memset_zero_constant_len_parameter)#endif
 |