123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- /*
- * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
- * Copyright (C) 2007 ARC International (UK) LTD
- *
- * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
- */
- #include <sysdep.h>
- #if !defined(__ARC700__) && !defined(__ARCHS__)
- #error "Neither ARC700 nor ARCHS is defined!"
- #endif
- ENTRY(memset)
- #ifdef __ARC700__
- #define SMALL 7 /* Must be at least 6 to deal with alignment/loop issues. */
- mov_s r4,r0
- or r12,r0,r2
- bmsk.f r12,r12,1
- extb_s r1,r1
- asl r3,r1,8
- beq.d .Laligned
- or_s r1,r1,r3
- brls r2,SMALL,.Ltiny
- add r3,r2,r0
- stb r1,[r3,-1]
- bclr_s r3,r3,0
- stw r1,[r3,-2]
- bmsk.f r12,r0,1
- add_s r2,r2,r12
- sub.ne r2,r2,4
- stb.ab r1,[r4,1]
- and r4,r4,-2
- stw.ab r1,[r4,2]
- and r4,r4,-4
- .Laligned: ; This code address should be aligned for speed.
- asl r3,r1,16
- lsr.f lp_count,r2,2
- or_s r1,r1,r3
- lpne .Loop_end
- st.ab r1,[r4,4]
- .Loop_end:
- j_s [blink]
- .balign 4
- .Ltiny:
- mov.f lp_count,r2
- lpne .Ltiny_end
- stb.ab r1,[r4,1]
- .Ltiny_end:
- j_s [blink]
- #endif /* __ARC700__ */
- #ifdef __ARCHS__
- #ifdef DONT_USE_PREALLOC
- #define PREWRITE(A,B) prefetchw [(A),(B)]
- #else
- #define PREWRITE(A,B) prealloc [(A),(B)]
- #endif
- prefetchw [r0] ; Prefetch the write location
- mov.f 0, r2
- ;;; if size is zero
- jz.d [blink]
- mov r3, r0 ; don't clobber ret val
- ;;; if length < 8
- brls.d.nt r2, 8, .Lsmallchunk
- mov.f lp_count,r2
- and.f r4, r0, 0x03
- rsub lp_count, r4, 4
- lpnz @.Laligndestination
- ;; LOOP BEGIN
- stb.ab r1, [r3,1]
- sub r2, r2, 1
- .Laligndestination:
- ;;; Destination is aligned
- and r1, r1, 0xFF
- asl r4, r1, 8
- or r4, r4, r1
- asl r5, r4, 16
- or r5, r5, r4
- mov r4, r5
- sub3 lp_count, r2, 8
- cmp r2, 64
- bmsk.hi r2, r2, 5
- mov.ls lp_count, 0
- add3.hi r2, r2, 8
- ;;; Convert len to Dwords, unfold x8
- lsr.f lp_count, lp_count, 6
- lpnz @.Lset64bytes
- ;; LOOP START
- PREWRITE(r3, 64) ;Prefetch the next write location
- #if defined(__LL64__) || defined(__ARC_LL64__)
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- #else
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- #endif
- .Lset64bytes:
- lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
- lpnz .Lset32bytes
- ;; LOOP START
- prefetchw [r3, 32] ;Prefetch the next write location
- #if defined(__LL64__) || defined(__ARC_LL64__)
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- std.ab r4, [r3, 8]
- #else
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- st.ab r4, [r3, 4]
- #endif
- .Lset32bytes:
- and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
- .Lsmallchunk:
- lpnz .Lcopy3bytes
- ;; LOOP START
- stb.ab r1, [r3, 1]
- .Lcopy3bytes:
- j [blink]
- #endif /* __ARCHS__ */
- END(memset)
- libc_hidden_def(memset)
|