123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- /* Set a block of memory to some byte value.
- For UltraSPARC.
- Copyright (C) 1996, 97, 98, 99, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by David S. Miller (davem@caip.rutgers.edu) and
- Jakub Jelinek (jj@ultra.linux.cz).
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
- #include <asm/asi.h>
- #ifndef XCC
- #define XCC xcc
- #define USE_BPR
- #endif
- #define FPRS_FEF 4
- #define SET_BLOCKS(base, offset, source) \
- stx source, [base - offset - 0x18]; \
- stx source, [base - offset - 0x10]; \
- stx source, [base - offset - 0x08]; \
- stx source, [base - offset - 0x00];
- /* Well, memset is a lot easier to get right than bcopy... */
- .text
- .align 32
- .globl memset
- .set memset,__memset
- .hidden __memset
- ENTRY(__memset)
- andcc %o1, 0xff, %o1
- mov %o0, %o5
- be,a,pt %icc, 50f
- #ifndef USE_BPR
- srl %o2, 0, %o1
- #else
- mov %o2, %o1
- #endif
- cmp %o2, 7
- #ifndef USE_BPR
- srl %o2, 0, %o2
- #endif
- bleu,pn %XCC, 17f
- andcc %o0, 3, %g5
- be,pt %xcc, 4f
- and %o1, 0xff, %o1
- cmp %g5, 3
- be,pn %xcc, 2f
- stb %o1, [%o0 + 0x00]
- cmp %g5, 2
- be,pt %xcc, 2f
- stb %o1, [%o0 + 0x01]
- stb %o1, [%o0 + 0x02]
- 2: sub %g5, 4, %g5
- sub %o0, %g5, %o0
- add %o2, %g5, %o2
- 4: sllx %o1, 8, %g1
- andcc %o0, 4, %g0
- or %o1, %g1, %o1
- sllx %o1, 16, %g1
- or %o1, %g1, %o1
- be,pt %xcc, 2f
- sllx %o1, 32, %g1
- stw %o1, [%o0]
- sub %o2, 4, %o2
- add %o0, 4, %o0
- 2: cmp %o2, 128
- or %o1, %g1, %o1
- blu,pn %xcc, 9f
- andcc %o0, 0x38, %g5
- be,pn %icc, 6f
- mov 64, %o4
- andcc %o0, 8, %g0
- be,pn %icc, 1f
- sub %o4, %g5, %o4
- stx %o1, [%o0]
- add %o0, 8, %o0
- 1: andcc %o4, 16, %g0
- be,pn %icc, 1f
- sub %o2, %o4, %o2
- stx %o1, [%o0]
- stx %o1, [%o0 + 8]
- add %o0, 16, %o0
- 1: andcc %o4, 32, %g0
- be,pn %icc, 7f
- andncc %o2, 0x3f, %o3
- stw %o1, [%o0]
- stw %o1, [%o0 + 4]
- stw %o1, [%o0 + 8]
- stw %o1, [%o0 + 12]
- stw %o1, [%o0 + 16]
- stw %o1, [%o0 + 20]
- stw %o1, [%o0 + 24]
- stw %o1, [%o0 + 28]
- add %o0, 32, %o0
- 7: be,pn %xcc, 9f
- nop
- ldd [%o0 - 8], %f0
- 18: wr %g0, ASI_BLK_P, %asi
- membar #StoreStore | #LoadStore
- andcc %o3, 0xc0, %g5
- and %o2, 0x3f, %o2
- fmovd %f0, %f2
- fmovd %f0, %f4
- andn %o3, 0xff, %o3
- fmovd %f0, %f6
- cmp %g5, 64
- fmovd %f0, %f8
- fmovd %f0, %f10
- fmovd %f0, %f12
- brz,pn %g5, 10f
- fmovd %f0, %f14
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x00] %asi
- cmp %g5, 128
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- 2: brz,pn %o3, 12f
- add %o0, %g5, %o0
- 10: stda %f0, [%o0 + 0x00] %asi
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- stda %f0, [%o0 + 0xc0] %asi
- 11: subcc %o3, 256, %o3
- bne,pt %xcc, 10b
- add %o0, 256, %o0
- 12: wr %g0, FPRS_FEF, %fprs
- membar #StoreLoad | #StoreStore
- 9: andcc %o2, 0x78, %g5
- be,pn %xcc, 13f
- andcc %o2, 7, %o2
- 14: rd %pc, %o4
- srl %g5, 1, %o3
- sub %o4, %o3, %o4
- jmpl %o4 + (13f - 14b), %g0
- add %o0, %g5, %o0
- 12: SET_BLOCKS (%o0, 0x68, %o1)
- SET_BLOCKS (%o0, 0x48, %o1)
- SET_BLOCKS (%o0, 0x28, %o1)
- SET_BLOCKS (%o0, 0x08, %o1)
- 13: be,pn %xcc, 8f
- andcc %o2, 4, %g0
- be,pn %xcc, 1f
- andcc %o2, 2, %g0
- stw %o1, [%o0]
- add %o0, 4, %o0
- 1: be,pn %xcc, 1f
- andcc %o2, 1, %g0
- sth %o1, [%o0]
- add %o0, 2, %o0
- 1: bne,a,pn %xcc, 8f
- stb %o1, [%o0]
- 8: retl
- mov %o5, %o0
- 17: brz,pn %o2, 0f
- 8: add %o0, 1, %o0
- subcc %o2, 1, %o2
- bne,pt %xcc, 8b
- stb %o1, [%o0 - 1]
- 0: retl
- mov %o5, %o0
- 6: stx %o1, [%o0]
- andncc %o2, 0x3f, %o3
- be,pn %xcc, 9b
- nop
- ba,pt %xcc, 18b
- ldd [%o0], %f0
- END(__memset)
- #define ZERO_BLOCKS(base, offset, source) \
- stx source, [base - offset - 0x38]; \
- stx source, [base - offset - 0x30]; \
- stx source, [base - offset - 0x28]; \
- stx source, [base - offset - 0x20]; \
- stx source, [base - offset - 0x18]; \
- stx source, [base - offset - 0x10]; \
- stx source, [base - offset - 0x08]; \
- stx source, [base - offset - 0x00];
- .text
- .align 32
- ENTRY(__bzero)
- #ifndef USE_BPR
- srl %o1, 0, %o1
- #endif
- mov %o0, %o5
- 50: cmp %o1, 7
- bleu,pn %xcc, 17f
- andcc %o0, 3, %o2
- be,a,pt %xcc, 4f
- andcc %o0, 4, %g0
- cmp %o2, 3
- be,pn %xcc, 2f
- stb %g0, [%o0 + 0x00]
- cmp %o2, 2
- be,pt %xcc, 2f
- stb %g0, [%o0 + 0x01]
- stb %g0, [%o0 + 0x02]
- 2: sub %o2, 4, %o2
- sub %o0, %o2, %o0
- add %o1, %o2, %o1
- andcc %o0, 4, %g0
- 4: be,pt %xcc, 2f
- cmp %o1, 128
- stw %g0, [%o0]
- sub %o1, 4, %o1
- add %o0, 4, %o0
- 2: blu,pn %xcc, 9f
- andcc %o0, 0x38, %o2
- be,pn %icc, 6f
- mov 64, %o4
- andcc %o0, 8, %g0
- be,pn %icc, 1f
- sub %o4, %o2, %o4
- stx %g0, [%o0]
- add %o0, 8, %o0
- 1: andcc %o4, 16, %g0
- be,pn %icc, 1f
- sub %o1, %o4, %o1
- stx %g0, [%o0]
- stx %g0, [%o0 + 8]
- add %o0, 16, %o0
- 1: andcc %o4, 32, %g0
- be,pn %icc, 7f
- andncc %o1, 0x3f, %o3
- stx %g0, [%o0]
- stx %g0, [%o0 + 8]
- stx %g0, [%o0 + 16]
- stx %g0, [%o0 + 24]
- add %o0, 32, %o0
- 6: andncc %o1, 0x3f, %o3
- 7: be,pn %xcc, 9f
- wr %g0, ASI_BLK_P, %asi
- membar #StoreLoad | #StoreStore | #LoadStore
- fzero %f0
- andcc %o3, 0xc0, %o2
- and %o1, 0x3f, %o1
- fzero %f2
- andn %o3, 0xff, %o3
- faddd %f0, %f2, %f4
- fmuld %f0, %f2, %f6
- cmp %o2, 64
- faddd %f0, %f2, %f8
- fmuld %f0, %f2, %f10
- faddd %f0, %f2, %f12
- brz,pn %o2, 10f
- fmuld %f0, %f2, %f14
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x00] %asi
- cmp %o2, 128
- be,pn %icc, 2f
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- 2: brz,pn %o3, 12f
- add %o0, %o2, %o0
- 10: stda %f0, [%o0 + 0x00] %asi
- stda %f0, [%o0 + 0x40] %asi
- stda %f0, [%o0 + 0x80] %asi
- stda %f0, [%o0 + 0xc0] %asi
- 11: subcc %o3, 256, %o3
- bne,pt %xcc, 10b
- add %o0, 256, %o0
- 12: wr %g0, FPRS_FEF, %fprs
- membar #StoreLoad | #StoreStore
- 9: andcc %o1, 0xf8, %o2
- be,pn %xcc, 13f
- andcc %o1, 7, %o1
- 14: rd %pc, %o4
- srl %o2, 1, %o3
- sub %o4, %o3, %o4
- jmpl %o4 + (13f - 14b), %g0
- add %o0, %o2, %o0
- 12: ZERO_BLOCKS (%o0, 0xc8, %g0)
- ZERO_BLOCKS (%o0, 0x88, %g0)
- ZERO_BLOCKS (%o0, 0x48, %g0)
- ZERO_BLOCKS (%o0, 0x08, %g0)
- 13: be,pn %xcc, 8f
- andcc %o1, 4, %g0
- be,pn %xcc, 1f
- andcc %o1, 2, %g0
- stw %g0, [%o0]
- add %o0, 4, %o0
- 1: be,pn %xcc, 1f
- andcc %o1, 1, %g0
- sth %g0, [%o0]
- add %o0, 2, %o0
- 1: bne,a,pn %xcc, 8f
- stb %g0, [%o0]
- 8: retl
- mov %o5, %o0
- 17: be,pn %xcc, 13b
- orcc %o1, 0, %g0
- be,pn %xcc, 0f
- 8: add %o0, 1, %o0
- subcc %o1, 1, %o1
- bne,pt %xcc, 8b
- stb %g0, [%o0 - 1]
- 0: retl
- mov %o5, %o0
- END(__bzero)
- weak_alias (__bzero, bzero)
|