123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348 |
- .globl memmove
- .type memmove,
- .ent memmove
- memmove:
- cmpu r4, r5, r6
- bgei r4, HIDDEN_JUMPTARGET(memcpy)
- fast_memcpy_descending:
-
- addi r3, r5, 0
- add r5, r5, r7
- add r6, r6, r7
- addi r4, r0, 4
- cmpu r4, r4, r7
- blti r4,d_xfer_end
-
- andi r4, r5, 3
-
- beqi r4,d_dalign_done
- rsub r7, r4, r7
- d_xfer_first_loop:
-
- beqi r4,d_dalign_done
- addi r6, r6, -1
- addi r5, r5, -1
- lbui r11, r6, 0
- sbi r11, r5, 0
- brid d_xfer_first_loop
- addi r4, r4, -1
- d_dalign_done:
- addi r4, r0, 32
- cmpu r4, r4, r7
-
- blti r4, d_block_done
- d_block_xfer:
- andi r4, r7, 0xffffffe0
- rsub r7, r4, r7
- andi r9, r6, 3
-
- bnei r9, d_block_unaligned
- d_block_aligned:
- addi r6, r6, -32
- addi r5, r5, -32
- lwi r9, r6, 28
- lwi r10, r6, 24
- lwi r11, r6, 20
- lwi r12, r6, 16
- swi r9, r5, 28
- swi r10, r5, 24
- swi r11, r5, 20
- swi r12, r5, 16
- lwi r9, r6, 12
- lwi r10, r6, 8
- lwi r11, r6, 4
- lwi r12, r6, 0
- swi r9, r5, 12
- swi r10, r5, 8
- swi r11, r5, 4
- addi r4, r4, -32
- bneid r4, d_block_aligned
- swi r12, r5, 0
- bri d_block_done
- d_block_unaligned:
- andi r8, r6, 0xfffffffc
- rsub r6, r4, r6
- lwi r11, r8, 0
- addi r9, r9, -1
- beqi r9,d_block_u1
- addi r9, r9, -1
- beqi r9,d_block_u2
- d_block_u3:
- bsrli r11, r11, 8
- d_bu3_loop:
- addi r8, r8, -32
- addi r5, r5, -32
- lwi r12, r8, 28
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 28
- bsrli r11, r12, 8
- lwi r12, r8, 24
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 24
- bsrli r11, r12, 8
- lwi r12, r8, 20
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 20
- bsrli r11, r12, 8
- lwi r12, r8, 16
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 16
- bsrli r11, r12, 8
- lwi r12, r8, 12
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 12
- bsrli r11, r12, 8
- lwi r12, r8, 8
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 8
- bsrli r11, r12, 8
- lwi r12, r8, 4
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 4
- bsrli r11, r12, 8
- lwi r12, r8, 0
- bslli r9, r12, 24
- or r9, r11, r9
- swi r9, r5, 0
- addi r4, r4, -32
- bneid r4, d_bu3_loop
- bsrli r11, r12, 8
- bri d_block_done
- d_block_u1:
- bsrli r11, r11, 24
- d_bu1_loop:
- addi r8, r8, -32
- addi r5, r5, -32
- lwi r12, r8, 28
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 28
- bsrli r11, r12, 24
- lwi r12, r8, 24
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 24
- bsrli r11, r12, 24
- lwi r12, r8, 20
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 20
- bsrli r11, r12, 24
- lwi r12, r8, 16
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 16
- bsrli r11, r12, 24
- lwi r12, r8, 12
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 12
- bsrli r11, r12, 24
- lwi r12, r8, 8
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 8
- bsrli r11, r12, 24
- lwi r12, r8, 4
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 4
- bsrli r11, r12, 24
- lwi r12, r8, 0
- bslli r9, r12, 8
- or r9, r11, r9
- swi r9, r5, 0
- addi r4, r4, -32
- bneid r4, d_bu1_loop
- bsrli r11, r12, 24
- bri d_block_done
- d_block_u2:
- bsrli r11, r11, 16
- d_bu2_loop:
- addi r8, r8, -32
- addi r5, r5, -32
- lwi r12, r8, 28
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 28
- bsrli r11, r12, 16
- lwi r12, r8, 24
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 24
- bsrli r11, r12, 16
- lwi r12, r8, 20
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 20
- bsrli r11, r12, 16
- lwi r12, r8, 16
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 16
- bsrli r11, r12, 16
- lwi r12, r8, 12
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 12
- bsrli r11, r12, 16
- lwi r12, r8, 8
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 8
- bsrli r11, r12, 16
- lwi r12, r8, 4
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 4
- bsrli r11, r12, 16
- lwi r12, r8, 0
- bslli r9, r12, 16
- or r9, r11, r9
- swi r9, r5, 0
- addi r4, r4, -32
- bneid r4, d_bu2_loop
- bsrli r11, r12, 16
- d_block_done:
- addi r4, r0, 4
- cmpu r4, r4, r7
- blti r4,d_xfer_end
- d_word_xfer:
- andi r4, r7, 0xfffffffc
- rsub r5, r4, r5
- rsub r6, r4, r6
- rsub r7, r4, r7
- andi r9, r6, 3
-
- bnei r9, d_word_unaligned
- d_word_aligned:
- addi r4, r4,-4
- lw r9, r6, r4
- bneid r4, d_word_aligned
- sw r9, r5, r4
- bri d_word_done
- d_word_unaligned:
- andi r8, r6, 0xfffffffc
- lw r11, r8, r4
- addi r9, r9, -1
- beqi r9,d_word_u1
- addi r9, r9, -1
- beqi r9,d_word_u2
- d_word_u3:
- bsrli r11, r11, 8
- d_wu3_loop:
- addi r4, r4,-4
- lw r12, r8, r4
- bslli r9, r12, 24
- or r9, r11, r9
- sw r9, r5, r4
- bneid r4, d_wu3_loop
- bsrli r11, r12, 8
- bri d_word_done
- d_word_u1:
- bsrli r11, r11, 24
- d_wu1_loop:
- addi r4, r4,-4
- lw r12, r8, r4
- bslli r9, r12, 8
- or r9, r11, r9
- sw r9, r5, r4
- bneid r4, d_wu1_loop
- bsrli r11, r12, 24
- bri d_word_done
- d_word_u2:
- bsrli r11, r11, 16
- d_wu2_loop:
- addi r4, r4,-4
- lw r12, r8, r4
- bslli r9, r12, 16
- or r9, r11, r9
- sw r9, r5, r4
- bneid r4, d_wu2_loop
- bsrli r11, r12, 16
- d_word_done:
- d_xfer_end:
- d_xfer_end_loop:
- beqi r7, a_done
- addi r6, r6, -1
- lbui r9, r6, 0
- addi r5, r5, -1
- sbi r9, r5, 0
- brid d_xfer_end_loop
- addi r7, r7, -1
- a_done:
- d_done:
- rtsd r15, 8
- nop
- .size memmove, . - memmove
- .end memmove
- libc_hidden_def(memmove)
|