| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191 | /* * Copyright (C) 2017 Hangzhou C-SKY Microsystems co.,ltd. * * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB * in this tarball. */.macro      GET_FRONT_BITS rx ry#ifdef      __cskyLE__    lsr     \rx, \ry#else    lsl     \rx, \ry#endif.endm.macro      GET_AFTER_BITS rx ry#ifdef      __cskyLE__    lsl     \rx, \ry#else    lsr     \rx, \ry#endif.endm#ifdef WANT_WIDE# define Wmemcpy wmemcpy#else# define Wmemcpy memcpy#endif/* void *memcpy(void *dest, const void *src, size_t n); */    .text	.align 2	.global Wmemcpy	.type   Wmemcpy, @functionWmemcpy:    mov     r3, r0    cmplti  r2, 4                                            /* If len less than 4 bytes */    jbt     .L_copy_by_byte    mov     r12, r0    andi    r12, 3    bnez    r12, .L_dest_not_aligned                         /* If dest is not 4 bytes aligned */.L0:    mov     r12, r1    andi    r12, 3    bnez    r12, .L_dest_aligned_but_src_not_aligned         /* If dest is aligned, but src is not aligned */    cmplti  r2, 16                                           /* dest and src are all aligned */    jbt     .L_aligned_and_len_less_16bytes                  /* If len less than 16 bytes */.L_aligned_and_len_larger_16bytes:                           /* src and dst are all aligned, and len > 16 bytes */    ldw     r18, (r1, 0)    ldw     r19, (r1, 4)    ldw     r20, (r1, 8)    ldw     r21, (r1, 12)    stw     r18, (r3, 0)    stw     r19, (r3, 4)    stw     r20, (r3, 8)    stw     r21, (r3, 12)    subi    r2, 16    addi    r1, 16    addi    r3, 16    cmplti  r2, 16    jbf     .L_aligned_and_len_larger_16bytes.L_aligned_and_len_less_16bytes:    cmplti  r2, 4    jbt     .L_copy_by_byte    ldw     r18, (r1, 0)    stw     r18, (r3, 0)    subi    r2, 4    addi    r1, 4    addi    r3, 4    jbr     .L_aligned_and_len_less_16bytes.L_copy_by_byte:                                    /* len less than 4 bytes */    cmpnei  r2, 0    jbf     .L_return    ldb     r18, (r1, 0)    stb     r18, (r3, 0)    subi    r2, 1    addi    r1, 1    addi    r3, 1    jbr     .L_copy_by_byte.L_return:    rts/* If dest is not aligned, just copying some bytes makes the dest align.   After that, we judge whether the src is aligned. */.L_dest_not_aligned:    rsub    r13, r1, r3                              /* consider overlapped case */    abs     r13, r13    cmplt   r13, r2    jbt     .L_copy_by_byte.L1:    ldb     r18, (r1, 0)                             /* makes the dest align. */    stb     r18, (r3, 0)    addi    r12, 1    subi    r2, 1    addi    r1, 1    addi    r3, 1    cmpnei  r12, 4    jbt     .L1    cmplti  r2, 4    jbt     .L_copy_by_byte    jbf     .L0                                     /* judge whether the src is aligned. */.L_dest_aligned_but_src_not_aligned:    rsub    r13, r1, r3                             /* consider overlapped case */    abs     r13, r13    cmplt   r13, r2    jbt     .L_copy_by_byte    bclri   r1, 0    bclri   r1, 1    ldw     r18, (r1, 0)    addi    r1, 4    movi    r13, 8    mult    r13, r12    mov     r24, r13                                /* r12 is used to store the misaligned bits */    rsubi   r13, 32    mov     r25, r13    cmplti  r2, 16    jbt     .L_not_aligned_and_len_less_16bytes.L_not_aligned_and_len_larger_16bytes:    ldw     r20, (r1, 0)    ldw     r21, (r1, 4)    ldw     r22, (r1, 8)    ldw     r23, (r1, 12)    GET_FRONT_BITS r18 r24                          /* little or big endian? */    mov     r19, r20    GET_AFTER_BITS r20 r25    or      r20, r18    GET_FRONT_BITS r19 r24    mov     r18, r21    GET_AFTER_BITS r21 r13    or      r21, r19    GET_FRONT_BITS r18 r24    mov     r19, r22    GET_AFTER_BITS r22 r25    or      r22, r18    GET_FRONT_BITS r19 r24    mov     r18, r23    GET_AFTER_BITS r23 r25    or      r23, r19    stw     r20, (r3, 0)    stw     r21, (r3, 4)    stw     r22, (r3, 8)    stw     r23, (r3, 12)    subi    r2, 16    addi    r1, 16    addi    r3, 16    cmplti  r2, 16    jbf     .L_not_aligned_and_len_larger_16bytes.L_not_aligned_and_len_less_16bytes:    cmplti  r2, 4    jbf     .L2    rsubi   r12, 4                                   /* r12 is used to stored the misaligned bits */    subu    r1, r12                                  /* initial the position */    jbr     .L_copy_by_byte.L2:    ldw     r21, (r1, 0)    GET_FRONT_BITS r18 r24    mov     r19, r18    mov     r18, r21    GET_AFTER_BITS r21 r25    or      r21, r19    stw     r21, (r3, 0)    subi    r2, 4    addi    r1, 4    addi    r3, 4    jbr     .L_not_aligned_and_len_less_16bytes.size   Wmemcpy, .-Wmemcpylibc_hidden_def(Wmemcpy).weak Wmemcpy
 |