123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- /*
- * Copyright (C) 2020 Kalray Inc.
- *
- * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB
- * in this tarball.
- */
- #include <sysdep.h>
- .align 16
- ENTRY(memcpy)
- cb.deqz $r2? .Lreturn
- compd.geu $r3 = $r2, 256
- copyd $r6 = $r0
- ;;
- cb.deqz $r3? .Lremaining_256
- ;;
- lq.u $r32r33 = 0[$r1]
- addd $r2 = $r2, -256
- ;;
- lq.u $r34r35 = 16[$r1]
- ;;
- lq.u $r36r37 = 32[$r1]
- srld $r7 = $r2, 8
- ;;
- lq.u $r38r39 = 48[$r1]
- ;;
- lq.u $r40r41 = 64[$r1]
- ;;
- lq.u $r42r43 = 80[$r1]
- ;;
- lq.u $r44r45 = 96[$r1]
- ;;
- lq.u $r46r47 = 112[$r1]
- ;;
- lq.u $r48r49 = 128[$r1]
- ;;
- lq.u $r50r51 = 144[$r1]
- ;;
- lq.u $r52r53 = 160[$r1]
- ;;
- lq.u $r54r55 = 176[$r1]
- ;;
- lq.u $r56r57 = 192[$r1]
- ;;
- lq.u $r58r59 = 208[$r1]
- compd.geu $r3 = $r2, 256
- ;;
- lq.u $r60r61 = 224[$r1]
- ;;
- lq.u $r62r63 = 240[$r1]
- addd $r1 = $r1, 256
- ;;
- cb.deqz $r7? .Lstreaming_loop_end
- ;;
- loopdo $r7? .Lstreaming_loop_end
- ;;
- sq 0[$r0] = $r32r33
- addd $r2 = $r2, -256
- ;;
- lq.u $r32r33 = 0[$r1]
- ;;
- sq 16[$r0] = $r34r35
- ;;
- lq.u $r34r35 = 16[$r1]
- ;;
- sq 32[$r0] = $r36r37
- ;;
- lq.u $r36r37 = 32[$r1]
- ;;
- sq 48[$r0] = $r38r39
- ;;
- lq.u $r38r39 = 48[$r1]
- ;;
- sq 64[$r0] = $r40r41
- ;;
- lq.u $r40r41 = 64[$r1]
- ;;
- sq 80[$r0] = $r42r43
- ;;
- lq.u $r42r43 = 80[$r1]
- ;;
- sq 96[$r0] = $r44r45
- ;;
- lq.u $r44r45 = 96[$r1]
- ;;
- sq 112[$r0] = $r46r47
- ;;
- lq.u $r46r47 = 112[$r1]
- ;;
- sq 128[$r0] = $r48r49
- ;;
- lq.u $r48r49 = 128[$r1]
- ;;
- sq 144[$r0] = $r50r51
- ;;
- lq.u $r50r51 = 144[$r1]
- ;;
- sq 160[$r0] = $r52r53
- ;;
- lq.u $r52r53 = 160[$r1]
- ;;
- sq 176[$r0] = $r54r55
- ;;
- lq.u $r54r55 = 176[$r1]
- ;;
- sq 192[$r0] = $r56r57
- ;;
- lq.u $r56r57 = 192[$r1]
- ;;
- sq 208[$r0] = $r58r59
- ;;
- lq.u $r58r59 = 208[$r1]
- ;;
- sq 224[$r0] = $r60r61
- ;;
- lq.u $r60r61 = 224[$r1]
- ;;
- sq 240[$r0] = $r62r63
- addd $r0 = $r0, 256
- ;;
- lq.u $r62r63 = 240[$r1]
- addd $r1 = $r1, 256
- ;;
- .Lstreaming_loop_end:
- sq 0[$r0] = $r32r33
- ;;
- sq 16[$r0] = $r34r35
- ;;
- sq 32[$r0] = $r36r37
- ;;
- sq 48[$r0] = $r38r39
- ;;
- sq 64[$r0] = $r40r41
- ;;
- sq 80[$r0] = $r42r43
- ;;
- sq 96[$r0] = $r44r45
- ;;
- sq 112[$r0] = $r46r47
- ;;
- sq 128[$r0] = $r48r49
- ;;
- sq 144[$r0] = $r50r51
- ;;
- sq 160[$r0] = $r52r53
- ;;
- sq 176[$r0] = $r54r55
- ;;
- sq 192[$r0] = $r56r57
- ;;
- sq 208[$r0] = $r58r59
- ;;
- sq 224[$r0] = $r60r61
- ;;
- sq 240[$r0] = $r62r63
- addd $r0 = $r0, 256
- ;;
- .Lremaining_256:
- andd $r11 = $r2, 16
- srld $r7 = $r2, 5
- ;;
- cb.deqz $r7? .Lloop_32_end
- ;;
- loopdo $r7? .Lloop_32_end
- ;;
- lo $r32r33r34r35 = 0[$r1]
- addd $r1 = $r1, 32
- addd $r2 = $r2, -32
- ;;
- so 0[$r0] = $r32r33r34r35
- addd $r0 = $r0, 32
- ;;
- .Lloop_32_end:
- andd $r10 = $r2, 8
- andd $r9 = $r2, 4
- cb.deqz $r11? .Lloop_remaining_16
- lq.u.dnez $r11? $r32r33 = 0[$r1]
- ;;
- sq 0[$r0] = $r32r33
- addd $r1 = $r1, 16
- addd $r0 = $r0, 16
- ;;
- .Lloop_remaining_16:
- andd $r8 = $r2, 2
- andd $r7 = $r2, 1
- cb.deqz $r10? .Lloop_remaining_8
- ld.dnez $r10? $r32 = 0[$r1]
- ;;
- sd 0[$r0] = $r32
- addd $r1 = $r1, 8
- addd $r0 = $r0, 8
- ;;
- .Lloop_remaining_8:
- cb.deqz $r9? .Lloop_remaining_4
- lwz.dnez $r9? $r32 = 0[$r1]
- ;;
- sw 0[$r0] = $r32
- addd $r1 = $r1, 4
- addd $r0 = $r0, 4
- ;;
- .Lloop_remaining_4:
- cb.deqz $r8? .Lloop_remaining_2
- lhz.dnez $r8? $r32 = 0[$r1]
- ;;
- sh 0[$r0] = $r32
- addd $r1 = $r1, 2
- addd $r0 = $r0, 2
- ;;
- .Lloop_remaining_2:
- lbz.dnez $r7? $r32 = 0[$r1]
- ;;
- sb.dnez $r7? 0[$r0] = $r32
- ;;
- .Lreturn:
- copyd $r0 = $r6
- ret
- ;;
- END(memcpy)
- libc_hidden_def(memcpy)
|