浏览代码

arc: add optimized string functions for ARCv3

Add ability to use optimized versions of string functions for ARCv3 32-bit
CPUs with UCLIBC_HAS_STRING_ARCH_OPT option. Add optimized
memcpy/memset/memcmp code for ARCv3 CPUs based on the code from newlib
and adapt for ARCv3 existed optimized strchr/strcmp/strcpy/strlen.

Link to the Synopsys newlib repo with code for ARCv3 on GitHub:
https://github.com/foss-for-synopsys-dwc-arc-processors/newlib

Signed-off-by: Pavel Kozlov <pavel.kozlov@synopsys.com>
Pavel Kozlov 2 年之前
父节点
当前提交
663b8a0497
共有 7 个文件被更改,包括 267 次插入45 次删除
  1. 93 1
      libc/string/arc/memcmp.S
  2. 56 9
      libc/string/arc/memcpy.S
  3. 52 9
      libc/string/arc/memset.S
  4. 13 12
      libc/string/arc/strchr.S
  5. 10 11
      libc/string/arc/strcmp.S
  6. 4 3
      libc/string/arc/strlen.S
  7. 39 0
      libc/sysdeps/linux/arc/asm.h

+ 93 - 1
libc/string/arc/memcmp.S

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2022 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2007 ARC International (UK) LTD
  * Copyright (C) 2007 ARC International (UK) LTD
  *
  *
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -17,6 +17,8 @@
 #endif
 #endif
 
 
 ENTRY(memcmp)
 ENTRY(memcmp)
+
+#if defined(__ARC700__) || defined(__ARCHS__)
 	or	r12,r0,r1
 	or	r12,r0,r1
 	asl_s	r12,r12,30
 	asl_s	r12,r12,30
 	sub	r3,r2,1
 	sub	r3,r2,1
@@ -149,6 +151,96 @@ ENTRY(memcmp)
 .Lnil:
 .Lnil:
 	j_s.d	[blink]
 	j_s.d	[blink]
 	mov	r0,0
 	mov	r0,0
+
+#elif (__ARC64_ARCH32__)
+	;; Based on Synopsys code from newlib's arc64/memcmp.S
+	cmp		r2, 32
+	bls.d	@.L_compare_1_bytes
+	mov		r3, r0	; "r0" will be used as return value
+
+	lsr		r12, r2, 4	; counter for 16-byte chunks
+	xor		r13, r13, r13	; the mask showing inequal registers
+
+.L_compare_16_bytes:
+	ld.ab	r4, [r3, +4]
+	ld.ab	r5, [r1, +4]
+	ld.ab	r6, [r3, +4]
+	ld.ab	r7, [r1, +4]
+	ld.ab	r8, [r3, +4]
+	ld.ab	r9, [r1, +4]
+	ld.ab	r10, [r3, +4]
+	ld.ab	r11, [r1, +4]
+	xor.f	0, r4, r5
+	xor.ne	r13, r13, 0b0001
+	xor.f	0, r6, r7
+	xor.ne	r13, r13, 0b0010
+	xor.f	0, r8, r9
+	xor.ne	r13, r13, 0b0100
+	xor.f	0, r10, r11
+	xor.ne	r13, r13, 0b1000
+	brne	r13, 0, @.L_unequal_find
+	dbnz	r12, @.L_compare_16_bytes
+
+	;; Adjusting the pointers because of the extra loads in the end
+	sub		r1, r1, 4
+	sub		r3, r3, 4
+	bmsk_s	  r2, r2, 3	; any remaining bytes to compare
+
+.L_compare_1_bytes:
+	cmp		r2, 0
+	jeq.d	[blink]
+	xor_s	r0, r0, r0
+
+2:
+	ldb.ab	r4, [r3, +1]
+	ldb.ab	r5, [r1, +1]
+	sub.f	r0, r4, r5
+	jne		[blink]
+	dbnz	r2, @2b
+	j_s		[blink]
+
+	;; At this point, we want to find the _first_ comparison that marked the
+	;; inequality of "lhs" and "rhs"
+.L_unequal_find:
+	ffs		r13, r13
+	asl		r13, r13, 2
+	bi		[r13]
+.L_unequal_r4r5:
+	mov		r1, r4
+	b.d		@.L_diff_byte_in_regs
+	mov		r2, r5
+	nop
+.L_unequal_r6r7:
+	mov		r1, r6
+	b.d		@.L_diff_byte_in_regs
+	mov		r2, r7
+	nop
+.L_unequal_r8r9:
+	mov		r1, r8
+	b.d		@.L_diff_byte_in_regs
+	mov		r2, r9
+	nop
+.L_unequal_r10r11:
+	mov		r1, r10
+	mov		r2, r11
+
+	;; fall-through
+	;; If we're here, that means the two operands are not equal.
+.L_diff_byte_in_regs:
+	xor		r0, r1, r2
+	ffs		r0, r0
+	and		r0, r0, 0x18
+	lsr		r1, r1, r0
+	lsr		r2, r2, r0
+	bmsk_s	r1, r1, 7
+	bmsk_s	r2, r2, 7
+	j_s.d	[blink]
+	sub		r0, r1, r2
+
+#else
+#error "Unsupported ARC CPU type"
+#endif
+
 END(memcmp)
 END(memcmp)
 libc_hidden_def(memcmp)
 libc_hidden_def(memcmp)
 
 

+ 56 - 9
libc/string/arc/memcpy.S

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2007 ARC International (UK) LTD
  * Copyright (C) 2007 ARC International (UK) LTD
  *
  *
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -7,13 +7,9 @@
 
 
 #include <sysdep.h>
 #include <sysdep.h>
 
 
-#if !defined(__ARC700__) && !defined(__ARCHS__)
-#error "Neither ARC700 nor ARCHS is defined!"
-#endif
-
 ENTRY(memcpy)
 ENTRY(memcpy)
 
 
-#ifdef __ARC700__
+#if defined(__ARC700__)
 /* This memcpy implementation does not support objects of 1GB or larger -
 /* This memcpy implementation does not support objects of 1GB or larger -
    the check for alignment does not work then.  */
    the check for alignment does not work then.  */
 /* We assume that most sources and destinations are aligned, and
 /* We assume that most sources and destinations are aligned, and
@@ -73,9 +69,9 @@ ENTRY(memcpy)
 .Lendbloop:
 .Lendbloop:
 	j_s.d	[blink]
 	j_s.d	[blink]
 	stb	r12,[r5,0]
 	stb	r12,[r5,0]
-#endif /* __ARC700__ */
 
 
-#ifdef __ARCHS__
+#elif defined(__ARCHS__)
+
 #ifdef __LITTLE_ENDIAN__
 #ifdef __LITTLE_ENDIAN__
 # define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
 # define SHIFT_1(RX,RY,IMM)	asl	RX, RY, IMM	; <<
 # define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
 # define SHIFT_2(RX,RY,IMM)	lsr	RX, RY, IMM	; >>
@@ -299,7 +295,58 @@ ENTRY(memcpy)
 	stb.ab	r6, [r3,1]
 	stb.ab	r6, [r3,1]
 .Lcopybytewise_3:
 .Lcopybytewise_3:
 	j	[blink]
 	j	[blink]
-#endif /* __ARCHS__ */
+
+#elif defined(__ARC64_ARCH32__)
+	;; Based on Synopsys code from newlib's arc64/memcpy.S
+	lsr.f	r11, r2, 4		; counter for 16-byte chunks
+	beq.d	@.L_write_15_bytes
+	mov	r3, r0			; work on a copy of "r0"
+
+.L_write_16_bytes:
+#if defined(__ARC64_LL64__)
+	ldd.ab	r4, [r1, 8]
+	ldd.ab	r6, [r1, 8]
+	std.ab	r4, [r3, 8]
+	std.ab	r6, [r3, 8]
+	dbnz	r11, @.L_write_16_bytes
+#else
+	ld.ab	r4, [r1, 4]
+	ld.ab	r5, [r1, 4]
+	ld.ab	r6, [r1, 4]
+	ld.ab	r7, [r1, 4]
+	st.ab	r4, [r3, 4]
+	st.ab	r5, [r3, 4]
+	st.ab	r6, [r3, 4]
+	dbnz.d	r11, @.L_write_16_bytes
+	st.ab	r7, [r3, 4]
+#endif
+	bmsk_s	r2, r2, 3
+
+.L_write_15_bytes:
+	bbit0.d	r2, 1, @1f
+	lsr	r11, r2, 2
+	ldh.ab	r4, [r1, 2]
+	sth.ab	r4, [r3, 2]
+1:
+	bbit0.d	r2, 0, @1f
+	xor	r11, r11, 3
+	ldb.ab	r4, [r1, 1]
+	stb.ab	r4, [r3, 1]
+1:
+	asl	r11, r11, 1
+	bi	[r11]
+	ld.ab	r4,[r1, 4]
+	st.ab	r4,[r3, 4]
+	ld.ab	r4,[r1, 4]
+	st.ab	r4,[r3, 4]
+	ld	r4,[r1]
+	st	r4,[r3]
+
+	j_s	[blink]
+
+#else
+#error "Unsupported ARC CPU type"
+#endif
 
 
 END(memcpy)
 END(memcpy)
 libc_hidden_def(memcpy)
 libc_hidden_def(memcpy)

+ 52 - 9
libc/string/arc/memset.S

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2007 ARC International (UK) LTD
  * Copyright (C) 2007 ARC International (UK) LTD
  *
  *
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -7,13 +7,9 @@
 
 
 #include <sysdep.h>
 #include <sysdep.h>
 
 
-#if !defined(__ARC700__) && !defined(__ARCHS__)
-#error "Neither ARC700 nor ARCHS is defined!"
-#endif
-
 ENTRY(memset)
 ENTRY(memset)
 
 
-#ifdef __ARC700__
+#if defined(__ARC700__)
 #define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
 #define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
 
 
 	mov_s	r4,r0
 	mov_s	r4,r0
@@ -52,9 +48,8 @@ ENTRY(memset)
 	stb.ab	r1,[r4,1]
 	stb.ab	r1,[r4,1]
 .Ltiny_end:
 .Ltiny_end:
 	j_s	[blink]
 	j_s	[blink]
-#endif /* __ARC700__ */
 
 
-#ifdef __ARCHS__
+#elif defined(__ARCHS__)
 #ifdef DONT_USE_PREALLOC
 #ifdef DONT_USE_PREALLOC
 #define PREWRITE(A,B)	prefetchw [(A),(B)]
 #define PREWRITE(A,B)	prefetchw [(A),(B)]
 #else
 #else
@@ -156,7 +151,55 @@ ENTRY(memset)
 .Lcopy3bytes:
 .Lcopy3bytes:
 
 
 	j	[blink]
 	j	[blink]
-#endif /* __ARCHS__ */
+
+#elif defined(__ARC64_ARCH32__)
+	;; Based on Synopsys code from newlib's arc64/memset.S
+
+	;; Assemble the bytes to 32bit words
+	bmsk_s	r1, r1, 7		; treat it like unsigned char
+	lsl8	r3, r1
+	or_s	r1, r1, r3
+	lsl16	r3, r1
+	or	r6, r1, r3
+	mov r7,r6
+
+	lsr.f	r5, r2, 4		; counter for 16-byte chunks
+	beq.d	@.L_write_15_bytes
+	mov	r4, r0			; work on a copy of "r0"
+
+.L_write_16_bytes:
+#if defined(__ARC64_LL64__)
+	std.ab	r6, [r4, 8]
+	std.ab	r6, [r4, 8]
+	dbnz	r5, @.L_write_16_bytes
+#else
+	st.ab	r6, [r4, 4]
+	st.ab	r6, [r4, 4]
+	st.ab	r6, [r4, 4]
+	dbnz.d	r5, @.L_write_16_bytes
+	st.ab	r6, [r4, 4]
+#endif
+	bmsk_s	r2, r2, 3
+
+.L_write_15_bytes:
+	bbit0.d	r2, 1, @1f
+	lsr	r3, r2, 2
+	sth.ab	r6, [r4, 2]
+1:
+	bbit0.d	r2, 0, @1f
+	xor	r3, r3, 3
+	stb.ab	r6, [r4, 1]
+1:
+	bi	[r3]
+	st.ab	r6,[r4, 4]
+	st.ab	r6,[r4, 4]
+	st.ab	r6,[r4, 4]
+
+	j_s	[blink]
+
+#else
+#error "Unsupported ARC CPU type"
+#endif
 
 
 END(memset)
 END(memset)
 libc_hidden_def(memset)
 libc_hidden_def(memset)

+ 13 - 12
libc/string/arc/strchr.S

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2022 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2007 ARC International (UK) LTD
  * Copyright (C) 2007 ARC International (UK) LTD
  *
  *
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -7,6 +7,7 @@
 
 
 #include <sysdep.h>
 #include <sysdep.h>
 #include <features.h>
 #include <features.h>
+#include <asm.h>
 
 
 /* ARC700 has a relatively long pipeline and branch prediction, so we want
 /* ARC700 has a relatively long pipeline and branch prediction, so we want
    to avoid branches that are hard to predict.  On the other hand, the
    to avoid branches that are hard to predict.  On the other hand, the
@@ -21,7 +22,7 @@ ENTRY(strchr)
 	mov_s	r3,0x01010101
 	mov_s	r3,0x01010101
 	breq.d	r2,r0,.Laligned
 	breq.d	r2,r0,.Laligned
 	asl	r4,r5,16
 	asl	r4,r5,16
-	sub_s	r0,r0,r2
+	SUBR_S	r0,r0,r2
 	asl	r7,r2,3
 	asl	r7,r2,3
 	ld_s	r2,[r0]
 	ld_s	r2,[r0]
 #ifdef __LITTLE_ENDIAN__
 #ifdef __LITTLE_ENDIAN__
@@ -77,10 +78,10 @@ ENTRY(strchr)
 	sub	r3,r7,1
 	sub	r3,r7,1
 	bic	r3,r3,r7
 	bic	r3,r3,r7
 	norm	r2,r3
 	norm	r2,r3
-	sub_s	r0,r0,1
+	SUBR_S	r0,r0,1
-	asr_s	r2,r2,3
+	ASRR_S	r2,r2,3
 	j.d	[blink]
 	j.d	[blink]
-	sub_s	r0,r0,r2
+	SUBR_S	r0,r0,r2
 
 
 	.balign	4
 	.balign	4
 .Lfound0_ua:
 .Lfound0_ua:
@@ -90,13 +91,13 @@ ENTRY(strchr)
 	bic	r3,r3,r6
 	bic	r3,r3,r6
 	and	r2,r3,r4
 	and	r2,r3,r4
 	or_s	r12,r12,r2
 	or_s	r12,r12,r2
-	sub_s	r3,r12,1
+	SUBR_S	r3,r12,1
 	bic_s	r3,r3,r12
 	bic_s	r3,r3,r12
 	norm	r3,r3
 	norm	r3,r3
-	add_s	r0,r0,3
+	ADDR_S	r0,r0,3
-	asr_s	r12,r3,3
+	ASRR_S	r12,r3,3
 	asl.f	0,r2,r3
 	asl.f	0,r2,r3
-	sub_s	r0,r0,r12
+	SUBR_S	r0,r0,r12
 	j_s.d	[blink]
 	j_s.d	[blink]
 	mov.pl	r0,0
 	mov.pl	r0,0
 #else /* BIG ENDIAN */
 #else /* BIG ENDIAN */
@@ -106,10 +107,10 @@ ENTRY(strchr)
 	bic	r2,r7,r6
 	bic	r2,r7,r6
 .Lfound_char_b:
 .Lfound_char_b:
 	norm	r2,r2
 	norm	r2,r2
-	sub_s	r0,r0,4
+	SUBR_S	r0,r0,4
 	asr_s	r2,r2,3
 	asr_s	r2,r2,3
 	j.d	[blink]
 	j.d	[blink]
-	add_s	r0,r0,r2
+	ADDR_S	r0,r0,r2
 
 
 .Lfound0_ua:
 .Lfound0_ua:
 	mov_s	r3,r7
 	mov_s	r3,r7
@@ -126,7 +127,7 @@ ENTRY(strchr)
 	add.pl	r3,r3,1
 	add.pl	r3,r3,1
 	asr_s	r12,r3,3
 	asr_s	r12,r3,3
 	asl.f	0,r2,r3
 	asl.f	0,r2,r3
-	add_s	r0,r0,r12
+	ADDR_S	r0,r0,r12
 	j_s.d	[blink]
 	j_s.d	[blink]
 	mov.mi	r0,0
 	mov.mi	r0,0
 #endif /* ENDIAN */
 #endif /* ENDIAN */

+ 10 - 11
libc/string/arc/strcmp.S

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013, 2014-2015, 2017 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2014-2015, 2017, 2022 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2007 ARC International (UK) LTD
  * Copyright (C) 2007 ARC International (UK) LTD
  *
  *
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -7,14 +7,11 @@
 
 
 #include <features.h>
 #include <features.h>
 #include <sysdep.h>
 #include <sysdep.h>
-
+#include <asm.h>
-#if !defined(__ARC700__) && !defined(__ARCHS__)
-#error "Neither ARC700 nor ARCHS is defined!"
-#endif
 
 
 ENTRY(strcmp)
 ENTRY(strcmp)
 
 
-#ifdef __ARC700__
+#if defined(__ARC700__) || defined(__ARC64_ARCH32__)
 /* This is optimized primarily for the ARC700.
 /* This is optimized primarily for the ARC700.
    It would be possible to speed up the loops by one cycle / word
    It would be possible to speed up the loops by one cycle / word
    respective one cycle / byte by forcing double source 1 alignment, unrolling
    respective one cycle / byte by forcing double source 1 alignment, unrolling
@@ -38,7 +35,7 @@ ENTRY(strcmp)
 	breq	r2,r3,.Lwordloop
 	breq	r2,r3,.Lwordloop
 #ifdef	__LITTLE_ENDIAN__
 #ifdef	__LITTLE_ENDIAN__
 	xor	r0,r2,r3	; mask for difference
 	xor	r0,r2,r3	; mask for difference
-	sub_s	r1,r0,1
+	SUBR_S	r1,r0,1
 	bic_s	r0,r0,r1	; mask for least significant difference bit
 	bic_s	r0,r0,r1	; mask for least significant difference bit
 	sub	r1,r5,r0
 	sub	r1,r5,r0
 	xor	r0,r5,r1	; mask for least significant difference byte
 	xor	r0,r5,r1	; mask for least significant difference byte
@@ -55,7 +52,7 @@ ENTRY(strcmp)
 .Lfound0:
 .Lfound0:
 	xor	r0,r2,r3	; mask for difference
 	xor	r0,r2,r3	; mask for difference
 	or	r0,r0,r4	; or in zero indicator
 	or	r0,r0,r4	; or in zero indicator
-	sub_s	r1,r0,1
+	SUBR_S	r1,r0,1
 	bic_s	r0,r0,r1	; mask for least significant difference bit
 	bic_s	r0,r0,r1	; mask for least significant difference bit
 	sub	r1,r5,r0
 	sub	r1,r5,r0
 	xor	r0,r5,r1	; mask for least significant difference byte
 	xor	r0,r5,r1	; mask for least significant difference byte
@@ -99,9 +96,8 @@ ENTRY(strcmp)
 .Lcmpend:
 .Lcmpend:
 	j_s.d	[blink]
 	j_s.d	[blink]
 	sub	r0,r2,r3
 	sub	r0,r2,r3
-#endif /* __ARC700__ */
 
 
-#ifdef __ARCHS__
+#elif defined(__ARCHS__)
 	or	r2, r0, r1
 	or	r2, r0, r1
 	bmsk_s	r2, r2, 1
 	bmsk_s	r2, r2, 1
 	brne	r2, 0, @.Lcharloop
 	brne	r2, 0, @.Lcharloop
@@ -168,7 +164,10 @@ ENTRY(strcmp)
 .Lcmpend:
 .Lcmpend:
 	j_s.d	[blink]
 	j_s.d	[blink]
 	sub	r0, r2, r3
 	sub	r0, r2, r3
-#endif /* __ARCHS__ */
+
+#else
+#error "Unsupported ARC CPU type"
+#endif
 
 
 END(strcmp)
 END(strcmp)
 libc_hidden_def(strcmp)
 libc_hidden_def(strcmp)

+ 4 - 3
libc/string/arc/strlen.S

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2013, 2022 Synopsys, Inc. (www.synopsys.com)
  * Copyright (C) 2007 ARC International (UK) LTD
  * Copyright (C) 2007 ARC International (UK) LTD
  *
  *
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
  * Licensed under the LGPL v2.1 or later, see the file COPYING.LIB in this tarball.
@@ -7,6 +7,7 @@
 
 
 
 
 #include <sysdep.h>
 #include <sysdep.h>
+#include <asm.h>
 
 
 ENTRY(strlen)
 ENTRY(strlen)
 	or	r3,r0,7
 	or	r3,r0,7
@@ -15,7 +16,7 @@ ENTRY(strlen)
 	mov	r4,0x01010101
 	mov	r4,0x01010101
 	; uses long immediate
 	; uses long immediate
 #ifdef __LITTLE_ENDIAN__
 #ifdef __LITTLE_ENDIAN__
-	asl_s	r1,r0,3
+	ASLR_S	r1,r0,3
 	btst_s	r0,2
 	btst_s	r0,2
 	asl	r7,r4,r1
 	asl	r7,r4,r1
 	ror	r5,r4
 	ror	r5,r4
@@ -59,7 +60,7 @@ ENTRY(strlen)
 	sub.ne	r3,r3,4
 	sub.ne	r3,r3,4
 	mov.eq	r1,r12
 	mov.eq	r1,r12
 #ifdef __LITTLE_ENDIAN__
 #ifdef __LITTLE_ENDIAN__
-	sub_s	r2,r1,1
+	SUBR_S	r2,r1,1
 	bic_s	r2,r2,r1
 	bic_s	r2,r2,r1
 	norm	r1,r2
 	norm	r1,r2
 	sub_s	r0,r0,3
 	sub_s	r0,r0,3

+ 39 - 0
libc/sysdeps/linux/arc/asm.h

@@ -7,6 +7,13 @@
 #ifndef _ARC_ASM_H
 #ifndef _ARC_ASM_H
 #define _ARC_ASM_H
 #define _ARC_ASM_H
 
 
+/*
+ * Some 16-bit instructions were excluded from the ARCv3 ISA
+ * the following macros are introduced to handle these changes in one place.
+ * This will allow not to change existing ARCv2 code and use 16-bit versions
+ * of instructions for ARCv2 and replace them with 32-bit vesrions for ARCv3
+ */
+
 #if defined (__ARC64_ARCH32__)
 #if defined (__ARC64_ARCH32__)
 
 
 .macro PUSHR reg
 .macro PUSHR reg
@@ -25,6 +32,22 @@
 	pop	\reg
 	pop	\reg
 .endm
 .endm
 
 
+.macro SUBR_S dst,src1,src2
+	sub	\dst, \src1, \src2
+.endm
+
+.macro ADDR_S dst,src1,src2
+	add	\dst, \src1, \src2
+.endm
+
+.macro ASRR_S dst,src1,src2
+	asr	\dst, \src1, \src2
+.endm
+
+.macro ASLR_S dst,src1,src2
+	asl	\dst, \src1, \src2
+.endm
+
 #elif defined (__ARC64_ARCH64__)
 #elif defined (__ARC64_ARCH64__)
 
 
 # error ARCv3 64-bit is not supported by uClibc-ng
 # error ARCv3 64-bit is not supported by uClibc-ng
@@ -47,6 +70,22 @@
 	pop_s	\reg
 	pop_s	\reg
 .endm
 .endm
 
 
+.macro SUBR_S dst,src1,src2
+	sub_s	\dst, \src1, \src2
+.endm
+
+.macro ADDR_S dst,src1,src2
+	add_s	\dst, \src1, \src2
+.endm
+
+.macro ASRR_S dst,src1,src2
+	asr_s	\dst, \src1, \src2
+.endm
+
+.macro ASLR_S dst,src1,src2
+	asl_s	\dst, \src1, \src2
+.endm
+
 #endif
 #endif
 
 
 #endif /* _ARC_ASM_H  */
 #endif /* _ARC_ASM_H  */