Browse Source

libm_sh: add optimised assembly implementation of lroundf and lrintf

* libc/sysdeps/linux/sh/sysdep.h: Add LOCAL macro
* libm/sh/sh4/Makefile.arch: Include asm source in the build
* libm/sh/sh4/s_lrintf.S [NEW]: optimised asm lrintf
* libm/sh/sh4/s_lroundf.S [NEW]: optimised asm lroundf

Signed-off-by: Christian Bruel <christian.bruel@st.com>
Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
Christian Bruel 13 years ago
parent
commit
6ac247452e
4 changed files with 97 additions and 3 deletions
  1. 1 0
      libc/sysdeps/linux/sh/sysdep.h
  2. 5 3
      libm/sh/sh4/Makefile.arch
  3. 52 0
      libm/sh/sh4/s_lrintf.S
  4. 39 0
      libm/sh/sh4/s_lroundf.S

+ 1 - 0
libc/sysdeps/linux/sh/sysdep.h

@@ -26,6 +26,7 @@
 
 /* Syntactic details of assembler.  */
 
+#define LOCAL(X)	.L_##X
 #define ALIGNARG(log2) log2
 /* For ELF we need the `.type' directive to make shared libs work right.  */
 #define ASM_TYPE_DIRECTIVE(name,typearg) .type name,@##typearg;

+ 5 - 3
libm/sh/sh4/Makefile.arch

@@ -7,11 +7,13 @@
 #
 
 ifeq ($(UCLIBC_HAS_FENV),y)
-libm_ARCH_SRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
-libm_ARCH_OBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_CSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
+libm_ARCH_COBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_SSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.S)
+libm_ARCH_SOBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.S,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SSRC))
 endif
 
-libm_ARCH_OBJS:=$(libm_ARCH_OBJ)
+libm_ARCH_OBJS:=$(libm_ARCH_COBJ) $(libm_ARCH_SOBJ)
 
 ifeq ($(DOPIC),y)
 libm-a-y+=$(libm_ARCH_OBJS:.o=.os)

+ 52 - 0
libm/sh/sh4/s_lrintf.S

@@ -0,0 +1,52 @@
+/* Round argument to nearest integer value. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lrintf)
+	mov	#0,r0
+	sts	fpscr,r3
+	lds	r0,fpscr
+	flds	fr5,fpul
+	mov.l	LOCAL(mask),r1
+	sts	fpul,r2
+	and	r2,r1
+	mov.l	LOCAL(midway),r2
+	or	r1,r2
+	lds	r2,fpul
+	fsts	fpul,fr2
+	fadd	fr2,fr5
+	ftrc	fr5,fpul
+	sts	fpul,r0
+	float	fpul,fr2
+	fcmp/eq	fr5,fr2
+	bf/s	0f
+	mov	#1,r2
+	tst	r1,r1
+	and	r0,r2
+	movt	r1
+	shal	r1
+	tst	r2,r2
+	add	#-1,r1
+	bt	0f
+	sub	r1,r0
+0:
+	rts
+	lds	r3,fpscr
+
+	.align 2
+LOCAL(mask):
+	.long	0x80000000
+LOCAL(midway):
+	.long	1056964608
+
+END(lrintf)

+ 39 - 0
libm/sh/sh4/s_lroundf.S

@@ -0,0 +1,39 @@
+/* Round argument toward 0. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel@st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lroundf)
+	mov	#0,r0
+	sts	fpscr,r3
+	lds	r0,fpscr
+	flds	fr5,fpul
+	mov.l	LOCAL(mask),r1
+	sts	fpul,r2
+	and	r2,r1
+	mov.l	LOCAL(midway),r2
+	or	r1,r2
+	lds	r2,fpul
+	fsts	fpul,fr2
+	fadd	fr2,fr5
+	ftrc	fr5,fpul
+	sts	fpul,r0
+	rts
+	lds	r3,fpscr
+
+	.align 2
+LOCAL(mask):
+	.long	0x80000000
+LOCAL(midway):
+	.long	1056964608
+
+END(lroundf)