Prechádzať zdrojové kódy

Adjust sparc port do it now actually works.
-Erik

Eric Andersen 22 rokov pred
rodič
commit
fea1b23bcf

+ 6 - 5
libc/sysdeps/linux/sparc/Makefile

@@ -25,13 +25,14 @@ TOPDIR=../../../../
 include $(TOPDIR)Rules.mak
 ASFLAGS=$(CFLAGS)
 
-CRT0=crt0.S
-CRT0_OBJ=$(patsubst %.S,%.o, $(CRT0))
+CRT0=crt0.c
+CRT0_OBJ=$(patsubst %.c,%.o, $(CRT0))
 
-SSRC=__longjmp.S setjmp.S vfork.S
+SSRC=__longjmp.S fork.S vfork.S clone.S setjmp.S bsd-setjmp.S bsd-_setjmp.S \
+	urem.S udiv.S umul.S sdiv.S rem.S
 SOBJS=$(patsubst %.S,%.o, $(SSRC))
 
-CSRC=fork.c
+CSRC=brk.c
 COBJS=$(patsubst %.c,%.o, $(CSRC))
 
 OBJS=$(SOBJS) $(MOBJ) $(COBJS)
@@ -46,7 +47,7 @@ ar-target: $(OBJS) $(CRT0_OBJ)
 	$(AR) $(ARFLAGS) $(LIBC) $(OBJS)
 	cp $(CRT0_OBJ) $(TOPDIR)libc/$(CRT0_OBJ)
 
-$(CRT0_OBJ): %.o : %.S
+$(CRT0_OBJ): %.o : %.c
 	$(CC) $(CFLAGS) -c $< -o $@
 	$(STRIPTOOL) -x -R .note -R .comment $*.o
 

+ 22 - 28
libc/sysdeps/linux/sparc/__longjmp.S

@@ -2,21 +2,19 @@
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-/* Code taken from glibc/sysdeps/sparc/sparc32/  (glibc 2.2.2) */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
 
 #include <sysdep.h>
 
@@ -27,7 +25,11 @@
 #define ST_FLUSH_WINDOWS 3
 #define RW_FP [%fp + 0x48]
 
-ENTRY(__longjmp)
+.global   __longjmp;
+.align 4;
+__longjmp: ; 
+.type __longjmp ,@function; 
+
 	/* Store our arguments in global registers so we can still
 	   use them while unwinding frames and their register windows.  */
 
@@ -40,27 +42,21 @@ ENTRY(__longjmp)
 	xor %fp, %g3, %o0
 	add %fp, 512, %o1
 	andncc %o0, 4095, %o0
-	bne thread
-	//bne LOC(thread)
+	bne .Lthread
 	 cmp %o1, %g3
-	bl thread
-	//bl LOC(thread)
+	bl .Lthread
 
 	/* Now we will loop, unwinding the register windows up the stack
 	   until the restored %fp value matches the target value in %g3.  */
 
-//LOC(loop):
-loop:
+.Lloop:
 	cmp %fp, %g3		/* Have we reached the target frame? */
-	bl,a loop		/* Loop while current fp is below target.  */
-	//bl,a LOC(loop)	/* Loop while current fp is below target.  */
+	bl,a .Lloop		/* Loop while current fp is below target.  */
 	 restore		/* Unwind register window in delay slot.  */
-	be,a found		/* Better have hit it exactly.  */
-	//be,a LOC(found)	/* Better have hit it exactly.  */
+	be,a .Lfound		/* Better have hit it exactly.  */
 	 ld ENV(g1,JB_SP), %o0	/* Delay slot: extract target SP.  */
 
-thread:
-//LOC(thread):
+.Lthread:
 	/*
 	 * Do a "flush register windows trap".  The trap handler in the
 	 * kernel writes all the register windows to their stack slots, and
@@ -77,15 +73,13 @@ thread:
 	retl
 	 restore %g2, 0, %o0	/* Restore values from above register frame. */
 
-found:
-//LOC(found):
+.Lfound:
 	/* We have unwound register windows so %fp matches the target.  */
 	mov %o0, %sp		/* OK, install new SP.  */
 
-//LOC(sp_ok):
-sp_ok:
+.Lsp_ok:
 	ld ENV(g1,JB_PC), %o0	/* Extract target return PC.  */
 	jmp %o0 + 8		/* Return there.  */
 	 mov %g2, %o0		/* Delay slot: set return value.  */
 
-END(__longjmp)
+.size  __longjmp , . -  __longjmp

+ 49 - 0
libc/sysdeps/linux/sparc/brk.c

@@ -0,0 +1,49 @@
+/* brk system call for Linux/i386.
+   Copyright (C) 1995, 1996, 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+
+/* This must be initialized data because commons can't have aliases.  */
+void *___brk_addr = 0;
+
+
+int brk (void *addr)
+{
+    void *newbrk;
+
+    {
+	register void *o0 __asm__("%o0") = addr;
+	register int g1 __asm__("%g1") = 17 ;
+	__asm ("t 0x10" : "=r"(o0) : "r"(g1), "0"(o0) : "cc");
+	newbrk = o0;
+    }
+
+    ___brk_addr = newbrk;
+
+    if (newbrk < addr)
+    {
+	__set_errno (ENOMEM);
+	return -1;
+    }
+
+    return 0;
+}

+ 1 - 0
libc/sysdeps/linux/sparc/bsd-_setjmp.S

@@ -0,0 +1 @@
+/* _setjmp is in setjmp.S  */

+ 1 - 0
libc/sysdeps/linux/sparc/bsd-setjmp.S

@@ -0,0 +1 @@
+/* setjmp is in setjmp.S  */

+ 72 - 0
libc/sysdeps/linux/sparc/clone.S

@@ -0,0 +1,72 @@
+/* Copyright (C) 1996, 1997, 1998, 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Richard Henderson (rth@tamu.edu).
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* clone() is even more special than fork() as it mucks with stacks
+   and invokes a function in the right context after its all over.  */
+
+#include <asm/errno.h>
+#include <asm/unistd.h>
+
+/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */
+
+	.text
+	.align	4
+	.globl	__clone
+	.type	__clone,@function
+
+__clone:
+	save	%sp,-96,%sp
+
+	/* sanity check arguments */
+	tst	%i0
+	be	.Lerror
+	 orcc	%i1,%g0,%o1
+	be	.Lerror
+	 mov	%i2,%o0
+
+	/* Do the system call */
+	set	__NR_clone,%g1
+	ta	0x10
+	bcs	.Lerror
+	 tst	%o1
+	bne	__thread_start
+	 nop
+	ret
+	 restore %o0,%g0,%o0
+
+.Lerror:
+	call	__errno_location
+	 or	%g0,EINVAL,%i0
+	st	%i0,[%o0]
+	ret
+	 restore %g0,-1,%o0
+
+	.size	__clone, .-__clone
+
+	.type	__thread_start,@function
+
+__thread_start:
+	call	%i0
+	 mov	%i3,%o0
+	call	_exit,0
+	 nop
+
+	.size	__thread_start, .-__thread_start
+
+.weak    clone    ;        clone    =   __clone

+ 0 - 90
libc/sysdeps/linux/sparc/crt0.S

@@ -1,90 +0,0 @@
-/*
- * xrt0.s for ERC32. 
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 675
- * Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-/* code taken from leonccs 1.0 leon/src/libio/crt0.S 
-	I don't know if this is available anymore, now that LECCS
-	is out.  And I'm not sure if this source is in the LECCS distro  :(
-*/
-
-	.text
-! Original : 
-!	.global __start, _main
-! uC-libc version : 
-	.global _start
-	.global __uClibc_main
-
-! Start the real-time clock with a tick of 14 clocks
-!
-
-_start:
-
-	save	%sp, -64, %sp
-
-        /* clear the bss */
- 
-        sethi %hi(edata),%g2
-        or    %g2,%lo(edata),%g2  ! g2 = start of bss
-        sethi %hi(_end),%g3
-        or    %g3,%lo(_end),%g3         ! g3 = end of bss
-        mov   %g0,%g1                   ! so std has two zeros
-zerobss:
-        std    %g0,[%g2]
-        add    %g2,8,%g2
-        cmp    %g2,%g3
-        bleu,a zerobss
-        nop
-
-        /* move data segment to proper location */
- 
-relocd:
-        set (_endtext),%g2 		! g2 = start of data in aout file
-        set (_environ),%g4		! g4 = start of where data should go
-        set (_edata),%g3 		! g3 = end of where data should go
-	subcc	%g3, %g4, %g5		! g5 = length of data
-
-	subcc	%g4, %g2, %g0		! need to relocate data ?
-	ble	initok
-	ld	[%g4], %g6
-!	subcc	%g6, 1, %g0
-!	be	initok
-mvdata:
-	subcc	%g5, 8, %g5
-	ldd	[%g2 + %g5], %g6
-	bg	mvdata
-        std    	%g6, [%g4 + %g5]
-
-initok:
-
-!	call    _main
-	call	__uClibc_main
-        nop
-! Should not return from uClibc main()
-!	ret
-!	nop
-
-        .seg    "data"
-        .global .bdata
-.bdata:
-        .align  8
-        .global _environ                ! first symbol in sdata
-_environ:
-        .word   1
-
-
-

+ 22 - 12
libc/sysdeps/linux/sparc/fork.c → libc/sysdeps/linux/sparc/crt0.c

@@ -1,9 +1,8 @@
 /* vi: set sw=4 ts=4: */
-/* fork for uClibc
+/* uClibc/sysdeps/linux/i386/crt0.S
+ * Pull stuff off the stack and get uClibc moving.
  *
- * Copyright (C) 2000 by Lineo, inc. and Erik Andersen
  * Copyright (C) 2000,2001 by Erik Andersen <andersen@uclibc.org>
- * Written by Erik Andersen <andersen@uclibc.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU Library General Public License as published by
@@ -20,15 +19,26 @@
  * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
 
-#include <errno.h>
-#include <features.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
+extern void __uClibc_main(int argc,void *argv,void *envp);
 
-#ifndef __HAS_NO_MMU__
+/* a little bit of stuff to support C++ */
+__asm__(".section .ctors,\"aw\"\n.align 4\n.global __CTOR_LIST__\n"
+	"__CTOR_LIST__:\n.long -1\n");
 
-//#define __NR_fork             2
-#include <unistd.h>
-_syscall0(pid_t, fork);
+__asm__(".section .dtors,\"aw\"\n.align 4\n.global __DTOR_LIST__\n"
+	"__DTOR_LIST__:\n.long -1\n");
+
+void _start(unsigned int first_arg)
+{
+	unsigned int argc;
+	char **argv, **envp;
+	unsigned long *stack;
+
+	stack = (unsigned long*) &first_arg;
+	argc = *(stack - 1);
+	argv = (char **) stack;
+	envp = (char **)stack + argc + 1;
+
+	__uClibc_main(argc, argv, envp);
+}
 
-#endif

+ 46 - 0
libc/sysdeps/linux/sparc/fork.S

@@ -0,0 +1,46 @@
+/* Copyright (C) 1991, 92, 94, 95, 97, 99 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+
+.text;  
+.global    fork;
+.align 4;      
+fork: ;
+.type   fork  ,@function; ;      
+
+	mov	2, %g1 ; 
+	ta	0x10;
+	bcc,a 9000f;
+	nop;
+	save    %sp,-96,%sp;
+	call __errno_location;
+	nop;    
+	st	%i0,[%o0];
+	jmpl    %i7+8,%g0;
+	restore %g0,-1,%o0; ;
+	9000:;
+
+	/* %o1 is now 0 for the parent and 1 for the child.  Decrement it to
+	   make it -1 (all bits set) for the parent, and 0 (no bits set)
+	   for the child.  Then AND it with %o0, so the parent gets
+	   %o0&-1==pid, and the child gets %o0&0==0.  */
+	sub %o1, 1, %o1
+	retl
+	and %o0, %o1, %o0
+

+ 367 - 0
libc/sysdeps/linux/sparc/rem.S

@@ -0,0 +1,367 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .rem	name of function to generate
+ *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+
+.global   .rem;
+.align 4;
+.type  .rem ,@function; 
+
+.rem: 
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	mov	%o0, %g3		! sign of remainder matches %o0
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	0x02
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	.Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	.Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	.Lnot_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	.Ldo_single_div
+		sub	%g2, 1, %g2
+
+	.Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	.Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	.Ldo_single_div:
+		subcc	%g2, 1, %g2
+		bl	.Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	.Lend_single_divloop
+		nop
+	.Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	.Lend_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	.Lsingle_divloop
+		tst	%o3
+		b,a	.Lend_regular_divide
+
+.Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	.Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+.Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	.L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	.L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	.L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	.L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+.L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+.L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	.L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+.L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+.L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	.L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	.L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+.L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+.L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	.L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+.L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+.L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	.L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	.L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	.L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+.L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+.L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	.L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+.L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+.L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	.L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	.L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+.L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+.L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	.L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+.L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+.Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	.Ldivloop
+	tst	%o3
+	bl,a	.Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+
+.Lgot_result:
+	! check to see if answer should be < 0
+	tst	%g3
+	bl,a	1f
+	sub %g0, %o3, %o3
+1:
+	retl
+	mov %o3, %o0
+
+END(.rem)

+ 366 - 0
libc/sysdeps/linux/sparc/sdiv.S

@@ -0,0 +1,366 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .div	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  true		true=true => signed; true=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+.global   .div;
+.align 4;
+.type  .div ,@function; 
+
+.div: 
+	! compute sign of result; if neither is negative, no problem
+	orcc	%o1, %o0, %g0	! either negative?
+	bge	2f			! no, go do the divide
+	xor	%o1, %o0, %g3	! compute sign in any case
+	tst	%o1
+	bge	1f
+	tst	%o0
+	! %o1 is definitely negative; %o0 might also be negative
+	bge	2f			! if %o0 not negative...
+	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
+1:	! %o0 is negative, %o1 is nonnegative
+	sub	%g0, %o0, %o0	! make %o0 nonnegative
+2:
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	0x02
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	.Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	.Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	.Lnot_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	.Ldo_single_div
+		sub	%g2, 1, %g2
+
+	.Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	.Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	.Ldo_single_div:
+		subcc	%g2, 1, %g2
+		bl	.Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	.Lend_single_divloop
+		nop
+	.Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	.Lend_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	.Lsingle_divloop
+		tst	%o3
+		b,a	.Lend_regular_divide
+
+.Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	.Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+.Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	.L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	.L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	.L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	.L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+.L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+.L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	.L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+.L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+.L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	.L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	.L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+.L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+.L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	.L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+.L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+.L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	.L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	.L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	.L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+.L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+.L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	.L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+.L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+.L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	.L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	.L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+.L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+.L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	.L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+.L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+.Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	.Ldivloop
+	tst	%o3
+	bl,a	.Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+.Lgot_result:
+	! check to see if answer should be < 0
+	tst	%g3
+	bl,a	1f
+	sub %g0, %o2, %o2
+1:
+	retl
+	mov %o2, %o0
+
+END(.div)

+ 10 - 13
libc/sysdeps/linux/sparc/setjmp.S

@@ -2,24 +2,21 @@
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
+   Lesser General Public License for more details.
 
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-/* Code taken from glibc2.2.2/sysdeps/sparc/sparc32/setjmp.S */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
 
 #include <sysdep.h>
-#include "sysdep.h"
 
 #define _ASM 1
 #define _SETJMP_H
@@ -49,5 +46,5 @@ ENTRY (__sigsetjmp)
 	 mov	%g1, %o7
 END(__sigsetjmp)
 
-//weak_extern(_setjmp)
-//weak_extern(setjmp)
+.weak   _setjmp    
+.weak   setjmp  

+ 348 - 0
libc/sysdeps/linux/sparc/udiv.S

@@ -0,0 +1,348 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .udiv	name of function to generate
+ *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
+ *  false		false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+.global   .udiv;
+.align 4;
+.type  .udiv ,@function; 
+
+.udiv: 
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	0x02
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	.Lgot_result  		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	.Lnot_really_big  
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	.Lnot_too_big  
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	.Ldo_single_div
+		sub	%g2, 1, %g2
+
+	.Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	.Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	.Ldo_single_div:
+		subcc	%g2, 1, %g2
+		bl	.Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	.Lend_single_divloop
+		nop
+	.Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	.Lend_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	.Lsingle_divloop
+		tst	%o3
+		b,a	.Lend_regular_divide
+
+.Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	.Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+.Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	.L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	.L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	.L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	.L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+.L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+.L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	.L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+.L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+.L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	.L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	.L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+.L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+.L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	.L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+.L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+.L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	.L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	.L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	.L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+.L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+.L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	.L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+.L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+.L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	.L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	.L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+.L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+.L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	.L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+.L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+.Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	.Ldivloop
+	tst	%o3
+	bl,a	.Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	sub	%o2, 1, %o2
+
+
+.Lgot_result:
+
+	retl
+	mov %o2, %o0
+
+END(.udiv)

+ 160 - 0
libc/sysdeps/linux/sparc/umul.S

@@ -0,0 +1,160 @@
+/*
+ * Unsigned multiply.  Returns %o0 * %o1 in %o1%o0 (i.e., %o1 holds the
+ * upper 32 bits of the 64-bit product).
+ *
+ * This code optimizes short (less than 13-bit) multiplies.  Short
+ * multiplies require 25 instruction cycles, and long ones require
+ * 45 instruction cycles.
+ *
+ * On return, overflow has occurred (%o1 is not zero) if and only if
+ * the Z condition code is clear, allowing, e.g., the following:
+ *
+ *	call	.umul
+ *	nop
+ *	bnz	overflow	(or tnz)
+ */
+
+#include <sysdep.h>
+
+
+.global   .umul;
+.align 4;
+.type  .umul ,@function; 
+
+.umul: 
+	or	%o0, %o1, %o4
+	mov	%o0, %y			! multiplier -> Y
+	andncc	%o4, 0xfff, %g0		! test bits 12..31 of *both* args
+	be	.Lmul_shortway	! if zero, can do it the short way
+	 andcc	%g0, %g0, %o4		! zero the partial product; clear N & V
+
+	/*
+	 * Long multiply.  32 steps, followed by a final shift step.
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %o1, %o4	! 13
+	mulscc	%o4, %o1, %o4	! 14
+	mulscc	%o4, %o1, %o4	! 15
+	mulscc	%o4, %o1, %o4	! 16
+	mulscc	%o4, %o1, %o4	! 17
+	mulscc	%o4, %o1, %o4	! 18
+	mulscc	%o4, %o1, %o4	! 19
+	mulscc	%o4, %o1, %o4	! 20
+	mulscc	%o4, %o1, %o4	! 21
+	mulscc	%o4, %o1, %o4	! 22
+	mulscc	%o4, %o1, %o4	! 23
+	mulscc	%o4, %o1, %o4	! 24
+	mulscc	%o4, %o1, %o4	! 25
+	mulscc	%o4, %o1, %o4	! 26
+	mulscc	%o4, %o1, %o4	! 27
+	mulscc	%o4, %o1, %o4	! 28
+	mulscc	%o4, %o1, %o4	! 29
+	mulscc	%o4, %o1, %o4	! 30
+	mulscc	%o4, %o1, %o4	! 31
+	mulscc	%o4, %o1, %o4	! 32
+	mulscc	%o4, %g0, %o4	! final shift
+
+	/*
+	 * Normally, with the shift-and-add approach, if both numbers are
+	 * positive you get the correct result.  With 32-bit two's-complement
+	 * numbers, -x is represented as
+	 *
+	 *		  x		    32
+	 *	( 2  -  ------ ) mod 2  *  2
+	 *		   32
+	 *		  2
+	 *
+	 * (the `mod 2' subtracts 1 from 1.bbbb).  To avoid lots of 2^32s,
+	 * we can treat this as if the radix point were just to the left
+	 * of the sign bit (multiply by 2^32), and get
+	 *
+	 *	-x  =  (2 - x) mod 2
+	 *
+	 * Then, ignoring the `mod 2's for convenience:
+	 *
+	 *   x *  y	= xy
+	 *  -x *  y	= 2y - xy
+	 *   x * -y	= 2x - xy
+	 *  -x * -y	= 4 - 2x - 2y + xy
+	 *
+	 * For signed multiplies, we subtract (x << 32) from the partial
+	 * product to fix this problem for negative multipliers (see mul.s).
+	 * Because of the way the shift into the partial product is calculated
+	 * (N xor V), this term is automatically removed for the multiplicand,
+	 * so we don't have to adjust.
+	 *
+	 * But for unsigned multiplies, the high order bit wasn't a sign bit,
+	 * and the correction is wrong.  So for unsigned multiplies where the
+	 * high order bit is one, we end up with xy - (y << 32).  To fix it
+	 * we add y << 32.
+	 */
+#if 0
+	tst	%o1
+	bl,a	1f		! if %o1 < 0 (high order bit = 1),
+	 add	%o4, %o0, %o4	! %o4 += %o0 (add y to upper half)
+1:	rd	%y, %o0		! get lower half of product
+	retl
+	 addcc	%o4, %g0, %o1	! put upper half in place and set Z for %o1==0
+#else
+	/* Faster code from tege@sics.se.  */
+	sra	%o1, 31, %o2	! make mask from sign bit
+	and	%o0, %o2, %o2	! %o2 = 0 or %o0, depending on sign of %o1
+	rd	%y, %o0		! get lower half of product
+	retl
+	 addcc	%o4, %o2, %o1	! add compensation and put upper half in place
+#endif
+
+.Lmul_shortway:
+	/*
+	 * Short multiply.  12 steps, followed by a final shift step.
+	 * The resulting bits are off by 12 and (32-12) = 20 bit positions,
+	 * but there is no problem with %o0 being negative (unlike above),
+	 * and overflow is impossible (the answer is at most 24 bits long).
+	 */
+	mulscc	%o4, %o1, %o4	! 1
+	mulscc	%o4, %o1, %o4	! 2
+	mulscc	%o4, %o1, %o4	! 3
+	mulscc	%o4, %o1, %o4	! 4
+	mulscc	%o4, %o1, %o4	! 5
+	mulscc	%o4, %o1, %o4	! 6
+	mulscc	%o4, %o1, %o4	! 7
+	mulscc	%o4, %o1, %o4	! 8
+	mulscc	%o4, %o1, %o4	! 9
+	mulscc	%o4, %o1, %o4	! 10
+	mulscc	%o4, %o1, %o4	! 11
+	mulscc	%o4, %o1, %o4	! 12
+	mulscc	%o4, %g0, %o4	! final shift
+
+	/*
+	 * %o4 has 20 of the bits that should be in the result; %y has
+	 * the bottom 12 (as %y's top 12).  That is:
+	 *
+	 *	  %o4		    %y
+	 * +----------------+----------------+
+	 * | -12- |   -20-  | -12- |   -20-  |
+	 * +------(---------+------)---------+
+	 *	   -----result-----
+	 *
+	 * The 12 bits of %o4 left of the `result' area are all zero;
+	 * in fact, all top 20 bits of %o4 are zero.
+	 */
+
+	rd	%y, %o5
+	sll	%o4, 12, %o0	! shift middle bits left 12
+	srl	%o5, 20, %o5	! shift low bits right 20
+	or	%o5, %o0, %o0
+	retl
+	 addcc	%g0, %g0, %o1	! %o1 = zero, and set Z
+
+.size  .umul , . -.umul

+ 350 - 0
libc/sysdeps/linux/sparc/urem.S

@@ -0,0 +1,350 @@
+   /* This file is generated from divrem.m4; DO NOT EDIT! */
+/*
+ * Division and remainder, from Appendix E of the Sparc Version 8
+ * Architecture Manual, with fixes from Gordon Irlam.
+ */
+
+/*
+ * Input: dividend and divisor in %o0 and %o1 respectively.
+ *
+ * m4 parameters:
+ *  .urem	name of function to generate
+ *  rem		rem=div => %o0 / %o1; rem=rem => %o0 % %o1
+ *  false		false=true => signed; false=false => unsigned
+ *
+ * Algorithm parameters:
+ *  N		how many bits per iteration we try to get (4)
+ *  WORDSIZE	total number of bits (32)
+ *
+ * Derived constants:
+ *  TOPBITS	number of bits in the top decade of a number
+ *
+ * Important variables:
+ *  Q		the partial quotient under development (initially 0)
+ *  R		the remainder so far, initially the dividend
+ *  ITER	number of main division loop iterations required;
+ *		equal to ceil(log2(quotient) / N).  Note that this
+ *		is the log base (2^N) of the quotient.
+ *  V		the current comparand, initially divisor*2^(ITER*N-1)
+ *
+ * Cost:
+ *  Current estimate for non-large dividend is
+ *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
+ *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
+ *  different path, as the upper bits of the quotient must be developed
+ *  one bit at a time.
+ */
+
+
+
+#include <sysdep.h>
+
+
+.global   .urem;
+.align 4;
+.type  .urem ,@function; 
+
+.urem: 
+
+	! Ready to divide.  Compute size of quotient; scale comparand.
+	orcc	%o1, %g0, %o5
+	bne	1f
+	mov	%o0, %o3
+
+		! Divide by zero trap.  If it returns, return 0 (about as
+		! wrong as possible, but that is what SunOS does...).
+		ta	0x02 
+		retl
+		clr	%o0
+
+1:
+	cmp	%o3, %o5			! if %o1 exceeds %o0, done
+	blu	.Lgot_result		! (and algorithm fails otherwise)
+	clr	%o2
+	sethi	%hi(1 << (32 - 4 - 1)), %g1
+	cmp	%o3, %g1
+	blu	.Lnot_really_big
+	clr	%o4
+
+	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
+	! as our usual N-at-a-shot divide step will cause overflow and havoc.
+	! The number of bits in the result here is N*ITER+SC, where SC <= N.
+	! Compute ITER in an unorthodox manner: know we need to shift V into
+	! the top decade: so do not even bother to compare to R.
+	1:
+		cmp	%o5, %g1
+		bgeu	3f
+		mov	1, %g2
+		sll	%o5, 4, %o5
+		b	1b
+		add	%o4, 1, %o4
+
+	! Now compute %g2.
+	2:	addcc	%o5, %o5, %o5
+		bcc	.Lnot_too_big
+		add	%g2, 1, %g2
+
+		! We get here if the %o1 overflowed while shifting.
+		! This means that %o3 has the high-order bit set.
+		! Restore %o5 and subtract from %o3.
+		sll	%g1, 4, %g1	! high order bit
+		srl	%o5, 1, %o5		! rest of %o5
+		add	%o5, %g1, %o5
+		b	.Ldo_single_div
+		sub	%g2, 1, %g2
+
+	.Lnot_too_big:
+	3:	cmp	%o5, %o3
+		blu	2b
+		nop
+		be	.Ldo_single_div
+		nop
+	/* NB: these are commented out in the V8-Sparc manual as well */
+	/* (I do not understand this) */
+	! %o5 > %o3: went too far: back up 1 step
+	!	srl	%o5, 1, %o5
+	!	dec	%g2
+	! do single-bit divide steps
+	!
+	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
+	! first divide step without thinking.  BUT, the others are conditional,
+	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
+	! order bit set in the first step, just falling into the regular
+	! division loop will mess up the first time around.
+	! So we unroll slightly...
+	.Ldo_single_div:
+		subcc	%g2, 1, %g2
+		bl	.Lend_regular_divide
+		nop
+		sub	%o3, %o5, %o3
+		mov	1, %o2
+		b	.Lend_single_divloop
+		nop
+	.Lsingle_divloop:
+		sll	%o2, 1, %o2
+		bl	1f
+		srl	%o5, 1, %o5
+		! %o3 >= 0
+		sub	%o3, %o5, %o3
+		b	2f
+		add	%o2, 1, %o2
+	1:	! %o3 < 0
+		add	%o3, %o5, %o3
+		sub	%o2, 1, %o2
+	2:
+	.Lend_single_divloop:
+		subcc	%g2, 1, %g2
+		bge	.Lsingle_divloop
+		tst	%o3
+		b,a	.Lend_regular_divide
+
+.Lnot_really_big:
+1:
+	sll	%o5, 4, %o5
+	cmp	%o5, %o3
+	bleu	1b
+	addcc	%o4, 1, %o4
+	be	.Lgot_result
+	sub	%o4, 1, %o4
+
+	tst	%o3	! set up for initial iteration
+.Ldivloop:
+	sll	%o2, 4, %o2
+		! depth 1, accumulated bits 0
+	bl	.L1.16
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 2, accumulated bits 1
+	bl	.L2.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 3
+	bl	.L3.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 7
+	bl	.L4.23
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2+1), %o2
+	
+.L4.23:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (7*2-1), %o2
+	
+	
+.L3.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 5
+	bl	.L4.21
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2+1), %o2
+	
+.L4.21:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (5*2-1), %o2
+	
+	
+	
+.L2.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits 1
+	bl	.L3.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 3
+	bl	.L4.19
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2+1), %o2
+	
+.L4.19:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (3*2-1), %o2
+	
+	
+.L3.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits 1
+	bl	.L4.17
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2+1), %o2
+	
+.L4.17:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (1*2-1), %o2
+	
+	
+	
+	
+.L1.16:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 2, accumulated bits -1
+	bl	.L2.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -1
+	bl	.L3.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -1
+	bl	.L4.15
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2+1), %o2
+	
+.L4.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-1*2-1), %o2
+	
+	
+.L3.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -3
+	bl	.L4.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2+1), %o2
+	
+.L4.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-3*2-1), %o2
+	
+	
+	
+.L2.15:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 3, accumulated bits -3
+	bl	.L3.13
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -5
+	bl	.L4.11
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2+1), %o2
+	
+.L4.11:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-5*2-1), %o2
+	
+	
+.L3.13:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+			! depth 4, accumulated bits -7
+	bl	.L4.9
+	srl	%o5,1,%o5
+	! remainder is positive
+	subcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2+1), %o2
+	
+.L4.9:
+	! remainder is negative
+	addcc	%o3,%o5,%o3
+		b	9f
+		add	%o2, (-7*2-1), %o2
+	
+	
+	
+	
+	9:
+.Lend_regular_divide:
+	subcc	%o4, 1, %o4
+	bge	.Ldivloop
+	tst	%o3
+	bl,a	.Lgot_result
+	! non-restoring fixup here (one instruction only!)
+	add	%o3, %o1, %o3
+
+
+.Lgot_result:
+
+	retl
+	mov %o3, %o0
+
+END(.urem)