Эх сурвалжийг харах

sh specific bits needed for nptl

 * unified atomic.h compare and exchange macros
 * clone.S with RESET_PID support
 * sh specific versions of pread/pwrite with cancellation support
 * check SHARED instead of PIC

Signed-off-by: Austin Foxley <austinf@cetoncorp.com>
Austin Foxley 16 жил өмнө
parent
commit
6ffcc881dc

+ 2 - 2
libc/sysdeps/linux/sh/Makefile.arch

@@ -7,6 +7,6 @@
 #
 #
 
 
 CSRC := \
 CSRC := \
-	mmap.c pipe.c __init_brk.c brk.c sbrk.c pread_write.c cacheflush.c
+	mmap.c pipe.c __init_brk.c brk.c sbrk.c pread_write.c longjmp.c cacheflush.c
 
 
-SSRC := setjmp.S __longjmp.S vfork.S clone.S ___fpscr_values.S
+SSRC := setjmp.S __longjmp.S ___fpscr_values.S

+ 195 - 321
libc/sysdeps/linux/sh/bits/atomic.h

@@ -54,6 +54,10 @@ typedef uintmax_t uatomic_max_t;
     Japan. http://lc.linux.or.jp/lc2002/papers/niibe0919h.pdf (in
     Japan. http://lc.linux.or.jp/lc2002/papers/niibe0919h.pdf (in
     Japanese).
     Japanese).
 
 
+    Niibe Yutaka, "gUSA: User Space Atomicity with Little Kernel
+    Modification", LinuxTag 2003, Rome.
+    http://www.semmel.ch/Linuxtag-DVD/talks/170/paper.html (in English).
+
     B.N. Bershad, D. Redell, and J. Ellis, "Fast Mutual Exclusion for
     B.N. Bershad, D. Redell, and J. Ellis, "Fast Mutual Exclusion for
     Uniprocessors",  Proceedings of the Fifth Architectural Support for
     Uniprocessors",  Proceedings of the Fifth Architectural Support for
     Programming Languages and Operating Systems (ASPLOS), pp. 223-233,
     Programming Languages and Operating Systems (ASPLOS), pp. 223-233,
@@ -65,56 +69,44 @@ typedef uintmax_t uatomic_max_t;
       r1:     saved stack pointer
       r1:     saved stack pointer
 */
 */
 
 
-#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
-  ({ __typeof (*(mem)) __result; \
-     __asm__ __volatile__ ("\
+/* Avoid having lots of different versions of compare and exchange,
+   by having this one complicated version. Parameters:
+      bwl:     b, w or l for 8, 16 and 32 bit versions.
+      version: val or bool, depending on whether the result is the
+               previous value or a bool indicating whether the transfer
+               did happen (note this needs inverting before being
+               returned in atomic_compare_and_exchange_bool).
+*/
+
+#define __arch_compare_and_exchange_n(mem, newval, oldval, bwl, version) \
+  ({ signed long __result; \
+     __asm __volatile ("\
 	.align 2\n\
 	.align 2\n\
 	mova 1f,r0\n\
 	mova 1f,r0\n\
 	nop\n\
 	nop\n\
 	mov r15,r1\n\
 	mov r15,r1\n\
 	mov #-8,r15\n\
 	mov #-8,r15\n\
-     0: mov.b @%1,%0\n\
+     0: mov." #bwl " @%1,%0\n\
 	cmp/eq %0,%3\n\
 	cmp/eq %0,%3\n\
 	bf 1f\n\
 	bf 1f\n\
-	mov.b %2,@%1\n\
-     1: mov r1,r15"\
-	: "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
-	: "r0", "r1", "t", "memory"); \
+	mov." #bwl " %2,@%1\n\
+     1: mov r1,r15\n\
+     .ifeqs \"bool\",\"" #version "\"\n\
+        movt %0\n\
+     .endif\n"					\
+	: "=&r" (__result)			\
+	: "r" (mem), "r" (newval), "r" (oldval)	\
+	: "r0", "r1", "t", "memory");		\
      __result; })
      __result; })
 
 
+#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
+  __arch_compare_and_exchange_n(mem, newval, (int8_t)(oldval), b, val)
+
 #define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
 #define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
-  ({ __typeof (*(mem)) __result; \
-     __asm__ __volatile__ ("\
-	.align 2\n\
-	mova 1f,r0\n\
-	nop\n\
-	mov r15,r1\n\
-	mov #-8,r15\n\
-     0: mov.w @%1,%0\n\
-	cmp/eq %0,%3\n\
-	bf 1f\n\
-	mov.w %2,@%1\n\
-     1: mov r1,r15"\
-	: "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
-	: "r0", "r1", "t", "memory"); \
-     __result; })
+  __arch_compare_and_exchange_n(mem, newval, (int16_t)(oldval), w, val)
 
 
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
 #define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
-  ({ __typeof (*(mem)) __result; \
-     __asm__ __volatile__ ("\
-	.align 2\n\
-	mova 1f,r0\n\
-	nop\n\
-	mov r15,r1\n\
-	mov #-8,r15\n\
-     0: mov.l @%1,%0\n\
-	cmp/eq %0,%3\n\
-	bf 1f\n\
-	mov.l %2,@%1\n\
-     1: mov r1,r15"\
-	: "=&r" (__result) : "r" (mem), "r" (newval), "r" (oldval) \
-	: "r0", "r1", "t", "memory"); \
-     __result; })
+  __arch_compare_and_exchange_n(mem, newval, (int32_t)(oldval), l, val)
 
 
 /* XXX We do not really need 64-bit compare-and-exchange.  At least
 /* XXX We do not really need 64-bit compare-and-exchange.  At least
    not in the moment.  Using it would mean causing portability
    not in the moment.  Using it would mean causing portability
@@ -122,298 +114,180 @@ typedef uintmax_t uatomic_max_t;
    such an operation.  So don't define any code for now.  */
    such an operation.  So don't define any code for now.  */
 
 
 # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
 # define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
-  (abort (), (__typeof (*mem)) 0)
+  (abort (), 0)
+
+/* For "bool" routines, return if the exchange did NOT occur */
+
+#define __arch_compare_and_exchange_bool_8_acq(mem, newval, oldval) \
+  (! __arch_compare_and_exchange_n(mem, newval, (int8_t)(oldval), b, bool))
+
+#define __arch_compare_and_exchange_bool_16_acq(mem, newval, oldval) \
+  (! __arch_compare_and_exchange_n(mem, newval, (int16_t)(oldval), w, bool))
+
+#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
+  (! __arch_compare_and_exchange_n(mem, newval, (int32_t)(oldval), l, bool))
+
+# define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \
+  (abort (), 0)
+
+/* Similar to the above, have one template which can be used in a
+   number of places. This version returns both the old and the new
+   values of the location. Parameters:
+      bwl:     b, w or l for 8, 16 and 32 bit versions.
+      oper:    The instruction to perform on the old value.
+   Note old is not sign extended, so should be an unsigned long.
+*/
+
+#define __arch_operate_old_new_n(mem, value, old, new, bwl, oper)	\
+  (void) ({ __asm __volatile ("\
+	.align 2\n\
+	mova 1f,r0\n\
+	mov r15,r1\n\
+	nop\n\
+	mov #-8,r15\n\
+     0: mov." #bwl " @%2,%0\n\
+	mov %0,%1\n\
+	" #oper " %3,%1\n\
+	mov." #bwl " %1,@%2\n\
+     1: mov r1,r15"			\
+	: "=&r" (old), "=&r"(new)	\
+	: "r" (mem), "r" (value)	\
+	: "r0", "r1", "memory");	\
+    })
+
+#define __arch_exchange_and_add_8_int(mem, value)			\
+  ({ int32_t __value = (value), __new, __old;				\
+    __arch_operate_old_new_n((mem), __value, __old, __new, b, add);	\
+    __old; })
+
+#define __arch_exchange_and_add_16_int(mem, value)			\
+  ({ int32_t __value = (value), __new, __old;				\
+    __arch_operate_old_new_n((mem), __value, __old, __new, w, add);	\
+    __old; })
+
+#define __arch_exchange_and_add_32_int(mem, value)			\
+  ({ int32_t __value = (value), __new, __old;				\
+    __arch_operate_old_new_n((mem), __value, __old, __new, l, add);	\
+    __old; })
+
+#define __arch_exchange_and_add_64_int(mem, value)			\
+  (abort (), 0)
 
 
 #define atomic_exchange_and_add(mem, value) \
 #define atomic_exchange_and_add(mem, value) \
-  ({ __typeof (*(mem)) __result, __tmp, __value = (value); \
-     if (sizeof (*(mem)) == 1) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.b @%2,%0\n\
-	  add %0,%1\n\
-	  mov.b %1,@%2\n\
-       1: mov r1,r15"\
-	: "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "memory"); \
-     else if (sizeof (*(mem)) == 2) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.w @%2,%0\n\
-	  add %0,%1\n\
-	  mov.w %1,@%2\n\
-       1: mov r1,r15"\
-	: "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "memory"); \
-     else if (sizeof (*(mem)) == 4) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.l @%2,%0\n\
-	  add %0,%1\n\
-	  mov.l %1,@%2\n\
-       1: mov r1,r15"\
-	: "=&r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "memory"); \
-     else \
-       { \
-	 __typeof (mem) memp = (mem); \
-	 do \
-	   __result = *memp; \
-	 while (__arch_compare_and_exchange_val_64_acq \
-		 (memp,	__result + __value, __result) == __result); \
-	 (void) __value; \
-       } \
-     __result; })
+  __atomic_val_bysize (__arch_exchange_and_add, int, mem, value)
+
+
+/* Again, another template. We get a slight optimisation when the old value
+   does not need to be returned. Parameters:
+      bwl:     b, w or l for 8, 16 and 32 bit versions.
+      oper:    The instruction to perform on the old value.
+*/
+
+#define __arch_operate_new_n(mem, value, bwl, oper)	 \
+  ({ int32_t __value = (value), __new; \
+     __asm __volatile ("\
+	.align 2\n\
+	mova 1f,r0\n\
+	mov r15,r1\n\
+	mov #-6,r15\n\
+     0: mov." #bwl " @%1,%0\n\
+	" #oper " %2,%0\n\
+	mov." #bwl " %0,@%1\n\
+     1: mov r1,r15"			\
+	: "=&r" (__new)			\
+	: "r" (mem), "r" (__value)	\
+	: "r0", "r1", "memory");	\
+     __new;				\
+  })
+
+#define __arch_add_8_int(mem, value)		\
+  __arch_operate_new_n(mem, value, b, add)
+
+#define __arch_add_16_int(mem, value)		\
+  __arch_operate_new_n(mem, value, w, add)
+
+#define __arch_add_32_int(mem, value)		\
+  __arch_operate_new_n(mem, value, l, add)
+
+#define __arch_add_64_int(mem, value)		\
+  (abort (), 0)
 
 
 #define atomic_add(mem, value) \
 #define atomic_add(mem, value) \
-  (void) ({ __typeof (*(mem)) __tmp, __value = (value); \
-	    if (sizeof (*(mem)) == 1) \
-	      __asm__ __volatile__ ("\
-		.align 2\n\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		mov #-6,r15\n\
-	     0: mov.b @%1,r2\n\
-		add r2,%0\n\
-		mov.b %0,@%1\n\
-	     1: mov r1,r15"\
-		: "=&r" (__tmp) : "r" (mem), "0" (__value) \
-		: "r0", "r1", "r2", "memory"); \
-	    else if (sizeof (*(mem)) == 2) \
-	      __asm__ __volatile__ ("\
-		.align 2\n\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		mov #-6,r15\n\
-	     0: mov.w @%1,r2\n\
-		add r2,%0\n\
-		mov.w %0,@%1\n\
-	     1: mov r1,r15"\
-		: "=&r" (__tmp) : "r" (mem), "0" (__value) \
-		: "r0", "r1", "r2", "memory"); \
-	    else if (sizeof (*(mem)) == 4) \
-	      __asm__ __volatile__ ("\
-		.align 2\n\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		mov #-6,r15\n\
-	     0: mov.l @%1,r2\n\
-		add r2,%0\n\
-		mov.l %0,@%1\n\
-	     1: mov r1,r15"\
-		: "=&r" (__tmp) : "r" (mem), "0" (__value) \
-		: "r0", "r1", "r2", "memory"); \
-	    else \
-	      { \
-		__typeof (*(mem)) oldval; \
-		__typeof (mem) memp = (mem); \
-		do \
-		  oldval = *memp; \
-		while (__arch_compare_and_exchange_val_64_acq \
-			(memp, oldval + __value, oldval) == oldval); \
-		(void) __value; \
-	      } \
-	    })
+  ((void) __atomic_val_bysize (__arch_add, int, mem, value))
+
+
+#define __arch_add_negative_8_int(mem, value)		\
+  (__arch_operate_new_n(mem, value, b, add) < 0)
+
+#define __arch_add_negative_16_int(mem, value)		\
+  (__arch_operate_new_n(mem, value, w, add) < 0)
+
+#define __arch_add_negative_32_int(mem, value)		\
+  (__arch_operate_new_n(mem, value, l, add) < 0)
+
+#define __arch_add_negative_64_int(mem, value)		\
+  (abort (), 0)
 
 
 #define atomic_add_negative(mem, value) \
 #define atomic_add_negative(mem, value) \
-  ({ unsigned char __result; \
-     __typeof (*(mem)) __tmp, __value = (value); \
-     if (sizeof (*(mem)) == 1) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.b @%2,r2\n\
-	  add r2,%1\n\
-	  mov.b %1,@%2\n\
-       1: mov r1,r15\n\
-	  shal %1\n\
-	  movt %0"\
-	: "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "r2", "t", "memory"); \
-     else if (sizeof (*(mem)) == 2) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.w @%2,r2\n\
-	  add r2,%1\n\
-	  mov.w %1,@%2\n\
-       1: mov r1,r15\n\
-	  shal %1\n\
-	  movt %0"\
-	: "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "r2", "t", "memory"); \
-     else if (sizeof (*(mem)) == 4) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.l @%2,r2\n\
-	  add r2,%1\n\
-	  mov.l %1,@%2\n\
-       1: mov r1,r15\n\
-	  shal %1\n\
-	  movt %0"\
-	: "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "r2", "t", "memory"); \
-     else \
-       abort (); \
-     __result; })
+  __atomic_bool_bysize (__arch_add_negative, int, mem, value)
+
+
+#define __arch_add_zero_8_int(mem, value)		\
+  (__arch_operate_new_n(mem, value, b, add) == 0)
+
+#define __arch_add_zero_16_int(mem, value)		\
+  (__arch_operate_new_n(mem, value, w, add) == 0)
+
+#define __arch_add_zero_32_int(mem, value)		\
+  (__arch_operate_new_n(mem, value, l, add) == 0)
+
+#define __arch_add_zero_64_int(mem, value)		\
+  (abort (), 0)
 
 
 #define atomic_add_zero(mem, value) \
 #define atomic_add_zero(mem, value) \
-  ({ unsigned char __result; \
-     __typeof (*(mem)) __tmp, __value = (value); \
-     if (sizeof (*(mem)) == 1) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.b @%2,r2\n\
-	  add r2,%1\n\
-	  mov.b %1,@%2\n\
-       1: mov r1,r15\n\
-	  tst %1,%1\n\
-	  movt %0"\
-	: "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "r2", "t", "memory"); \
-     else if (sizeof (*(mem)) == 2) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.w @%2,r2\n\
-	  add r2,%1\n\
-	  mov.w %1,@%2\n\
-       1: mov r1,r15\n\
-	  tst %1,%1\n\
-	  movt %0"\
-	: "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "r2", "t", "memory"); \
-     else if (sizeof (*(mem)) == 4) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  mov r15,r1\n\
-	  mov #-6,r15\n\
-       0: mov.l @%2,r2\n\
-	  add r2,%1\n\
-	  mov.l %1,@%2\n\
-       1: mov r1,r15\n\
-	  tst %1,%1\n\
-	  movt %0"\
-	: "=r" (__result), "=&r" (__tmp) : "r" (mem), "1" (__value) \
-	: "r0", "r1", "r2", "t", "memory"); \
-     else \
-       abort (); \
-     __result; })
+  __atomic_bool_bysize (__arch_add_zero, int, mem, value)
+
 
 
 #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1)
 #define atomic_increment_and_test(mem) atomic_add_zero((mem), 1)
 #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1)
 #define atomic_decrement_and_test(mem) atomic_add_zero((mem), -1)
 
 
-#define atomic_bit_set(mem, bit) \
-  (void) ({ unsigned int __mask = 1 << (bit); \
-	    if (sizeof (*(mem)) == 1) \
-	      __asm__ __volatile__ ("\
-		.align 2\n\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		mov #-6,r15\n\
-	     0: mov.b @%0,r2\n\
-		or %1,r2\n\
-		mov.b r2,@%0\n\
-	     1: mov r1,r15"\
-		: : "r" (mem), "r" (__mask) \
-		: "r0", "r1", "r2", "memory"); \
-	    else if (sizeof (*(mem)) == 2) \
-	      __asm__ __volatile__ ("\
-		.align 2\n\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		mov #-6,r15\n\
-	     0: mov.w @%0,r2\n\
-		or %1,r2\n\
-		mov.w r2,@%0\n\
-	     1: mov r1,r15"\
-		: : "r" (mem), "r" (__mask) \
-		: "r0", "r1", "r2", "memory"); \
-	    else if (sizeof (*(mem)) == 4) \
-	      __asm__ __volatile__ ("\
-		.align 2\n\
-		mova 1f,r0\n\
-		mov r15,r1\n\
-		mov #-6,r15\n\
-	     0: mov.l @%0,r2\n\
-		or %1,r2\n\
-		mov.l r2,@%0\n\
-	     1: mov r1,r15"\
-		: : "r" (mem), "r" (__mask) \
-		: "r0", "r1", "r2", "memory"); \
-	    else \
-	      abort (); \
-	    })
-
-#define atomic_bit_test_set(mem, bit) \
-  ({ unsigned int __mask = 1 << (bit); \
-     unsigned int __result = __mask; \
-     if (sizeof (*(mem)) == 1) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  nop\n\
-	  mov r15,r1\n\
-	  mov #-8,r15\n\
-       0: mov.b @%2,r2\n\
-	  or r2,%1\n\
-	  and r2,%0\n\
-	  mov.b %1,@%2\n\
-       1: mov r1,r15"\
-	: "=&r" (__result), "=&r" (__mask) \
-	: "r" (mem), "0" (__result), "1" (__mask) \
-	: "r0", "r1", "r2", "memory"); \
-     else if (sizeof (*(mem)) == 2) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  nop\n\
-	  mov r15,r1\n\
-	  mov #-8,r15\n\
-       0: mov.w @%2,r2\n\
-	  or r2,%1\n\
-	  and r2,%0\n\
-	  mov.w %1,@%2\n\
-       1: mov r1,r15"\
-	: "=&r" (__result), "=&r" (__mask) \
-	: "r" (mem), "0" (__result), "1" (__mask) \
-	: "r0", "r1", "r2", "memory"); \
-     else if (sizeof (*(mem)) == 4) \
-       __asm__ __volatile__ ("\
-	  .align 2\n\
-	  mova 1f,r0\n\
-	  nop\n\
-	  mov r15,r1\n\
-	  mov #-8,r15\n\
-       0: mov.l @%2,r2\n\
-	  or r2,%1\n\
-	  and r2,%0\n\
-	  mov.l %1,@%2\n\
-       1: mov r1,r15"\
-	: "=&r" (__result), "=&r" (__mask) \
-	: "r" (mem), "0" (__result), "1" (__mask) \
-	: "r0", "r1", "r2", "memory"); \
-     else \
-       abort (); \
-     __result; })
+
+#define __arch_bit_set_8_int(mem, value)		\
+  __arch_operate_new_n(mem, 1<<(value), b, or)
+
+#define __arch_bit_set_16_int(mem, value)		\
+  __arch_operate_new_n(mem, 1<<(value), w, or)
+
+#define __arch_bit_set_32_int(mem, value)		\
+  __arch_operate_new_n(mem, 1<<(value), l, or)
+  
+#define __arch_bit_set_64_int(mem, value)		\
+  (abort (), 0)
+
+#define __arch_add_64_int(mem, value)			\
+  (abort (), 0)
+
+#define atomic_bit_set(mem, value) \
+  ((void) __atomic_val_bysize (__arch_bit_set, int, mem, value))
+
+
+#define __arch_bit_test_set_8_int(mem, value)				\
+  ({ int32_t __value = 1<<(value), __new, __old;			\
+    __arch_operate_old_new_n((mem), __value, __old, __new, b, or);	\
+    __old & __value; })
+
+#define __arch_bit_test_set_16_int(mem, value)				\
+  ({ int32_t __value = 1<<(value), __new, __old;			\
+    __arch_operate_old_new_n((mem), __value, __old, __new, w, or);	\
+    __old & __value; })
+
+#define __arch_bit_test_set_32_int(mem, value)				\
+  ({ int32_t __value = 1<<(value), __new, __old;			\
+    __arch_operate_old_new_n((mem), __value, __old, __new, l, or);	\
+    __old & __value; })
+
+#define __arch_bit_test_set_64_int(mem, value)	\
+  (abort (), 0)
+
+#define atomic_bit_test_set(mem, value) \
+  __atomic_val_bysize (__arch_bit_test_set, int, mem, value)

+ 72 - 63
libc/sysdeps/linux/sh/clone.S

@@ -1,4 +1,4 @@
-/* Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+/* Copyright (C) 1999, 2000, 2003, 2004, 2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    This file is part of the GNU C Library.
 
 
    The GNU C Library is free software; you can redistribute it and/or
    The GNU C Library is free software; you can redistribute it and/or
@@ -20,93 +20,94 @@
    and invokes a function in the right context after its all over.  */
    and invokes a function in the right context after its all over.  */
 
 
 #include <features.h>
 #include <features.h>
-#include <sys/syscall.h>
-#define _ERRNO_H
+#include <asm/unistd.h>
+#include <sysdep.h>
+#define _ERRNO_H	1
 #include <bits/errno.h>
 #include <bits/errno.h>
-#include <bits/sysnum.h>
-
-
-#ifdef __PIC__
-#define PLTJMP(_x)	_x@PLT
-#else
-#define PLTJMP(_x)	_x
+#ifdef RESET_PID
+#include <tcb-offsets.h>
 #endif
 #endif
+/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg,
+	     pid_t *ptid, void *tls, pid_t *ctid); */
 
 
-
-/* int clone(int (*fn)(void *arg), void *child_stack, int flags, void *arg); */
-
-        .text
-
-.text
-.align 4
-.type	clone,@function
-.globl	clone;
-clone:
+	.text
+ENTRY(__clone)
 	/* sanity check arguments.  */
 	/* sanity check arguments.  */
 	tst	r4, r4
 	tst	r4, r4
-	bt	0f
-	tst	r5, r5
-	bf/s	1f
-	 mov	#+__NR_clone, r3
-0:		
-	bra __syscall_error
-	 mov	#-EINVAL, r4
-
+	bt/s	0f
+	 tst	r5, r5
+	bf	1f
+0:
+	bra	.Lsyscall_error
+	 mov	#-EINVAL,r0
 1:
 1:
 	/* insert the args onto the new stack */
 	/* insert the args onto the new stack */
 	mov.l	r7, @-r5
 	mov.l	r7, @-r5
 	/* save the function pointer as the 0th element */
 	/* save the function pointer as the 0th element */
 	mov.l	r4, @-r5
 	mov.l	r4, @-r5
-	
+
 	/* do the system call */
 	/* do the system call */
 	mov	r6, r4
 	mov	r6, r4
-	trapa	#(__SH_SYSCALL_TRAP_BASE + 2)
+	mov.l	@r15, r6
+	mov.l	@(8,r15), r7
+	mov.l	@(4,r15), r0
+	mov	#+SYS_ify(clone), r3
+	trapa	#0x15
 	mov     r0, r1
 	mov     r0, r1
-#ifdef __sh2__
-/* 12 arithmetic shifts for the crappy sh2, because shad doesn't exist!	 */
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-	shar	r1
-#else		
 	mov	#-12, r2
 	mov	#-12, r2
 	shad	r2, r1
 	shad	r2, r1
-#endif
-	not	r1, r1			/* r1=0 means r0 = -1 to -4095 */
-	tst	r1, r1			/* i.e. error in linux */
-	bf/s	2f
-	 tst	r0, r0
-        bra __syscall_error
-	 mov	r0, r4
-
-2:
-	bt	3f
+	not	r1, r1			// r1=0 means r0 = -1 to -4095
+	tst	r1, r1			// i.e. error in linux
+	bf	.Lclone_end
+.Lsyscall_error:	
+	SYSCALL_ERROR_HANDLER
+.Lclone_end:
+	tst	r0, r0
+	bt	2f
+.Lpseudo_end:
 	rts
 	rts
 	 nop
 	 nop
+2:
+	/* terminate the stack frame */
+	mov	#0, r14
+#ifdef RESET_PID
+	mov	r4, r0
+	shlr16	r0
+	tst	#1, r0			// CLONE_THREAD = (1 << 16)
+	bf/s	4f
+	 mov	r4, r0
+	/* new pid */
+	shlr8	r0
+	tst	#1, r0			// CLONE_VM = (1 << 8)
+	bf/s	3f
+	 mov	#-1, r0
+	mov	#+SYS_ify(getpid), r3
+	trapa	#0x15
 3:
 3:
+	stc	gbr, r1
+	mov.w	.Lpidoff, r2
+	add	r1, r2
+	mov.l	r0, @r2	
+	mov.w	.Ltidoff, r2
+	add	r1, r2
+	mov.l	r0, @r2	
+4:
+#endif
 	/* thread starts */
 	/* thread starts */
 	mov.l	@r15, r1
 	mov.l	@r15, r1
 	jsr	@r1
 	jsr	@r1
 	 mov.l	@(4,r15), r4
 	 mov.l	@(4,r15), r4
 
 
 	/* we are done, passing the return value through r0  */
 	/* we are done, passing the return value through r0  */
-	mov.l	.L1, r1
-#ifdef __PIC__
+	mov.l	.L3, r1
+#ifdef SHARED
 	mov.l	r12, @-r15
 	mov.l	r12, @-r15
 	sts.l	pr, @-r15
 	sts.l	pr, @-r15
 	mov	r0, r4
 	mov	r0, r4
-	mova	.LG, r0  /* .LG from syscall_error.S */
+	mova	.LG, r0
 	mov.l	.LG, r12
 	mov.l	.LG, r12
 	add	r0, r12
 	add	r0, r12
-	mova	.L1, r0
+	mova	.L3, r0
 	add	r0, r1
 	add	r0, r1
 	jsr	@r1
 	jsr	@r1
 	 nop
 	 nop
@@ -118,8 +119,16 @@ clone:
 	 mov	r0, r4
 	 mov	r0, r4
 #endif
 #endif
 	.align	2
 	.align	2
-.L1:
-	.long	PLTJMP( HIDDEN_JUMPTARGET(_exit))
-.size clone,.-clone;
+.LG:
+	.long	_GLOBAL_OFFSET_TABLE_
+.L3:
+	.long	PLTJMP(C_SYMBOL_NAME(_exit))
+#ifdef RESET_PID
+.Lpidoff:
+	.word	PID - TLS_PRE_TCB_SIZE
+.Ltidoff:
+	.word	TID - TLS_PRE_TCB_SIZE
+#endif
+PSEUDO_END (__clone)
 
 
-#include "syscall_error.S"
+weak_alias (__clone, clone)

+ 56 - 0
libc/sysdeps/linux/sh/longjmp.c

@@ -0,0 +1,56 @@
+/* Copyright (C) 1991, 92, 94, 95, 97, 98, 2000 Free Software Foundation, Inc.
+   Copyright (C) 2001 Hewlett-Packard Australia
+
+ This program is free software; you can redistribute it and/or modify it under
+ the terms of the GNU Library General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more
+ details.
+
+ You should have received a copy of the GNU Library General Public License
+ along with this program; if not, write to the Free Software Foundation, Inc.,
+ 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ Derived in part from the Linux-8086 C library, the GNU C Library, and several
+ other sundry sources.  Files within this library are copyright by their
+ respective copyright holders.
+*/
+
+#include <stddef.h>
+#include <setjmp.h>
+#include <signal.h>
+
+libc_hidden_proto(sigprocmask)
+
+extern int __longjmp(char *env, int val);
+libc_hidden_proto(__longjmp)
+
+extern void _longjmp_unwind (jmp_buf env, int val);
+
+
+/* Set the signal mask to the one specified in ENV, and jump
+   to the position specified in ENV, causing the setjmp
+   call there to return VAL, or 1 if VAL is 0.  */
+void __libc_siglongjmp (sigjmp_buf env, int val)
+{
+  /* Perform any cleanups needed by the frames being unwound.  */
+
+  _longjmp_unwind (env, val);
+
+  if (env[0].__mask_was_saved)
+    /* Restore the saved signal mask.  */
+    (void) sigprocmask (SIG_SETMASK, &env[0].__saved_mask,
+			  (sigset_t *) NULL);
+
+  /* Call the machine-dependent function to restore machine state.  */
+  __longjmp ((char *) env[0].__jmpbuf, val ?: 1);
+}
+
+__asm__(".weak longjmp; longjmp = __libc_siglongjmp");
+__asm__(".weak _longjmp; _longjmp = __libc_siglongjmp");
+__asm__(".weak siglongjmp; siglongjmp = __libc_siglongjmp");
+strong_alias(__libc_siglongjmp, __libc_longjmp)

+ 50 - 8
libc/sysdeps/linux/sh/pread_write.c

@@ -18,6 +18,13 @@
 #include <stdint.h>
 #include <stdint.h>
 #include <endian.h>
 #include <endian.h>
 
 
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+#include <sysdep-cancel.h>
+#else
+#define SINGLE_THREAD_P 1
+#endif
+
+
 #ifdef __NR_pread64             /* Newer kernels renamed but it's the same.  */
 #ifdef __NR_pread64             /* Newer kernels renamed but it's the same.  */
 # ifdef __NR_pread
 # ifdef __NR_pread
 #  error "__NR_pread and __NR_pread64 both defined???"
 #  error "__NR_pread and __NR_pread64 both defined???"
@@ -32,18 +39,35 @@ static __inline__ _syscall6(ssize_t, __syscall_pread, int, fd, void *, buf,
 		size_t, count, int, dummy, off_t, offset_hi, off_t, offset_lo)
 		size_t, count, int, dummy, off_t, offset_hi, off_t, offset_lo)
 
 
 ssize_t __libc_pread(int fd, void *buf, size_t count, off_t offset)
 ssize_t __libc_pread(int fd, void *buf, size_t count, off_t offset)
-{
-	return(__syscall_pread(fd,buf,count,0,__LONG_LONG_PAIR(offset >> 31,offset)));
+{ 
+	if (SINGLE_THREAD_P)
+		return(__syscall_pread(fd,buf,count,0,__LONG_LONG_PAIR(offset >> 31,offset)));
+
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+	int oldtype = LIBC_CANCEL_ASYNC ();
+	ssize_t result = __syscall_pread(fd,buf,count,0,__LONG_LONG_PAIR(offset >> 31,offset));
+	LIBC_CANCEL_RESET (oldtype);
+	return result;
+#endif	
 }
 }
 weak_alias(__libc_pread,pread)
 weak_alias(__libc_pread,pread)
 
 
 # ifdef __UCLIBC_HAS_LFS__
 # ifdef __UCLIBC_HAS_LFS__
 extern __typeof(pread64) __libc_pread64;
 extern __typeof(pread64) __libc_pread64;
 ssize_t __libc_pread64(int fd, void *buf, size_t count, off64_t offset)
 ssize_t __libc_pread64(int fd, void *buf, size_t count, off64_t offset)
-{
+{ 
 	uint32_t low = offset & 0xffffffff;
 	uint32_t low = offset & 0xffffffff;
 	uint32_t high = offset >> 32;
 	uint32_t high = offset >> 32;
-	return(__syscall_pread(fd, buf, count, 0, __LONG_LONG_PAIR (high, low)));
+
+	if (SINGLE_THREAD_P)
+		return __syscall_pread(fd, buf, count, 0, __LONG_LONG_PAIR (high, low));
+
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+	int oldtype = LIBC_CANCEL_ASYNC ();
+	ssize_t result = __syscall_pread(fd, buf, count, 0, __LONG_LONG_PAIR (high, low));
+	LIBC_CANCEL_RESET (oldtype);
+	return result;
+#endif	
 }
 }
 weak_alias(__libc_pread64,pread64)
 weak_alias(__libc_pread64,pread64)
 # endif /* __UCLIBC_HAS_LFS__  */
 # endif /* __UCLIBC_HAS_LFS__  */
@@ -65,18 +89,36 @@ static __inline__ _syscall6(ssize_t, __syscall_pwrite, int, fd, const void *, bu
 		size_t, count, int, dummy, off_t, offset_hi, off_t, offset_lo)
 		size_t, count, int, dummy, off_t, offset_hi, off_t, offset_lo)
 
 
 ssize_t __libc_pwrite(int fd, const void *buf, size_t count, off_t offset)
 ssize_t __libc_pwrite(int fd, const void *buf, size_t count, off_t offset)
-{
-	return(__syscall_pwrite(fd,buf,count,0,__LONG_LONG_PAIR(offset >> 31,offset)));
+{ 
+	if (SINGLE_THREAD_P)
+		return __syscall_pwrite(fd,buf,count,0,__LONG_LONG_PAIR(offset >> 31,offset));
+
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+	int oldtype = LIBC_CANCEL_ASYNC ();
+	ssize_t result = __syscall_pwrite(fd,buf,count,0,__LONG_LONG_PAIR(offset >> 31,offset));
+	LIBC_CANCEL_RESET (oldtype);
+	return result;
+#endif
+
 }
 }
 weak_alias(__libc_pwrite,pwrite)
 weak_alias(__libc_pwrite,pwrite)
 
 
 # ifdef __UCLIBC_HAS_LFS__
 # ifdef __UCLIBC_HAS_LFS__
 extern __typeof(pwrite64) __libc_pwrite64;
 extern __typeof(pwrite64) __libc_pwrite64;
 ssize_t __libc_pwrite64(int fd, const void *buf, size_t count, off64_t offset)
 ssize_t __libc_pwrite64(int fd, const void *buf, size_t count, off64_t offset)
-{
+{ 
 	uint32_t low = offset & 0xffffffff;
 	uint32_t low = offset & 0xffffffff;
 	uint32_t high = offset >> 32;
 	uint32_t high = offset >> 32;
-	return(__syscall_pwrite(fd, buf, count, 0, __LONG_LONG_PAIR (high, low)));
+
+	if (SINGLE_THREAD_P)
+		return __syscall_pwrite(fd, buf, count, 0, __LONG_LONG_PAIR (high, low));
+
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+	int oldtype = LIBC_CANCEL_ASYNC ();
+	ssize_t result = __syscall_pwrite(fd, buf, count, 0, __LONG_LONG_PAIR (high, low));
+	LIBC_CANCEL_RESET (oldtype);
+	return result;
+#endif
 }
 }
 weak_alias(__libc_pwrite64,pwrite64)
 weak_alias(__libc_pwrite64,pwrite64)
 # endif /* __UCLIBC_HAS_LFS__  */
 # endif /* __UCLIBC_HAS_LFS__  */

+ 1 - 1
libc/sysdeps/linux/sh/setjmp.S

@@ -77,7 +77,7 @@ __sigsetjmp_intern:
 	mov.l	r9, @-r4
 	mov.l	r9, @-r4
 	mov.l	r8, @-r4
 	mov.l	r8, @-r4
 
 
-#ifdef __PIC__
+#ifdef __HAVE_SHARED__ 
 	mov.l	.LG, r2
 	mov.l	.LG, r2
 	mova	.LG, r0
 	mova	.LG, r0
 	add	r0, r2
 	add	r0, r2

+ 2 - 2
libc/sysdeps/linux/sh/syscall_error.S

@@ -3,7 +3,7 @@ __syscall_error:
 	/* Call errno_location, store '-r4' in errno and return -1 */
 	/* Call errno_location, store '-r4' in errno and return -1 */
 	mov.l	r12, @-r15
 	mov.l	r12, @-r15
 	sts.l	pr, @-r15
 	sts.l	pr, @-r15
-#ifdef __PIC__
+#ifdef SHARED 
 	mova	.LG, r0
 	mova	.LG, r0
 	mov.l	.LG, r12
 	mov.l	.LG, r12
 	add	r0, r12
 	add	r0, r12
@@ -27,7 +27,7 @@ __syscall_error:
 
 
 	.align	4
 	.align	4
 
 
-#ifdef __PIC__
+#ifdef SHARED
 1:	.long   __errno_location@GOT
 1:	.long   __errno_location@GOT
 .LG:	.long	_GLOBAL_OFFSET_TABLE_
 .LG:	.long	_GLOBAL_OFFSET_TABLE_
 #else
 #else