Browse Source

wait4: fall back to waitid(2) at runtime when wait4 returns ENOSYS

wait4 is a legacy/time32 syscall: its rusage carries 32-bit timevals.
Modern asm-generic/unistd.h (>= ~5.1) only defines __NR_wait4 when
__ARCH_WANT_TIME32_SYSCALLS is set or __BITS_PER_LONG != 32 -- so
time64-only 32-bit arches like RISC-V (rv32) correctly do not expose
wait4 and uClibc-ng's existing #else path (waitid emulation) is taken.

Older asm-generic/unistd.h (e.g. 4.19) however defines __NR_wait4
unconditionally.  When uClibc-ng is built against such old headers for
an arch whose kernel does not actually implement sys_wait4 (rv32-nommu
in particular), every wait4()/wait() call returns ENOSYS.  busybox init
then spins in waitfor() at 100% CPU and the system never makes progress
past SYSINIT.

Factor the existing waitid-based wait4 implementation into a helper
__wait4_via_waitid() and use it both as the dedicated implementation
when __NR_wait4 is not defined and as a runtime fallback when wait4
returns ENOSYS.  This makes the library robust regardless of the
kernel-header version used at build time and recovers boot on
rv32-nommu kernels.

The helper uses INLINE_SYSCALL(waitid, ...) directly rather than calling
uClibc-ng's POSIX waitid() because waitid(3) does not expose the rusage
out-parameter of the waitid(2) syscall, and because wait4 needs its own
pid->idtype mapping and siginfo_t->status encoding (which musl handles
the same way in src/internal/emulate_wait4.c).

Tested on riscv32 nommu (static ELF) under qemu: without this patch
busybox init spins at 100% CPU after the first SYSINIT job; with it,
init proceeds normally and reaches the login prompt.

Signed-off-by: Ramin Moussavi <ramin.moussavi@yacoub.de>
Ramin Moussavi 1 week ago
parent
commit
12275d23f0
1 changed files with 83 additions and 51 deletions
  1. 83 51
      libc/sysdeps/linux/common/wait4.c

+ 83 - 51
libc/sysdeps/linux/common/wait4.c

@@ -9,37 +9,30 @@
 #include <sys/syscall.h>
 #include <sys/wait.h>
 #include <sys/resource.h>
+#include <errno.h>
 
-#if defined(__NR_wait4)
-# define __NR___syscall_wait4 __NR_wait4
-static __always_inline _syscall4(int, __syscall_wait4, __kernel_pid_t, pid,
-				 int *, status, int, opts, struct rusage *, rusage)
-
-pid_t __wait4_nocancel(pid_t pid, int *status, int opts, struct rusage *rusage)
-{
-#if defined(__UCLIBC_USE_TIME64__)
-	char *arg_rusage = rusage ? (char *)&rusage->ru_maxrss - 4 * sizeof(__S32_TYPE) : 0;
-	int __ret = __syscall_wait4(pid, status, opts, (struct rusage *)arg_rusage);
-	if (__ret > 0 && rusage) {
-		__S32_TYPE __rusage[4];
-		memcpy(__rusage, arg_rusage, 4 * sizeof(__S32_TYPE));
-		struct timeval tv_utime = {.tv_sec = __rusage[0], .tv_usec = __rusage[1]};
-		struct timeval tv_stime = {.tv_sec = __rusage[2], .tv_usec = __rusage[2]};
-		rusage->ru_utime = tv_utime;
-		rusage->ru_stime = tv_stime;
-	}
-	return __ret;
-#else
-	return __syscall_wait4(pid, status, opts, rusage);
-#endif
-}
-
-#else
-pid_t __wait4_nocancel(pid_t pid, int *status, int opts, struct rusage *rusage)
+#if defined(__NR_waitid)
+/* wait4() expressed via the waitid(2) syscall.  Used as the implementation
+ * when the kernel/arch does not provide wait4, and as a runtime fallback
+ * when wait4 returns ENOSYS (e.g. rv32 kernels which only wire up waitid
+ * even though the uapi headers still define __NR_wait4).
+ *
+ * We invoke the syscall directly here instead of calling uClibc-ng's
+ * POSIX waitid() from waitid.c, because:
+ *   1. waitid(3) is 4-arg (idtype, id, infop, options) and passes NULL as
+ *      the 5th syscall argument; we need the rusage out-parameter, which
+ *      only the Linux waitid(2) syscall (not POSIX waitid) provides.
+ *   2. wait4 semantics require translating the pid argument into idtype/id
+ *      and re-encoding siginfo_t (si_code/si_status) into the 'int *status'
+ *      bit pattern wait/waitpid callers expect.  That logic does not belong
+ *      in waitid().
+ *   3. waitid() is wrapped as a CANCELLABLE_SYSCALL; this is the nocancel
+ *      path. */
+static pid_t __wait4_via_waitid(pid_t pid, int *status, int opts, struct rusage *rusage)
 {
 	idtype_t type;
-	int __res;
 	siginfo_t info;
+	int res;
 
 	info.si_pid = 0;
 
@@ -54,38 +47,77 @@ pid_t __wait4_nocancel(pid_t pid, int *status, int opts, struct rusage *rusage)
 		type = P_PID;
 	}
 
-	__res = INLINE_SYSCALL(waitid, 5, type, pid, &info, opts|WEXITED, rusage);
-
-	if ( __res < 0 )
-		return __res;
+	res = INLINE_SYSCALL(waitid, 5, type, pid, &info, opts | WEXITED, rusage);
+	if (res < 0)
+		return res;
 
 	if (info.si_pid && status) {
-			int sw = 0;
-			switch (info.si_code) {
-			case CLD_CONTINUED:
-				sw = 0xffff;
-				break;
-			case CLD_DUMPED:
-				sw = (info.si_status & 0x7f) | 0x80;
-				break;
-			case CLD_EXITED:
-				sw = (info.si_status & 0xff) << 8;
-				break;
-			case CLD_KILLED:
-				sw = info.si_status & 0x7f;
-				break;
-			case CLD_STOPPED:
-			case CLD_TRAPPED:
-				sw = (info.si_status << 8) + 0x7f;
-				break;
-			}
-			*status = sw;
+		int sw = 0;
+		switch (info.si_code) {
+		case CLD_CONTINUED:
+			sw = 0xffff;
+			break;
+		case CLD_DUMPED:
+			sw = (info.si_status & 0x7f) | 0x80;
+			break;
+		case CLD_EXITED:
+			sw = (info.si_status & 0xff) << 8;
+			break;
+		case CLD_KILLED:
+			sw = info.si_status & 0x7f;
+			break;
+		case CLD_STOPPED:
+		case CLD_TRAPPED:
+			sw = (info.si_status << 8) + 0x7f;
+			break;
+		}
+		*status = sw;
 	}
 
 	return info.si_pid;
 }
 #endif
 
+#if defined(__NR_wait4)
+# define __NR___syscall_wait4 __NR_wait4
+static __always_inline _syscall4(int, __syscall_wait4, __kernel_pid_t, pid,
+				 int *, status, int, opts, struct rusage *, rusage)
+
+pid_t __wait4_nocancel(pid_t pid, int *status, int opts, struct rusage *rusage)
+{
+	int ret;
+#if defined(__UCLIBC_USE_TIME64__)
+	/* Kernel returns 32-bit timevals in the rusage prefix; convert below. */
+	char *kru = rusage ? (char *)&rusage->ru_maxrss - 4 * sizeof(__S32_TYPE) : NULL;
+	ret = __syscall_wait4(pid, status, opts, (struct rusage *)kru);
+#else
+	ret = __syscall_wait4(pid, status, opts, rusage);
+#endif
+
+#if defined(__NR_waitid)
+	/* Some kernels (e.g. rv32) reject wait4 with ENOSYS; emulate via waitid. */
+	if (ret == -1 && errno == ENOSYS)
+		return __wait4_via_waitid(pid, status, opts, rusage);
+#endif
+
+#if defined(__UCLIBC_USE_TIME64__)
+	if (ret > 0 && rusage) {
+		__S32_TYPE r32[4];
+		memcpy(r32, kru, 4 * sizeof(__S32_TYPE));
+		rusage->ru_utime = (struct timeval){ .tv_sec = r32[0], .tv_usec = r32[1] };
+		rusage->ru_stime = (struct timeval){ .tv_sec = r32[2], .tv_usec = r32[2] };
+	}
+#endif
+	return ret;
+}
+
+#elif defined(__NR_waitid)
+pid_t __wait4_nocancel(pid_t pid, int *status, int opts, struct rusage *rusage)
+{
+	return __wait4_via_waitid(pid, status, opts, rusage);
+}
+#endif
+
 #ifdef __USE_BSD
 strong_alias(__wait4_nocancel,wait4)
 #endif