Просмотр исходного кода

misc: add unicode character functions (c11)

Add functions for unicode character conversion.
Ported over from musl C library.

Tested with GNU awk and their testsuite.
GNU libc has 4 test failures, uClibc-ng 8 test failures.
Tested in Qemu MIPS (Little Endian).
The four failures seems to be unrelated to this addition.
Waldemar Brodkorb 3 недель назад
Родитель
Сommit
1df9e57f25

+ 1 - 0
Makefile.in

@@ -294,6 +294,7 @@ HEADERS_RM-$(UCLIBC_HAS_THREADS)             += *thread*.h semaphore.h \
 HEADERS_RM-$(UCLIBC_HAS_THREADS_NATIVE)      += atomic.h bits/atomic.h
 HEADERS_RM-$(UCLIBC_HAS_UTMP)               += bits/utmp.h utmp.h
 HEADERS_RM-$(UCLIBC_HAS_UTMPX)               += bits/utmpx.h utmpx.h
+HEADERS_RM-$(UCLIBC_HAS_UCHAR)               += uchar.h
 HEADERS_RM-$(UCLIBC_HAS_WCHAR)               += wchar.h wctype.h
 HEADERS_RM-$(UCLIBC_HAS_WORDEXP)             += wordexp.h
 HEADERS_RM-$(UCLIBC_HAS_XATTR)               += sys/xattr.h

+ 8 - 0
extra/Configs/Config.in

@@ -1577,6 +1577,14 @@ config UCLIBC_HAS_WCHAR
 
 	  Most people will answer N.
 
+config UCLIBC_HAS_UCHAR
+	bool "C11 Character Support"
+	select UCLIBC_HAS_WCHAR
+	help
+	  Answer Y to enable c11 character support.
+	  Provides types and functions for handling Unicode characters
+	  and UTF-16/UTF-32 strings.
+
 config UCLIBC_HAS_LIBICONV
 	bool "Iconv Support"
 	select UCLIBC_HAS_WCHAR

+ 38 - 44
include/uchar.h

@@ -1,56 +1,50 @@
-/* Copyright (C) 2011-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
 /*
- *      ISO C11 Standard: 7.28
- *	Unicode utilities	<uchar.h>
+ * Copyright © 2005-2026 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Imported from musl C library, adapted to uClibc-ng
  */
 
 #ifndef _UCHAR_H
-#define _UCHAR_H	1
+#define _UCHAR_H
 
-#include <features.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __cplusplus < 201103L
+typedef unsigned short char16_t;
+typedef unsigned char32_t;
+#endif
 
-#define __need_size_t
-#include <stddef.h>
-#define __need_mbstate_t
 #include <wchar.h>
 
-#ifndef __mbstate_t_defined
-__BEGIN_NAMESPACE_C99
-/* Public type.  */
-typedef __mbstate_t mbstate_t;
-__END_NAMESPACE_C99
-# define __mbstate_t_defined 1
-#endif
+size_t c16rtomb(char *__restrict, char16_t, mbstate_t *__restrict);
+size_t mbrtoc16(char16_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict);
 
+size_t c32rtomb(char *__restrict, char32_t, mbstate_t *__restrict);
+size_t mbrtoc32(char32_t *__restrict, const char *__restrict, size_t, mbstate_t *__restrict);
 
-#if defined __GNUC__ && !defined __USE_ISOCXX11
-/* Define the 16-bit and 32-bit character types.  Use the information
-   provided by the compiler.  */
-# if !defined __CHAR16_TYPE__ || !defined __CHAR32_TYPE__
-#  if defined __STDC_VERSION__ && __STDC_VERSION__ < 201000L
-#   error "<uchar.h> requires ISO C11 mode"
-#  else
-#   error "definitions of __CHAR16_TYPE__ and/or __CHAR32_TYPE__ missing"
-#  endif
-# endif
-typedef __CHAR16_TYPE__ char16_t;
-typedef __CHAR32_TYPE__ char32_t;
+#ifdef __cplusplus
+}
 #endif
 
-#endif	/* uchar.h */
+#endif

+ 1 - 0
libc/misc/Makefile.in

@@ -30,6 +30,7 @@ include $(top_srcdir)libc/misc/sysvipc/Makefile.in
 include $(top_srcdir)libc/misc/time/Makefile.in
 include $(top_srcdir)libc/misc/ttyent/Makefile.in
 include $(top_srcdir)libc/misc/utmp/Makefile.in
+include $(top_srcdir)libc/misc/uchar/Makefile.in
 include $(top_srcdir)libc/misc/wchar/Makefile.in
 include $(top_srcdir)libc/misc/wctype/Makefile.in
 include $(top_srcdir)libc/misc/wordexp/Makefile.in

+ 11 - 0
libc/misc/uchar/Makefile

@@ -0,0 +1,11 @@
+# Makefile for uClibc-ng
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+top_srcdir=../../../
+top_builddir=../../../
+all: objs
+include $(top_builddir)Rules.mak
+include Makefile.in
+include $(top_srcdir)Makerules

+ 21 - 0
libc/misc/uchar/Makefile.in

@@ -0,0 +1,21 @@
+# Makefile for uClibc-ng
+#
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#
+
+subdirs += libc/misc/uchar
+
+CSRC-y := mbrtoc16.c mbrtoc32.c c16rtomb.c c32rtomb.c
+
+MISC_UCHAR_DIR := $(top_srcdir)libc/misc/uchar
+MISC_UCHAR_OUT := $(top_builddir)libc/misc/uchar
+
+MISC_UCHAR_SRC := $(patsubst %.c,$(MISC_UCHAR_DIR)/%.c,$(CSRC-y))
+MISC_UCHAR_OBJ := $(patsubst %.c,$(MISC_UCHAR_OUT)/%.o,$(CSRC-y))
+
+libc-$(UCLIBC_HAS_UCHAR) += $(MISC_UCHAR_OBJ)
+
+objclean-y += CLEAN_libc/misc/uchar
+
+CLEAN_libc/misc/uchar:
+	$(do_rm) $(addprefix $(MISC_UCHAR_OUT)/*., o os)

+ 60 - 0
libc/misc/uchar/c16rtomb.c

@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2005-2026 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Imported from musl C library
+ */
+
+#include <uchar.h>
+#include <errno.h>
+#include <wchar.h>
+
+size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps)
+{
+	static unsigned internal_state;
+	if (!ps) ps = (void *)&internal_state;
+	unsigned *x = (unsigned *)ps;
+	wchar_t wc;
+
+	if (!s) {
+		if (*x) goto ilseq;
+		return 1;
+	}
+
+	if (!*x && c16 - 0xd800u < 0x400) {
+		*x = c16 - 0xd7c0 << 10;
+		return 0;
+	}
+
+	if (*x) {
+		if (c16 - 0xdc00u >= 0x400) goto ilseq;
+		else wc = *x + c16 - 0xdc00;
+		*x = 0;
+	} else {
+		wc = c16;
+	}
+	return wcrtomb(s, wc, 0);
+
+ilseq:
+	*x = 0;
+	errno = EILSEQ;
+	return -1;
+}

+ 32 - 0
libc/misc/uchar/c32rtomb.c

@@ -0,0 +1,32 @@
+/*
+ * Copyright © 2005-2026 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Imported from musl C library
+ */
+
+#include <uchar.h>
+#include <wchar.h>
+
+size_t c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps)
+{
+	return wcrtomb(s, c32, ps);
+}

+ 55 - 0
libc/misc/uchar/mbrtoc16.c

@@ -0,0 +1,55 @@
+/*
+ * Copyright © 2005-2026 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Imported from musl C library
+ */
+
+#include <uchar.h>
+#include <wchar.h>
+
+size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps)
+{
+	static unsigned internal_state;
+	if (!ps) ps = (void *)&internal_state;
+	unsigned *pending = (unsigned *)ps;
+
+	if (!s) return mbrtoc16(0, "", 1, ps);
+
+	/* mbrtowc states for partial UTF-8 characters have the high bit set;
+	 * we use nonzero states without high bit for pending surrogates. */
+	if ((int)*pending > 0) {
+ 		if (pc16) *pc16 = *pending;
+		*pending = 0;
+		return -3;
+	}
+
+	wchar_t wc;
+	size_t ret = mbrtowc(&wc, s, n, ps);
+	if (ret <= 4) {
+		if (wc >= 0x10000) {
+			*pending = (wc & 0x3ff) + 0xdc00;
+			wc = 0xd7c0 + (wc >> 10);
+		}
+		if (pc16) *pc16 = wc;
+	}
+	return ret;
+}

+ 38 - 0
libc/misc/uchar/mbrtoc32.c

@@ -0,0 +1,38 @@
+/*
+ * Copyright © 2005-2026 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Imported from musl C library
+ */
+
+#include <uchar.h>
+#include <wchar.h>
+
+size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps)
+{
+	static unsigned internal_state;
+	if (!ps) ps = (void *)&internal_state;
+	if (!s) return mbrtoc32(0, "", 1, ps);
+	wchar_t wc;
+	size_t ret = mbrtowc(&wc, s, n, ps);
+	if (ret <= 4 && pc32) *pc32 = wc;
+	return ret;
+}