|
@@ -0,0 +1,723 @@
|
|
|
+
|
|
|
+ *
|
|
|
+ * This library is free software; you can redistribute it and/or
|
|
|
+ * modify it under the terms of the GNU Library General Public
|
|
|
+ * License as published by the Free Software Foundation; either
|
|
|
+ * version 2 of the License, or (at your option) any later version.
|
|
|
+ *
|
|
|
+ * This library is distributed in the hope that it will be useful,
|
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
+ * Library General Public License for more details.
|
|
|
+ *
|
|
|
+ * You should have received a copy of the GNU Library General Public
|
|
|
+ * License along with this library; if not, write to the Free
|
|
|
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ *
|
|
|
+ * Besides uClibc, I'm using this code in my libc for elks, which is
|
|
|
+ * a 16-bit environment with a fairly limited compiler. It would make
|
|
|
+ * things much easier for me if this file isn't modified unnecessarily.
|
|
|
+ * In particular, please put any new or replacement functions somewhere
|
|
|
+ * else, and modify the makefile to use your version instead.
|
|
|
+ * Thanks. Manuel
|
|
|
+ *
|
|
|
+ * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ *
|
|
|
+ * I'm still tweaking this stuff, but it passes the tests I've thrown
|
|
|
+ * at it, and Erik needs it for the gcc port. The glibc extension
|
|
|
+ * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
|
|
|
+ * in the glibc source. I also need to fix the behavior of
|
|
|
+ * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
|
|
|
+ *
|
|
|
+ * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
|
|
|
+ * file on my platform (x86) show about 5-10% faster conversion speed than
|
|
|
+ * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
|
|
|
+ * individual mbrtowc()/wcrtomb() calls.
|
|
|
+ *
|
|
|
+ * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
|
|
|
+ * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
|
|
|
+ * needs to deal gracefully with whatever is sent to it. In that mode,
|
|
|
+ * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
|
|
|
+ * an arg to force that behavior, so the interface will be changing.
|
|
|
+ *
|
|
|
+ * I need to fix the error checking for 16-bit wide chars. This isn't
|
|
|
+ * an issue for uClibc, but may be for ELKS. I'm currently not sure
|
|
|
+ * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
|
|
|
+ *
|
|
|
+ * Manuel
|
|
|
+ */
|
|
|
+
|
|
|
+#define _GNU_SOURCE
|
|
|
+#define _ISOC99_SOURCE
|
|
|
+#include <errno.h>
|
|
|
+#include <stddef.h>
|
|
|
+#include <limits.h>
|
|
|
+#include <stdint.h>
|
|
|
+#include <inttypes.h>
|
|
|
+#include <stdlib.h>
|
|
|
+#include <stdio.h>
|
|
|
+#include <assert.h>
|
|
|
+#include <locale.h>
|
|
|
+#include <wchar.h>
|
|
|
+
|
|
|
+#define ENCODING (__global_locale.encoding)
|
|
|
+
|
|
|
+#if WCHAR_MAX > 0xffffU
|
|
|
+#define UTF_8_MAX_LEN 6
|
|
|
+#else
|
|
|
+#define UTF_8_MAX_LEN 3
|
|
|
+#endif
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+#warning implement __CTYPE_HAS_UTF_8_LOCALES!
|
|
|
+#define __CTYPE_HAS_UTF_8_LOCALES
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
|
|
|
+ const char **__restrict src, size_t n,
|
|
|
+ mbstate_t *ps, int allow_continuation);
|
|
|
+
|
|
|
+extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
|
|
|
+ const wchar_t **__restrict src, size_t wn);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
|
|
|
+ const char **__restrict src,
|
|
|
+ size_t NMC, size_t len, mbstate_t *__restrict ps);
|
|
|
+
|
|
|
+extern size_t __wcsnrtombs(char *__restrict dst,
|
|
|
+ const wchar_t **__restrict src,
|
|
|
+ size_t NWC, size_t len, mbstate_t *__restrict ps);
|
|
|
+
|
|
|
+
|
|
|
+#ifdef L_btowc
|
|
|
+
|
|
|
+wint_t btowc(int c)
|
|
|
+{
|
|
|
+ wchar_t wc;
|
|
|
+ unsigned char buf[1];
|
|
|
+ mbstate_t mbstate;
|
|
|
+
|
|
|
+ if (c != EOF) {
|
|
|
+ *buf = (unsigned char) c;
|
|
|
+ mbstate.mask = 0;
|
|
|
+ if (mbrtowc(&wc, buf, 1, &mbstate) == 1) {
|
|
|
+ return wc;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return WEOF;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_wctob
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+int wctob(wint_t c)
|
|
|
+{
|
|
|
+ unsigned char buf[MB_LEN_MAX];
|
|
|
+
|
|
|
+ return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_mbsinit
|
|
|
+
|
|
|
+int mbsinit(const mbstate_t *ps)
|
|
|
+{
|
|
|
+ return !ps || !ps->mask;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_mbrlen
|
|
|
+
|
|
|
+size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ static mbstate_t mbstate;
|
|
|
+
|
|
|
+ return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_mbrtowc
|
|
|
+
|
|
|
+size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
|
|
|
+ size_t n, mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ static mbstate_t mbstate;
|
|
|
+ wchar_t wcbuf[1];
|
|
|
+ const char *p;
|
|
|
+ size_t r;
|
|
|
+ char empty_string[1];
|
|
|
+
|
|
|
+ if (!ps) {
|
|
|
+ ps = &mbstate;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!s) {
|
|
|
+ pwc = (wchar_t *) s;
|
|
|
+ empty_string[0] = 0;
|
|
|
+ s = empty_string;
|
|
|
+ n = 1;
|
|
|
+ } else if (!n) {
|
|
|
+ return (ps->mask && (ps->wc == 0xffffU))
|
|
|
+ ? ((size_t) -1) : ((size_t) -2);
|
|
|
+ }
|
|
|
+
|
|
|
+ p = s;
|
|
|
+
|
|
|
+#ifdef __CTYPE_HAS_UTF_8_LOCALES
|
|
|
+
|
|
|
+ if (ENCODING == __ctype_encoding_utf8) {
|
|
|
+ r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
|
|
|
+ return (r == 1) ? (p-s) : r;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
|
|
|
+
|
|
|
+ if (((ssize_t) r) >= 0) {
|
|
|
+ if (pwc) {
|
|
|
+ *pwc = *wcbuf;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return (size_t) r;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_wcrtomb
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+size_t wcrtomb(register char *__restrict s, wchar_t wc,
|
|
|
+ mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ wchar_t wcbuf[2];
|
|
|
+ const wchar_t *pwc;
|
|
|
+ size_t r;
|
|
|
+ char buf[MB_LEN_MAX];
|
|
|
+
|
|
|
+ if (!s) {
|
|
|
+ s = buf;
|
|
|
+ wc = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ pwc = wcbuf;
|
|
|
+ wcbuf[0] = wc;
|
|
|
+ wcbuf[1] = 0;
|
|
|
+
|
|
|
+ r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
|
|
|
+ return (r != 0) ? r : 1;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_mbsrtowcs
|
|
|
+
|
|
|
+size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
|
|
|
+ size_t len, mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ static mbstate_t mbstate;
|
|
|
+
|
|
|
+ return __mbsnrtowcs(dst, src, SIZE_MAX, len,
|
|
|
+ ((ps != NULL) ? ps : &mbstate));
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L_wcsrtombs
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ * TODO: Check for valid state anyway? */
|
|
|
+
|
|
|
+size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
|
|
|
+ size_t len, mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L__wchar_utf8sntowcs
|
|
|
+
|
|
|
+
|
|
|
+ * UTF-8-test.txt strss test.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+#ifdef DECODER
|
|
|
+#ifndef KUHN
|
|
|
+#define KUHN
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+
|
|
|
+size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
|
|
|
+ const char **__restrict src, size_t n,
|
|
|
+ mbstate_t *ps, int allow_continuation)
|
|
|
+{
|
|
|
+ register const char *s;
|
|
|
+ __uwchar_t mask;
|
|
|
+ __uwchar_t wc;
|
|
|
+ wchar_t wcbuf[1];
|
|
|
+ size_t count;
|
|
|
+ int incr;
|
|
|
+
|
|
|
+ s = *src;
|
|
|
+
|
|
|
+ assert(s != NULL);
|
|
|
+ assert(ps != NULL);
|
|
|
+
|
|
|
+ incr = 1;
|
|
|
+ if (!pwc) {
|
|
|
+ pwc = wcbuf;
|
|
|
+ wn = SIZE_MAX;
|
|
|
+ incr = 0;
|
|
|
+ }
|
|
|
+#warning fix _wchar_utf8sntowcs to allow wn == 0!
|
|
|
+ assert(wn > 0);
|
|
|
+
|
|
|
+ count = wn;
|
|
|
+
|
|
|
+ if ((mask = (__uwchar_t) ps->mask) != 0) {
|
|
|
+#ifdef DECODER
|
|
|
+ wc = (__uwchar_t) ps->wc;
|
|
|
+ if (n) {
|
|
|
+ goto CONTINUE;
|
|
|
+ }
|
|
|
+ goto DONE;
|
|
|
+#else
|
|
|
+ if ((wc = (__uwchar_t) ps->wc) != 0xffffU) {
|
|
|
+
|
|
|
+ if (n) {
|
|
|
+ goto CONTINUE;
|
|
|
+ }
|
|
|
+ goto DONE;
|
|
|
+ }
|
|
|
+ return (size_t) -1;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+
|
|
|
+ do {
|
|
|
+ if (!n) {
|
|
|
+ goto DONE;
|
|
|
+ }
|
|
|
+ --n;
|
|
|
+ if ((wc = ((unsigned char) *s++)) >= 0x80) {
|
|
|
+ mask = 0x40;
|
|
|
+#warning fix range for 16 bit wides
|
|
|
+ if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
|
|
|
+ goto START;
|
|
|
+ }
|
|
|
+ BAD:
|
|
|
+#ifdef DECODER
|
|
|
+ wc = 0xfffd;
|
|
|
+ goto COMPLETE;
|
|
|
+#else
|
|
|
+ ps->mask = mask;
|
|
|
+ ps->wc = 0xffffU;
|
|
|
+ return (size_t) -1;
|
|
|
+#endif
|
|
|
+
|
|
|
+ CONTINUE:
|
|
|
+ while (n) {
|
|
|
+ --n;
|
|
|
+ if ((*s & 0xc0) != 0x80) {
|
|
|
+ goto BAD;
|
|
|
+ }
|
|
|
+ mask <<= 5;
|
|
|
+ wc <<= 6;
|
|
|
+ wc += (*s & 0x3f);
|
|
|
+ ++s;
|
|
|
+ START:
|
|
|
+ wc &= ~(mask << 1);
|
|
|
+
|
|
|
+ if ((wc & mask) == 0) {
|
|
|
+ if ((mask >>= 5) == 0x40) {
|
|
|
+ mask += mask;
|
|
|
+ }
|
|
|
+
|
|
|
+ * and invalid chars. */
|
|
|
+ if ( (wc < mask)
|
|
|
+#ifdef KUHN
|
|
|
+#if UTF_8_MAX_LEN == 3
|
|
|
+#error broken since mask can overflow!!
|
|
|
+
|
|
|
+ || (wc > 0xfffdU)
|
|
|
+#else
|
|
|
+
|
|
|
+
|
|
|
+ || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
|
|
|
+#endif
|
|
|
+ || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
|
|
|
+#endif
|
|
|
+ ) {
|
|
|
+ goto BAD;
|
|
|
+ }
|
|
|
+ goto COMPLETE;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!allow_continuation) {
|
|
|
+ if (count != wn) {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ * continuation!!! */
|
|
|
+#if UTF_8_MAX_LEN == 3
|
|
|
+#error broken since mask can overflow!!
|
|
|
+#endif
|
|
|
+
|
|
|
+ do {
|
|
|
+ --s;
|
|
|
+ } while ((mask >>= 5) >= 0x40);
|
|
|
+ goto DONE;
|
|
|
+ }
|
|
|
+ ps->mask = (wchar_t) mask;
|
|
|
+ ps->wc = (wchar_t) wc;
|
|
|
+ *src = s;
|
|
|
+ return (size_t) -2;
|
|
|
+ }
|
|
|
+ COMPLETE:
|
|
|
+ *pwc = wc;
|
|
|
+ pwc += incr;
|
|
|
+
|
|
|
+ }
|
|
|
+#ifdef DECODER
|
|
|
+ while (--count);
|
|
|
+#else
|
|
|
+ while (wc && --count);
|
|
|
+
|
|
|
+ if (!wc) {
|
|
|
+ s = NULL;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ DONE:
|
|
|
+
|
|
|
+ ps->mask = 0;
|
|
|
+ if (pwc != wcbuf) {
|
|
|
+ *src = s;
|
|
|
+ }
|
|
|
+
|
|
|
+ return wn - count;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L__wchar_wcstoutf8s
|
|
|
+
|
|
|
+size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
|
|
|
+ const wchar_t **__restrict src, size_t wn)
|
|
|
+{
|
|
|
+ register char *p;
|
|
|
+ size_t len, t;
|
|
|
+ __uwchar_t wc;
|
|
|
+ const __uwchar_t *swc;
|
|
|
+ int store;
|
|
|
+ char buf[MB_LEN_MAX];
|
|
|
+ char m;
|
|
|
+
|
|
|
+ store = 1;
|
|
|
+ if (!s) {
|
|
|
+ s = buf;
|
|
|
+ n = SIZE_MAX;
|
|
|
+ store = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ t = n;
|
|
|
+ swc = (const __uwchar_t *) *src;
|
|
|
+
|
|
|
+ assert(swc != NULL);
|
|
|
+
|
|
|
+ while (wn && t) {
|
|
|
+ wc = *swc;
|
|
|
+
|
|
|
+ *s = wc;
|
|
|
+ len = 1;
|
|
|
+
|
|
|
+ if (wc >= 0x80) {
|
|
|
+#ifdef KUHN
|
|
|
+ if (
|
|
|
+#if UTF_8_MAX_LEN == 3
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ (wc > 0xfffdU)
|
|
|
+#else
|
|
|
+
|
|
|
+ (wc > 0x7fffffffUL)
|
|
|
+ || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
|
|
|
+#endif
|
|
|
+ || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
|
|
|
+ ) {
|
|
|
+ return (size_t) -1;
|
|
|
+ }
|
|
|
+#else
|
|
|
+#if UTF_8_MAX_LEN != 3
|
|
|
+ if (wc > 0x7fffffffUL) {
|
|
|
+ return (size_t) -1;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+#endif
|
|
|
+
|
|
|
+ wc >>= 1;
|
|
|
+ p = s;
|
|
|
+ do {
|
|
|
+ ++p;
|
|
|
+ } while (wc >>= 5);
|
|
|
+ wc = *swc;
|
|
|
+ if ((len = p - s) > t) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ m = 0x80;
|
|
|
+ while( p>s ) {
|
|
|
+ m = (m >> 1) | 0x80;
|
|
|
+ *--p = (wc & 0x3f) | 0x80;
|
|
|
+ wc >>= 6;
|
|
|
+ }
|
|
|
+ *s |= (m << 1);
|
|
|
+ } else if (wc == 0) {
|
|
|
+ swc = NULL;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ ++swc;
|
|
|
+ --wn;
|
|
|
+ t -= len;
|
|
|
+ if (store) {
|
|
|
+ s += len;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ *src = (const wchar_t *) swc;
|
|
|
+
|
|
|
+ return n - t;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L___mbsnrtowcs
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
|
|
|
+ size_t NMC, size_t len, mbstate_t *__restrict ps)
|
|
|
+ __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
|
|
|
+
|
|
|
+size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
|
|
|
+ size_t NMC, size_t len, mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ static mbstate_t mbstate;
|
|
|
+ wchar_t wcbuf[1];
|
|
|
+ const char *s;
|
|
|
+ size_t count, r;
|
|
|
+ int incr;
|
|
|
+
|
|
|
+ if (!ps) {
|
|
|
+ ps = &mbstate;
|
|
|
+ }
|
|
|
+
|
|
|
+#ifdef __CTYPE_HAS_UTF_8_LOCALES
|
|
|
+ if (ENCODING == __ctype_encoding_utf8) {
|
|
|
+ return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
|
|
|
+ != (size_t) -2) ? r : 0;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+ incr = 1;
|
|
|
+ if (!dst) {
|
|
|
+ dst = wcbuf;
|
|
|
+ len = SIZE_MAX;
|
|
|
+ incr = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ if (len > NMC) {
|
|
|
+ len = NMC;
|
|
|
+ }
|
|
|
+
|
|
|
+ count = len;
|
|
|
+ s = *src;
|
|
|
+
|
|
|
+#ifdef __CTYPE_HAS_8_BIT_LOCALES
|
|
|
+ if (ENCODING == __ctype_encoding_8_bit) {
|
|
|
+ wchar_t wc;
|
|
|
+ while (count) {
|
|
|
+ if ((wc = ((unsigned char)(*s))) >= 0x80) {
|
|
|
+ wc -= 0x80;
|
|
|
+ wc = __global_locale.tbl8c2wc[
|
|
|
+ (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
|
|
|
+ << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
|
|
|
+ if (!wc) {
|
|
|
+ goto BAD;
|
|
|
+ }
|
|
|
+ } else if (!wc) {
|
|
|
+ s = NULL;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ ++s;
|
|
|
+ *dst = wc;
|
|
|
+ dst += incr;
|
|
|
+ --count;
|
|
|
+ }
|
|
|
+ if (dst != wcbuf) {
|
|
|
+ *src = s;
|
|
|
+ }
|
|
|
+ return len - count;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ assert(ENCODING == __ctype_encoding_7_bit);
|
|
|
+
|
|
|
+ while (count) {
|
|
|
+ if ((*dst = (unsigned char) *s) == 0) {
|
|
|
+ s = NULL;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ if (*dst >= 0x80) {
|
|
|
+#ifdef __CTYPE_HAS_8_BIT_LOCALES
|
|
|
+ BAD:
|
|
|
+#endif
|
|
|
+ __set_errno(EILSEQ);
|
|
|
+ return (size_t) -1;
|
|
|
+ }
|
|
|
+ ++s;
|
|
|
+ dst += incr;
|
|
|
+ --count;
|
|
|
+ }
|
|
|
+ if (dst != wcbuf) {
|
|
|
+ *src = s;
|
|
|
+ }
|
|
|
+ return len - count;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|
|
|
+#ifdef L___wcsnrtombs
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ * TODO: Check for valid state anyway? */
|
|
|
+
|
|
|
+size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
|
|
|
+ size_t NWC, size_t len, mbstate_t *__restrict ps)
|
|
|
+ __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
|
|
|
+
|
|
|
+size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
|
|
|
+ size_t NWC, size_t len, mbstate_t *__restrict ps)
|
|
|
+{
|
|
|
+ const __uwchar_t *s;
|
|
|
+ size_t count;
|
|
|
+ int incr;
|
|
|
+ char buf[MB_LEN_MAX];
|
|
|
+
|
|
|
+#ifdef __CTYPE_HAS_UTF_8_LOCALES
|
|
|
+ if (ENCODING == __ctype_encoding_utf8) {
|
|
|
+ return _wchar_wcsntoutf8s(dst, len, src, NWC);
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ incr = 1;
|
|
|
+ if (!dst) {
|
|
|
+ dst = buf;
|
|
|
+ len = SIZE_MAX;
|
|
|
+ incr = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ if (len > NWC) {
|
|
|
+ len = NWC;
|
|
|
+ }
|
|
|
+
|
|
|
+ count = len;
|
|
|
+ s = (const __uwchar_t *) *src;
|
|
|
+
|
|
|
+#ifdef __CTYPE_HAS_8_BIT_LOCALES
|
|
|
+ if (ENCODING == __ctype_encoding_8_bit) {
|
|
|
+ __uwchar_t wc;
|
|
|
+ __uwchar_t u;
|
|
|
+ while (count) {
|
|
|
+ if ((wc = *s) <= 0x7f) {
|
|
|
+ if (!(*dst = (unsigned char) wc)) {
|
|
|
+ s = NULL;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ u = 0;
|
|
|
+ if (wc <= Cwc2c_DOMAIN_MAX) {
|
|
|
+ u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
|
|
|
+ + Cwc2c_TT_SHIFT)];
|
|
|
+ u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
|
|
|
+ + ((wc >> Cwc2c_TT_SHIFT)
|
|
|
+ & ((1 << Cwc2c_TI_SHIFT)-1))];
|
|
|
+ u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
|
|
|
+ + (u << Cwc2c_TT_SHIFT)
|
|
|
+ + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+#ifdef __WCHAR_REPLACEMENT_CHAR
|
|
|
+ *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
|
|
|
+#else
|
|
|
+ if (!u) {
|
|
|
+ goto BAD;
|
|
|
+ }
|
|
|
+ *dst = (unsigned char) u;
|
|
|
+#endif
|
|
|
+ }
|
|
|
+ ++s;
|
|
|
+ dst += incr;
|
|
|
+ --count;
|
|
|
+ }
|
|
|
+ if (dst != buf) {
|
|
|
+ *src = (const wchar_t *) s;
|
|
|
+ }
|
|
|
+ return len - count;
|
|
|
+ }
|
|
|
+#endif
|
|
|
+
|
|
|
+ assert(ENCODING == __ctype_encoding_7_bit);
|
|
|
+
|
|
|
+ while (count) {
|
|
|
+ if (*s >= 0x80) {
|
|
|
+#if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
|
|
|
+ BAD:
|
|
|
+#endif
|
|
|
+ __set_errno(EILSEQ);
|
|
|
+ return (size_t) -1;
|
|
|
+
|
|
|
+ }
|
|
|
+ if ((*dst = (unsigned char) *s) == 0) {
|
|
|
+ s = NULL;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ ++s;
|
|
|
+ dst += incr;
|
|
|
+ --count;
|
|
|
+ }
|
|
|
+ if (dst != buf) {
|
|
|
+ *src = (const wchar_t *) s;
|
|
|
+ }
|
|
|
+ return len - count;
|
|
|
+}
|
|
|
+
|
|
|
+#endif
|
|
|
+
|