wchar.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Manuel
  59. */
  60. #define _GNU_SOURCE
  61. #define _ISOC99_SOURCE
  62. #include <errno.h>
  63. #include <stddef.h>
  64. #include <limits.h>
  65. #include <stdint.h>
  66. #include <inttypes.h>
  67. #include <stdlib.h>
  68. #include <stdio.h>
  69. #include <assert.h>
  70. #include <locale.h>
  71. #include <wchar.h>
  72. #ifdef __UCLIBC_HAS_LOCALE__
  73. #define ENCODING (__global_locale.encoding)
  74. #ifdef __UCLIBC_MJN3_ONLY__
  75. #warning implement __CTYPE_HAS_UTF_8_LOCALES!
  76. #endif
  77. #define __CTYPE_HAS_UTF_8_LOCALES
  78. #else
  79. #undef __CTYPE_HAS_8_BIT_LOCALES
  80. #undef __CTYPE_HAS_UTF_8_LOCALES
  81. #undef L__wchar_utf8sntowcs
  82. #undef L__wchar_wcsntoutf8s
  83. #endif
  84. #if WCHAR_MAX > 0xffffUL
  85. #define UTF_8_MAX_LEN 6
  86. #else
  87. #define UTF_8_MAX_LEN 3
  88. #endif
  89. /* #define KUHN */
  90. /* Implementation-specific work functions. */
  91. extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  92. const char **__restrict src, size_t n,
  93. mbstate_t *ps, int allow_continuation);
  94. extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  95. const wchar_t **__restrict src, size_t wn);
  96. /* glibc extensions. */
  97. extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
  98. const char **__restrict src,
  99. size_t NMC, size_t len, mbstate_t *__restrict ps);
  100. extern size_t __wcsnrtombs(char *__restrict dst,
  101. const wchar_t **__restrict src,
  102. size_t NWC, size_t len, mbstate_t *__restrict ps);
  103. /**********************************************************************/
  104. #ifdef L_btowc
  105. wint_t btowc(int c)
  106. {
  107. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  108. wchar_t wc;
  109. unsigned char buf[1];
  110. mbstate_t mbstate;
  111. if (c != EOF) {
  112. *buf = (unsigned char) c;
  113. mbstate.mask = 0; /* Initialize the mbstate. */
  114. if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
  115. return wc;
  116. }
  117. }
  118. return WEOF;
  119. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  120. /* If we don't have 8-bit locale support, then this is trivial since
  121. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  122. return (((unsigned int)c) < 0x80) ? c : WEOF;
  123. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  124. }
  125. #endif
  126. /**********************************************************************/
  127. #ifdef L_wctob
  128. /* Note: We completely ignore ps in all currently supported conversions. */
  129. int wctob(wint_t c)
  130. {
  131. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  132. unsigned char buf[MB_LEN_MAX];
  133. return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
  134. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  135. /* If we don't have 8-bit locale support, then this is trivial since
  136. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  137. /* TODO: need unsigned version of wint_t... */
  138. /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
  139. return ((c >= 0) && (c < 0x80)) ? c : EOF;
  140. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  141. }
  142. #endif
  143. /**********************************************************************/
  144. #ifdef L_mbsinit
  145. int mbsinit(const mbstate_t *ps)
  146. {
  147. return !ps || !ps->mask;
  148. }
  149. #endif
  150. /**********************************************************************/
  151. #ifdef L_mbrlen
  152. size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  153. __attribute__ ((__weak__, __alias__("__mbrlen")));
  154. size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  155. {
  156. static mbstate_t mbstate; /* Rely on bss 0-init. */
  157. return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
  158. }
  159. #endif
  160. /**********************************************************************/
  161. #ifdef L_mbrtowc
  162. size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
  163. size_t n, mbstate_t *__restrict ps)
  164. {
  165. static mbstate_t mbstate; /* Rely on bss 0-init. */
  166. wchar_t wcbuf[1];
  167. const char *p;
  168. size_t r;
  169. char empty_string[1]; /* Avoid static to be fPIC friendly. */
  170. if (!ps) {
  171. ps = &mbstate;
  172. }
  173. if (!s) {
  174. pwc = (wchar_t *) s; /* NULL */
  175. empty_string[0] = 0; /* Init the empty string when necessary. */
  176. s = empty_string;
  177. n = 1;
  178. } else if (!n) {
  179. return (ps->mask && (ps->wc == 0xffffU)) /* TODO: change error code? */
  180. ? ((size_t) -1) : ((size_t) -2);
  181. }
  182. p = s;
  183. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  184. /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
  185. if (ENCODING == __ctype_encoding_utf8) {
  186. r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
  187. return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
  188. }
  189. #endif
  190. r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
  191. if (((ssize_t) r) >= 0) {
  192. if (pwc) {
  193. *pwc = *wcbuf;
  194. }
  195. }
  196. return (size_t) r;
  197. }
  198. #endif
  199. /**********************************************************************/
  200. #ifdef L_wcrtomb
  201. /* Note: We completely ignore ps in all currently supported conversions. */
  202. /* TODO: Check for valid state anyway? */
  203. size_t wcrtomb(register char *__restrict s, wchar_t wc,
  204. mbstate_t *__restrict ps)
  205. {
  206. wchar_t wcbuf[2];
  207. const wchar_t *pwc;
  208. size_t r;
  209. char buf[MB_LEN_MAX];
  210. if (!s) {
  211. s = buf;
  212. wc = 0;
  213. }
  214. pwc = wcbuf;
  215. wcbuf[0] = wc;
  216. wcbuf[1] = 0;
  217. r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
  218. return (r != 0) ? r : 1;
  219. }
  220. #endif
  221. /**********************************************************************/
  222. #ifdef L_mbsrtowcs
  223. size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  224. size_t len, mbstate_t *__restrict ps)
  225. {
  226. static mbstate_t mbstate; /* Rely on bss 0-init. */
  227. return __mbsnrtowcs(dst, src, SIZE_MAX, len,
  228. ((ps != NULL) ? ps : &mbstate));
  229. }
  230. #endif
  231. /**********************************************************************/
  232. #ifdef L_wcsrtombs
  233. /* Note: We completely ignore ps in all currently supported conversions.
  234. * TODO: Check for valid state anyway? */
  235. size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
  236. size_t len, mbstate_t *__restrict ps)
  237. {
  238. return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
  239. }
  240. #endif
  241. /**********************************************************************/
  242. #ifdef L__wchar_utf8sntowcs
  243. /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
  244. * UTF-8-test.txt strss test.
  245. */
  246. /* #define DECODER */
  247. #ifdef DECODER
  248. #ifndef KUHN
  249. #define KUHN
  250. #endif
  251. #endif
  252. size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  253. const char **__restrict src, size_t n,
  254. mbstate_t *ps, int allow_continuation)
  255. {
  256. register const char *s;
  257. __uwchar_t mask;
  258. __uwchar_t wc;
  259. wchar_t wcbuf[1];
  260. size_t count;
  261. int incr;
  262. s = *src;
  263. assert(s != NULL);
  264. assert(ps != NULL);
  265. incr = 1;
  266. if (!pwc) {
  267. pwc = wcbuf;
  268. wn = SIZE_MAX;
  269. incr = 0;
  270. }
  271. /* This is really here only to support the glibc extension function
  272. * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
  273. * check on the validity of the mbstate. */
  274. if (!(count = wn)) {
  275. return 0;
  276. }
  277. if ((mask = (__uwchar_t) ps->mask) != 0) { /* A continuation... */
  278. #ifdef DECODER
  279. wc = (__uwchar_t) ps->wc;
  280. if (n) {
  281. goto CONTINUE;
  282. }
  283. goto DONE;
  284. #else
  285. if ((wc = (__uwchar_t) ps->wc) != 0xffffU) {
  286. /* TODO: change error code here and below? */
  287. if (n) {
  288. goto CONTINUE;
  289. }
  290. goto DONE;
  291. }
  292. return (size_t) -1; /* We're in an error state. */
  293. #endif
  294. }
  295. do {
  296. if (!n) {
  297. goto DONE;
  298. }
  299. --n;
  300. if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
  301. mask = 0x40;
  302. #ifdef __UCLIBC_MJN3_ONLY__
  303. #warning fix range for 16 bit wides
  304. #endif
  305. if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
  306. goto START;
  307. }
  308. BAD:
  309. #ifdef DECODER
  310. wc = 0xfffd;
  311. goto COMPLETE;
  312. #else
  313. ps->mask = mask;
  314. ps->wc = 0xffffU;
  315. return (size_t) -1; /* Illegal start byte! */
  316. #endif
  317. CONTINUE:
  318. while (n) {
  319. --n;
  320. if ((*s & 0xc0) != 0x80) {
  321. goto BAD;
  322. }
  323. mask <<= 5;
  324. wc <<= 6;
  325. wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
  326. ++s;
  327. START:
  328. wc &= ~(mask << 1);
  329. if ((wc & mask) == 0) { /* Character completed. */
  330. if ((mask >>= 5) == 0x40) {
  331. mask += mask;
  332. }
  333. /* Check for invalid sequences (longer than necessary)
  334. * and invalid chars. */
  335. if ( (wc < mask) /* Sequence not minimal length. */
  336. #ifdef KUHN
  337. #if UTF_8_MAX_LEN == 3
  338. #error broken since mask can overflow!!
  339. /* For plane 0, these are the only defined values.*/
  340. || (wc > 0xfffdU)
  341. #else
  342. /* Note that we don't need to worry about exceeding */
  343. /* 31 bits as that is the most that UTF-8 provides. */
  344. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  345. #endif
  346. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  347. #endif /* KUHN */
  348. ) {
  349. goto BAD;
  350. }
  351. goto COMPLETE;
  352. }
  353. }
  354. /* Character potentially valid but incomplete. */
  355. if (!allow_continuation) {
  356. if (count != wn) {
  357. return 0;
  358. }
  359. /* NOTE: The following can fail if you allow and then disallow
  360. * continuation!!! */
  361. #if UTF_8_MAX_LEN == 3
  362. #error broken since mask can overflow!!
  363. #endif
  364. /* Need to back up... */
  365. do {
  366. --s;
  367. } while ((mask >>= 5) >= 0x40);
  368. goto DONE;
  369. }
  370. ps->mask = (wchar_t) mask;
  371. ps->wc = (wchar_t) wc;
  372. *src = s;
  373. return (size_t) -2;
  374. }
  375. COMPLETE:
  376. *pwc = wc;
  377. pwc += incr;
  378. }
  379. #ifdef DECODER
  380. while (--count);
  381. #else
  382. while (wc && --count);
  383. if (!wc) {
  384. s = NULL;
  385. }
  386. #endif
  387. DONE:
  388. /* ps->wc is irrelavent here. */
  389. ps->mask = 0;
  390. if (pwc != wcbuf) {
  391. *src = s;
  392. }
  393. return wn - count;
  394. }
  395. #endif
  396. /**********************************************************************/
  397. #ifdef L__wchar_wcsntoutf8s
  398. size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  399. const wchar_t **__restrict src, size_t wn)
  400. {
  401. register char *p;
  402. size_t len, t;
  403. __uwchar_t wc;
  404. const __uwchar_t *swc;
  405. int store;
  406. char buf[MB_LEN_MAX];
  407. char m;
  408. store = 1;
  409. if (!s) {
  410. s = buf;
  411. n = SIZE_MAX;
  412. store = 0;
  413. }
  414. t = n;
  415. swc = (const __uwchar_t *) *src;
  416. assert(swc != NULL);
  417. while (wn && t) {
  418. wc = *swc;
  419. *s = wc;
  420. len = 1;
  421. if (wc >= 0x80) {
  422. #ifdef KUHN
  423. if (
  424. #if UTF_8_MAX_LEN == 3
  425. /* For plane 0, these are the only defined values.*/
  426. /* Note that we don't need to worry about exceeding */
  427. /* 31 bits as that is the most that UTF-8 provides. */
  428. (wc > 0xfffdU)
  429. #else
  430. /* UTF_8_MAX_LEN == 6 */
  431. (wc > 0x7fffffffUL)
  432. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  433. #endif
  434. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  435. ) {
  436. return (size_t) -1;
  437. }
  438. #else /* KUHN */
  439. #if UTF_8_MAX_LEN != 3
  440. if (wc > 0x7fffffffUL) { /* Value too large. */
  441. return (size_t) -1;
  442. }
  443. #endif
  444. #endif /* KUHN */
  445. wc >>= 1;
  446. p = s;
  447. do {
  448. ++p;
  449. } while (wc >>= 5);
  450. wc = *swc;
  451. if ((len = p - s) > t) { /* Not enough space. */
  452. break;
  453. }
  454. m = 0x80;
  455. while( p>s ) {
  456. m = (m >> 1) | 0x80;
  457. *--p = (wc & 0x3f) | 0x80;
  458. wc >>= 6;
  459. }
  460. *s |= (m << 1);
  461. } else if (wc == 0) { /* End of string. */
  462. swc = NULL;
  463. break;
  464. }
  465. ++swc;
  466. --wn;
  467. t -= len;
  468. if (store) {
  469. s += len;
  470. }
  471. }
  472. *src = (const wchar_t *) swc;
  473. return n - t;
  474. }
  475. #endif
  476. /**********************************************************************/
  477. #ifdef L___mbsnrtowcs
  478. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  479. size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  480. size_t NMC, size_t len, mbstate_t *__restrict ps)
  481. __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
  482. size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  483. size_t NMC, size_t len, mbstate_t *__restrict ps)
  484. {
  485. static mbstate_t mbstate; /* Rely on bss 0-init. */
  486. wchar_t wcbuf[1];
  487. const char *s;
  488. size_t count;
  489. int incr;
  490. if (!ps) {
  491. ps = &mbstate;
  492. }
  493. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  494. if (ENCODING == __ctype_encoding_utf8) {
  495. size_t r;
  496. return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
  497. != (size_t) -2) ? r : 0;
  498. }
  499. #endif
  500. incr = 1;
  501. if (!dst) {
  502. dst = wcbuf;
  503. len = SIZE_MAX;
  504. incr = 0;
  505. }
  506. /* Since all the following encodings are single-byte encodings... */
  507. if (len > NMC) {
  508. len = NMC;
  509. }
  510. count = len;
  511. s = *src;
  512. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  513. if (ENCODING == __ctype_encoding_8_bit) {
  514. wchar_t wc;
  515. while (count) {
  516. if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
  517. wc -= 0x80;
  518. wc = __global_locale.tbl8c2wc[
  519. (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  520. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  521. if (!wc) {
  522. goto BAD;
  523. }
  524. }
  525. if (!(*dst = wc)) {
  526. s = NULL;
  527. break;
  528. }
  529. dst += incr;
  530. ++s;
  531. --count;
  532. }
  533. if (dst != wcbuf) {
  534. *src = s;
  535. }
  536. return len - count;
  537. }
  538. #endif
  539. #ifdef __UCLIBC_HAS_LOCALE__
  540. assert(ENCODING == __ctype_encoding_7_bit);
  541. #endif
  542. while (count) {
  543. if ((*dst = (unsigned char) *s) == 0) {
  544. s = NULL;
  545. break;
  546. }
  547. if (*dst >= 0x80) {
  548. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  549. BAD:
  550. #endif
  551. __set_errno(EILSEQ);
  552. return (size_t) -1;
  553. }
  554. ++s;
  555. dst += incr;
  556. --count;
  557. }
  558. if (dst != wcbuf) {
  559. *src = s;
  560. }
  561. return len - count;
  562. }
  563. #endif
  564. /**********************************************************************/
  565. #ifdef L___wcsnrtombs
  566. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  567. /* Note: We completely ignore ps in all currently supported conversions.
  568. * TODO: Check for valid state anyway? */
  569. size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  570. size_t NWC, size_t len, mbstate_t *__restrict ps)
  571. __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
  572. size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  573. size_t NWC, size_t len, mbstate_t *__restrict ps)
  574. {
  575. const __uwchar_t *s;
  576. size_t count;
  577. int incr;
  578. char buf[MB_LEN_MAX];
  579. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  580. if (ENCODING == __ctype_encoding_utf8) {
  581. return _wchar_wcsntoutf8s(dst, len, src, NWC);
  582. }
  583. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  584. incr = 1;
  585. if (!dst) {
  586. dst = buf;
  587. len = SIZE_MAX;
  588. incr = 0;
  589. }
  590. /* Since all the following encodings are single-byte encodings... */
  591. if (len > NWC) {
  592. len = NWC;
  593. }
  594. count = len;
  595. s = (const __uwchar_t *) *src;
  596. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  597. if (ENCODING == __ctype_encoding_8_bit) {
  598. __uwchar_t wc;
  599. __uwchar_t u;
  600. while (count) {
  601. if ((wc = *s) <= 0x7f) {
  602. if (!(*dst = (unsigned char) wc)) {
  603. s = NULL;
  604. break;
  605. }
  606. } else {
  607. u = 0;
  608. if (wc <= Cwc2c_DOMAIN_MAX) {
  609. u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
  610. + Cwc2c_TT_SHIFT)];
  611. u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  612. + ((wc >> Cwc2c_TT_SHIFT)
  613. & ((1 << Cwc2c_TI_SHIFT)-1))];
  614. u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
  615. + (u << Cwc2c_TT_SHIFT)
  616. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  617. }
  618. /* #define __WCHAR_REPLACEMENT_CHAR '?' */
  619. #ifdef __WCHAR_REPLACEMENT_CHAR
  620. *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
  621. #else /* __WCHAR_REPLACEMENT_CHAR */
  622. if (!u) {
  623. goto BAD;
  624. }
  625. *dst = (unsigned char) u;
  626. #endif /* __WCHAR_REPLACEMENT_CHAR */
  627. }
  628. ++s;
  629. dst += incr;
  630. --count;
  631. }
  632. if (dst != buf) {
  633. *src = (const wchar_t *) s;
  634. }
  635. return len - count;
  636. }
  637. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  638. #ifdef __UCLIBC_HAS_LOCALE__
  639. assert(ENCODING == __ctype_encoding_7_bit);
  640. #endif
  641. while (count) {
  642. if (*s >= 0x80) {
  643. #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
  644. BAD:
  645. #endif
  646. __set_errno(EILSEQ);
  647. return (size_t) -1;
  648. }
  649. if ((*dst = (unsigned char) *s) == 0) {
  650. s = NULL;
  651. break;
  652. }
  653. ++s;
  654. dst += incr;
  655. --count;
  656. }
  657. if (dst != buf) {
  658. *src = (const wchar_t *) s;
  659. }
  660. return len - count;
  661. }
  662. #endif
  663. /**********************************************************************/