wchar.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Manuel
  59. */
  60. #define _GNU_SOURCE
  61. #define _ISOC99_SOURCE
  62. #include <errno.h>
  63. #include <stddef.h>
  64. #include <limits.h>
  65. #include <stdint.h>
  66. #include <inttypes.h>
  67. #include <stdlib.h>
  68. #include <stdio.h>
  69. #include <assert.h>
  70. #include <locale.h>
  71. #include <wchar.h>
  72. #ifdef __UCLIBC_HAS_LOCALE__
  73. #define ENCODING (__global_locale.encoding)
  74. #warning implement __CTYPE_HAS_UTF_8_LOCALES!
  75. #define __CTYPE_HAS_UTF_8_LOCALES
  76. #else
  77. #define ENCODING (__ctype_encoding_7_bit)
  78. #undef __CTYPE_HAS_8_BIT_LOCALES
  79. #undef __CTYPE_HAS_UTF_8_LOCALES
  80. #undef L__wchar_utf8sntowcs
  81. #undef L__wchar_wcsntoutf8s
  82. #endif
  83. #if WCHAR_MAX > 0xffffU
  84. #define UTF_8_MAX_LEN 6
  85. #else
  86. #define UTF_8_MAX_LEN 3
  87. #endif
  88. /* #define KUHN */
  89. /* Implementation-specific work functions. */
  90. extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  91. const char **__restrict src, size_t n,
  92. mbstate_t *ps, int allow_continuation);
  93. extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  94. const wchar_t **__restrict src, size_t wn);
  95. /* glibc extensions. */
  96. extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
  97. const char **__restrict src,
  98. size_t NMC, size_t len, mbstate_t *__restrict ps);
  99. extern size_t __wcsnrtombs(char *__restrict dst,
  100. const wchar_t **__restrict src,
  101. size_t NWC, size_t len, mbstate_t *__restrict ps);
  102. /**********************************************************************/
  103. #ifdef L_btowc
  104. wint_t btowc(int c)
  105. {
  106. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  107. wchar_t wc;
  108. unsigned char buf[1];
  109. mbstate_t mbstate;
  110. if (c != EOF) {
  111. *buf = (unsigned char) c;
  112. mbstate.mask = 0; /* Initialize the mbstate. */
  113. if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
  114. return wc;
  115. }
  116. }
  117. return WEOF;
  118. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  119. /* If we don't have 8-bit locale support, then this is trivial since
  120. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  121. return (((unsigned int)c) < 0x80) ? c : WEOF;
  122. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  123. }
  124. #endif
  125. /**********************************************************************/
  126. #ifdef L_wctob
  127. /* Note: We completely ignore ps in all currently supported conversions. */
  128. int wctob(wint_t c)
  129. {
  130. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  131. unsigned char buf[MB_LEN_MAX];
  132. return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
  133. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  134. /* If we don't have 8-bit locale support, then this is trivial since
  135. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  136. /* TODO: need unsigned version of wint_t... */
  137. /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
  138. return ((c >= 0) && (c < 0x80)) ? c : EOF;
  139. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  140. }
  141. #endif
  142. /**********************************************************************/
  143. #ifdef L_mbsinit
  144. int mbsinit(const mbstate_t *ps)
  145. {
  146. return !ps || !ps->mask;
  147. }
  148. #endif
  149. /**********************************************************************/
  150. #ifdef L_mbrlen
  151. size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  152. __attribute__ ((__weak__, __alias__("__mbrlen")));
  153. size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  154. {
  155. static mbstate_t mbstate; /* Rely on bss 0-init. */
  156. return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
  157. }
  158. #endif
  159. /**********************************************************************/
  160. #ifdef L_mbrtowc
  161. size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
  162. size_t n, mbstate_t *__restrict ps)
  163. {
  164. static mbstate_t mbstate; /* Rely on bss 0-init. */
  165. wchar_t wcbuf[1];
  166. const char *p;
  167. size_t r;
  168. char empty_string[1]; /* Avoid static to be fPIC friendly. */
  169. if (!ps) {
  170. ps = &mbstate;
  171. }
  172. if (!s) {
  173. pwc = (wchar_t *) s; /* NULL */
  174. empty_string[0] = 0; /* Init the empty string when necessary. */
  175. s = empty_string;
  176. n = 1;
  177. } else if (!n) {
  178. return (ps->mask && (ps->wc == 0xffffU)) /* TODO: change error code? */
  179. ? ((size_t) -1) : ((size_t) -2);
  180. }
  181. p = s;
  182. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  183. /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
  184. if (ENCODING == __ctype_encoding_utf8) {
  185. r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
  186. return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
  187. }
  188. #endif
  189. r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
  190. if (((ssize_t) r) >= 0) {
  191. if (pwc) {
  192. *pwc = *wcbuf;
  193. }
  194. }
  195. return (size_t) r;
  196. }
  197. #endif
  198. /**********************************************************************/
  199. #ifdef L_wcrtomb
  200. /* Note: We completely ignore ps in all currently supported conversions. */
  201. /* TODO: Check for valid state anyway? */
  202. size_t wcrtomb(register char *__restrict s, wchar_t wc,
  203. mbstate_t *__restrict ps)
  204. {
  205. wchar_t wcbuf[2];
  206. const wchar_t *pwc;
  207. size_t r;
  208. char buf[MB_LEN_MAX];
  209. if (!s) {
  210. s = buf;
  211. wc = 0;
  212. }
  213. pwc = wcbuf;
  214. wcbuf[0] = wc;
  215. wcbuf[1] = 0;
  216. r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
  217. return (r != 0) ? r : 1;
  218. }
  219. #endif
  220. /**********************************************************************/
  221. #ifdef L_mbsrtowcs
  222. size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  223. size_t len, mbstate_t *__restrict ps)
  224. {
  225. static mbstate_t mbstate; /* Rely on bss 0-init. */
  226. return __mbsnrtowcs(dst, src, SIZE_MAX, len,
  227. ((ps != NULL) ? ps : &mbstate));
  228. }
  229. #endif
  230. /**********************************************************************/
  231. #ifdef L_wcsrtombs
  232. /* Note: We completely ignore ps in all currently supported conversions.
  233. * TODO: Check for valid state anyway? */
  234. size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
  235. size_t len, mbstate_t *__restrict ps)
  236. {
  237. return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
  238. }
  239. #endif
  240. /**********************************************************************/
  241. #ifdef L__wchar_utf8sntowcs
  242. /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
  243. * UTF-8-test.txt strss test.
  244. */
  245. /* #define DECODER */
  246. #ifdef DECODER
  247. #ifndef KUHN
  248. #define KUHN
  249. #endif
  250. #endif
  251. size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  252. const char **__restrict src, size_t n,
  253. mbstate_t *ps, int allow_continuation)
  254. {
  255. register const char *s;
  256. __uwchar_t mask;
  257. __uwchar_t wc;
  258. wchar_t wcbuf[1];
  259. size_t count;
  260. int incr;
  261. s = *src;
  262. assert(s != NULL);
  263. assert(ps != NULL);
  264. incr = 1;
  265. if (!pwc) {
  266. pwc = wcbuf;
  267. wn = SIZE_MAX;
  268. incr = 0;
  269. }
  270. /* This is really here only to support the glibc extension function
  271. * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
  272. * check on the validity of the mbstate. */
  273. if (!(count = wn)) {
  274. return 0;
  275. }
  276. if ((mask = (__uwchar_t) ps->mask) != 0) { /* A continuation... */
  277. #ifdef DECODER
  278. wc = (__uwchar_t) ps->wc;
  279. if (n) {
  280. goto CONTINUE;
  281. }
  282. goto DONE;
  283. #else
  284. if ((wc = (__uwchar_t) ps->wc) != 0xffffU) {
  285. /* TODO: change error code here and below? */
  286. if (n) {
  287. goto CONTINUE;
  288. }
  289. goto DONE;
  290. }
  291. return (size_t) -1; /* We're in an error state. */
  292. #endif
  293. }
  294. do {
  295. if (!n) {
  296. goto DONE;
  297. }
  298. --n;
  299. if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
  300. mask = 0x40;
  301. #warning fix range for 16 bit wides
  302. if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
  303. goto START;
  304. }
  305. BAD:
  306. #ifdef DECODER
  307. wc = 0xfffd;
  308. goto COMPLETE;
  309. #else
  310. ps->mask = mask;
  311. ps->wc = 0xffffU;
  312. return (size_t) -1; /* Illegal start byte! */
  313. #endif
  314. CONTINUE:
  315. while (n) {
  316. --n;
  317. if ((*s & 0xc0) != 0x80) {
  318. goto BAD;
  319. }
  320. mask <<= 5;
  321. wc <<= 6;
  322. wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
  323. ++s;
  324. START:
  325. wc &= ~(mask << 1);
  326. if ((wc & mask) == 0) { /* Character completed. */
  327. if ((mask >>= 5) == 0x40) {
  328. mask += mask;
  329. }
  330. /* Check for invalid sequences (longer than necessary)
  331. * and invalid chars. */
  332. if ( (wc < mask) /* Sequence not minimal length. */
  333. #ifdef KUHN
  334. #if UTF_8_MAX_LEN == 3
  335. #error broken since mask can overflow!!
  336. /* For plane 0, these are the only defined values.*/
  337. || (wc > 0xfffdU)
  338. #else
  339. /* Note that we don't need to worry about exceeding */
  340. /* 31 bits as that is the most that UTF-8 provides. */
  341. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  342. #endif
  343. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  344. #endif /* KUHN */
  345. ) {
  346. goto BAD;
  347. }
  348. goto COMPLETE;
  349. }
  350. }
  351. /* Character potentially valid but incomplete. */
  352. if (!allow_continuation) {
  353. if (count != wn) {
  354. return 0;
  355. }
  356. /* NOTE: The following can fail if you allow and then disallow
  357. * continuation!!! */
  358. #if UTF_8_MAX_LEN == 3
  359. #error broken since mask can overflow!!
  360. #endif
  361. /* Need to back up... */
  362. do {
  363. --s;
  364. } while ((mask >>= 5) >= 0x40);
  365. goto DONE;
  366. }
  367. ps->mask = (wchar_t) mask;
  368. ps->wc = (wchar_t) wc;
  369. *src = s;
  370. return (size_t) -2;
  371. }
  372. COMPLETE:
  373. *pwc = wc;
  374. pwc += incr;
  375. }
  376. #ifdef DECODER
  377. while (--count);
  378. #else
  379. while (wc && --count);
  380. if (!wc) {
  381. s = NULL;
  382. }
  383. #endif
  384. DONE:
  385. /* ps->wc is irrelavent here. */
  386. ps->mask = 0;
  387. if (pwc != wcbuf) {
  388. *src = s;
  389. }
  390. return wn - count;
  391. }
  392. #endif
  393. /**********************************************************************/
  394. #ifdef L__wchar_wcsntoutf8s
  395. size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  396. const wchar_t **__restrict src, size_t wn)
  397. {
  398. register char *p;
  399. size_t len, t;
  400. __uwchar_t wc;
  401. const __uwchar_t *swc;
  402. int store;
  403. char buf[MB_LEN_MAX];
  404. char m;
  405. store = 1;
  406. if (!s) {
  407. s = buf;
  408. n = SIZE_MAX;
  409. store = 0;
  410. }
  411. t = n;
  412. swc = (const __uwchar_t *) *src;
  413. assert(swc != NULL);
  414. while (wn && t) {
  415. wc = *swc;
  416. *s = wc;
  417. len = 1;
  418. if (wc >= 0x80) {
  419. #ifdef KUHN
  420. if (
  421. #if UTF_8_MAX_LEN == 3
  422. /* For plane 0, these are the only defined values.*/
  423. /* Note that we don't need to worry about exceeding */
  424. /* 31 bits as that is the most that UTF-8 provides. */
  425. (wc > 0xfffdU)
  426. #else
  427. /* UTF_8_MAX_LEN == 6 */
  428. (wc > 0x7fffffffUL)
  429. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  430. #endif
  431. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  432. ) {
  433. return (size_t) -1;
  434. }
  435. #else /* KUHN */
  436. #if UTF_8_MAX_LEN != 3
  437. if (wc > 0x7fffffffUL) { /* Value too large. */
  438. return (size_t) -1;
  439. }
  440. #endif
  441. #endif /* KUHN */
  442. wc >>= 1;
  443. p = s;
  444. do {
  445. ++p;
  446. } while (wc >>= 5);
  447. wc = *swc;
  448. if ((len = p - s) > t) { /* Not enough space. */
  449. break;
  450. }
  451. m = 0x80;
  452. while( p>s ) {
  453. m = (m >> 1) | 0x80;
  454. *--p = (wc & 0x3f) | 0x80;
  455. wc >>= 6;
  456. }
  457. *s |= (m << 1);
  458. } else if (wc == 0) { /* End of string. */
  459. swc = NULL;
  460. break;
  461. }
  462. ++swc;
  463. --wn;
  464. t -= len;
  465. if (store) {
  466. s += len;
  467. }
  468. }
  469. *src = (const wchar_t *) swc;
  470. return n - t;
  471. }
  472. #endif
  473. /**********************************************************************/
  474. #ifdef L___mbsnrtowcs
  475. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  476. size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  477. size_t NMC, size_t len, mbstate_t *__restrict ps)
  478. __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
  479. size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  480. size_t NMC, size_t len, mbstate_t *__restrict ps)
  481. {
  482. static mbstate_t mbstate; /* Rely on bss 0-init. */
  483. wchar_t wcbuf[1];
  484. const char *s;
  485. size_t count, r;
  486. int incr;
  487. if (!ps) {
  488. ps = &mbstate;
  489. }
  490. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  491. if (ENCODING == __ctype_encoding_utf8) {
  492. return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
  493. != (size_t) -2) ? r : 0;
  494. }
  495. #endif
  496. incr = 1;
  497. if (!dst) {
  498. dst = wcbuf;
  499. len = SIZE_MAX;
  500. incr = 0;
  501. }
  502. /* Since all the following encodings are single-byte encodings... */
  503. if (len > NMC) {
  504. len = NMC;
  505. }
  506. count = len;
  507. s = *src;
  508. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  509. if (ENCODING == __ctype_encoding_8_bit) {
  510. wchar_t wc;
  511. while (count) {
  512. if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
  513. wc -= 0x80;
  514. wc = __global_locale.tbl8c2wc[
  515. (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  516. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  517. if (!wc) {
  518. goto BAD;
  519. }
  520. }
  521. if (!(*dst = wc)) {
  522. s = NULL;
  523. break;
  524. }
  525. dst += incr;
  526. ++s;
  527. --count;
  528. }
  529. if (dst != wcbuf) {
  530. *src = s;
  531. }
  532. return len - count;
  533. }
  534. #endif
  535. assert(ENCODING == __ctype_encoding_7_bit);
  536. while (count) {
  537. if ((*dst = (unsigned char) *s) == 0) {
  538. s = NULL;
  539. break;
  540. }
  541. if (*dst >= 0x80) {
  542. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  543. BAD:
  544. #endif
  545. __set_errno(EILSEQ);
  546. return (size_t) -1;
  547. }
  548. ++s;
  549. dst += incr;
  550. --count;
  551. }
  552. if (dst != wcbuf) {
  553. *src = s;
  554. }
  555. return len - count;
  556. }
  557. #endif
  558. /**********************************************************************/
  559. #ifdef L___wcsnrtombs
  560. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  561. /* Note: We completely ignore ps in all currently supported conversions.
  562. * TODO: Check for valid state anyway? */
  563. size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  564. size_t NWC, size_t len, mbstate_t *__restrict ps)
  565. __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
  566. size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  567. size_t NWC, size_t len, mbstate_t *__restrict ps)
  568. {
  569. const __uwchar_t *s;
  570. size_t count;
  571. int incr;
  572. char buf[MB_LEN_MAX];
  573. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  574. if (ENCODING == __ctype_encoding_utf8) {
  575. return _wchar_wcsntoutf8s(dst, len, src, NWC);
  576. }
  577. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  578. incr = 1;
  579. if (!dst) {
  580. dst = buf;
  581. len = SIZE_MAX;
  582. incr = 0;
  583. }
  584. /* Since all the following encodings are single-byte encodings... */
  585. if (len > NWC) {
  586. len = NWC;
  587. }
  588. count = len;
  589. s = (const __uwchar_t *) *src;
  590. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  591. if (ENCODING == __ctype_encoding_8_bit) {
  592. __uwchar_t wc;
  593. __uwchar_t u;
  594. while (count) {
  595. if ((wc = *s) <= 0x7f) {
  596. if (!(*dst = (unsigned char) wc)) {
  597. s = NULL;
  598. break;
  599. }
  600. } else {
  601. u = 0;
  602. if (wc <= Cwc2c_DOMAIN_MAX) {
  603. u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
  604. + Cwc2c_TT_SHIFT)];
  605. u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  606. + ((wc >> Cwc2c_TT_SHIFT)
  607. & ((1 << Cwc2c_TI_SHIFT)-1))];
  608. u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
  609. + (u << Cwc2c_TT_SHIFT)
  610. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  611. }
  612. /* #define __WCHAR_REPLACEMENT_CHAR '?' */
  613. #ifdef __WCHAR_REPLACEMENT_CHAR
  614. *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
  615. #else /* __WCHAR_REPLACEMENT_CHAR */
  616. if (!u) {
  617. goto BAD;
  618. }
  619. *dst = (unsigned char) u;
  620. #endif /* __WCHAR_REPLACEMENT_CHAR */
  621. }
  622. ++s;
  623. dst += incr;
  624. --count;
  625. }
  626. if (dst != buf) {
  627. *src = (const wchar_t *) s;
  628. }
  629. return len - count;
  630. }
  631. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  632. assert(ENCODING == __ctype_encoding_7_bit);
  633. while (count) {
  634. if (*s >= 0x80) {
  635. #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
  636. BAD:
  637. #endif
  638. __set_errno(EILSEQ);
  639. return (size_t) -1;
  640. }
  641. if ((*dst = (unsigned char) *s) == 0) {
  642. s = NULL;
  643. break;
  644. }
  645. ++s;
  646. dst += incr;
  647. --count;
  648. }
  649. if (dst != buf) {
  650. *src = (const wchar_t *) s;
  651. }
  652. return len - count;
  653. }
  654. #endif
  655. /**********************************************************************/