wchar.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Manuel
  59. */
  60. #define _GNU_SOURCE
  61. #define _ISOC99_SOURCE
  62. #include <errno.h>
  63. #include <stddef.h>
  64. #include <limits.h>
  65. #include <stdint.h>
  66. #include <inttypes.h>
  67. #include <stdlib.h>
  68. #include <stdio.h>
  69. #include <assert.h>
  70. #include <locale.h>
  71. #include <wchar.h>
  72. #ifdef __UCLIBC_HAS_LOCALE__
  73. #define ENCODING (__global_locale.encoding)
  74. #warning implement __CTYPE_HAS_UTF_8_LOCALES!
  75. #define __CTYPE_HAS_UTF_8_LOCALES
  76. #else
  77. #undef __CTYPE_HAS_8_BIT_LOCALES
  78. #undef __CTYPE_HAS_UTF_8_LOCALES
  79. #undef L__wchar_utf8sntowcs
  80. #undef L__wchar_wcsntoutf8s
  81. #endif
  82. #if WCHAR_MAX > 0xffffU
  83. #define UTF_8_MAX_LEN 6
  84. #else
  85. #define UTF_8_MAX_LEN 3
  86. #endif
  87. /* #define KUHN */
  88. /* Implementation-specific work functions. */
  89. extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  90. const char **__restrict src, size_t n,
  91. mbstate_t *ps, int allow_continuation);
  92. extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  93. const wchar_t **__restrict src, size_t wn);
  94. /* glibc extensions. */
  95. extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
  96. const char **__restrict src,
  97. size_t NMC, size_t len, mbstate_t *__restrict ps);
  98. extern size_t __wcsnrtombs(char *__restrict dst,
  99. const wchar_t **__restrict src,
  100. size_t NWC, size_t len, mbstate_t *__restrict ps);
  101. /**********************************************************************/
  102. #ifdef L_btowc
  103. wint_t btowc(int c)
  104. {
  105. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  106. wchar_t wc;
  107. unsigned char buf[1];
  108. mbstate_t mbstate;
  109. if (c != EOF) {
  110. *buf = (unsigned char) c;
  111. mbstate.mask = 0; /* Initialize the mbstate. */
  112. if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
  113. return wc;
  114. }
  115. }
  116. return WEOF;
  117. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  118. /* If we don't have 8-bit locale support, then this is trivial since
  119. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  120. return (((unsigned int)c) < 0x80) ? c : WEOF;
  121. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  122. }
  123. #endif
  124. /**********************************************************************/
  125. #ifdef L_wctob
  126. /* Note: We completely ignore ps in all currently supported conversions. */
  127. int wctob(wint_t c)
  128. {
  129. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  130. unsigned char buf[MB_LEN_MAX];
  131. return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
  132. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  133. /* If we don't have 8-bit locale support, then this is trivial since
  134. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  135. /* TODO: need unsigned version of wint_t... */
  136. /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
  137. return ((c >= 0) && (c < 0x80)) ? c : EOF;
  138. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  139. }
  140. #endif
  141. /**********************************************************************/
  142. #ifdef L_mbsinit
  143. int mbsinit(const mbstate_t *ps)
  144. {
  145. return !ps || !ps->mask;
  146. }
  147. #endif
  148. /**********************************************************************/
  149. #ifdef L_mbrlen
  150. size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  151. __attribute__ ((__weak__, __alias__("__mbrlen")));
  152. size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  153. {
  154. static mbstate_t mbstate; /* Rely on bss 0-init. */
  155. return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
  156. }
  157. #endif
  158. /**********************************************************************/
  159. #ifdef L_mbrtowc
  160. size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
  161. size_t n, mbstate_t *__restrict ps)
  162. {
  163. static mbstate_t mbstate; /* Rely on bss 0-init. */
  164. wchar_t wcbuf[1];
  165. const char *p;
  166. size_t r;
  167. char empty_string[1]; /* Avoid static to be fPIC friendly. */
  168. if (!ps) {
  169. ps = &mbstate;
  170. }
  171. if (!s) {
  172. pwc = (wchar_t *) s; /* NULL */
  173. empty_string[0] = 0; /* Init the empty string when necessary. */
  174. s = empty_string;
  175. n = 1;
  176. } else if (!n) {
  177. return (ps->mask && (ps->wc == 0xffffU)) /* TODO: change error code? */
  178. ? ((size_t) -1) : ((size_t) -2);
  179. }
  180. p = s;
  181. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  182. /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
  183. if (ENCODING == __ctype_encoding_utf8) {
  184. r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
  185. return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
  186. }
  187. #endif
  188. r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
  189. if (((ssize_t) r) >= 0) {
  190. if (pwc) {
  191. *pwc = *wcbuf;
  192. }
  193. }
  194. return (size_t) r;
  195. }
  196. #endif
  197. /**********************************************************************/
  198. #ifdef L_wcrtomb
  199. /* Note: We completely ignore ps in all currently supported conversions. */
  200. /* TODO: Check for valid state anyway? */
  201. size_t wcrtomb(register char *__restrict s, wchar_t wc,
  202. mbstate_t *__restrict ps)
  203. {
  204. wchar_t wcbuf[2];
  205. const wchar_t *pwc;
  206. size_t r;
  207. char buf[MB_LEN_MAX];
  208. if (!s) {
  209. s = buf;
  210. wc = 0;
  211. }
  212. pwc = wcbuf;
  213. wcbuf[0] = wc;
  214. wcbuf[1] = 0;
  215. r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
  216. return (r != 0) ? r : 1;
  217. }
  218. #endif
  219. /**********************************************************************/
  220. #ifdef L_mbsrtowcs
  221. size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  222. size_t len, mbstate_t *__restrict ps)
  223. {
  224. static mbstate_t mbstate; /* Rely on bss 0-init. */
  225. return __mbsnrtowcs(dst, src, SIZE_MAX, len,
  226. ((ps != NULL) ? ps : &mbstate));
  227. }
  228. #endif
  229. /**********************************************************************/
  230. #ifdef L_wcsrtombs
  231. /* Note: We completely ignore ps in all currently supported conversions.
  232. * TODO: Check for valid state anyway? */
  233. size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
  234. size_t len, mbstate_t *__restrict ps)
  235. {
  236. return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
  237. }
  238. #endif
  239. /**********************************************************************/
  240. #ifdef L__wchar_utf8sntowcs
  241. /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
  242. * UTF-8-test.txt strss test.
  243. */
  244. /* #define DECODER */
  245. #ifdef DECODER
  246. #ifndef KUHN
  247. #define KUHN
  248. #endif
  249. #endif
  250. size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  251. const char **__restrict src, size_t n,
  252. mbstate_t *ps, int allow_continuation)
  253. {
  254. register const char *s;
  255. __uwchar_t mask;
  256. __uwchar_t wc;
  257. wchar_t wcbuf[1];
  258. size_t count;
  259. int incr;
  260. s = *src;
  261. assert(s != NULL);
  262. assert(ps != NULL);
  263. incr = 1;
  264. if (!pwc) {
  265. pwc = wcbuf;
  266. wn = SIZE_MAX;
  267. incr = 0;
  268. }
  269. /* This is really here only to support the glibc extension function
  270. * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
  271. * check on the validity of the mbstate. */
  272. if (!(count = wn)) {
  273. return 0;
  274. }
  275. if ((mask = (__uwchar_t) ps->mask) != 0) { /* A continuation... */
  276. #ifdef DECODER
  277. wc = (__uwchar_t) ps->wc;
  278. if (n) {
  279. goto CONTINUE;
  280. }
  281. goto DONE;
  282. #else
  283. if ((wc = (__uwchar_t) ps->wc) != 0xffffU) {
  284. /* TODO: change error code here and below? */
  285. if (n) {
  286. goto CONTINUE;
  287. }
  288. goto DONE;
  289. }
  290. return (size_t) -1; /* We're in an error state. */
  291. #endif
  292. }
  293. do {
  294. if (!n) {
  295. goto DONE;
  296. }
  297. --n;
  298. if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
  299. mask = 0x40;
  300. #warning fix range for 16 bit wides
  301. if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
  302. goto START;
  303. }
  304. BAD:
  305. #ifdef DECODER
  306. wc = 0xfffd;
  307. goto COMPLETE;
  308. #else
  309. ps->mask = mask;
  310. ps->wc = 0xffffU;
  311. return (size_t) -1; /* Illegal start byte! */
  312. #endif
  313. CONTINUE:
  314. while (n) {
  315. --n;
  316. if ((*s & 0xc0) != 0x80) {
  317. goto BAD;
  318. }
  319. mask <<= 5;
  320. wc <<= 6;
  321. wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
  322. ++s;
  323. START:
  324. wc &= ~(mask << 1);
  325. if ((wc & mask) == 0) { /* Character completed. */
  326. if ((mask >>= 5) == 0x40) {
  327. mask += mask;
  328. }
  329. /* Check for invalid sequences (longer than necessary)
  330. * and invalid chars. */
  331. if ( (wc < mask) /* Sequence not minimal length. */
  332. #ifdef KUHN
  333. #if UTF_8_MAX_LEN == 3
  334. #error broken since mask can overflow!!
  335. /* For plane 0, these are the only defined values.*/
  336. || (wc > 0xfffdU)
  337. #else
  338. /* Note that we don't need to worry about exceeding */
  339. /* 31 bits as that is the most that UTF-8 provides. */
  340. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  341. #endif
  342. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  343. #endif /* KUHN */
  344. ) {
  345. goto BAD;
  346. }
  347. goto COMPLETE;
  348. }
  349. }
  350. /* Character potentially valid but incomplete. */
  351. if (!allow_continuation) {
  352. if (count != wn) {
  353. return 0;
  354. }
  355. /* NOTE: The following can fail if you allow and then disallow
  356. * continuation!!! */
  357. #if UTF_8_MAX_LEN == 3
  358. #error broken since mask can overflow!!
  359. #endif
  360. /* Need to back up... */
  361. do {
  362. --s;
  363. } while ((mask >>= 5) >= 0x40);
  364. goto DONE;
  365. }
  366. ps->mask = (wchar_t) mask;
  367. ps->wc = (wchar_t) wc;
  368. *src = s;
  369. return (size_t) -2;
  370. }
  371. COMPLETE:
  372. *pwc = wc;
  373. pwc += incr;
  374. }
  375. #ifdef DECODER
  376. while (--count);
  377. #else
  378. while (wc && --count);
  379. if (!wc) {
  380. s = NULL;
  381. }
  382. #endif
  383. DONE:
  384. /* ps->wc is irrelavent here. */
  385. ps->mask = 0;
  386. if (pwc != wcbuf) {
  387. *src = s;
  388. }
  389. return wn - count;
  390. }
  391. #endif
  392. /**********************************************************************/
  393. #ifdef L__wchar_wcsntoutf8s
  394. size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  395. const wchar_t **__restrict src, size_t wn)
  396. {
  397. register char *p;
  398. size_t len, t;
  399. __uwchar_t wc;
  400. const __uwchar_t *swc;
  401. int store;
  402. char buf[MB_LEN_MAX];
  403. char m;
  404. store = 1;
  405. if (!s) {
  406. s = buf;
  407. n = SIZE_MAX;
  408. store = 0;
  409. }
  410. t = n;
  411. swc = (const __uwchar_t *) *src;
  412. assert(swc != NULL);
  413. while (wn && t) {
  414. wc = *swc;
  415. *s = wc;
  416. len = 1;
  417. if (wc >= 0x80) {
  418. #ifdef KUHN
  419. if (
  420. #if UTF_8_MAX_LEN == 3
  421. /* For plane 0, these are the only defined values.*/
  422. /* Note that we don't need to worry about exceeding */
  423. /* 31 bits as that is the most that UTF-8 provides. */
  424. (wc > 0xfffdU)
  425. #else
  426. /* UTF_8_MAX_LEN == 6 */
  427. (wc > 0x7fffffffUL)
  428. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  429. #endif
  430. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  431. ) {
  432. return (size_t) -1;
  433. }
  434. #else /* KUHN */
  435. #if UTF_8_MAX_LEN != 3
  436. if (wc > 0x7fffffffUL) { /* Value too large. */
  437. return (size_t) -1;
  438. }
  439. #endif
  440. #endif /* KUHN */
  441. wc >>= 1;
  442. p = s;
  443. do {
  444. ++p;
  445. } while (wc >>= 5);
  446. wc = *swc;
  447. if ((len = p - s) > t) { /* Not enough space. */
  448. break;
  449. }
  450. m = 0x80;
  451. while( p>s ) {
  452. m = (m >> 1) | 0x80;
  453. *--p = (wc & 0x3f) | 0x80;
  454. wc >>= 6;
  455. }
  456. *s |= (m << 1);
  457. } else if (wc == 0) { /* End of string. */
  458. swc = NULL;
  459. break;
  460. }
  461. ++swc;
  462. --wn;
  463. t -= len;
  464. if (store) {
  465. s += len;
  466. }
  467. }
  468. *src = (const wchar_t *) swc;
  469. return n - t;
  470. }
  471. #endif
  472. /**********************************************************************/
  473. #ifdef L___mbsnrtowcs
  474. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  475. size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  476. size_t NMC, size_t len, mbstate_t *__restrict ps)
  477. __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
  478. size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  479. size_t NMC, size_t len, mbstate_t *__restrict ps)
  480. {
  481. static mbstate_t mbstate; /* Rely on bss 0-init. */
  482. wchar_t wcbuf[1];
  483. const char *s;
  484. size_t count;
  485. int incr;
  486. if (!ps) {
  487. ps = &mbstate;
  488. }
  489. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  490. if (ENCODING == __ctype_encoding_utf8) {
  491. size_t r;
  492. return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
  493. != (size_t) -2) ? r : 0;
  494. }
  495. #endif
  496. incr = 1;
  497. if (!dst) {
  498. dst = wcbuf;
  499. len = SIZE_MAX;
  500. incr = 0;
  501. }
  502. /* Since all the following encodings are single-byte encodings... */
  503. if (len > NMC) {
  504. len = NMC;
  505. }
  506. count = len;
  507. s = *src;
  508. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  509. if (ENCODING == __ctype_encoding_8_bit) {
  510. wchar_t wc;
  511. while (count) {
  512. if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
  513. wc -= 0x80;
  514. wc = __global_locale.tbl8c2wc[
  515. (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  516. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  517. if (!wc) {
  518. goto BAD;
  519. }
  520. }
  521. if (!(*dst = wc)) {
  522. s = NULL;
  523. break;
  524. }
  525. dst += incr;
  526. ++s;
  527. --count;
  528. }
  529. if (dst != wcbuf) {
  530. *src = s;
  531. }
  532. return len - count;
  533. }
  534. #endif
  535. #ifdef __UCLIBC_HAS_LOCALE__
  536. assert(ENCODING == __ctype_encoding_7_bit);
  537. #endif
  538. while (count) {
  539. if ((*dst = (unsigned char) *s) == 0) {
  540. s = NULL;
  541. break;
  542. }
  543. if (*dst >= 0x80) {
  544. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  545. BAD:
  546. #endif
  547. __set_errno(EILSEQ);
  548. return (size_t) -1;
  549. }
  550. ++s;
  551. dst += incr;
  552. --count;
  553. }
  554. if (dst != wcbuf) {
  555. *src = s;
  556. }
  557. return len - count;
  558. }
  559. #endif
  560. /**********************************************************************/
  561. #ifdef L___wcsnrtombs
  562. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  563. /* Note: We completely ignore ps in all currently supported conversions.
  564. * TODO: Check for valid state anyway? */
  565. size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  566. size_t NWC, size_t len, mbstate_t *__restrict ps)
  567. __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
  568. size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  569. size_t NWC, size_t len, mbstate_t *__restrict ps)
  570. {
  571. const __uwchar_t *s;
  572. size_t count;
  573. int incr;
  574. char buf[MB_LEN_MAX];
  575. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  576. if (ENCODING == __ctype_encoding_utf8) {
  577. return _wchar_wcsntoutf8s(dst, len, src, NWC);
  578. }
  579. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  580. incr = 1;
  581. if (!dst) {
  582. dst = buf;
  583. len = SIZE_MAX;
  584. incr = 0;
  585. }
  586. /* Since all the following encodings are single-byte encodings... */
  587. if (len > NWC) {
  588. len = NWC;
  589. }
  590. count = len;
  591. s = (const __uwchar_t *) *src;
  592. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  593. if (ENCODING == __ctype_encoding_8_bit) {
  594. __uwchar_t wc;
  595. __uwchar_t u;
  596. while (count) {
  597. if ((wc = *s) <= 0x7f) {
  598. if (!(*dst = (unsigned char) wc)) {
  599. s = NULL;
  600. break;
  601. }
  602. } else {
  603. u = 0;
  604. if (wc <= Cwc2c_DOMAIN_MAX) {
  605. u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
  606. + Cwc2c_TT_SHIFT)];
  607. u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  608. + ((wc >> Cwc2c_TT_SHIFT)
  609. & ((1 << Cwc2c_TI_SHIFT)-1))];
  610. u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
  611. + (u << Cwc2c_TT_SHIFT)
  612. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  613. }
  614. /* #define __WCHAR_REPLACEMENT_CHAR '?' */
  615. #ifdef __WCHAR_REPLACEMENT_CHAR
  616. *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
  617. #else /* __WCHAR_REPLACEMENT_CHAR */
  618. if (!u) {
  619. goto BAD;
  620. }
  621. *dst = (unsigned char) u;
  622. #endif /* __WCHAR_REPLACEMENT_CHAR */
  623. }
  624. ++s;
  625. dst += incr;
  626. --count;
  627. }
  628. if (dst != buf) {
  629. *src = (const wchar_t *) s;
  630. }
  631. return len - count;
  632. }
  633. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  634. #ifdef __UCLIBC_HAS_LOCALE__
  635. assert(ENCODING == __ctype_encoding_7_bit);
  636. #endif
  637. while (count) {
  638. if (*s >= 0x80) {
  639. #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
  640. BAD:
  641. #endif
  642. __set_errno(EILSEQ);
  643. return (size_t) -1;
  644. }
  645. if ((*dst = (unsigned char) *s) == 0) {
  646. s = NULL;
  647. break;
  648. }
  649. ++s;
  650. dst += incr;
  651. --count;
  652. }
  653. if (dst != buf) {
  654. *src = (const wchar_t *) s;
  655. }
  656. return len - count;
  657. }
  658. #endif
  659. /**********************************************************************/