wchar.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Nov 4, 2002
  59. *
  60. * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
  61. * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
  62. * order to support %ls in printf. See comments below for details.
  63. * Change behaviour of wc<->mb functions when in the C locale. Now they do
  64. * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
  65. * and consistency with the stds requirements that a printf format string by
  66. * a valid multibyte string beginning and ending in it's initial shift state.
  67. *
  68. * Manuel
  69. */
  70. #define _GNU_SOURCE
  71. #define _ISOC99_SOURCE
  72. #include <errno.h>
  73. #include <stddef.h>
  74. #include <limits.h>
  75. #include <stdint.h>
  76. #include <inttypes.h>
  77. #include <stdlib.h>
  78. #include <stdio.h>
  79. #include <assert.h>
  80. #include <locale.h>
  81. #include <wchar.h>
  82. #ifdef __UCLIBC_HAS_LOCALE__
  83. #define ENCODING (__global_locale.encoding)
  84. #ifdef __UCLIBC_MJN3_ONLY__
  85. #warning implement __CTYPE_HAS_UTF_8_LOCALES!
  86. #endif
  87. #define __CTYPE_HAS_UTF_8_LOCALES
  88. #else
  89. #undef __CTYPE_HAS_8_BIT_LOCALES
  90. #undef __CTYPE_HAS_UTF_8_LOCALES
  91. #undef L__wchar_utf8sntowcs
  92. #undef L__wchar_wcsntoutf8s
  93. #endif
  94. #if WCHAR_MAX > 0xffffUL
  95. #define UTF_8_MAX_LEN 6
  96. #else
  97. #define UTF_8_MAX_LEN 3
  98. #endif
  99. /* #define KUHN */
  100. /* Implementation-specific work functions. */
  101. extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  102. const char **__restrict src, size_t n,
  103. mbstate_t *ps, int allow_continuation);
  104. extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  105. const wchar_t **__restrict src, size_t wn);
  106. /* glibc extensions. */
  107. extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
  108. const char **__restrict src,
  109. size_t NMC, size_t len, mbstate_t *__restrict ps);
  110. extern size_t __wcsnrtombs(char *__restrict dst,
  111. const wchar_t **__restrict src,
  112. size_t NWC, size_t len, mbstate_t *__restrict ps);
  113. /**********************************************************************/
  114. #ifdef L_btowc
  115. wint_t btowc(int c)
  116. {
  117. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  118. wchar_t wc;
  119. unsigned char buf[1];
  120. mbstate_t mbstate;
  121. if (c != EOF) {
  122. *buf = (unsigned char) c;
  123. mbstate.mask = 0; /* Initialize the mbstate. */
  124. if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
  125. return wc;
  126. }
  127. }
  128. return WEOF;
  129. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  130. /* If we don't have 8-bit locale support, then this is trivial since
  131. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  132. return (((unsigned int)c) < 0x80) ? c : WEOF;
  133. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  134. }
  135. #endif
  136. /**********************************************************************/
  137. #ifdef L_wctob
  138. /* Note: We completely ignore ps in all currently supported conversions. */
  139. int wctob(wint_t c)
  140. {
  141. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  142. unsigned char buf[MB_LEN_MAX];
  143. return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
  144. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  145. /* If we don't have 8-bit locale support, then this is trivial since
  146. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  147. /* TODO: need unsigned version of wint_t... */
  148. /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
  149. return ((c >= 0) && (c < 0x80)) ? c : EOF;
  150. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  151. }
  152. #endif
  153. /**********************************************************************/
  154. #ifdef L_mbsinit
  155. int mbsinit(const mbstate_t *ps)
  156. {
  157. return !ps || !ps->mask;
  158. }
  159. #endif
  160. /**********************************************************************/
  161. #ifdef L_mbrlen
  162. size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  163. __attribute__ ((__weak__, __alias__("__mbrlen")));
  164. size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  165. {
  166. static mbstate_t mbstate; /* Rely on bss 0-init. */
  167. return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
  168. }
  169. #endif
  170. /**********************************************************************/
  171. #ifdef L_mbrtowc
  172. size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
  173. size_t n, mbstate_t *__restrict ps)
  174. {
  175. static mbstate_t mbstate; /* Rely on bss 0-init. */
  176. wchar_t wcbuf[1];
  177. const char *p;
  178. size_t r;
  179. char empty_string[1]; /* Avoid static to be fPIC friendly. */
  180. if (!ps) {
  181. ps = &mbstate;
  182. }
  183. if (!s) {
  184. pwc = (wchar_t *) s; /* NULL */
  185. empty_string[0] = 0; /* Init the empty string when necessary. */
  186. s = empty_string;
  187. n = 1;
  188. } else if (!n) {
  189. return (ps->mask && (ps->wc == 0xffffU)) /* TODO: change error code? */
  190. ? ((size_t) -1) : ((size_t) -2);
  191. }
  192. p = s;
  193. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  194. /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
  195. if (ENCODING == __ctype_encoding_utf8) {
  196. r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
  197. return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
  198. }
  199. #endif
  200. r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
  201. if (((ssize_t) r) >= 0) {
  202. if (pwc) {
  203. *pwc = *wcbuf;
  204. }
  205. }
  206. return (size_t) r;
  207. }
  208. #endif
  209. /**********************************************************************/
  210. #ifdef L_wcrtomb
  211. /* Note: We completely ignore ps in all currently supported conversions. */
  212. /* TODO: Check for valid state anyway? */
  213. size_t wcrtomb(register char *__restrict s, wchar_t wc,
  214. mbstate_t *__restrict ps)
  215. {
  216. wchar_t wcbuf[2];
  217. const wchar_t *pwc;
  218. size_t r;
  219. char buf[MB_LEN_MAX];
  220. if (!s) {
  221. s = buf;
  222. wc = 0;
  223. }
  224. pwc = wcbuf;
  225. wcbuf[0] = wc;
  226. wcbuf[1] = 0;
  227. r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
  228. return (r != 0) ? r : 1;
  229. }
  230. #endif
  231. /**********************************************************************/
  232. #ifdef L_mbsrtowcs
  233. size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  234. size_t len, mbstate_t *__restrict ps)
  235. {
  236. static mbstate_t mbstate; /* Rely on bss 0-init. */
  237. return __mbsnrtowcs(dst, src, SIZE_MAX, len,
  238. ((ps != NULL) ? ps : &mbstate));
  239. }
  240. #endif
  241. /**********************************************************************/
  242. #ifdef L_wcsrtombs
  243. /* Note: We completely ignore ps in all currently supported conversions.
  244. * TODO: Check for valid state anyway? */
  245. size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
  246. size_t len, mbstate_t *__restrict ps)
  247. {
  248. return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
  249. }
  250. #endif
  251. /**********************************************************************/
  252. #ifdef L__wchar_utf8sntowcs
  253. /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
  254. * UTF-8-test.txt strss test.
  255. */
  256. /* #define DECODER */
  257. #ifdef DECODER
  258. #ifndef KUHN
  259. #define KUHN
  260. #endif
  261. #endif
  262. size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  263. const char **__restrict src, size_t n,
  264. mbstate_t *ps, int allow_continuation)
  265. {
  266. register const char *s;
  267. __uwchar_t mask;
  268. __uwchar_t wc;
  269. wchar_t wcbuf[1];
  270. size_t count;
  271. int incr;
  272. s = *src;
  273. assert(s != NULL);
  274. assert(ps != NULL);
  275. incr = 1;
  276. if (!pwc) {
  277. pwc = wcbuf;
  278. wn = SIZE_MAX;
  279. incr = 0;
  280. }
  281. /* This is really here only to support the glibc extension function
  282. * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
  283. * check on the validity of the mbstate. */
  284. if (!(count = wn)) {
  285. return 0;
  286. }
  287. if ((mask = (__uwchar_t) ps->mask) != 0) { /* A continuation... */
  288. #ifdef DECODER
  289. wc = (__uwchar_t) ps->wc;
  290. if (n) {
  291. goto CONTINUE;
  292. }
  293. goto DONE;
  294. #else
  295. if ((wc = (__uwchar_t) ps->wc) != 0xffffU) {
  296. /* TODO: change error code here and below? */
  297. if (n) {
  298. goto CONTINUE;
  299. }
  300. goto DONE;
  301. }
  302. return (size_t) -1; /* We're in an error state. */
  303. #endif
  304. }
  305. do {
  306. if (!n) {
  307. goto DONE;
  308. }
  309. --n;
  310. if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
  311. mask = 0x40;
  312. #ifdef __UCLIBC_MJN3_ONLY__
  313. #warning fix range for 16 bit wides
  314. #endif
  315. if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
  316. goto START;
  317. }
  318. BAD:
  319. #ifdef DECODER
  320. wc = 0xfffd;
  321. goto COMPLETE;
  322. #else
  323. ps->mask = mask;
  324. ps->wc = 0xffffU;
  325. return (size_t) -1; /* Illegal start byte! */
  326. #endif
  327. CONTINUE:
  328. while (n) {
  329. --n;
  330. if ((*s & 0xc0) != 0x80) {
  331. goto BAD;
  332. }
  333. mask <<= 5;
  334. wc <<= 6;
  335. wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
  336. ++s;
  337. START:
  338. wc &= ~(mask << 1);
  339. if ((wc & mask) == 0) { /* Character completed. */
  340. if ((mask >>= 5) == 0x40) {
  341. mask += mask;
  342. }
  343. /* Check for invalid sequences (longer than necessary)
  344. * and invalid chars. */
  345. if ( (wc < mask) /* Sequence not minimal length. */
  346. #ifdef KUHN
  347. #if UTF_8_MAX_LEN == 3
  348. #error broken since mask can overflow!!
  349. /* For plane 0, these are the only defined values.*/
  350. || (wc > 0xfffdU)
  351. #else
  352. /* Note that we don't need to worry about exceeding */
  353. /* 31 bits as that is the most that UTF-8 provides. */
  354. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  355. #endif
  356. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  357. #endif /* KUHN */
  358. ) {
  359. goto BAD;
  360. }
  361. goto COMPLETE;
  362. }
  363. }
  364. /* Character potentially valid but incomplete. */
  365. if (!allow_continuation) {
  366. if (count != wn) {
  367. return 0;
  368. }
  369. /* NOTE: The following can fail if you allow and then disallow
  370. * continuation!!! */
  371. #if UTF_8_MAX_LEN == 3
  372. #error broken since mask can overflow!!
  373. #endif
  374. /* Need to back up... */
  375. do {
  376. --s;
  377. } while ((mask >>= 5) >= 0x40);
  378. goto DONE;
  379. }
  380. ps->mask = (wchar_t) mask;
  381. ps->wc = (wchar_t) wc;
  382. *src = s;
  383. return (size_t) -2;
  384. }
  385. COMPLETE:
  386. *pwc = wc;
  387. pwc += incr;
  388. }
  389. #ifdef DECODER
  390. while (--count);
  391. #else
  392. while (wc && --count);
  393. if (!wc) {
  394. s = NULL;
  395. }
  396. #endif
  397. DONE:
  398. /* ps->wc is irrelavent here. */
  399. ps->mask = 0;
  400. if (pwc != wcbuf) {
  401. *src = s;
  402. }
  403. return wn - count;
  404. }
  405. #endif
  406. /**********************************************************************/
  407. #ifdef L__wchar_wcsntoutf8s
  408. size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  409. const wchar_t **__restrict src, size_t wn)
  410. {
  411. register char *p;
  412. size_t len, t;
  413. __uwchar_t wc;
  414. const __uwchar_t *swc;
  415. int store;
  416. char buf[MB_LEN_MAX];
  417. char m;
  418. store = 1;
  419. /* NOTE: The following is an AWFUL HACK! In order to support %ls in
  420. * printf, we need to be able to compute the number of bytes needed
  421. * for the mbs conversion, not to exceed the precision specified.
  422. * But if dst is NULL, the return value is the length assuming a
  423. * sufficiently sized buffer. So, we allow passing of (char *) src
  424. * as dst in order to flag that we really want the length, subject
  425. * to the restricted buffer size and no partial conversions.
  426. * See wcsnrtombs() as well. */
  427. if (!s || (s == ((char *) src))) {
  428. if (!s) {
  429. n = SIZE_MAX;
  430. }
  431. s = buf;
  432. store = 0;
  433. }
  434. t = n;
  435. swc = (const __uwchar_t *) *src;
  436. assert(swc != NULL);
  437. while (wn && t) {
  438. wc = *swc;
  439. *s = wc;
  440. len = 1;
  441. if (wc >= 0x80) {
  442. #ifdef KUHN
  443. if (
  444. #if UTF_8_MAX_LEN == 3
  445. /* For plane 0, these are the only defined values.*/
  446. /* Note that we don't need to worry about exceeding */
  447. /* 31 bits as that is the most that UTF-8 provides. */
  448. (wc > 0xfffdU)
  449. #else
  450. /* UTF_8_MAX_LEN == 6 */
  451. (wc > 0x7fffffffUL)
  452. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  453. #endif
  454. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  455. ) {
  456. return (size_t) -1;
  457. }
  458. #else /* KUHN */
  459. #if UTF_8_MAX_LEN != 3
  460. if (wc > 0x7fffffffUL) { /* Value too large. */
  461. return (size_t) -1;
  462. }
  463. #endif
  464. #endif /* KUHN */
  465. wc >>= 1;
  466. p = s;
  467. do {
  468. ++p;
  469. } while (wc >>= 5);
  470. wc = *swc;
  471. if ((len = p - s) > t) { /* Not enough space. */
  472. break;
  473. }
  474. m = 0x80;
  475. while( p>s ) {
  476. m = (m >> 1) | 0x80;
  477. *--p = (wc & 0x3f) | 0x80;
  478. wc >>= 6;
  479. }
  480. *s |= (m << 1);
  481. } else if (wc == 0) { /* End of string. */
  482. swc = NULL;
  483. break;
  484. }
  485. ++swc;
  486. --wn;
  487. t -= len;
  488. if (store) {
  489. s += len;
  490. }
  491. }
  492. if (store) {
  493. *src = (const wchar_t *) swc;
  494. }
  495. return n - t;
  496. }
  497. #endif
  498. /**********************************************************************/
  499. #ifdef L___mbsnrtowcs
  500. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  501. size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  502. size_t NMC, size_t len, mbstate_t *__restrict ps)
  503. __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
  504. size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  505. size_t NMC, size_t len, mbstate_t *__restrict ps)
  506. {
  507. static mbstate_t mbstate; /* Rely on bss 0-init. */
  508. wchar_t wcbuf[1];
  509. const char *s;
  510. size_t count;
  511. int incr;
  512. if (!ps) {
  513. ps = &mbstate;
  514. }
  515. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  516. if (ENCODING == __ctype_encoding_utf8) {
  517. size_t r;
  518. return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
  519. != (size_t) -2) ? r : 0;
  520. }
  521. #endif
  522. incr = 1;
  523. if (!dst) {
  524. dst = wcbuf;
  525. len = SIZE_MAX;
  526. incr = 0;
  527. }
  528. /* Since all the following encodings are single-byte encodings... */
  529. if (len > NMC) {
  530. len = NMC;
  531. }
  532. count = len;
  533. s = *src;
  534. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  535. if (ENCODING == __ctype_encoding_8_bit) {
  536. wchar_t wc;
  537. while (count) {
  538. if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
  539. wc -= 0x80;
  540. wc = __global_locale.tbl8c2wc[
  541. (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  542. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  543. if (!wc) {
  544. __set_errno(EILSEQ);
  545. return (size_t) -1;
  546. }
  547. }
  548. if (!(*dst = wc)) {
  549. s = NULL;
  550. break;
  551. }
  552. dst += incr;
  553. ++s;
  554. --count;
  555. }
  556. if (dst != wcbuf) {
  557. *src = s;
  558. }
  559. return len - count;
  560. }
  561. #endif
  562. #ifdef __UCLIBC_HAS_LOCALE__
  563. assert(ENCODING == __ctype_encoding_7_bit);
  564. #endif
  565. while (count) {
  566. if ((*dst = (unsigned char) *s) == 0) {
  567. s = NULL;
  568. break;
  569. }
  570. ++s;
  571. dst += incr;
  572. --count;
  573. }
  574. if (dst != wcbuf) {
  575. *src = s;
  576. }
  577. return len - count;
  578. }
  579. #endif
  580. /**********************************************************************/
  581. #ifdef L___wcsnrtombs
  582. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  583. /* Note: We completely ignore ps in all currently supported conversions.
  584. * TODO: Check for valid state anyway? */
  585. size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  586. size_t NWC, size_t len, mbstate_t *__restrict ps)
  587. __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
  588. size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  589. size_t NWC, size_t len, mbstate_t *__restrict ps)
  590. {
  591. const __uwchar_t *s;
  592. size_t count;
  593. int incr;
  594. char buf[MB_LEN_MAX];
  595. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  596. if (ENCODING == __ctype_encoding_utf8) {
  597. return _wchar_wcsntoutf8s(dst, len, src, NWC);
  598. }
  599. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  600. incr = 1;
  601. /* NOTE: The following is an AWFUL HACK! In order to support %ls in
  602. * printf, we need to be able to compute the number of bytes needed
  603. * for the mbs conversion, not to exceed the precision specified.
  604. * But if dst is NULL, the return value is the length assuming a
  605. * sufficiently sized buffer. So, we allow passing of (char *) src
  606. * as dst in order to flag that we really want the length, subject
  607. * to the restricted buffer size and no partial conversions.
  608. * See _wchar_wcsntoutf8s() as well. */
  609. if (!dst || (dst == ((char *) src))) {
  610. if (!dst) {
  611. len = SIZE_MAX;
  612. }
  613. dst = buf;
  614. incr = 0;
  615. }
  616. /* Since all the following encodings are single-byte encodings... */
  617. if (len > NWC) {
  618. len = NWC;
  619. }
  620. count = len;
  621. s = (const __uwchar_t *) *src;
  622. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  623. if (ENCODING == __ctype_encoding_8_bit) {
  624. __uwchar_t wc;
  625. __uwchar_t u;
  626. while (count) {
  627. if ((wc = *s) <= 0x7f) {
  628. if (!(*dst = (unsigned char) wc)) {
  629. s = NULL;
  630. break;
  631. }
  632. } else {
  633. u = 0;
  634. if (wc <= Cwc2c_DOMAIN_MAX) {
  635. u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
  636. + Cwc2c_TT_SHIFT)];
  637. u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  638. + ((wc >> Cwc2c_TT_SHIFT)
  639. & ((1 << Cwc2c_TI_SHIFT)-1))];
  640. u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
  641. + (u << Cwc2c_TT_SHIFT)
  642. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  643. }
  644. /* #define __WCHAR_REPLACEMENT_CHAR '?' */
  645. #ifdef __WCHAR_REPLACEMENT_CHAR
  646. *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
  647. #else /* __WCHAR_REPLACEMENT_CHAR */
  648. if (!u) {
  649. goto BAD;
  650. }
  651. *dst = (unsigned char) u;
  652. #endif /* __WCHAR_REPLACEMENT_CHAR */
  653. }
  654. ++s;
  655. dst += incr;
  656. --count;
  657. }
  658. if (dst != buf) {
  659. *src = (const wchar_t *) s;
  660. }
  661. return len - count;
  662. }
  663. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  664. #ifdef __UCLIBC_HAS_LOCALE__
  665. assert(ENCODING == __ctype_encoding_7_bit);
  666. #endif
  667. while (count) {
  668. if (*s > UCHAR_MAX) {
  669. #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
  670. BAD:
  671. #endif
  672. __set_errno(EILSEQ);
  673. return (size_t) -1;
  674. }
  675. if ((*dst = (unsigned char) *s) == 0) {
  676. s = NULL;
  677. break;
  678. }
  679. ++s;
  680. dst += incr;
  681. --count;
  682. }
  683. if (dst != buf) {
  684. *src = (const wchar_t *) s;
  685. }
  686. return len - count;
  687. }
  688. #endif
  689. /**********************************************************************/