wchar.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Nov 4, 2002
  59. *
  60. * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
  61. * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
  62. * order to support %ls in printf. See comments below for details.
  63. * Change behaviour of wc<->mb functions when in the C locale. Now they do
  64. * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
  65. * and consistency with the stds requirements that a printf format string by
  66. * a valid multibyte string beginning and ending in it's initial shift state.
  67. *
  68. * Nov 5, 2002
  69. *
  70. * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
  71. *
  72. * Manuel
  73. */
  74. #define _GNU_SOURCE
  75. #define _ISOC99_SOURCE
  76. #include <errno.h>
  77. #include <stddef.h>
  78. #include <limits.h>
  79. #include <stdint.h>
  80. #include <inttypes.h>
  81. #include <stdlib.h>
  82. #include <stdio.h>
  83. #include <assert.h>
  84. #include <locale.h>
  85. #include <wchar.h>
  86. #ifdef __UCLIBC_HAS_LOCALE__
  87. #define ENCODING (__global_locale.encoding)
  88. #ifdef __UCLIBC_MJN3_ONLY__
  89. #warning implement __CTYPE_HAS_UTF_8_LOCALES!
  90. #endif
  91. #define __CTYPE_HAS_UTF_8_LOCALES
  92. #else
  93. #undef __CTYPE_HAS_8_BIT_LOCALES
  94. #undef __CTYPE_HAS_UTF_8_LOCALES
  95. #undef L__wchar_utf8sntowcs
  96. #undef L__wchar_wcsntoutf8s
  97. #endif
  98. #if WCHAR_MAX > 0xffffUL
  99. #define UTF_8_MAX_LEN 6
  100. #else
  101. #define UTF_8_MAX_LEN 3
  102. #endif
  103. /* #define KUHN */
  104. /* Implementation-specific work functions. */
  105. extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  106. const char **__restrict src, size_t n,
  107. mbstate_t *ps, int allow_continuation);
  108. extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  109. const wchar_t **__restrict src, size_t wn);
  110. /* glibc extensions. */
  111. extern size_t __mbsnrtowcs(wchar_t *__restrict dst,
  112. const char **__restrict src,
  113. size_t NMC, size_t len, mbstate_t *__restrict ps);
  114. extern size_t __wcsnrtombs(char *__restrict dst,
  115. const wchar_t **__restrict src,
  116. size_t NWC, size_t len, mbstate_t *__restrict ps);
  117. /**********************************************************************/
  118. #ifdef L_btowc
  119. wint_t btowc(int c)
  120. {
  121. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  122. wchar_t wc;
  123. unsigned char buf[1];
  124. mbstate_t mbstate;
  125. if (c != EOF) {
  126. *buf = (unsigned char) c;
  127. mbstate.mask = 0; /* Initialize the mbstate. */
  128. if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
  129. return wc;
  130. }
  131. }
  132. return WEOF;
  133. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  134. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  135. if (ENCODING == __ctype_encoding_utf8) {
  136. return (((unsigned int)c) < 0x80) ? c : WEOF;
  137. }
  138. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  139. #ifdef __UCLIBC_HAS_LOCALE__
  140. assert(ENCODING == __ctype_encoding_7_bit);
  141. #endif
  142. return (((unsigned int)c) <= UCHAR_MAX) ? c : WEOF;
  143. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  144. }
  145. #endif
  146. /**********************************************************************/
  147. #ifdef L_wctob
  148. /* Note: We completely ignore ps in all currently supported conversions. */
  149. int wctob(wint_t c)
  150. {
  151. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  152. unsigned char buf[MB_LEN_MAX];
  153. return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
  154. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  155. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  156. if (ENCODING == __ctype_encoding_utf8) {
  157. return ((c >= 0) && (c < 0x80)) ? c : EOF;
  158. }
  159. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  160. #ifdef __UCLIBC_HAS_LOCALE__
  161. assert(ENCODING == __ctype_encoding_7_bit);
  162. #endif
  163. return ((c >= 0) && (c <= UCHAR_MAX)) ? c : EOF;
  164. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  165. }
  166. #endif
  167. /**********************************************************************/
  168. #ifdef L_mbsinit
  169. int mbsinit(const mbstate_t *ps)
  170. {
  171. return !ps || !ps->mask;
  172. }
  173. #endif
  174. /**********************************************************************/
  175. #ifdef L_mbrlen
  176. size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  177. __attribute__ ((__weak__, __alias__("__mbrlen")));
  178. size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  179. {
  180. static mbstate_t mbstate; /* Rely on bss 0-init. */
  181. return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
  182. }
  183. #endif
  184. /**********************************************************************/
  185. #ifdef L_mbrtowc
  186. size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
  187. size_t n, mbstate_t *__restrict ps)
  188. {
  189. static mbstate_t mbstate; /* Rely on bss 0-init. */
  190. wchar_t wcbuf[1];
  191. const char *p;
  192. size_t r;
  193. char empty_string[1]; /* Avoid static to be fPIC friendly. */
  194. if (!ps) {
  195. ps = &mbstate;
  196. }
  197. if (!s) {
  198. pwc = (wchar_t *) s; /* NULL */
  199. empty_string[0] = 0; /* Init the empty string when necessary. */
  200. s = empty_string;
  201. n = 1;
  202. } else if (!n) {
  203. return (ps->mask && (ps->wc == 0xffffU)) /* TODO: change error code? */
  204. ? ((size_t) -1) : ((size_t) -2);
  205. }
  206. p = s;
  207. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  208. /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
  209. if (ENCODING == __ctype_encoding_utf8) {
  210. r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
  211. return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
  212. }
  213. #endif
  214. r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
  215. if (((ssize_t) r) >= 0) {
  216. if (pwc) {
  217. *pwc = *wcbuf;
  218. }
  219. }
  220. return (size_t) r;
  221. }
  222. #endif
  223. /**********************************************************************/
  224. #ifdef L_wcrtomb
  225. /* Note: We completely ignore ps in all currently supported conversions. */
  226. /* TODO: Check for valid state anyway? */
  227. size_t wcrtomb(register char *__restrict s, wchar_t wc,
  228. mbstate_t *__restrict ps)
  229. {
  230. wchar_t wcbuf[2];
  231. const wchar_t *pwc;
  232. size_t r;
  233. char buf[MB_LEN_MAX];
  234. if (!s) {
  235. s = buf;
  236. wc = 0;
  237. }
  238. pwc = wcbuf;
  239. wcbuf[0] = wc;
  240. wcbuf[1] = 0;
  241. r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
  242. return (r != 0) ? r : 1;
  243. }
  244. #endif
  245. /**********************************************************************/
  246. #ifdef L_mbsrtowcs
  247. size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  248. size_t len, mbstate_t *__restrict ps)
  249. {
  250. static mbstate_t mbstate; /* Rely on bss 0-init. */
  251. return __mbsnrtowcs(dst, src, SIZE_MAX, len,
  252. ((ps != NULL) ? ps : &mbstate));
  253. }
  254. #endif
  255. /**********************************************************************/
  256. #ifdef L_wcsrtombs
  257. /* Note: We completely ignore ps in all currently supported conversions.
  258. * TODO: Check for valid state anyway? */
  259. size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
  260. size_t len, mbstate_t *__restrict ps)
  261. {
  262. return __wcsnrtombs(dst, src, SIZE_MAX, len, ps);
  263. }
  264. #endif
  265. /**********************************************************************/
  266. #ifdef L__wchar_utf8sntowcs
  267. /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
  268. * UTF-8-test.txt strss test.
  269. */
  270. /* #define DECODER */
  271. #ifdef DECODER
  272. #ifndef KUHN
  273. #define KUHN
  274. #endif
  275. #endif
  276. size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  277. const char **__restrict src, size_t n,
  278. mbstate_t *ps, int allow_continuation)
  279. {
  280. register const char *s;
  281. __uwchar_t mask;
  282. __uwchar_t wc;
  283. wchar_t wcbuf[1];
  284. size_t count;
  285. int incr;
  286. s = *src;
  287. assert(s != NULL);
  288. assert(ps != NULL);
  289. incr = 1;
  290. if (!pwc) {
  291. pwc = wcbuf;
  292. wn = SIZE_MAX;
  293. incr = 0;
  294. }
  295. /* This is really here only to support the glibc extension function
  296. * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
  297. * check on the validity of the mbstate. */
  298. if (!(count = wn)) {
  299. return 0;
  300. }
  301. if ((mask = (__uwchar_t) ps->mask) != 0) { /* A continuation... */
  302. #ifdef DECODER
  303. wc = (__uwchar_t) ps->wc;
  304. if (n) {
  305. goto CONTINUE;
  306. }
  307. goto DONE;
  308. #else
  309. if ((wc = (__uwchar_t) ps->wc) != 0xffffU) {
  310. /* TODO: change error code here and below? */
  311. if (n) {
  312. goto CONTINUE;
  313. }
  314. goto DONE;
  315. }
  316. return (size_t) -1; /* We're in an error state. */
  317. #endif
  318. }
  319. do {
  320. if (!n) {
  321. goto DONE;
  322. }
  323. --n;
  324. if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
  325. mask = 0x40;
  326. #ifdef __UCLIBC_MJN3_ONLY__
  327. #warning fix range for 16 bit wides
  328. #endif
  329. if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
  330. goto START;
  331. }
  332. BAD:
  333. #ifdef DECODER
  334. wc = 0xfffd;
  335. goto COMPLETE;
  336. #else
  337. ps->mask = mask;
  338. ps->wc = 0xffffU;
  339. return (size_t) -1; /* Illegal start byte! */
  340. #endif
  341. CONTINUE:
  342. while (n) {
  343. --n;
  344. if ((*s & 0xc0) != 0x80) {
  345. goto BAD;
  346. }
  347. mask <<= 5;
  348. wc <<= 6;
  349. wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
  350. ++s;
  351. START:
  352. wc &= ~(mask << 1);
  353. if ((wc & mask) == 0) { /* Character completed. */
  354. if ((mask >>= 5) == 0x40) {
  355. mask += mask;
  356. }
  357. /* Check for invalid sequences (longer than necessary)
  358. * and invalid chars. */
  359. if ( (wc < mask) /* Sequence not minimal length. */
  360. #ifdef KUHN
  361. #if UTF_8_MAX_LEN == 3
  362. #error broken since mask can overflow!!
  363. /* For plane 0, these are the only defined values.*/
  364. || (wc > 0xfffdU)
  365. #else
  366. /* Note that we don't need to worry about exceeding */
  367. /* 31 bits as that is the most that UTF-8 provides. */
  368. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  369. #endif
  370. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  371. #endif /* KUHN */
  372. ) {
  373. goto BAD;
  374. }
  375. goto COMPLETE;
  376. }
  377. }
  378. /* Character potentially valid but incomplete. */
  379. if (!allow_continuation) {
  380. if (count != wn) {
  381. return 0;
  382. }
  383. /* NOTE: The following can fail if you allow and then disallow
  384. * continuation!!! */
  385. #if UTF_8_MAX_LEN == 3
  386. #error broken since mask can overflow!!
  387. #endif
  388. /* Need to back up... */
  389. do {
  390. --s;
  391. } while ((mask >>= 5) >= 0x40);
  392. goto DONE;
  393. }
  394. ps->mask = (wchar_t) mask;
  395. ps->wc = (wchar_t) wc;
  396. *src = s;
  397. return (size_t) -2;
  398. }
  399. COMPLETE:
  400. *pwc = wc;
  401. pwc += incr;
  402. }
  403. #ifdef DECODER
  404. while (--count);
  405. #else
  406. while (wc && --count);
  407. if (!wc) {
  408. s = NULL;
  409. }
  410. #endif
  411. DONE:
  412. /* ps->wc is irrelavent here. */
  413. ps->mask = 0;
  414. if (pwc != wcbuf) {
  415. *src = s;
  416. }
  417. return wn - count;
  418. }
  419. #endif
  420. /**********************************************************************/
  421. #ifdef L__wchar_wcsntoutf8s
  422. size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  423. const wchar_t **__restrict src, size_t wn)
  424. {
  425. register char *p;
  426. size_t len, t;
  427. __uwchar_t wc;
  428. const __uwchar_t *swc;
  429. int store;
  430. char buf[MB_LEN_MAX];
  431. char m;
  432. store = 1;
  433. /* NOTE: The following is an AWFUL HACK! In order to support %ls in
  434. * printf, we need to be able to compute the number of bytes needed
  435. * for the mbs conversion, not to exceed the precision specified.
  436. * But if dst is NULL, the return value is the length assuming a
  437. * sufficiently sized buffer. So, we allow passing of (char *) src
  438. * as dst in order to flag that we really want the length, subject
  439. * to the restricted buffer size and no partial conversions.
  440. * See wcsnrtombs() as well. */
  441. if (!s || (s == ((char *) src))) {
  442. if (!s) {
  443. n = SIZE_MAX;
  444. }
  445. s = buf;
  446. store = 0;
  447. }
  448. t = n;
  449. swc = (const __uwchar_t *) *src;
  450. assert(swc != NULL);
  451. while (wn && t) {
  452. wc = *swc;
  453. *s = wc;
  454. len = 1;
  455. if (wc >= 0x80) {
  456. #ifdef KUHN
  457. if (
  458. #if UTF_8_MAX_LEN == 3
  459. /* For plane 0, these are the only defined values.*/
  460. /* Note that we don't need to worry about exceeding */
  461. /* 31 bits as that is the most that UTF-8 provides. */
  462. (wc > 0xfffdU)
  463. #else
  464. /* UTF_8_MAX_LEN == 6 */
  465. (wc > 0x7fffffffUL)
  466. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  467. #endif
  468. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  469. ) {
  470. return (size_t) -1;
  471. }
  472. #else /* KUHN */
  473. #if UTF_8_MAX_LEN != 3
  474. if (wc > 0x7fffffffUL) { /* Value too large. */
  475. return (size_t) -1;
  476. }
  477. #endif
  478. #endif /* KUHN */
  479. wc >>= 1;
  480. p = s;
  481. do {
  482. ++p;
  483. } while (wc >>= 5);
  484. wc = *swc;
  485. if ((len = p - s) > t) { /* Not enough space. */
  486. break;
  487. }
  488. m = 0x80;
  489. while( p>s ) {
  490. m = (m >> 1) | 0x80;
  491. *--p = (wc & 0x3f) | 0x80;
  492. wc >>= 6;
  493. }
  494. *s |= (m << 1);
  495. } else if (wc == 0) { /* End of string. */
  496. swc = NULL;
  497. break;
  498. }
  499. ++swc;
  500. --wn;
  501. t -= len;
  502. if (store) {
  503. s += len;
  504. }
  505. }
  506. if (store) {
  507. *src = (const wchar_t *) swc;
  508. }
  509. return n - t;
  510. }
  511. #endif
  512. /**********************************************************************/
  513. #ifdef L___mbsnrtowcs
  514. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  515. size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  516. size_t NMC, size_t len, mbstate_t *__restrict ps)
  517. __attribute__ ((__weak__, __alias__("__mbsnrtowcs")));
  518. size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  519. size_t NMC, size_t len, mbstate_t *__restrict ps)
  520. {
  521. static mbstate_t mbstate; /* Rely on bss 0-init. */
  522. wchar_t wcbuf[1];
  523. const char *s;
  524. size_t count;
  525. int incr;
  526. if (!ps) {
  527. ps = &mbstate;
  528. }
  529. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  530. if (ENCODING == __ctype_encoding_utf8) {
  531. size_t r;
  532. return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
  533. != (size_t) -2) ? r : 0;
  534. }
  535. #endif
  536. incr = 1;
  537. if (!dst) {
  538. dst = wcbuf;
  539. len = SIZE_MAX;
  540. incr = 0;
  541. }
  542. /* Since all the following encodings are single-byte encodings... */
  543. if (len > NMC) {
  544. len = NMC;
  545. }
  546. count = len;
  547. s = *src;
  548. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  549. if (ENCODING == __ctype_encoding_8_bit) {
  550. wchar_t wc;
  551. while (count) {
  552. if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
  553. wc -= 0x80;
  554. wc = __global_locale.tbl8c2wc[
  555. (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  556. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  557. if (!wc) {
  558. __set_errno(EILSEQ);
  559. return (size_t) -1;
  560. }
  561. }
  562. if (!(*dst = wc)) {
  563. s = NULL;
  564. break;
  565. }
  566. dst += incr;
  567. ++s;
  568. --count;
  569. }
  570. if (dst != wcbuf) {
  571. *src = s;
  572. }
  573. return len - count;
  574. }
  575. #endif
  576. #ifdef __UCLIBC_HAS_LOCALE__
  577. assert(ENCODING == __ctype_encoding_7_bit);
  578. #endif
  579. while (count) {
  580. if ((*dst = (unsigned char) *s) == 0) {
  581. s = NULL;
  582. break;
  583. }
  584. ++s;
  585. dst += incr;
  586. --count;
  587. }
  588. if (dst != wcbuf) {
  589. *src = s;
  590. }
  591. return len - count;
  592. }
  593. #endif
  594. /**********************************************************************/
  595. #ifdef L___wcsnrtombs
  596. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  597. /* Note: We completely ignore ps in all currently supported conversions.
  598. * TODO: Check for valid state anyway? */
  599. size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  600. size_t NWC, size_t len, mbstate_t *__restrict ps)
  601. __attribute__ ((__weak__, __alias__("__wcsnrtombs")));
  602. size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  603. size_t NWC, size_t len, mbstate_t *__restrict ps)
  604. {
  605. const __uwchar_t *s;
  606. size_t count;
  607. int incr;
  608. char buf[MB_LEN_MAX];
  609. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  610. if (ENCODING == __ctype_encoding_utf8) {
  611. return _wchar_wcsntoutf8s(dst, len, src, NWC);
  612. }
  613. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  614. incr = 1;
  615. /* NOTE: The following is an AWFUL HACK! In order to support %ls in
  616. * printf, we need to be able to compute the number of bytes needed
  617. * for the mbs conversion, not to exceed the precision specified.
  618. * But if dst is NULL, the return value is the length assuming a
  619. * sufficiently sized buffer. So, we allow passing of (char *) src
  620. * as dst in order to flag that we really want the length, subject
  621. * to the restricted buffer size and no partial conversions.
  622. * See _wchar_wcsntoutf8s() as well. */
  623. if (!dst || (dst == ((char *) src))) {
  624. if (!dst) {
  625. len = SIZE_MAX;
  626. }
  627. dst = buf;
  628. incr = 0;
  629. }
  630. /* Since all the following encodings are single-byte encodings... */
  631. if (len > NWC) {
  632. len = NWC;
  633. }
  634. count = len;
  635. s = (const __uwchar_t *) *src;
  636. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  637. if (ENCODING == __ctype_encoding_8_bit) {
  638. __uwchar_t wc;
  639. __uwchar_t u;
  640. while (count) {
  641. if ((wc = *s) <= 0x7f) {
  642. if (!(*dst = (unsigned char) wc)) {
  643. s = NULL;
  644. break;
  645. }
  646. } else {
  647. u = 0;
  648. if (wc <= Cwc2c_DOMAIN_MAX) {
  649. u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
  650. + Cwc2c_TT_SHIFT)];
  651. u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  652. + ((wc >> Cwc2c_TT_SHIFT)
  653. & ((1 << Cwc2c_TI_SHIFT)-1))];
  654. u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
  655. + (u << Cwc2c_TT_SHIFT)
  656. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  657. }
  658. /* #define __WCHAR_REPLACEMENT_CHAR '?' */
  659. #ifdef __WCHAR_REPLACEMENT_CHAR
  660. *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
  661. #else /* __WCHAR_REPLACEMENT_CHAR */
  662. if (!u) {
  663. goto BAD;
  664. }
  665. *dst = (unsigned char) u;
  666. #endif /* __WCHAR_REPLACEMENT_CHAR */
  667. }
  668. ++s;
  669. dst += incr;
  670. --count;
  671. }
  672. if (dst != buf) {
  673. *src = (const wchar_t *) s;
  674. }
  675. return len - count;
  676. }
  677. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  678. #ifdef __UCLIBC_HAS_LOCALE__
  679. assert(ENCODING == __ctype_encoding_7_bit);
  680. #endif
  681. while (count) {
  682. if (*s > UCHAR_MAX) {
  683. #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
  684. BAD:
  685. #endif
  686. __set_errno(EILSEQ);
  687. return (size_t) -1;
  688. }
  689. if ((*dst = (unsigned char) *s) == 0) {
  690. s = NULL;
  691. break;
  692. }
  693. ++s;
  694. dst += incr;
  695. --count;
  696. }
  697. if (dst != buf) {
  698. *src = (const wchar_t *) s;
  699. }
  700. return len - count;
  701. }
  702. #endif
  703. /**********************************************************************/