wchar.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737
  1. /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Nov 4, 2002
  59. *
  60. * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
  61. * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
  62. * order to support %ls in printf. See comments below for details.
  63. * Change behaviour of wc<->mb functions when in the C locale. Now they do
  64. * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
  65. * and consistency with the stds requirements that a printf format string by
  66. * a valid multibyte string beginning and ending in it's initial shift state.
  67. *
  68. * Nov 5, 2002
  69. *
  70. * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
  71. *
  72. * Nov 7, 2002
  73. *
  74. * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
  75. * Added some size/speed optimizations and integrated it into my locale
  76. * framework. Minimally tested at the moment, but the stub C-locale
  77. * version (which most people would probably be using) should be fine.
  78. *
  79. * Nov 21, 2002
  80. *
  81. * Revert the wc<->mb changes from earlier this month involving the C-locale.
  82. * Add a couple of ugly hacks to support *wprintf.
  83. * Add a mini iconv() and iconv implementation (requires locale support).
  84. *
  85. * Aug 1, 2003
  86. * Bug fix for mbrtowc.
  87. *
  88. * Aug 18, 2003
  89. * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
  90. *
  91. * Feb 11, 2004
  92. * Bug fix: Fix size check for remaining output space in iconv().
  93. *
  94. * Manuel
  95. */
  96. #define _ISOC99_SOURCE
  97. #include <errno.h>
  98. #include <stddef.h>
  99. #include <limits.h>
  100. #include <stdint.h>
  101. #include <inttypes.h>
  102. #include <stdlib.h>
  103. #include <stdio.h>
  104. #include <assert.h>
  105. #include <locale.h>
  106. #include <wchar.h>
  107. #include <bits/uClibc_uwchar.h>
  108. #ifdef __UCLIBC_HAS_LOCALE__
  109. libc_hidden_proto(__global_locale)
  110. #endif
  111. /**********************************************************************/
  112. #ifdef __UCLIBC_HAS_LOCALE__
  113. #ifdef __UCLIBC_MJN3_ONLY__
  114. #ifdef L_iswspace
  115. /* generates one warning */
  116. #warning TODO: Fix Cc2wc* and Cwc2c* defines!
  117. #endif
  118. #endif /* __UCLIBC_MJN3_ONLY__ */
  119. #define ENCODING ((__UCLIBC_CURLOCALE_DATA).encoding)
  120. #define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
  121. #define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
  122. #define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
  123. #define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
  124. #define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
  125. #define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
  126. #ifndef __CTYPE_HAS_UTF_8_LOCALES
  127. #warning __CTYPE_HAS_UTF_8_LOCALES not set!
  128. #endif
  129. #else /* __UCLIBC_HAS_LOCALE__ */
  130. #ifdef __UCLIBC_MJN3_ONLY__
  131. #ifdef L_btowc
  132. /* emit only once */
  133. #warning fix preprocessor logic testing locale settings
  134. #endif
  135. #endif
  136. #define ENCODING (__ctype_encoding_7_bit)
  137. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  138. #error __CTYPE_HAS_8_BIT_LOCALES is defined!
  139. #endif
  140. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  141. #error __CTYPE_HAS_UTF_8_LOCALES is defined!
  142. #endif
  143. #undef L__wchar_utf8sntowcs
  144. #undef L__wchar_wcsntoutf8s
  145. #endif /* __UCLIBC_HAS_LOCALE__ */
  146. /**********************************************************************/
  147. #if WCHAR_MAX > 0xffffUL
  148. #define UTF_8_MAX_LEN 6
  149. #else
  150. #define UTF_8_MAX_LEN 3
  151. #endif
  152. #define KUHN 1
  153. /* Implementation-specific work functions. */
  154. extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  155. const char **__restrict src, size_t n,
  156. mbstate_t *ps, int allow_continuation) attribute_hidden;
  157. extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  158. const wchar_t **__restrict src, size_t wn) attribute_hidden;
  159. /**********************************************************************/
  160. #ifdef L_btowc
  161. libc_hidden_proto(mbrtowc)
  162. libc_hidden_proto(btowc)
  163. wint_t btowc(int c)
  164. {
  165. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  166. wchar_t wc;
  167. unsigned char buf[1];
  168. mbstate_t mbstate;
  169. if (c != EOF) {
  170. *buf = (unsigned char) c;
  171. mbstate.__mask = 0; /* Initialize the mbstate. */
  172. if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) {
  173. return wc;
  174. }
  175. }
  176. return WEOF;
  177. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  178. #ifdef __UCLIBC_HAS_LOCALE__
  179. assert((ENCODING == __ctype_encoding_7_bit)
  180. || (ENCODING == __ctype_encoding_utf8));
  181. #endif /* __UCLIBC_HAS_LOCALE__ */
  182. /* If we don't have 8-bit locale support, then this is trivial since
  183. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  184. return (((unsigned int)c) < 0x80) ? c : WEOF;
  185. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  186. }
  187. libc_hidden_def(btowc)
  188. #endif
  189. /**********************************************************************/
  190. #ifdef L_wctob
  191. /* Note: We completely ignore ps in all currently supported conversions. */
  192. libc_hidden_proto(wcrtomb)
  193. int wctob(wint_t c)
  194. {
  195. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  196. unsigned char buf[MB_LEN_MAX];
  197. return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF;
  198. #else /* __CTYPE_HAS_8_BIT_LOCALES */
  199. #ifdef __UCLIBC_HAS_LOCALE__
  200. assert((ENCODING == __ctype_encoding_7_bit)
  201. || (ENCODING == __ctype_encoding_utf8));
  202. #endif /* __UCLIBC_HAS_LOCALE__ */
  203. /* If we don't have 8-bit locale support, then this is trivial since
  204. * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
  205. /* TODO: need unsigned version of wint_t... */
  206. /* return (((unsigned int)c) < 0x80) ? c : WEOF; */
  207. return ((c >= 0) && (c < 0x80)) ? c : EOF;
  208. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  209. }
  210. #endif
  211. /**********************************************************************/
  212. #ifdef L_mbsinit
  213. libc_hidden_proto(mbsinit)
  214. int mbsinit(const mbstate_t *ps)
  215. {
  216. return !ps || !ps->__mask;
  217. }
  218. libc_hidden_def(mbsinit)
  219. #endif
  220. /**********************************************************************/
  221. #ifdef L_mbrlen
  222. libc_hidden_proto(mbrtowc)
  223. libc_hidden_proto(mbrlen)
  224. size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps)
  225. {
  226. static mbstate_t mbstate; /* Rely on bss 0-init. */
  227. return mbrtowc(NULL, s, n, (ps != NULL) ? ps : &mbstate);
  228. }
  229. libc_hidden_def(mbrlen)
  230. #endif
  231. /**********************************************************************/
  232. #ifdef L_mbrtowc
  233. libc_hidden_proto(mbsnrtowcs)
  234. libc_hidden_proto(mbrtowc)
  235. size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
  236. size_t n, mbstate_t *__restrict ps)
  237. {
  238. static mbstate_t mbstate; /* Rely on bss 0-init. */
  239. wchar_t wcbuf[1];
  240. const char *p;
  241. size_t r;
  242. char empty_string[1]; /* Avoid static to be fPIC friendly. */
  243. if (!ps) {
  244. ps = &mbstate;
  245. }
  246. if (!s) {
  247. pwc = (wchar_t *) s; /* NULL */
  248. empty_string[0] = 0; /* Init the empty string when necessary. */
  249. s = empty_string;
  250. n = 1;
  251. } else if (!n) {
  252. /* TODO: change error code? */
  253. return (ps->__mask && (ps->__wc == 0xffffU))
  254. ? ((size_t) -1) : ((size_t) -2);
  255. }
  256. p = s;
  257. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  258. /* Need to do this here since mbsrtowcs doesn't allow incompletes. */
  259. if (ENCODING == __ctype_encoding_utf8) {
  260. if (!pwc) {
  261. pwc = wcbuf;
  262. }
  263. r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
  264. return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
  265. }
  266. #endif
  267. #ifdef __UCLIBC_MJN3_ONLY__
  268. #warning TODO: This adds a trailing nul!
  269. #endif /* __UCLIBC_MJN3_ONLY__ */
  270. r = mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
  271. if (((ssize_t) r) >= 0) {
  272. if (pwc) {
  273. *pwc = *wcbuf;
  274. }
  275. }
  276. return (size_t) r;
  277. }
  278. libc_hidden_def(mbrtowc)
  279. #endif
  280. /**********************************************************************/
  281. #ifdef L_wcrtomb
  282. libc_hidden_proto(wcsnrtombs)
  283. /* Note: We completely ignore ps in all currently supported conversions. */
  284. /* TODO: Check for valid state anyway? */
  285. libc_hidden_proto(wcrtomb)
  286. size_t wcrtomb(register char *__restrict s, wchar_t wc,
  287. mbstate_t *__restrict ps)
  288. {
  289. #ifdef __UCLIBC_MJN3_ONLY__
  290. #warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
  291. #endif /* __UCLIBC_MJN3_ONLY__ */
  292. wchar_t wcbuf[1];
  293. const wchar_t *pwc;
  294. size_t r;
  295. char buf[MB_LEN_MAX];
  296. if (!s) {
  297. s = buf;
  298. wc = 0;
  299. }
  300. pwc = wcbuf;
  301. wcbuf[0] = wc;
  302. r = wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
  303. return (r != 0) ? r : 1;
  304. }
  305. libc_hidden_def(wcrtomb)
  306. #endif
  307. /**********************************************************************/
  308. #ifdef L_mbsrtowcs
  309. libc_hidden_proto(mbsnrtowcs)
  310. libc_hidden_proto(mbsrtowcs)
  311. size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  312. size_t len, mbstate_t *__restrict ps)
  313. {
  314. static mbstate_t mbstate; /* Rely on bss 0-init. */
  315. return mbsnrtowcs(dst, src, SIZE_MAX, len,
  316. ((ps != NULL) ? ps : &mbstate));
  317. }
  318. libc_hidden_def(mbsrtowcs)
  319. #endif
  320. /**********************************************************************/
  321. #ifdef L_wcsrtombs
  322. /* Note: We completely ignore ps in all currently supported conversions.
  323. * TODO: Check for valid state anyway? */
  324. libc_hidden_proto(wcsnrtombs)
  325. libc_hidden_proto(wcsrtombs)
  326. size_t wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
  327. size_t len, mbstate_t *__restrict ps)
  328. {
  329. return wcsnrtombs(dst, src, SIZE_MAX, len, ps);
  330. }
  331. libc_hidden_def(wcsrtombs)
  332. #endif
  333. /**********************************************************************/
  334. #ifdef L__wchar_utf8sntowcs
  335. /* Define DECODER to generate a UTF-8 decoder which passes Markus Kuhn's
  336. * UTF-8-test.txt strss test.
  337. */
  338. /* #define DECODER */
  339. #ifdef DECODER
  340. #ifndef KUHN
  341. #define KUHN
  342. #endif
  343. #endif
  344. size_t attribute_hidden _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
  345. const char **__restrict src, size_t n,
  346. mbstate_t *ps, int allow_continuation)
  347. {
  348. register const char *s;
  349. __uwchar_t mask;
  350. __uwchar_t wc;
  351. wchar_t wcbuf[1];
  352. size_t count;
  353. int incr;
  354. s = *src;
  355. assert(s != NULL);
  356. assert(ps != NULL);
  357. incr = 1;
  358. /* NOTE: The following is an AWFUL HACK! In order to support %s in
  359. * wprintf, we need to be able to compute the number of wchars needed
  360. * for the mbs conversion, not to exceed the precision specified.
  361. * But if dst is NULL, the return value is the length assuming a
  362. * sufficiently sized buffer. So, we allow passing of (wchar_t *) ps
  363. * as pwc in order to flag that we really want the length, subject
  364. * to the restricted buffer size and no partial conversions.
  365. * See mbsnrtowcs() as well. */
  366. if (!pwc || (pwc == ((wchar_t *)ps))) {
  367. if (!pwc) {
  368. wn = SIZE_MAX;
  369. }
  370. pwc = wcbuf;
  371. incr = 0;
  372. }
  373. /* This is really here only to support the glibc extension function
  374. * __mbsnrtowcs which apparently returns 0 if wn == 0 without any
  375. * check on the validity of the mbstate. */
  376. if (!(count = wn)) {
  377. return 0;
  378. }
  379. if ((mask = (__uwchar_t) ps->__mask) != 0) { /* A continuation... */
  380. #ifdef DECODER
  381. wc = (__uwchar_t) ps->__wc;
  382. if (n) {
  383. goto CONTINUE;
  384. }
  385. goto DONE;
  386. #else
  387. if ((wc = (__uwchar_t) ps->__wc) != 0xffffU) {
  388. /* TODO: change error code here and below? */
  389. if (n) {
  390. goto CONTINUE;
  391. }
  392. goto DONE;
  393. }
  394. __set_errno(EILSEQ);
  395. return (size_t) -1; /* We're in an error state. */
  396. #endif
  397. }
  398. do {
  399. if (!n) {
  400. goto DONE;
  401. }
  402. --n;
  403. if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
  404. mask = 0x40;
  405. #ifdef __UCLIBC_MJN3_ONLY__
  406. #warning TODO: Fix range for 16 bit wchar_t case.
  407. #endif
  408. if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
  409. goto START;
  410. }
  411. BAD:
  412. #ifdef DECODER
  413. wc = 0xfffdU;
  414. goto COMPLETE;
  415. #else
  416. ps->__mask = mask;
  417. ps->__wc = 0xffffU;
  418. __set_errno(EILSEQ);
  419. return (size_t) -1; /* Illegal start byte! */
  420. #endif
  421. CONTINUE:
  422. while (n) {
  423. --n;
  424. if ((*s & 0xc0) != 0x80) {
  425. goto BAD;
  426. }
  427. mask <<= 5;
  428. wc <<= 6;
  429. wc += (*s & 0x3f); /* keep seperate for bcc (smaller code) */
  430. ++s;
  431. START:
  432. wc &= ~(mask << 1);
  433. if ((wc & mask) == 0) { /* Character completed. */
  434. if ((mask >>= 5) == 0x40) {
  435. mask += mask;
  436. }
  437. /* Check for invalid sequences (longer than necessary)
  438. * and invalid chars. */
  439. if ( (wc < mask) /* Sequence not minimal length. */
  440. #ifdef KUHN
  441. #if UTF_8_MAX_LEN == 3
  442. #error broken since mask can overflow!!
  443. /* For plane 0, these are the only defined values.*/
  444. || (wc > 0xfffdU)
  445. #else
  446. /* Note that we don't need to worry about exceeding */
  447. /* 31 bits as that is the most that UTF-8 provides. */
  448. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  449. #endif
  450. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  451. #endif /* KUHN */
  452. ) {
  453. goto BAD;
  454. }
  455. goto COMPLETE;
  456. }
  457. }
  458. /* Character potentially valid but incomplete. */
  459. if (!allow_continuation) {
  460. if (count != wn) {
  461. return 0;
  462. }
  463. /* NOTE: The following can fail if you allow and then disallow
  464. * continuation!!! */
  465. #if UTF_8_MAX_LEN == 3
  466. #error broken since mask can overflow!!
  467. #endif
  468. /* Need to back up... */
  469. do {
  470. --s;
  471. } while ((mask >>= 5) >= 0x40);
  472. goto DONE;
  473. }
  474. ps->__mask = (wchar_t) mask;
  475. ps->__wc = (wchar_t) wc;
  476. *src = s;
  477. return (size_t) -2;
  478. }
  479. COMPLETE:
  480. *pwc = wc;
  481. pwc += incr;
  482. }
  483. #ifdef DECODER
  484. while (--count);
  485. #else
  486. while (wc && --count);
  487. if (!wc) {
  488. s = NULL;
  489. }
  490. #endif
  491. DONE:
  492. /* ps->__wc is irrelavent here. */
  493. ps->__mask = 0;
  494. if (pwc != wcbuf) {
  495. *src = s;
  496. }
  497. return wn - count;
  498. }
  499. #endif
  500. /**********************************************************************/
  501. #ifdef L__wchar_wcsntoutf8s
  502. size_t attribute_hidden _wchar_wcsntoutf8s(char *__restrict s, size_t n,
  503. const wchar_t **__restrict src, size_t wn)
  504. {
  505. register char *p;
  506. size_t len, t;
  507. __uwchar_t wc;
  508. const __uwchar_t *swc;
  509. int store;
  510. char buf[MB_LEN_MAX];
  511. char m;
  512. store = 1;
  513. /* NOTE: The following is an AWFUL HACK! In order to support %ls in
  514. * printf, we need to be able to compute the number of bytes needed
  515. * for the mbs conversion, not to exceed the precision specified.
  516. * But if dst is NULL, the return value is the length assuming a
  517. * sufficiently sized buffer. So, we allow passing of (char *) src
  518. * as dst in order to flag that we really want the length, subject
  519. * to the restricted buffer size and no partial conversions.
  520. * See wcsnrtombs() as well. */
  521. if (!s || (s == ((char *) src))) {
  522. if (!s) {
  523. n = SIZE_MAX;
  524. }
  525. s = buf;
  526. store = 0;
  527. }
  528. t = n;
  529. swc = (const __uwchar_t *) *src;
  530. assert(swc != NULL);
  531. while (wn && t) {
  532. wc = *swc;
  533. *s = wc;
  534. len = 1;
  535. if (wc >= 0x80) {
  536. #ifdef KUHN
  537. if (
  538. #if UTF_8_MAX_LEN == 3
  539. /* For plane 0, these are the only defined values.*/
  540. /* Note that we don't need to worry about exceeding */
  541. /* 31 bits as that is the most that UTF-8 provides. */
  542. (wc > 0xfffdU)
  543. #else
  544. /* UTF_8_MAX_LEN == 6 */
  545. (wc > 0x7fffffffUL)
  546. || ( ((__uwchar_t)(wc - 0xfffeU)) < 2)
  547. #endif
  548. || ( ((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U) )
  549. ) {
  550. __set_errno(EILSEQ);
  551. return (size_t) -1;
  552. }
  553. #else /* KUHN */
  554. #if UTF_8_MAX_LEN != 3
  555. if (wc > 0x7fffffffUL) { /* Value too large. */
  556. __set_errno(EILSEQ);
  557. return (size_t) -1;
  558. }
  559. #endif
  560. #endif /* KUHN */
  561. wc >>= 1;
  562. p = s;
  563. do {
  564. ++p;
  565. } while (wc >>= 5);
  566. wc = *swc;
  567. if ((len = p - s) > t) { /* Not enough space. */
  568. break;
  569. }
  570. m = 0x80;
  571. while( p>s ) {
  572. m = (m >> 1) | 0x80;
  573. *--p = (wc & 0x3f) | 0x80;
  574. wc >>= 6;
  575. }
  576. *s |= (m << 1);
  577. } else if (wc == 0) { /* End of string. */
  578. swc = NULL;
  579. break;
  580. }
  581. ++swc;
  582. --wn;
  583. t -= len;
  584. if (store) {
  585. s += len;
  586. }
  587. }
  588. if (store) {
  589. *src = (const wchar_t *) swc;
  590. }
  591. return n - t;
  592. }
  593. #endif
  594. /**********************************************************************/
  595. #ifdef L_mbsnrtowcs
  596. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  597. libc_hidden_proto(mbsnrtowcs)
  598. size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
  599. size_t NMC, size_t len, mbstate_t *__restrict ps)
  600. {
  601. static mbstate_t mbstate; /* Rely on bss 0-init. */
  602. wchar_t wcbuf[1];
  603. const char *s;
  604. size_t count;
  605. int incr;
  606. if (!ps) {
  607. ps = &mbstate;
  608. }
  609. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  610. if (ENCODING == __ctype_encoding_utf8) {
  611. size_t r;
  612. return ((r = _wchar_utf8sntowcs(dst, len, src, NMC, ps, 1))
  613. != (size_t) -2) ? r : 0;
  614. }
  615. #endif
  616. incr = 1;
  617. /* NOTE: The following is an AWFUL HACK! In order to support %s in
  618. * wprintf, we need to be able to compute the number of wchars needed
  619. * for the mbs conversion, not to exceed the precision specified.
  620. * But if dst is NULL, the return value is the length assuming a
  621. * sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps)
  622. * as dst in order to flag that we really want the length, subject
  623. * to the restricted buffer size and no partial conversions.
  624. * See _wchar_utf8sntowcs() as well. */
  625. if (!dst || (dst == ((wchar_t *)ps))) {
  626. if (!dst) {
  627. len = SIZE_MAX;
  628. }
  629. dst = wcbuf;
  630. incr = 0;
  631. }
  632. /* Since all the following encodings are single-byte encodings... */
  633. if (len > NMC) {
  634. len = NMC;
  635. }
  636. count = len;
  637. s = *src;
  638. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  639. if (ENCODING == __ctype_encoding_8_bit) {
  640. wchar_t wc;
  641. while (count) {
  642. if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
  643. wc -= 0x80;
  644. wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
  645. (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  646. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  647. if (!wc) {
  648. goto BAD;
  649. }
  650. }
  651. if (!(*dst = wc)) {
  652. s = NULL;
  653. break;
  654. }
  655. dst += incr;
  656. ++s;
  657. --count;
  658. }
  659. if (dst != wcbuf) {
  660. *src = s;
  661. }
  662. return len - count;
  663. }
  664. #endif
  665. #ifdef __UCLIBC_HAS_LOCALE__
  666. assert(ENCODING == __ctype_encoding_7_bit);
  667. #endif
  668. while (count) {
  669. if ((*dst = (unsigned char) *s) == 0) {
  670. s = NULL;
  671. break;
  672. }
  673. if (*dst >= 0x80) {
  674. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  675. BAD:
  676. #endif
  677. __set_errno(EILSEQ);
  678. return (size_t) -1;
  679. }
  680. ++s;
  681. dst += incr;
  682. --count;
  683. }
  684. if (dst != wcbuf) {
  685. *src = s;
  686. }
  687. return len - count;
  688. }
  689. libc_hidden_def(mbsnrtowcs)
  690. #endif
  691. /**********************************************************************/
  692. #ifdef L_wcsnrtombs
  693. /* WARNING: We treat len as SIZE_MAX when dst is NULL! */
  694. /* Note: We completely ignore ps in all currently supported conversions.
  695. * TODO: Check for valid state anyway? */
  696. libc_hidden_proto(wcsnrtombs)
  697. size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
  698. size_t NWC, size_t len, mbstate_t *__restrict ps)
  699. {
  700. const __uwchar_t *s;
  701. size_t count;
  702. int incr;
  703. char buf[MB_LEN_MAX];
  704. #ifdef __CTYPE_HAS_UTF_8_LOCALES
  705. if (ENCODING == __ctype_encoding_utf8) {
  706. return _wchar_wcsntoutf8s(dst, len, src, NWC);
  707. }
  708. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  709. incr = 1;
  710. /* NOTE: The following is an AWFUL HACK! In order to support %ls in
  711. * printf, we need to be able to compute the number of bytes needed
  712. * for the mbs conversion, not to exceed the precision specified.
  713. * But if dst is NULL, the return value is the length assuming a
  714. * sufficiently sized buffer. So, we allow passing of (char *) src
  715. * as dst in order to flag that we really want the length, subject
  716. * to the restricted buffer size and no partial conversions.
  717. * See _wchar_wcsntoutf8s() as well. */
  718. if (!dst || (dst == ((char *) src))) {
  719. if (!dst) {
  720. len = SIZE_MAX;
  721. }
  722. dst = buf;
  723. incr = 0;
  724. }
  725. /* Since all the following encodings are single-byte encodings... */
  726. if (len > NWC) {
  727. len = NWC;
  728. }
  729. count = len;
  730. s = (const __uwchar_t *) *src;
  731. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  732. if (ENCODING == __ctype_encoding_8_bit) {
  733. __uwchar_t wc;
  734. __uwchar_t u;
  735. while (count) {
  736. if ((wc = *s) <= 0x7f) {
  737. if (!(*dst = (unsigned char) wc)) {
  738. s = NULL;
  739. break;
  740. }
  741. } else {
  742. u = 0;
  743. if (wc <= Cwc2c_DOMAIN_MAX) {
  744. u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
  745. + Cwc2c_TT_SHIFT)];
  746. u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  747. + ((wc >> Cwc2c_TT_SHIFT)
  748. & ((1 << Cwc2c_TI_SHIFT)-1))];
  749. u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
  750. + (u << Cwc2c_TT_SHIFT)
  751. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  752. }
  753. #define __WCHAR_REPLACEMENT_CHAR '?'
  754. #ifdef __WCHAR_REPLACEMENT_CHAR
  755. *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
  756. #else /* __WCHAR_REPLACEMENT_CHAR */
  757. if (!u) {
  758. goto BAD;
  759. }
  760. *dst = (unsigned char) u;
  761. #endif /* __WCHAR_REPLACEMENT_CHAR */
  762. }
  763. ++s;
  764. dst += incr;
  765. --count;
  766. }
  767. if (dst != buf) {
  768. *src = (const wchar_t *) s;
  769. }
  770. return len - count;
  771. }
  772. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  773. #ifdef __UCLIBC_HAS_LOCALE__
  774. assert(ENCODING == __ctype_encoding_7_bit);
  775. #endif
  776. while (count) {
  777. if (*s >= 0x80) {
  778. #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
  779. BAD:
  780. #endif
  781. __set_errno(EILSEQ);
  782. return (size_t) -1;
  783. }
  784. if ((*dst = (unsigned char) *s) == 0) {
  785. s = NULL;
  786. break;
  787. }
  788. ++s;
  789. dst += incr;
  790. --count;
  791. }
  792. if (dst != buf) {
  793. *src = (const wchar_t *) s;
  794. }
  795. return len - count;
  796. }
  797. libc_hidden_def(wcsnrtombs)
  798. #endif
  799. /**********************************************************************/
  800. #ifdef L_wcswidth
  801. libc_hidden_proto(wcswidth)
  802. #ifdef __UCLIBC_MJN3_ONLY__
  803. #warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
  804. #warning TODO: Update wcwidth to match latest by Kuhn.
  805. #endif
  806. #if defined(__UCLIBC_HAS_LOCALE__) && \
  807. ( defined(__CTYPE_HAS_8_BIT_LOCALES) || defined(__CTYPE_HAS_UTF_8_LOCALES) )
  808. static const unsigned char new_idx[] = {
  809. 0, 5, 5, 6, 10, 15, 28, 39,
  810. 48, 48, 71, 94, 113, 128, 139, 154,
  811. 175, 186, 188, 188, 188, 188, 188, 188,
  812. 203, 208, 208, 208, 208, 208, 208, 208,
  813. 208, 219, 219, 219, 222, 222, 222, 222,
  814. 222, 222, 222, 222, 222, 222, 222, 224,
  815. 224, 231, 231, 231, 231, 231, 231, 231,
  816. 231, 231, 231, 231, 231, 231, 231, 231,
  817. 231, 231, 231, 231, 231, 231, 231, 231,
  818. 231, 231, 231, 231, 231, 231, 231, 231,
  819. 231, 231, 231, 231, 231, 231, 231, 231,
  820. 231, 231, 231, 231, 231, 231, 231, 231,
  821. 231, 231, 231, 231, 231, 231, 231, 231,
  822. 231, 231, 231, 231, 231, 231, 231, 231,
  823. 231, 231, 231, 231, 231, 231, 231, 231,
  824. 231, 231, 231, 231, 231, 231, 231, 231,
  825. 231, 231, 231, 231, 231, 231, 231, 231,
  826. 231, 231, 231, 231, 231, 231, 231, 231,
  827. 231, 231, 231, 231, 231, 231, 231, 231,
  828. 231, 231, 231, 231, 231, 231, 231, 231,
  829. 231, 231, 231, 231, 231, 233, 233, 233,
  830. 233, 233, 233, 233, 234, 234, 234, 234,
  831. 234, 234, 234, 234, 234, 234, 234, 234,
  832. 234, 234, 234, 234, 234, 234, 234, 234,
  833. 234, 234, 234, 234, 234, 234, 234, 234,
  834. 234, 234, 234, 234, 234, 234, 234, 234,
  835. 234, 234, 234, 234, 234, 234, 234, 234,
  836. 236, 236, 236, 236, 236, 236, 236, 236,
  837. 236, 236, 236, 236, 236, 236, 236, 236,
  838. 236, 236, 236, 236, 236, 236, 236, 236,
  839. 236, 236, 236, 236, 236, 236, 236, 236,
  840. 236, 237, 237, 238, 241, 241, 242, 249,
  841. 255,
  842. };
  843. static const unsigned char new_tbl[] = {
  844. 0x00, 0x01, 0x20, 0x7f, 0xa0, 0x00, 0x00, 0x50,
  845. 0x60, 0x70, 0x00, 0x83, 0x87, 0x88, 0x8a, 0x00,
  846. 0x91, 0xa2, 0xa3, 0xba, 0xbb, 0xbe, 0xbf, 0xc0,
  847. 0xc1, 0xc3, 0xc4, 0xc5, 0x00, 0x4b, 0x56, 0x70,
  848. 0x71, 0xd6, 0xe5, 0xe7, 0xe9, 0xea, 0xee, 0x00,
  849. 0x0f, 0x10, 0x11, 0x12, 0x30, 0x4b, 0xa6, 0xb1,
  850. 0x00, 0x01, 0x03, 0x3c, 0x3d, 0x41, 0x49, 0x4d,
  851. 0x4e, 0x51, 0x55, 0x62, 0x64, 0x81, 0x82, 0xbc,
  852. 0xbd, 0xc1, 0xc5, 0xcd, 0xce, 0xe2, 0xe4, 0x00,
  853. 0x02, 0x03, 0x3c, 0x3d, 0x41, 0x43, 0x47, 0x49,
  854. 0x4b, 0x4e, 0x70, 0x72, 0x81, 0x83, 0xbc, 0xbd,
  855. 0xc1, 0xc6, 0xc7, 0xc9, 0xcd, 0xce, 0x00, 0x01,
  856. 0x02, 0x3c, 0x3d, 0x3f, 0x40, 0x41, 0x44, 0x4d,
  857. 0x4e, 0x56, 0x57, 0x82, 0x83, 0xc0, 0xc1, 0xcd,
  858. 0xce, 0x00, 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4e,
  859. 0x55, 0x57, 0xbf, 0xc0, 0xc6, 0xc7, 0xcc, 0xce,
  860. 0x00, 0x41, 0x44, 0x4d, 0x4e, 0xca, 0xcb, 0xd2,
  861. 0xd5, 0xd6, 0xd7, 0x00, 0x31, 0x32, 0x34, 0x3b,
  862. 0x47, 0x4f, 0xb1, 0xb2, 0xb4, 0xba, 0xbb, 0xbd,
  863. 0xc8, 0xce, 0x00, 0x18, 0x1a, 0x35, 0x36, 0x37,
  864. 0x38, 0x39, 0x3a, 0x71, 0x7f, 0x80, 0x85, 0x86,
  865. 0x88, 0x90, 0x98, 0x99, 0xbd, 0xc6, 0xc7, 0x00,
  866. 0x2d, 0x31, 0x32, 0x33, 0x36, 0x38, 0x39, 0x3a,
  867. 0x58, 0x5a, 0x00, 0x60, 0x00, 0x12, 0x15, 0x32,
  868. 0x35, 0x52, 0x54, 0x72, 0x74, 0xb7, 0xbe, 0xc6,
  869. 0xc7, 0xc9, 0xd4, 0x00, 0x0b, 0x0f, 0xa9, 0xaa,
  870. 0x00, 0x0b, 0x10, 0x2a, 0x2f, 0x60, 0x64, 0x6a,
  871. 0x70, 0xd0, 0xeb, 0x00, 0x29, 0x2b, 0x00, 0x80,
  872. 0x00, 0x2a, 0x30, 0x3f, 0x40, 0x99, 0x9b, 0x00,
  873. 0xd0, 0x00, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x1e,
  874. 0x1f, 0x00, 0x00, 0x10, 0x20, 0x24, 0x30, 0x70,
  875. 0xff, 0x00, 0x61, 0xe0, 0xe7, 0xf9, 0xfc,
  876. };
  877. static const signed char new_wtbl[] = {
  878. 0, -1, 1, -1, 1, 1, 0, 1,
  879. 0, 1, 1, 0, 1, 0, 1, 1,
  880. 0, 1, 0, 1, 0, 1, 0, 1,
  881. 0, 1, 0, 1, 1, 0, 1, 0,
  882. 1, 0, 1, 0, 1, 0, 1, 1,
  883. 0, 1, 0, 1, 0, 1, 0, 1,
  884. 1, 0, 1, 0, 1, 0, 1, 0,
  885. 1, 0, 1, 0, 1, 0, 1, 0,
  886. 1, 0, 1, 0, 1, 0, 1, 1,
  887. 0, 1, 0, 1, 0, 1, 0, 1,
  888. 0, 1, 0, 1, 0, 1, 0, 1,
  889. 0, 1, 0, 1, 0, 1, 1, 0,
  890. 1, 0, 1, 0, 1, 0, 1, 0,
  891. 1, 0, 1, 0, 1, 0, 1, 0,
  892. 1, 1, 0, 1, 0, 1, 0, 1,
  893. 0, 1, 0, 1, 0, 1, 0, 1,
  894. 1, 0, 1, 0, 1, 0, 1, 0,
  895. 1, 0, 1, 1, 0, 1, 0, 1,
  896. 0, 1, 0, 1, 0, 1, 0, 1,
  897. 0, 1, 1, 0, 1, 0, 1, 0,
  898. 1, 0, 1, 0, 1, 0, 1, 0,
  899. 1, 0, 1, 0, 1, 0, 1, 1,
  900. 0, 1, 0, 1, 0, 1, 0, 1,
  901. 0, 1, 2, 0, 1, 0, 1, 0,
  902. 1, 0, 1, 0, 1, 0, 1, 0,
  903. 1, 0, 1, 1, 0, 1, 0, 1,
  904. 1, 0, 1, 0, 1, 0, 1, 0,
  905. 1, 0, 1, 1, 2, 1, 1, 2,
  906. 2, 0, 2, 1, 2, 0, 2, 2,
  907. 1, 1, 2, 1, 1, 2, 1, 0,
  908. 1, 1, 0, 1, 0, 1, 2, 1,
  909. 0, 2, 1, 2, 1, 0, 1,
  910. };
  911. libc_hidden_proto(wcsnrtombs)
  912. int wcswidth(const wchar_t *pwcs, size_t n)
  913. {
  914. int h, l, m, count;
  915. wchar_t wc;
  916. unsigned char b;
  917. if (ENCODING == __ctype_encoding_7_bit) {
  918. size_t i;
  919. for (i = 0 ; (i < n) && pwcs[i] ; i++) {
  920. if (pwcs[i] != ((unsigned char)(pwcs[i]))) {
  921. return -1;
  922. }
  923. }
  924. }
  925. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  926. else if (ENCODING == __ctype_encoding_8_bit) {
  927. mbstate_t mbstate;
  928. mbstate.__mask = 0; /* Initialize the mbstate. */
  929. if (wcsnrtombs(NULL, &pwcs, n, SIZE_MAX, &mbstate) == ((size_t) - 1)) {
  930. return -1;
  931. }
  932. }
  933. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  934. #if defined(__CTYPE_HAS_UTF_8_LOCALES) && defined(KUHN)
  935. /* For stricter handling of allowed unicode values... see comments above. */
  936. else if (ENCODING == __ctype_encoding_utf8) {
  937. size_t i;
  938. for (i = 0 ; (i < n) && pwcs[i] ; i++) {
  939. if ( (((__uwchar_t)((pwcs[i]) - 0xfffeU)) < 2)
  940. || (((__uwchar_t)((pwcs[i]) - 0xd800U)) < (0xe000U - 0xd800U))
  941. ) {
  942. return -1;
  943. }
  944. }
  945. }
  946. #endif /* __CTYPE_HAS_UTF_8_LOCALES */
  947. for (count = 0 ; n && (wc = *pwcs++) ; n--) {
  948. if (wc <= 0xff) {
  949. /* If we're here, wc != 0. */
  950. if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
  951. return -1;
  952. }
  953. ++count;
  954. continue;
  955. }
  956. if (((unsigned int) wc) <= 0xffff) {
  957. b = wc & 0xff;
  958. h = (wc >> 8);
  959. l = new_idx[h];
  960. h = new_idx[h+1];
  961. while ((m = (l+h) >> 1) != l) {
  962. if (b >= new_tbl[m]) {
  963. l = m;
  964. } else { /* wc < tbl[m] */
  965. h = m;
  966. }
  967. }
  968. count += new_wtbl[l]; /* none should be -1. */
  969. continue;
  970. }
  971. /* Redo this to minimize average number of compares?*/
  972. if (wc >= 0x1d167) {
  973. if (wc <= 0x1d1ad) {
  974. if ((wc <= 0x1d169
  975. || (wc >= 0x1d173
  976. && (wc <= 0x1d182
  977. || (wc >= 0x1d185
  978. && (wc <= 0x1d18b
  979. || (wc >= 0x1d1aa))))))
  980. ) {
  981. continue;
  982. }
  983. } else if (((wc >= 0xe0020) && (wc <= 0xe007f)) || (wc == 0xe0001)) {
  984. continue;
  985. } else if ((wc >= 0x20000) && (wc <= 0x2ffff)) {
  986. ++count; /* need 2.. add one here */
  987. }
  988. #if (WCHAR_MAX > 0x7fffffffL)
  989. else if (wc > 0x7fffffffL) {
  990. return -1;
  991. }
  992. #endif /* (WCHAR_MAX > 0x7fffffffL) */
  993. }
  994. ++count;
  995. }
  996. return count;
  997. }
  998. #else /* __UCLIBC_HAS_LOCALE__ */
  999. int wcswidth(const wchar_t *pwcs, size_t n)
  1000. {
  1001. int count;
  1002. wchar_t wc;
  1003. for (count = 0 ; n && (wc = *pwcs++) ; n--) {
  1004. if (wc <= 0xff) {
  1005. /* If we're here, wc != 0. */
  1006. if ((wc < 32) || ((wc >= 0x7f) && (wc < 0xa0))) {
  1007. return -1;
  1008. }
  1009. ++count;
  1010. continue;
  1011. } else {
  1012. return -1;
  1013. }
  1014. }
  1015. return count;
  1016. }
  1017. #endif /* __UCLIBC_HAS_LOCALE__ */
  1018. libc_hidden_def(wcswidth)
  1019. #endif
  1020. /**********************************************************************/
  1021. #ifdef L_wcwidth
  1022. libc_hidden_proto(wcswidth)
  1023. int wcwidth(wchar_t wc)
  1024. {
  1025. return wcswidth(&wc, 1);
  1026. }
  1027. #endif
  1028. /**********************************************************************/
  1029. typedef struct {
  1030. mbstate_t tostate;
  1031. mbstate_t fromstate;
  1032. int tocodeset;
  1033. int fromcodeset;
  1034. int frombom;
  1035. int tobom;
  1036. int fromcodeset0;
  1037. int frombom0;
  1038. int tobom0;
  1039. int skip_invalid_input; /* To support iconv -c option. */
  1040. } _UC_iconv_t;
  1041. #ifdef L_iconv
  1042. #include <iconv.h>
  1043. #include <string.h>
  1044. #include <endian.h>
  1045. #include <byteswap.h>
  1046. #if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
  1047. #error unsupported endianness for iconv
  1048. #endif
  1049. #ifndef __CTYPE_HAS_8_BIT_LOCALES
  1050. #error currently iconv requires 8 bit locales
  1051. #endif
  1052. #ifndef __CTYPE_HAS_UTF_8_LOCALES
  1053. #error currently iconv requires UTF-8 locales
  1054. #endif
  1055. enum {
  1056. IC_WCHAR_T = 0xe0,
  1057. IC_MULTIBYTE = 0xe0,
  1058. #if __BYTE_ORDER == __BIG_ENDIAN
  1059. IC_UCS_4 = 0xec,
  1060. IC_UTF_32 = 0xe4,
  1061. IC_UCS_2 = 0xe2,
  1062. IC_UTF_16 = 0xea,
  1063. #else
  1064. IC_UCS_4 = 0xed,
  1065. IC_UTF_32 = 0xe5,
  1066. IC_UCS_2 = 0xe3,
  1067. IC_UTF_16 = 0xeb,
  1068. #endif
  1069. IC_UTF_8 = 2,
  1070. IC_ASCII = 1
  1071. };
  1072. /* For the multibyte
  1073. * bit 0 means swap endian
  1074. * bit 1 means 2 byte
  1075. * bit 2 means 4 byte
  1076. *
  1077. */
  1078. extern const unsigned char __iconv_codesets[];
  1079. libc_hidden_proto(__iconv_codesets)
  1080. const unsigned char __iconv_codesets[] =
  1081. "\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
  1082. #if __BYTE_ORDER == __BIG_ENDIAN
  1083. "\x08\xec""UCS-4\x00" /* always BE */
  1084. "\x0a\xec""UCS-4BE\x00"
  1085. "\x0a\xed""UCS-4LE\x00"
  1086. "\x09\fe4""UTF-32\x00" /* platform endian with BOM */
  1087. "\x0b\xe4""UTF-32BE\x00"
  1088. "\x0b\xe5""UTF-32LE\x00"
  1089. "\x08\xe2""UCS-2\x00" /* always BE */
  1090. "\x0a\xe2""UCS-2BE\x00"
  1091. "\x0a\xe3""UCS-2LE\x00"
  1092. "\x09\xea""UTF-16\x00" /* platform endian with BOM */
  1093. "\x0b\xea""UTF-16BE\x00"
  1094. "\x0b\xeb""UTF-16LE\x00"
  1095. #elif __BYTE_ORDER == __LITTLE_ENDIAN
  1096. "\x08\xed""UCS-4\x00" /* always BE */
  1097. "\x0a\xed""UCS-4BE\x00"
  1098. "\x0a\xec""UCS-4LE\x00"
  1099. "\x09\xf4""UTF-32\x00" /* platform endian with BOM */
  1100. "\x0b\xe5""UTF-32BE\x00"
  1101. "\x0b\xe4""UTF-32LE\x00"
  1102. "\x08\xe3""UCS-2\x00" /* always BE */
  1103. "\x0a\xe3""UCS-2BE\x00"
  1104. "\x0a\xe2""UCS-2LE\x00"
  1105. "\x09\xfa""UTF-16\x00" /* platform endian with BOM */
  1106. "\x0b\xeb""UTF-16BE\x00"
  1107. "\x0b\xea""UTF-16LE\x00"
  1108. #endif
  1109. "\x08\x02""UTF-8\x00"
  1110. "\x0b\x01""US-ASCII\x00"
  1111. "\x07\x01""ASCII"; /* Must be last! (special case to save a nul) */
  1112. libc_hidden_data_def(__iconv_codesets)
  1113. libc_hidden_proto(strcasecmp)
  1114. static int find_codeset(const char *name)
  1115. {
  1116. const unsigned char *s;
  1117. int codeset;
  1118. for (s = __iconv_codesets ; *s ; s += *s) {
  1119. if (!strcasecmp(s+2, name)) {
  1120. return s[1];
  1121. }
  1122. }
  1123. /* The following is ripped from find_locale in locale.c. */
  1124. /* TODO: maybe CODESET_LIST + *s ??? */
  1125. /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
  1126. codeset = 2;
  1127. s = __LOCALE_DATA_CODESET_LIST;
  1128. do {
  1129. ++codeset; /* Increment codeset first. */
  1130. if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
  1131. return codeset;
  1132. }
  1133. } while (*++s);
  1134. return 0; /* No matching codeset! */
  1135. }
  1136. iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
  1137. {
  1138. register _UC_iconv_t *px;
  1139. int tocodeset, fromcodeset;
  1140. if (((tocodeset = find_codeset(tocode)) != 0)
  1141. && ((fromcodeset = find_codeset(fromcode)) != 0)) {
  1142. if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
  1143. px->tocodeset = tocodeset;
  1144. px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;
  1145. px->fromcodeset0 = px->fromcodeset = fromcodeset;
  1146. px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;
  1147. px->skip_invalid_input = px->tostate.__mask
  1148. = px->fromstate.__mask = 0;
  1149. return (iconv_t) px;
  1150. }
  1151. } else {
  1152. __set_errno(EINVAL);
  1153. }
  1154. return (iconv_t)(-1);
  1155. }
  1156. int weak_function iconv_close(iconv_t cd)
  1157. {
  1158. free(cd);
  1159. return 0;
  1160. }
  1161. size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
  1162. size_t *__restrict inbytesleft,
  1163. char **__restrict outbuf,
  1164. size_t *__restrict outbytesleft)
  1165. {
  1166. _UC_iconv_t *px = (_UC_iconv_t *) cd;
  1167. size_t nrcount, r;
  1168. wchar_t wc, wc2;
  1169. int inci, inco;
  1170. assert(px != (_UC_iconv_t *)(-1));
  1171. assert(sizeof(wchar_t) == 4);
  1172. if (!inbuf || !*inbuf) { /* Need to reinitialze conversion state. */
  1173. /* Note: For shift-state encodings we possibly need to output the
  1174. * shift sequence to return to initial state! */
  1175. if ((px->fromcodeset & 0xf0) == 0xe0) {
  1176. }
  1177. px->tostate.__mask = px->fromstate.__mask = 0;
  1178. px->fromcodeset = px->fromcodeset0;
  1179. px->tobom = px->tobom0;
  1180. px->frombom = px->frombom0;
  1181. return 0;
  1182. }
  1183. nrcount = 0;
  1184. while (*inbytesleft) {
  1185. if (!*outbytesleft) {
  1186. TOO_BIG:
  1187. __set_errno(E2BIG);
  1188. return (size_t) -1;
  1189. }
  1190. inci = inco = 1;
  1191. if (px->fromcodeset >= IC_MULTIBYTE) {
  1192. inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
  1193. if (*inbytesleft < inci) goto INVALID;
  1194. wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
  1195. + ((unsigned char)((*inbuf)[1]));
  1196. if (inci == 4) {
  1197. wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
  1198. + ((unsigned char)((*inbuf)[3])) + (wc << 16);
  1199. if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
  1200. } else {
  1201. if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
  1202. if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
  1203. && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
  1204. ) { /* surrogate */
  1205. wc =- 0xd800U;
  1206. if (*inbytesleft < 4) goto INVALID;
  1207. wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
  1208. + ((unsigned char)((*inbuf)[3]));
  1209. if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
  1210. if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
  1211. goto ILLEGAL;
  1212. }
  1213. inci = 4; /* Change inci here in case skipping illegals. */
  1214. wc = 0x10000UL + (wc << 10) + wc2;
  1215. }
  1216. }
  1217. if (px->frombom) {
  1218. px->frombom = 0;
  1219. if ((wc == 0xfeffU)
  1220. || (wc == ((inci == 4)
  1221. ? (((wchar_t) 0xfffe0000UL))
  1222. : ((wchar_t)(0xfffeUL))))
  1223. ) {
  1224. if (wc != 0xfeffU) {
  1225. px->fromcodeset ^= 1; /* toggle endianness */
  1226. wc = 0xfeffU;
  1227. }
  1228. if (!px->frombom) {
  1229. goto BOM_SKIP_OUTPUT;
  1230. }
  1231. goto GOT_BOM;
  1232. }
  1233. }
  1234. if (px->fromcodeset != IC_WCHAR_T) {
  1235. if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
  1236. ? 0x7fffffffUL : 0x10ffffUL)
  1237. #ifdef KUHN
  1238. || (((__uwchar_t)(wc - 0xfffeU)) < 2)
  1239. || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
  1240. #endif
  1241. ) {
  1242. goto ILLEGAL;
  1243. }
  1244. }
  1245. } else if (px->fromcodeset == IC_UTF_8) {
  1246. const char *p = *inbuf;
  1247. r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
  1248. if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
  1249. if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
  1250. assert((r == (size_t)(-1)) || (r == (size_t)(-2)));
  1251. if (r == (size_t)(-2)) {
  1252. INVALID:
  1253. __set_errno(EINVAL);
  1254. } else {
  1255. px->fromstate.__mask = 0;
  1256. inci = 1;
  1257. ILLEGAL:
  1258. if (px->skip_invalid_input) {
  1259. px->skip_invalid_input = 2; /* flag for iconv utility */
  1260. goto BOM_SKIP_OUTPUT;
  1261. }
  1262. __set_errno(EILSEQ);
  1263. }
  1264. return (size_t)(-1);
  1265. }
  1266. #ifdef __UCLIBC_MJN3_ONLY__
  1267. #warning TODO: optimize this.
  1268. #endif
  1269. if (p != NULL) { /* incomplete char case */
  1270. goto INVALID;
  1271. }
  1272. p = *inbuf + 1; /* nul */
  1273. }
  1274. inci = p - *inbuf;
  1275. } else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) { /* Non-ASCII... */
  1276. if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
  1277. goto ILLEGAL;
  1278. } else { /* some other 8-bit ascii-extension codeset */
  1279. const __codeset_8_bit_t *c8b
  1280. = __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
  1281. wc -= 0x80;
  1282. wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
  1283. (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
  1284. << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
  1285. if (!wc) {
  1286. goto ILLEGAL;
  1287. }
  1288. }
  1289. }
  1290. if (px->tobom) {
  1291. inci = 0;
  1292. wc = 0xfeffU;
  1293. GOT_BOM:
  1294. px->tobom = 0;
  1295. }
  1296. if (px->tocodeset >= IC_MULTIBYTE) {
  1297. inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
  1298. if (*outbytesleft < inco) goto TOO_BIG;
  1299. if (px->tocodeset != IC_WCHAR_T) {
  1300. if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
  1301. ? 0x7fffffffUL : 0x10ffffUL)
  1302. #ifdef KUHN
  1303. || (((__uwchar_t)(wc - 0xfffeU)) < 2)
  1304. || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
  1305. #endif
  1306. ) {
  1307. REPLACE_32:
  1308. wc = 0xfffd;
  1309. ++nrcount;
  1310. }
  1311. }
  1312. if (inco == 4) {
  1313. if (px->tocodeset & 1) wc = bswap_32(wc);
  1314. } else {
  1315. if (((__uwchar_t)wc ) > 0xffffU) {
  1316. if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
  1317. goto REPLACE_32;
  1318. }
  1319. if (*outbytesleft < (inco = 4)) goto TOO_BIG;
  1320. wc2 = 0xdc00U + (wc & 0x3ff);
  1321. wc = 0xd800U + ((wc >> 10) & 0x3ff);
  1322. if (px->tocodeset & 1) {
  1323. wc = bswap_16(wc);
  1324. wc2 = bswap_16(wc2);
  1325. }
  1326. wc += (wc2 << 16);
  1327. } else if (px->tocodeset & 1) wc = bswap_16(wc);
  1328. }
  1329. (*outbuf)[0] = (char)((unsigned char)(wc));
  1330. (*outbuf)[1] = (char)((unsigned char)(wc >> 8));
  1331. if (inco == 4) {
  1332. (*outbuf)[2] = (char)((unsigned char)(wc >> 16));
  1333. (*outbuf)[3] = (char)((unsigned char)(wc >> 24));
  1334. }
  1335. } else if (px->tocodeset == IC_UTF_8) {
  1336. const wchar_t *pw = &wc;
  1337. do {
  1338. r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
  1339. if (r != (size_t)(-1)) {
  1340. #ifdef __UCLIBC_MJN3_ONLY__
  1341. #warning TODO: What happens for a nul?
  1342. #endif
  1343. if (r == 0) {
  1344. if (wc != 0) {
  1345. goto TOO_BIG;
  1346. }
  1347. ++r;
  1348. }
  1349. break;
  1350. }
  1351. wc = 0xfffdU;
  1352. ++nrcount;
  1353. } while (1);
  1354. inco = r;
  1355. } else if (((__uwchar_t)(wc)) < 0x80) {
  1356. CHAR_GOOD:
  1357. **outbuf = wc;
  1358. } else {
  1359. if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
  1360. const __codeset_8_bit_t *c8b
  1361. = __locale_mmap->codeset_8_bit + px->tocodeset - 3;
  1362. __uwchar_t u;
  1363. u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
  1364. u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
  1365. + ((wc >> Cwc2c_TT_SHIFT)
  1366. & ((1 << Cwc2c_TI_SHIFT)-1))];
  1367. wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
  1368. + (u << Cwc2c_TT_SHIFT)
  1369. + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
  1370. if (wc) {
  1371. goto CHAR_GOOD;
  1372. }
  1373. }
  1374. **outbuf = '?';
  1375. ++nrcount;
  1376. }
  1377. *outbuf += inco;
  1378. *outbytesleft -= inco;
  1379. BOM_SKIP_OUTPUT:
  1380. *inbuf += inci;
  1381. *inbytesleft -= inci;
  1382. }
  1383. return nrcount;
  1384. }
  1385. #endif
  1386. /**********************************************************************/
  1387. #ifdef L_iconv_main
  1388. #include <stdio.h>
  1389. #include <stdlib.h>
  1390. #include <string.h>
  1391. #include <wchar.h>
  1392. #include <iconv.h>
  1393. #include <stdarg.h>
  1394. #include <libgen.h>
  1395. extern const unsigned char __iconv_codesets[];
  1396. #define IBUF BUFSIZ
  1397. #define OBUF BUFSIZ
  1398. char *progname;
  1399. int hide_errors;
  1400. static void error_msg(const char *fmt, ...)
  1401. __attribute__ ((noreturn, format (printf, 1, 2)));
  1402. static void error_msg(const char *fmt, ...)
  1403. {
  1404. va_list arg;
  1405. if (!hide_errors) {
  1406. fprintf(stderr, "%s: ", progname);
  1407. va_start(arg, fmt);
  1408. vfprintf(stderr, fmt, arg);
  1409. va_end(arg);
  1410. }
  1411. exit(EXIT_FAILURE);
  1412. }
  1413. int main(int argc, char **argv)
  1414. {
  1415. FILE *ifile;
  1416. FILE *ofile = stdout;
  1417. const char *p;
  1418. const char *s;
  1419. static const char opt_chars[] = "tfocsl";
  1420. /* 012345 */
  1421. const char *opts[sizeof(opt_chars)]; /* last is infile name */
  1422. iconv_t ic;
  1423. char ibuf[IBUF];
  1424. char obuf[OBUF];
  1425. char *pi;
  1426. char *po;
  1427. size_t ni, no, r, pos;
  1428. hide_errors = 0;
  1429. for (s = opt_chars ; *s ; s++) {
  1430. opts[ s - opt_chars ] = NULL;
  1431. }
  1432. progname = *argv;
  1433. while (--argc) {
  1434. p = *++argv;
  1435. if ((*p != '-') || (*++p == 0)) {
  1436. break;
  1437. }
  1438. do {
  1439. if ((s = strchr(opt_chars,*p)) == NULL) {
  1440. USAGE:
  1441. s = basename(progname);
  1442. fprintf(stderr,
  1443. "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
  1444. " or\n%s -l\n", s, s);
  1445. return EXIT_FAILURE;
  1446. }
  1447. if ((s - opt_chars) < 3) {
  1448. if ((--argc == 0) || opts[s - opt_chars]) {
  1449. goto USAGE;
  1450. }
  1451. opts[s - opt_chars] = *++argv;
  1452. } else {
  1453. opts[s - opt_chars] = p;
  1454. }
  1455. } while (*++p);
  1456. }
  1457. if (opts[5]) { /* -l */
  1458. fprintf(stderr, "Recognized codesets:\n");
  1459. for (s = __iconv_codesets ; *s ; s += *s) {
  1460. fprintf(stderr," %s\n", s+2);
  1461. }
  1462. s = __LOCALE_DATA_CODESET_LIST;
  1463. do {
  1464. fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
  1465. } while (*++s);
  1466. return EXIT_SUCCESS;
  1467. }
  1468. if (opts[4]) {
  1469. hide_errors = 1;
  1470. }
  1471. if (!opts[0] || !opts[1]) {
  1472. goto USAGE;
  1473. }
  1474. if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
  1475. error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);
  1476. }
  1477. if (opts[3]) { /* -c */
  1478. ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
  1479. }
  1480. if ((s = opts[2]) != NULL) {
  1481. if (!(ofile = fopen(s, "w"))) {
  1482. error_msg( "couldn't open %s for writing\n", s);
  1483. }
  1484. }
  1485. pos = ni = 0;
  1486. do {
  1487. if (!argc || ((**argv == '-') && !((*argv)[1]))) {
  1488. ifile = stdin; /* we don't check for duplicates */
  1489. } else if (!(ifile = fopen(*argv, "r"))) {
  1490. error_msg( "couldn't open %s for reading\n", *argv);
  1491. }
  1492. while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
  1493. pos += r;
  1494. ni += r;
  1495. no = OBUF;
  1496. pi = ibuf;
  1497. po = obuf;
  1498. if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
  1499. if ((errno != EINVAL) && (errno != E2BIG)) {
  1500. error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
  1501. }
  1502. }
  1503. if ((r = OBUF - no) > 0) {
  1504. if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
  1505. error_msg( "write error\n");
  1506. }
  1507. }
  1508. if (ni) { /* still bytes in buffer! */
  1509. memmove(ibuf, pi, ni);
  1510. }
  1511. }
  1512. if (ferror(ifile)) {
  1513. error_msg( "read error\n");
  1514. }
  1515. ++argv;
  1516. if (ifile != stdin) {
  1517. fclose(ifile);
  1518. }
  1519. } while (--argc > 0);
  1520. iconv_close(ic);
  1521. if (ni) {
  1522. error_msg( "incomplete sequence\n");
  1523. }
  1524. return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
  1525. ? EXIT_SUCCESS : EXIT_FAILURE;
  1526. }
  1527. #endif
  1528. /**********************************************************************/