iconv.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. /* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. /* May 23, 2002 Initial Notes:
  28. *
  29. * I'm still tweaking this stuff, but it passes the tests I've thrown
  30. * at it, and Erik needs it for the gcc port. The glibc extension
  31. * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
  32. * in the glibc source. I also need to fix the behavior of
  33. * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
  34. *
  35. * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
  36. * file on my platform (x86) show about 5-10% faster conversion speed than
  37. * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
  38. * individual mbrtowc()/wcrtomb() calls.
  39. *
  40. * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
  41. * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
  42. * needs to deal gracefully with whatever is sent to it. In that mode,
  43. * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
  44. * an arg to force that behavior, so the interface will be changing.
  45. *
  46. * I need to fix the error checking for 16-bit wide chars. This isn't
  47. * an issue for uClibc, but may be for ELKS. I'm currently not sure
  48. * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
  49. *
  50. * July 1, 2002
  51. *
  52. * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
  53. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
  54. * locales.
  55. * Enabled building of a C/POSIX-locale-only version, so full locale support
  56. * no longer needs to be enabled.
  57. *
  58. * Nov 4, 2002
  59. *
  60. * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
  61. * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
  62. * order to support %ls in printf. See comments below for details.
  63. * Change behaviour of wc<->mb functions when in the C locale. Now they do
  64. * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
  65. * and consistency with the stds requirements that a printf format string by
  66. * a valid multibyte string beginning and ending in it's initial shift state.
  67. *
  68. * Nov 5, 2002
  69. *
  70. * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
  71. *
  72. * Nov 7, 2002
  73. *
  74. * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
  75. * Added some size/speed optimizations and integrated it into my locale
  76. * framework. Minimally tested at the moment, but the stub C-locale
  77. * version (which most people would probably be using) should be fine.
  78. *
  79. * Nov 21, 2002
  80. *
  81. * Revert the wc<->mb changes from earlier this month involving the C-locale.
  82. * Add a couple of ugly hacks to support *wprintf.
  83. * Add a mini iconv() and iconv implementation (requires locale support).
  84. *
  85. * Aug 1, 2003
  86. * Bug fix for mbrtowc.
  87. *
  88. * Aug 18, 2003
  89. * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
  90. *
  91. * Feb 11, 2004
  92. * Bug fix: Fix size check for remaining output space in iconv().
  93. *
  94. * Manuel
  95. */
  96. /* keep libgen before string.h - and porting.h to use the
  97. * XPG version of basename */
  98. #include <libgen.h>
  99. #include "porting.h"
  100. #include <string.h>
  101. #include <iconv.h>
  102. #include <stdarg.h>
  103. #include <wchar.h>
  104. #include "wchar.c" /* for _UC_iconv_t and __iconv_codesets */
  105. #ifdef L_iconv_main
  106. static
  107. #else
  108. extern
  109. #endif
  110. const unsigned char __iconv_codesets[];
  111. #define IBUF BUFSIZ
  112. #define OBUF BUFSIZ
  113. static char *progname;
  114. static int hide_errors;
  115. static void error_msg(const char *fmt, ...)
  116. __attribute__ ((noreturn, format (printf, 1, 2)));
  117. static void error_msg(const char *fmt, ...)
  118. {
  119. va_list arg;
  120. if (!hide_errors) {
  121. fprintf(stderr, "%s: ", progname);
  122. va_start(arg, fmt);
  123. vfprintf(stderr, fmt, arg);
  124. va_end(arg);
  125. }
  126. exit(EXIT_FAILURE);
  127. }
  128. int main(int argc, char **argv)
  129. {
  130. FILE *ifile;
  131. FILE *ofile = stdout;
  132. const char *p;
  133. const char *s;
  134. static const char opt_chars[] = "tfocsl";
  135. /* 012345 */
  136. const char *opts[sizeof(opt_chars)]; /* last is infile name */
  137. iconv_t ic;
  138. char ibuf[IBUF];
  139. char obuf[OBUF];
  140. char *pi;
  141. char *po;
  142. size_t ni, no, r, pos;
  143. hide_errors = 0;
  144. for (s = opt_chars ; *s ; s++) {
  145. opts[ s - opt_chars ] = NULL;
  146. }
  147. progname = *argv;
  148. while (--argc) {
  149. p = *++argv;
  150. if ((*p != '-') || (*++p == 0)) {
  151. break;
  152. }
  153. do {
  154. if ((s = strchr(opt_chars,*p)) == NULL) {
  155. USAGE:
  156. s = basename(progname);
  157. fprintf(stderr,
  158. "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
  159. " or\n%s -l\n", s, s);
  160. return EXIT_FAILURE;
  161. }
  162. if ((s - opt_chars) < 3) {
  163. if ((--argc == 0) || opts[s - opt_chars]) {
  164. goto USAGE;
  165. }
  166. opts[s - opt_chars] = *++argv;
  167. } else {
  168. opts[s - opt_chars] = p;
  169. }
  170. } while (*++p);
  171. }
  172. if (opts[5]) { /* -l */
  173. fprintf(stderr, "Recognized codesets:\n");
  174. for (s = (char *)__iconv_codesets ; *s ; s += *s) {
  175. fprintf(stderr," %s\n", s+2);
  176. }
  177. s = __LOCALE_DATA_CODESET_LIST;
  178. do {
  179. fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
  180. } while (*++s);
  181. return EXIT_SUCCESS;
  182. }
  183. if (opts[4]) {
  184. hide_errors = 1;
  185. }
  186. if (!opts[0] || !opts[1]) {
  187. goto USAGE;
  188. }
  189. if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
  190. error_msg( "unsupported codeset in %s -> %s conversion\n", opts[1], opts[0]);
  191. }
  192. if (opts[3]) { /* -c */
  193. ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
  194. }
  195. if ((s = opts[2]) != NULL) {
  196. if (!(ofile = fopen(s, "w"))) {
  197. error_msg( "couldn't open %s for writing\n", s);
  198. }
  199. }
  200. pos = ni = 0;
  201. do {
  202. if (!argc || ((**argv == '-') && !((*argv)[1]))) {
  203. ifile = stdin; /* we don't check for duplicates */
  204. } else if (!(ifile = fopen(*argv, "r"))) {
  205. error_msg( "couldn't open %s for reading\n", *argv);
  206. }
  207. while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
  208. pos += r;
  209. ni += r;
  210. no = OBUF;
  211. pi = ibuf;
  212. po = obuf;
  213. if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
  214. if ((errno != EINVAL) && (errno != E2BIG)) {
  215. error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
  216. }
  217. }
  218. if ((r = OBUF - no) > 0) {
  219. if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
  220. error_msg( "write error\n");
  221. }
  222. }
  223. if (ni) { /* still bytes in buffer! */
  224. memmove(ibuf, pi, ni);
  225. }
  226. }
  227. if (ferror(ifile)) {
  228. error_msg( "read error\n");
  229. }
  230. ++argv;
  231. if (ifile != stdin) {
  232. fclose(ifile);
  233. }
  234. } while (--argc > 0);
  235. iconv_close(ic);
  236. if (ni) {
  237. error_msg( "incomplete sequence\n");
  238. }
  239. return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
  240. ? EXIT_SUCCESS : EXIT_FAILURE;
  241. }