| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266 | 
/*  Copyright (C) 2002, 2003, 2004     Manuel Novoa III * *  This library is free software; you can redistribute it and/or *  modify it under the terms of the GNU Library General Public *  License as published by the Free Software Foundation; either *  version 2 of the License, or (at your option) any later version. * *  This library is distributed in the hope that it will be useful, *  but WITHOUT ANY WARRANTY; without even the implied warranty of *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU *  Library General Public License for more details. * *  You should have received a copy of the GNU Library General Public *  License along with this library; if not, write to the Free *  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//*  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! * *  Besides uClibc, I'm using this code in my libc for elks, which is *  a 16-bit environment with a fairly limited compiler.  It would make *  things much easier for me if this file isn't modified unnecessarily. *  In particular, please put any new or replacement functions somewhere *  else, and modify the makefile to use your version instead. *  Thanks.  Manuel * *  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! *//* May 23, 2002     Initial Notes: * * I'm still tweaking this stuff, but it passes the tests I've thrown * at it, and Erik needs it for the gcc port.  The glibc extension * __wcsnrtombs() hasn't been tested, as I didn't find a test for it * in the glibc source.  I also need to fix the behavior of * _wchar_utf8sntowcs() if the max number of wchars to convert is 0. * * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt * file on my platform (x86) show about 5-10% faster conversion speed than * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with * individual mbrtowc()/wcrtomb() calls. * * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled * as a fail-safe UTF-8 decoder appropriate for a terminal, etc.  which * needs to deal gracefully with whatever is sent to it.  In that mode, * it passes Markus Kuhn's UTF-8-test.txt stress test.  I plan to add * an arg to force that behavior, so the interface will be changing. * * I need to fix the error checking for 16-bit wide chars.  This isn't * an issue for uClibc, but may be for ELKS.  I'm currently not sure * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS. * * July 1, 2002 * * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case. * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit *    locales. * Enabled building of a C/POSIX-locale-only version, so full locale support *    no longer needs to be enabled. * * Nov 4, 2002 * * Fixed a bug in _wchar_wcsntoutf8s().  Don't store wcs position if dst is NULL. * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in *   order to support %ls in printf.  See comments below for details. * Change behaviour of wc<->mb functions when in the C locale.  Now they do *   a 1-1 map for the range 0x80-UCHAR_MAX.  This is for backwards compatibility *   and consistency with the stds requirements that a printf format string by *   a valid multibyte string beginning and ending in it's initial shift state. * * Nov 5, 2002 * * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday. * * Nov 7, 2002 * * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08. *   Added some size/speed optimizations and integrated it into my locale *   framework.  Minimally tested at the moment, but the stub C-locale *   version (which most people would probably be using) should be fine. * * Nov 21, 2002 * * Revert the wc<->mb changes from earlier this month involving the C-locale. * Add a couple of ugly hacks to support *wprintf. * Add a mini iconv() and iconv implementation (requires locale support). * * Aug 1, 2003 * Bug fix for mbrtowc. * * Aug 18, 2003 * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ. * * Feb 11, 2004 * Bug fix: Fix size check for remaining output space in iconv(). * * Manuel *//* keep libgen before string.h - and porting.h to use the * XPG version of basename */#include <libgen.h>#include "porting.h"#include <string.h>#include <iconv.h>#include <stdarg.h>#include <wchar.h>#include "wchar.c" /* for _UC_iconv_t and __iconv_codesets */extern const unsigned char __iconv_codesets[];#define IBUF BUFSIZ#define OBUF BUFSIZstatic char *progname;static int hide_errors;static void error_msg(const char *fmt, ...)	 __attribute__ ((noreturn, format (printf, 1, 2)));static void error_msg(const char *fmt, ...){	va_list arg;	if (!hide_errors) {		fprintf(stderr, "%s: ", progname);		va_start(arg, fmt);		vfprintf(stderr, fmt, arg);		va_end(arg);	}	exit(EXIT_FAILURE);}int main(int argc, char **argv){	FILE *ifile;	FILE *ofile = stdout;	const char *p;	const char *s;	static const char opt_chars[] = "tfocsl";	                              /* 012345 */	const char *opts[sizeof(opt_chars)]; /* last is infile name */	iconv_t ic;	char ibuf[IBUF];	char obuf[OBUF];	char *pi;	char *po;	size_t ni, no, r, pos;	hide_errors = 0;	for (s = opt_chars ; *s ; s++) {		opts[ s - opt_chars ] = NULL;	}	progname = *argv;	while (--argc) {		p = *++argv;		if ((*p != '-') || (*++p == 0)) {			break;		}		do {			if ((s = strchr(opt_chars,*p)) == NULL) {			USAGE:				s = basename(progname);				fprintf(stderr,						"%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"						"  or\n%s -l\n", s, s);				return EXIT_FAILURE;			}			if ((s - opt_chars) < 3) {				if ((--argc == 0) || opts[s - opt_chars]) {					goto USAGE;				}				opts[s - opt_chars] = *++argv;			} else {				opts[s - opt_chars] = p;			}		} while (*++p);	}	if (opts[5]) {				/* -l */		fprintf(stderr, "Recognized codesets:\n");		for (s = (char *)__iconv_codesets ; *s ; s += *s) {			fprintf(stderr,"  %s\n", s+2);		}		s = __LOCALE_DATA_CODESET_LIST;		do {			fprintf(stderr,"  %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));		} while (*++s);		return EXIT_SUCCESS;	}	if (opts[4]) {		hide_errors = 1;	}	if (!opts[0] || !opts[1]) {		goto USAGE;	}	if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {		error_msg( "unsupported codeset in %s -> %s conversion\n", opts[1], opts[0]);	}	if (opts[3]) {				/* -c */		((_UC_iconv_t *) ic)->skip_invalid_input = 1;	}	if ((s = opts[2]) != NULL) {		if (!(ofile = fopen(s, "w"))) {			error_msg( "couldn't open %s for writing\n", s);		}	}	pos = ni = 0;	do {		if (!argc || ((**argv == '-') && !((*argv)[1]))) {			ifile = stdin;		/* we don't check for duplicates */		} else if (!(ifile = fopen(*argv, "r"))) {			error_msg( "couldn't open %s for reading\n", *argv);		}		while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {			pos += r;			ni += r;			no = OBUF;			pi = ibuf;			po = obuf;			if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {				if ((errno != EINVAL) && (errno != E2BIG)) {					error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));				}			}			if ((r = OBUF - no) > 0) {				if (fwrite(obuf, 1, OBUF - no, ofile) < r) {					error_msg( "write error\n");				}			}			if (ni) {			/* still bytes in buffer! */				memmove(ibuf, pi, ni);			}		}		if (ferror(ifile)) {			error_msg( "read error\n");		}		++argv;		if (ifile != stdin) {			fclose(ifile);		}	} while (--argc > 0);	iconv_close(ic);	if (ni) {		error_msg( "incomplete sequence\n");	}	return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)		? EXIT_SUCCESS : EXIT_FAILURE;}
 |