|
@@ -0,0 +1,264 @@
|
|
|
+
|
|
|
+
|
|
|
+ *
|
|
|
+ * This library is free software; you can redistribute it and/or
|
|
|
+ * modify it under the terms of the GNU Library General Public
|
|
|
+ * License as published by the Free Software Foundation; either
|
|
|
+ * version 2 of the License, or (at your option) any later version.
|
|
|
+ *
|
|
|
+ * This library is distributed in the hope that it will be useful,
|
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
+ * Library General Public License for more details.
|
|
|
+ *
|
|
|
+ * You should have received a copy of the GNU Library General Public
|
|
|
+ * License along with this library; if not, write to the Free
|
|
|
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
+ */
|
|
|
+
|
|
|
+
|
|
|
+ *
|
|
|
+ * Besides uClibc, I'm using this code in my libc for elks, which is
|
|
|
+ * a 16-bit environment with a fairly limited compiler. It would make
|
|
|
+ * things much easier for me if this file isn't modified unnecessarily.
|
|
|
+ * In particular, please put any new or replacement functions somewhere
|
|
|
+ * else, and modify the makefile to use your version instead.
|
|
|
+ * Thanks. Manuel
|
|
|
+ *
|
|
|
+ * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ *
|
|
|
+ * I'm still tweaking this stuff, but it passes the tests I've thrown
|
|
|
+ * at it, and Erik needs it for the gcc port. The glibc extension
|
|
|
+ * __wcsnrtombs() hasn't been tested, as I didn't find a test for it
|
|
|
+ * in the glibc source. I also need to fix the behavior of
|
|
|
+ * _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
|
|
|
+ *
|
|
|
+ * UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
|
|
|
+ * file on my platform (x86) show about 5-10% faster conversion speed than
|
|
|
+ * glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
|
|
|
+ * individual mbrtowc()/wcrtomb() calls.
|
|
|
+ *
|
|
|
+ * If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
|
|
|
+ * as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
|
|
|
+ * needs to deal gracefully with whatever is sent to it. In that mode,
|
|
|
+ * it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
|
|
|
+ * an arg to force that behavior, so the interface will be changing.
|
|
|
+ *
|
|
|
+ * I need to fix the error checking for 16-bit wide chars. This isn't
|
|
|
+ * an issue for uClibc, but may be for ELKS. I'm currently not sure
|
|
|
+ * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
|
|
|
+ *
|
|
|
+ * July 1, 2002
|
|
|
+ *
|
|
|
+ * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
|
|
|
+ * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
|
|
|
+ * locales.
|
|
|
+ * Enabled building of a C/POSIX-locale-only version, so full locale support
|
|
|
+ * no longer needs to be enabled.
|
|
|
+ *
|
|
|
+ * Nov 4, 2002
|
|
|
+ *
|
|
|
+ * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
|
|
|
+ * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
|
|
|
+ * order to support %ls in printf. See comments below for details.
|
|
|
+ * Change behaviour of wc<->mb functions when in the C locale. Now they do
|
|
|
+ * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
|
|
|
+ * and consistency with the stds requirements that a printf format string by
|
|
|
+ * a valid multibyte string beginning and ending in it's initial shift state.
|
|
|
+ *
|
|
|
+ * Nov 5, 2002
|
|
|
+ *
|
|
|
+ * Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
|
|
|
+ *
|
|
|
+ * Nov 7, 2002
|
|
|
+ *
|
|
|
+ * Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
|
|
|
+ * Added some size/speed optimizations and integrated it into my locale
|
|
|
+ * framework. Minimally tested at the moment, but the stub C-locale
|
|
|
+ * version (which most people would probably be using) should be fine.
|
|
|
+ *
|
|
|
+ * Nov 21, 2002
|
|
|
+ *
|
|
|
+ * Revert the wc<->mb changes from earlier this month involving the C-locale.
|
|
|
+ * Add a couple of ugly hacks to support *wprintf.
|
|
|
+ * Add a mini iconv() and iconv implementation (requires locale support).
|
|
|
+ *
|
|
|
+ * Aug 1, 2003
|
|
|
+ * Bug fix for mbrtowc.
|
|
|
+ *
|
|
|
+ * Aug 18, 2003
|
|
|
+ * Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
|
|
|
+ *
|
|
|
+ * Feb 11, 2004
|
|
|
+ * Bug fix: Fix size check for remaining output space in iconv().
|
|
|
+ *
|
|
|
+ * Manuel
|
|
|
+ */
|
|
|
+
|
|
|
+#include "porting.h"
|
|
|
+#include <string.h>
|
|
|
+#include <iconv.h>
|
|
|
+#include <stdarg.h>
|
|
|
+#include <libgen.h>
|
|
|
+#include <wchar.h>
|
|
|
+#include "wchar.c"
|
|
|
+
|
|
|
+extern const unsigned char __iconv_codesets[];
|
|
|
+
|
|
|
+#define IBUF BUFSIZ
|
|
|
+#define OBUF BUFSIZ
|
|
|
+
|
|
|
+static char *progname;
|
|
|
+static int hide_errors;
|
|
|
+
|
|
|
+static void error_msg(const char *fmt, ...)
|
|
|
+ __attribute__ ((noreturn, format (printf, 1, 2)));
|
|
|
+
|
|
|
+static void error_msg(const char *fmt, ...)
|
|
|
+{
|
|
|
+ va_list arg;
|
|
|
+
|
|
|
+ if (!hide_errors) {
|
|
|
+ fprintf(stderr, "%s: ", progname);
|
|
|
+ va_start(arg, fmt);
|
|
|
+ vfprintf(stderr, fmt, arg);
|
|
|
+ va_end(arg);
|
|
|
+ }
|
|
|
+
|
|
|
+ exit(EXIT_FAILURE);
|
|
|
+}
|
|
|
+
|
|
|
+int main(int argc, char **argv)
|
|
|
+{
|
|
|
+ FILE *ifile;
|
|
|
+ FILE *ofile = stdout;
|
|
|
+ const char *p;
|
|
|
+ const char *s;
|
|
|
+ static const char opt_chars[] = "tfocsl";
|
|
|
+
|
|
|
+ const char *opts[sizeof(opt_chars)];
|
|
|
+ iconv_t ic;
|
|
|
+ char ibuf[IBUF];
|
|
|
+ char obuf[OBUF];
|
|
|
+ char *pi;
|
|
|
+ char *po;
|
|
|
+ size_t ni, no, r, pos;
|
|
|
+
|
|
|
+ hide_errors = 0;
|
|
|
+
|
|
|
+ for (s = opt_chars ; *s ; s++) {
|
|
|
+ opts[ s - opt_chars ] = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ progname = *argv;
|
|
|
+ while (--argc) {
|
|
|
+ p = *++argv;
|
|
|
+ if ((*p != '-') || (*++p == 0)) {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ do {
|
|
|
+ if ((s = strchr(opt_chars,*p)) == NULL) {
|
|
|
+ USAGE:
|
|
|
+ s = basename(progname);
|
|
|
+ fprintf(stderr,
|
|
|
+ "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
|
|
|
+ " or\n%s -l\n", s, s);
|
|
|
+ return EXIT_FAILURE;
|
|
|
+ }
|
|
|
+ if ((s - opt_chars) < 3) {
|
|
|
+ if ((--argc == 0) || opts[s - opt_chars]) {
|
|
|
+ goto USAGE;
|
|
|
+ }
|
|
|
+ opts[s - opt_chars] = *++argv;
|
|
|
+ } else {
|
|
|
+ opts[s - opt_chars] = p;
|
|
|
+ }
|
|
|
+ } while (*++p);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (opts[5]) {
|
|
|
+ fprintf(stderr, "Recognized codesets:\n");
|
|
|
+ for (s = (char *)__iconv_codesets ; *s ; s += *s) {
|
|
|
+ fprintf(stderr," %s\n", s+2);
|
|
|
+ }
|
|
|
+ s = __LOCALE_DATA_CODESET_LIST;
|
|
|
+ do {
|
|
|
+ fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
|
|
|
+ } while (*++s);
|
|
|
+
|
|
|
+ return EXIT_SUCCESS;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (opts[4]) {
|
|
|
+ hide_errors = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!opts[0] || !opts[1]) {
|
|
|
+ goto USAGE;
|
|
|
+ }
|
|
|
+ if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
|
|
|
+ error_msg( "unsupported codeset in %s -> %s conversion\n", opts[1], opts[0]);
|
|
|
+ }
|
|
|
+ if (opts[3]) {
|
|
|
+ ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ if ((s = opts[2]) != NULL) {
|
|
|
+ if (!(ofile = fopen(s, "w"))) {
|
|
|
+ error_msg( "couldn't open %s for writing\n", s);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ pos = ni = 0;
|
|
|
+ do {
|
|
|
+ if (!argc || ((**argv == '-') && !((*argv)[1]))) {
|
|
|
+ ifile = stdin;
|
|
|
+ } else if (!(ifile = fopen(*argv, "r"))) {
|
|
|
+ error_msg( "couldn't open %s for reading\n", *argv);
|
|
|
+ }
|
|
|
+
|
|
|
+ while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
|
|
|
+ pos += r;
|
|
|
+ ni += r;
|
|
|
+ no = OBUF;
|
|
|
+ pi = ibuf;
|
|
|
+ po = obuf;
|
|
|
+ if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
|
|
|
+ if ((errno != EINVAL) && (errno != E2BIG)) {
|
|
|
+ error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if ((r = OBUF - no) > 0) {
|
|
|
+ if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
|
|
|
+ error_msg( "write error\n");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (ni) {
|
|
|
+ memmove(ibuf, pi, ni);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ferror(ifile)) {
|
|
|
+ error_msg( "read error\n");
|
|
|
+ }
|
|
|
+
|
|
|
+ ++argv;
|
|
|
+
|
|
|
+ if (ifile != stdin) {
|
|
|
+ fclose(ifile);
|
|
|
+ }
|
|
|
+
|
|
|
+ } while (--argc > 0);
|
|
|
+
|
|
|
+ iconv_close(ic);
|
|
|
+
|
|
|
+ if (ni) {
|
|
|
+ error_msg( "incomplete sequence\n");
|
|
|
+ }
|
|
|
+
|
|
|
+ return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
|
|
|
+ ? EXIT_SUCCESS : EXIT_FAILURE;
|
|
|
+}
|