Browse Source

Add printf wchar support for %lc (%C) and %ls (%S).
Require printf format strings to be valid multibyte strings beginning and
ending in their initial shift state, as per the stds.

Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
order to support %ls in printf. See comments below for details.
Change behaviour of wc<->mb functions when in the C locale. Now they do
a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
and consistency with the stds requirements that a printf format string by
a valid multibyte string beginning and ending in it's initial shift state.

Manuel Novoa III 21 years ago
parent
commit
a854cf512a
2 changed files with 140 additions and 29 deletions
  1. 41 15
      libc/misc/wchar/wchar.c
  2. 99 14
      libc/stdio/printf.c

+ 41 - 15
libc/misc/wchar/wchar.c

@@ -58,6 +58,16 @@
  * Enabled building of a C/POSIX-locale-only version, so full locale support
  *    no longer needs to be enabled.
  *
+ * Nov 4, 2002
+ *
+ * Fixed a bug in _wchar_wcsntoutf8s().  Don't store wcs position if dst is NULL.
+ * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
+ *   order to support %ls in printf.  See comments below for details.
+ * Change behaviour of wc<->mb functions when in the C locale.  Now they do
+ *   a 1-1 map for the range 0x80-UCHAR_MAX.  This is for backwards compatibility
+ *   and consistency with the stds requirements that a printf format string by
+ *   a valid multibyte string beginning and ending in it's initial shift state.
+ *
  * Manuel
  */
 
@@ -481,9 +491,19 @@ size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
 	char m;
 
 	store = 1;
-	if (!s) {
-		s = buf;
-		n = SIZE_MAX;
+	/* NOTE: The following is an AWFUL HACK!  In order to support %ls in
+	 * printf, we need to be able to compute the number of bytes needed
+	 * for the mbs conversion, not to exceed the precision specified.
+	 * But if dst is NULL, the return value is the length assuming a
+	 * sufficiently sized buffer.  So, we allow passing of (char *) src
+	 * as dst in order to flag that we really want the length, subject
+	 * to the restricted buffer size and no partial conversions.
+	 * See wcsnrtombs() as well. */
+	if (!s || (s == ((char *) src))) {
+		if (!s) {
+			n = SIZE_MAX;
+		}
+	    s = buf;
 		store = 0;
 	}
 
@@ -553,7 +573,9 @@ size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
 		}
 	}
 
-	*src = (const wchar_t *) swc;
+	if (store) {
+		*src = (const wchar_t *) swc;
+	}
 
 	return n - t;
 }
@@ -614,7 +636,8 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
 						  (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
 						   << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
 				if (!wc) {
-					goto BAD;
+					__set_errno(EILSEQ);
+					return (size_t) -1;
 				}
 			}
 			if (!(*dst = wc)) {
@@ -641,13 +664,6 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
 			s = NULL;
 			break;
 		}
-		if (*dst >= 0x80) {
-#ifdef __CTYPE_HAS_8_BIT_LOCALES
-		BAD:
-#endif
-			__set_errno(EILSEQ);
-			return (size_t) -1;
-		}
 		++s;
 		dst += incr;
 		--count;
@@ -686,9 +702,19 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
 #endif /* __CTYPE_HAS_UTF_8_LOCALES */
 
 	incr = 1;
-	if (!dst) {
+	/* NOTE: The following is an AWFUL HACK!  In order to support %ls in
+	 * printf, we need to be able to compute the number of bytes needed
+	 * for the mbs conversion, not to exceed the precision specified.
+	 * But if dst is NULL, the return value is the length assuming a
+	 * sufficiently sized buffer.  So, we allow passing of (char *) src
+	 * as dst in order to flag that we really want the length, subject
+	 * to the restricted buffer size and no partial conversions.
+	 * See _wchar_wcsntoutf8s() as well. */
+	if (!dst || (dst == ((char *) src))) {
+		if (!dst) {
+			len = SIZE_MAX;
+		}
 		dst = buf;
-		len = SIZE_MAX;
 		incr = 0;
 	}
 
@@ -749,7 +775,7 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
 #endif
 
 	while (count) {
-		if (*s >= 0x80) {
+		if (*s > UCHAR_MAX) {
 #if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
 		BAD:
 #endif

+ 99 - 14
libc/stdio/printf.c

@@ -31,25 +31,45 @@
  *
  *  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! */
 
-/* 4-01-2002
+
+/* April 1, 2002
  * Initialize thread locks for fake files in vsnprintf and vdprintf.
  *    reported by Erik Andersen (andersen@codepoet.com)
  * Fix an arg promotion handling bug in _do_one_spec for %c. 
  *    reported by Ilguiz Latypov <ilatypov@superbt.com>
  *
- * 5-10-2002
+ * May 10, 2002
  * Remove __isdigit and use new ctype.h version.
  * Add conditional setting of QUAL_CHARS for size_t and ptrdiff_t.
  *
- * 8-16-2002
+ * Aug 16, 2002
  * Fix two problems that showed up with the python 2.2.1 tests; one
  *    involving %o and one involving %f.
  *
- * 10-28-2002
+ * Oct 28, 2002
  * Fix a problem in vasprintf (reported by vodz a while back) when built
  *    without custom stream support.  In that case, it is necessary to do
  *    a va_copy.
  * Make sure each va_copy has a matching va_end, as required by C99.
+ *
+ * Nov 4, 2002
+ * Add locale-specific grouping support for integer decimal conversion.
+ * Add locale-specific decimal point support for floating point conversion.
+ *   Note: grouping will have to wait for _dtostr() rewrite.
+ * Add printf wchar support for %lc (%C) and %ls (%S).
+ * Require printf format strings to be valid multibyte strings beginning and
+ *   ending in their initial shift state, as per the stds.
+ */
+
+/* TODO:
+ *
+ * Should we validate that *printf format strings are valid multibyte
+ *   strings in the current locale?  ANSI/ISO C99 seems to imply this
+ *   and Plauger's printf implementation in his Standard C Library book
+ *   treats this as an error.
+ *
+ * Implement %a, %A, and locale-specific grouping for the printf floating
+ *   point conversions.  To be done in the rewrite of _dtostr().
  */
 
 
@@ -75,6 +95,10 @@
 #include <pthread.h>
 #endif /* __STDIO_THREADSAFE */
 
+#ifdef __UCLIBC_HAS_WCHAR__
+#include <wchar.h>
+#endif /* __UCLIBC_HAS_WCHAR__ */
+
 /**********************************************************************/
 
 /* These provide some control over printf's feature set */
@@ -335,7 +359,7 @@ typedef struct {
 extern size_t _dtostr(FILE * fp, long double x, struct printf_info *info);
 #endif
 
-#define _outnstr(stream, string, len)	_stdio_fwrite(s, len, stream)	/* TODO */
+#define _outnstr(stream, string, len)	_stdio_fwrite(string, len, stream)	/* TODO */
 
 extern int _do_one_spec(FILE * __restrict stream, ppfs_t *ppfs, int *count);
 
@@ -431,7 +455,7 @@ int vfprintf(FILE * __restrict stream, register const char * __restrict format,
 	s = format;
 
 	if (_ppfs_init(&ppfs, format) < 0) { /* Bad format string. */
-		_outnstr(stream, format, strlen(format));
+		_outnstr(stream, ppfs.fmtpos, strlen(ppfs.fmtpos));
 		count = -1;
 	} else {
 		_ppfs_prepargs(&ppfs, arg);	/* This did a va_copy!!! */
@@ -481,11 +505,29 @@ int vfprintf(FILE * __restrict stream, register const char * __restrict format,
 
 int _ppfs_init(register ppfs_t *ppfs, const char *fmt0)
 {
+#if defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__)
+	static const char invalid_mbs[] = "Invalid multibyte format string.";
+#endif /* defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__) */
 	int r;
 
 	/* First, zero out everything... argnumber[], argtype[], argptr[] */
 	memset(ppfs, 0, sizeof(ppfs_t)); /* TODO: nonportable???? */
 	--ppfs->maxposarg;			/* set to -1 */
+	ppfs->fmtpos = fmt0;
+#if defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__)
+	/* Note: We don't need to check if we don't have wide chars or we only
+	 * support the C locale. */
+	{
+		mbstate_t mbstate;
+		const char *p;
+		mbstate.mask = 0;	/* Initialize the mbstate. */
+		p = fmt0;
+		if (mbsrtowcs(NULL, &p, SIZE_MAX, &mbstate) == ((size_t)(-1))) {
+			ppfs->fmtpos = invalid_mbs;
+			return -1;
+		}
+	}
+#endif /* defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__) */
 	/* now set all argtypes to no-arg */
 	{
 #if 1
@@ -1098,6 +1140,10 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
 	const void * argptr[MAX_ARGS_PER_SPEC];
 #endif
 	int *argtype;
+#ifdef __UCLIBC_HAS_WCHAR__
+	const wchar_t *ws = NULL;
+	mbstate_t mbstate;
+#endif /* __UCLIBC_HAS_WCHAR__ */
 	size_t slen;
 	int base;
 	int numpad;
@@ -1223,18 +1269,39 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
 							  &ppfs->info);
 			return 0;
 #else  /* __STDIO_PRINTF_FLOAT */
-			return -1;			/* TODO -- try ton continue? */
+			return -1;			/* TODO -- try to continue? */
 #endif /* __STDIO_PRINTF_FLOAT */
 		} else if (ppfs->conv_num <= CONV_S) {	/* wide char or string */
-#if 1
-			return -1;			/* TODO -- wide */
-#else
+#ifdef __UCLIBC_HAS_WCHAR__
+			mbstate.mask = 0;	/* Initialize the mbstate. */
 			if (ppfs->conv_num == CONV_S) { /* wide string */
-
+				if (!(ws = *((const wchar_t **) *argptr))) {
+					goto NULL_STRING;
+				}
+				/* We use an awful uClibc-specific hack here, passing
+				 * (char*) &ws as the conversion destination.  This signals
+				 * uClibc's wcsrtombs that we want a "restricted" length
+				 * such that the mbs fits in a buffer of the specified
+				 * size with no partial conversions. */
+				if ((slen = wcsrtombs((char *) &ws, &ws, /* Use awful hack! */
+									  ((ppfs->info.prec >= 0)
+									   ? ppfs->info.prec
+									   : SIZE_MAX), &mbstate))
+					== ((size_t)-1)
+					) {
+					return -1;	/* EILSEQ */
+				}
 			} else {			/* wide char */
-				
+				s = buf;
+				slen = wcrtomb(s, (*((const wchar_t *) *argptr)), &mbstate);
+				if (slen == ((size_t)-1)) {
+					return -1;	/* EILSEQ */
+				}
+				s[slen] = 0;	/* TODO - Is this necessary? */
 			}
-#endif
+#else  /* __UCLIBC_HAS_WCHAR__ */
+			return -1;
+#endif /* __UCLIBC_HAS_WCHAR__ */
 		} else if (ppfs->conv_num <= CONV_s) {	/* char or string */
 			if (ppfs->conv_num == CONV_s) { /* string */
 				s = *((char **) (*argptr));
@@ -1243,11 +1310,12 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
 					slen = strnlen(s, ((ppfs->info.prec >= 0)
 									   ? ppfs->info.prec : SIZE_MAX));
 				} else {
+				NULL_STRING:
 					s = "(null)";
 					slen = 6;
 				}
 			} else {			/* char */
-				s = (char *) buf;
+				s = buf;
 				*s = (unsigned char)(*((const int *) *argptr));
 				s[1] = 0;
 				slen = 1;
@@ -1301,7 +1369,24 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
 		}
 		output(stream, prefix + prefix_num);
 		_charpad(stream, '0', numfill);
+#ifdef __UCLIBC_HAS_WCHAR__
+		if (!ws) {
+			_outnstr(stream, s, slen);
+		} else {				/* wide string */
+			size_t t;
+			mbstate.mask = 0;	/* Initialize the mbstate. */
+			while (slen) {
+				t = (slen <= sizeof(buf)) ? slen : sizeof(buf);
+				t = wcsrtombs(buf, &ws, t, &mbstate);
+				assert (t != ((size_t)(-1)));
+				_outnstr(stream, buf, t);
+				slen -= t;
+			}
+			ws = NULL;			/* Reset ws. */
+		}
+#else  /* __UCLIBC_HAS_WCHAR__ */
 		_outnstr(stream, s, slen);
+#endif /* __UCLIBC_HAS_WCHAR__ */
 		_charpad(stream, ' ', numpad);
 	}