Browse Source

Create a typedef for the ctype bitmask table entries.
Hack a fix for ctype support of 8-bit codeset locales.

Note: toupper/tolower mappings do not handle the special cases for the
tr_TR and az_AZ locales, since the wide versions currently handle them
either. That will be addressed when I rewrite the data generation tools
and the libc locale code.

Manuel Novoa III 20 years ago
parent
commit
9c6f2391ed

+ 6 - 6
include/ctype.h

@@ -32,7 +32,7 @@ __BEGIN_DECLS
 #ifndef _ISbit
 /* These are all the characteristics of characters.
    If there get to be more than 16 distinct characteristics,
-   many things must be changed that use `__uint16_t's. */
+   __ctype_mask_t will need to be adjusted. */
 
 # define _ISbit(bit)	(1 << (bit))
 
@@ -98,13 +98,13 @@ enum
  */
 
 /* Pointers to the default C-locale data. */
-extern const __uint16_t *__C_ctype_b;
+extern const __ctype_mask_t *__C_ctype_b;
 extern const __ctype_touplow_t *__C_ctype_toupper;
 extern const __ctype_touplow_t *__C_ctype_tolower;
 
 #ifdef __UCLIBC_HAS_XLOCALE__
 
-extern __const __uint16_t **__ctype_b_loc (void)
+extern __const __ctype_mask_t **__ctype_b_loc (void)
      __attribute__ ((__const));
 extern __const __ctype_touplow_t **__ctype_tolower_loc (void)
      __attribute__ ((__const));
@@ -118,7 +118,7 @@ extern __const __ctype_touplow_t **__ctype_toupper_loc (void)
 #else  /* __UCLIBC_HAS_XLOCALE__ */
 
 /* Pointers to the current global locale data in use. */
-extern const __uint16_t *__ctype_b;
+extern const __ctype_mask_t *__ctype_b;
 extern const __ctype_touplow_t *__ctype_toupper;
 extern const __ctype_touplow_t *__ctype_tolower;
 
@@ -129,7 +129,7 @@ extern const __ctype_touplow_t *__ctype_tolower;
 #endif /* __UCLIBC_HAS_XLOCALE__ */
 
 #define __isctype(c, type) \
-  ((__UCLIBC_CTYPE_B)[(int) (c)] & (__uint16_t) type)
+  ((__UCLIBC_CTYPE_B)[(int) (c)] & (__ctype_mask_t) type)
 
 #define	__isascii(c)	(((c) & ~0x7f) == 0)	/* If C is a 7 bit value.  */
 #define	__toascii(c)	((c) & 0x7f)		/* Mask off high bits.  */
@@ -292,7 +292,7 @@ toupper (int __c) __THROW
 /* These definitions are similar to the ones above but all functions
    take as an argument a handle for the locale which shall be used.  */
 #  define __isctype_l(c, type, locale) \
-  ((locale)->__ctype_b[(int) (c)] & (__uint16_t) type)
+  ((locale)->__ctype_b[(int) (c)] & (__ctype_mask_t) type)
 
 # define __exctype_l(name) 						      \
   extern int name (int, __locale_t) __THROW

+ 8 - 28
libc/misc/ctype/ctype.c

@@ -378,7 +378,7 @@ int isctype(int c, int mask)
 
 #ifdef __UCLIBC_HAS_XLOCALE__
 
-const uint16_t **__ctype_b_loc(void)
+const __ctype_mask_t **__ctype_b_loc(void)
 {
 	return &(__UCLIBC_CURLOCALE_DATA).__ctype_b;
 }
@@ -415,7 +415,7 @@ const __ctype_touplow_t **__ctype_toupper_loc(void)
 /**********************************************************************/
 #ifdef L___C_ctype_b
 
-const uint16_t __C_ctype_b_data[] = {
+const __ctype_mask_t __C_ctype_b_data[] = {
 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
 	/* -128  M-^@ */ 0,
 	/* -127  M-^A */ 0,
@@ -804,19 +804,11 @@ const uint16_t __C_ctype_b_data[] = {
 	/*  255  M-^? */ 0
 };
 
-const uint16_t *__C_ctype_b = __C_ctype_b_data + 1
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
-	+ 127
-#endif
-	;
+const __ctype_mask_t *__C_ctype_b = __C_ctype_b_data + __UCLIBC_CTYPE_B_TBL_OFFSET;
 
 #ifndef __UCLIBC_HAS_XLOCALE__
 
-const uint16_t *__ctype_b = __C_ctype_b_data + 1
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
-	+ 127
-#endif
-	;
+const __ctype_mask_t *__ctype_b = __C_ctype_b_data + __UCLIBC_CTYPE_B_TBL_OFFSET;
 
 #endif
 
@@ -926,18 +918,12 @@ const __ctype_touplow_t __C_ctype_tolower_data[] = {
 };
 
 const __ctype_touplow_t *__C_ctype_tolower = __C_ctype_tolower_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
-	+ 128
-#endif
-	;
+											+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
 
 #ifndef __UCLIBC_HAS_XLOCALE__
 
 const __ctype_touplow_t *__ctype_tolower = __C_ctype_tolower_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
-	+ 128
-#endif
-	;
+											+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
 
 #endif
 
@@ -1047,18 +1033,12 @@ const __ctype_touplow_t __C_ctype_toupper_data[] = {
 };
 
 const __ctype_touplow_t *__C_ctype_toupper = __C_ctype_toupper_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
-	+ 128
-#endif
-	;
+											+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
 
 #ifndef __UCLIBC_HAS_XLOCALE__
 
 const __ctype_touplow_t *__ctype_toupper = __C_ctype_toupper_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
-	+ 128
-#endif
-	;
+											+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
 
 #endif
 

+ 118 - 6
libc/misc/locale/locale.c

@@ -54,7 +54,6 @@
 #include <assert.h>
 #include <errno.h>
 #include <ctype.h>
-#warning devel code
 #include <stdio.h>
 
 #undef __LOCALE_C_ONLY
@@ -639,6 +638,122 @@ int _locale_set_l(const unsigned char *p, __locale_t base)
 					base->idx8wc2c = c8b->idx8wc2c;
 					/* translit  */
 #endif /* __UCLIBC_HAS_WCHAR__ */
+
+					/* What follows is fairly bloated, but it is just a hack
+					 * to get the 8-bit codeset ctype stuff functioning.
+					 * All of this will be replaced in the next generation
+					 * of locale support anyway... */
+
+					memcpy(base->__ctype_b_data,
+						   __C_ctype_b - __UCLIBC_CTYPE_B_TBL_OFFSET,
+						   (256 + __UCLIBC_CTYPE_B_TBL_OFFSET)
+						   * sizeof(__ctype_mask_t));
+					memcpy(base->__ctype_tolower_data,
+						   __C_ctype_tolower - __UCLIBC_CTYPE_TO_TBL_OFFSET,
+						   (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
+						   * sizeof(__ctype_touplow_t));
+					memcpy(base->__ctype_toupper_data,
+						   __C_ctype_toupper - __UCLIBC_CTYPE_TO_TBL_OFFSET,
+						   (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
+						   * sizeof(__ctype_touplow_t));
+
+#define Cctype_TBL_MASK		((1 << __LOCALE_DATA_Cctype_IDX_SHIFT) - 1)
+#define Cctype_IDX_OFFSET	(128 >> __LOCALE_DATA_Cctype_IDX_SHIFT)
+
+					{
+						int u;
+						__ctype_mask_t m;
+
+						for (u=0 ; u < 128 ; u++) {
+#ifdef __LOCALE_DATA_Cctype_PACKED
+							c = base->tbl8ctype
+								[ ((int)(c8b->idx8ctype
+										 [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
+								   << (__LOCALE_DATA_Cctype_IDX_SHIFT - 1))
+								  + ((u & Cctype_TBL_MASK) >> 1)];
+							c = (u & 1) ? (c >> 4) : (c & 0xf);
+#else
+							c = base->tbl8ctype
+								[ ((int)(c8b->idx8ctype
+										 [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
+								   << __LOCALE_DATA_Cctype_IDX_SHIFT)
+								  + (u & Cctype_TBL_MASK) ];
+#endif
+
+							m = base->code2flag[c];
+
+							base->__ctype_b_data
+								[128 + __UCLIBC_CTYPE_B_TBL_OFFSET + u]
+								= m;
+
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+							if (((signed char)(128 + u)) != -1) {
+								base->__ctype_b_data[__UCLIBC_CTYPE_B_TBL_OFFSET
+													 + ((signed char)(128 + u))]
+									= m;
+							}
+#endif
+
+							base->__ctype_tolower_data
+								[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+								= 128 + u;
+							base->__ctype_toupper_data
+								[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+								= 128 + u;
+
+							if (m & (_ISlower|_ISupper)) {
+								c = base->tbl8uplow
+									[ ((int)(c8b->idx8uplow
+											 [u >> __LOCALE_DATA_Cuplow_IDX_SHIFT])
+									   << __LOCALE_DATA_Cuplow_IDX_SHIFT)
+									  + ((128 + u) 
+										 & ((1 << __LOCALE_DATA_Cuplow_IDX_SHIFT)
+											- 1)) ];
+								if (m & _ISlower) {
+									base->__ctype_toupper_data
+										[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+										= (unsigned char)(128 + u + c);
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+									if (((signed char)(128 + u)) != -1) {
+										base->__ctype_toupper_data
+											[__UCLIBC_CTYPE_TO_TBL_OFFSET
+											 + ((signed char)(128 + u))]
+											= (unsigned char)(128 + u + c);
+									}
+#endif
+								} else {
+									base->__ctype_tolower_data
+										[128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+										= (unsigned char)(128 + u - c);
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+									if (((signed char)(128 + u)) != -1) {
+										base->__ctype_tolower_data
+											[__UCLIBC_CTYPE_TO_TBL_OFFSET
+											 + ((signed char)(128 + u))]
+											= (unsigned char)(128 + u - c);
+									}
+#endif
+								}
+							}
+						}
+					}
+
+#ifdef __UCLIBC_HAS_XLOCALE__
+					base->__ctype_b = base->__ctype_b_data
+						+ __UCLIBC_CTYPE_B_TBL_OFFSET;
+					base->__ctype_tolower = base->__ctype_tolower_data
+						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
+					base->__ctype_toupper = base->__ctype_toupper_data
+						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
+#else  /* __UCLIBC_HAS_XLOCALE__ */
+					__ctype_b = base->__ctype_b_data
+						+ __UCLIBC_CTYPE_B_TBL_OFFSET;
+					__ctype_tolower = base->__ctype_tolower_data
+						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
+					__ctype_toupper = base->__ctype_toupper_data
+						+ __UCLIBC_CTYPE_TO_TBL_OFFSET;
+#endif /* __UCLIBC_HAS_XLOCALE__ */
+
 #endif /* __CTYPE_HAS_8_BIT_LOCALES */
 				}
 #ifdef __UCLIBC_MJN3_ONLY__
@@ -741,11 +856,8 @@ void _locale_init_l(__locale_t base)
 	/* width?? */
 #endif /* __UCLIBC_HAS_WCHAR__ */
 
-
-
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning wrong for now, but always set ctype arrays to global C version
-#endif
+	/* Initially, set things up to use the global C ctype tables.
+	 * This is correct for C (ASCII) and UTF-8 based locales (except tr_TR). */
 #ifdef __UCLIBC_HAS_XLOCALE__
 	base->__ctype_b = __C_ctype_b;
 	base->__ctype_tolower = __C_ctype_tolower;

+ 6 - 1
libc/sysdeps/linux/common/bits/uClibc_locale.h

@@ -141,11 +141,16 @@ typedef struct {
 
 typedef struct {
 #ifdef __UCLIBC_HAS_XLOCALE__
-	const __uint16_t *__ctype_b;
+	const __ctype_mask_t *__ctype_b;
 	const __ctype_touplow_t *__ctype_tolower;
 	const __ctype_touplow_t *__ctype_toupper;
 #endif
 
+	/* For now, just embed this in the structure. */
+	__ctype_mask_t __ctype_b_data[256 + __UCLIBC_CTYPE_B_TBL_OFFSET];
+	__ctype_touplow_t __ctype_tolower_data[256 + __UCLIBC_CTYPE_TO_TBL_OFFSET];
+	__ctype_touplow_t __ctype_toupper_data[256 + __UCLIBC_CTYPE_TO_TBL_OFFSET];
+
 /*  	int tables_loaded; */
 /*  	unsigned char lctypes[LOCALE_STRING_SIZE]; */
 	unsigned char cur_locale[LOCALE_STRING_SIZE];

+ 10 - 0
libc/sysdeps/linux/common/bits/uClibc_touplow.h

@@ -34,10 +34,20 @@
 
 /* glibc uses the equivalent of - typedef __int32_t __ctype_touplow_t; */
 
+typedef __uint16_t __ctype_mask_t;
+
 #ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+
 typedef __int16_t __ctype_touplow_t;
+#define __UCLIBC_CTYPE_B_TBL_OFFSET       128
+#define __UCLIBC_CTYPE_TO_TBL_OFFSET      128
+
 #else  /* __UCLIBC_HAS_CTYPE_SIGNED__ */
+
 typedef unsigned char __ctype_touplow_t;
+#define __UCLIBC_CTYPE_B_TBL_OFFSET       1
+#define __UCLIBC_CTYPE_TO_TBL_OFFSET      0
+
 #endif /* __UCLIBC_HAS_CTYPE_SIGNED__ */
 
 #endif /* _UCLIBC_TOUPLOW_H */