12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319 |
- #define _GNU_SOURCE
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <limits.h>
- #include <assert.h>
- #include <locale.h>
- #include <langinfo.h>
- #include <nl_types.h>
- #include <stdint.h>
- #include "c8tables.h"
- #define __LOCALE_DATA_CATEGORIES 6
- /* must agree with ordering of gen_mmap! */
- static const unsigned char *lc_names[] = {
- "LC_CTYPE",
- "LC_NUMERIC",
- "LC_MONETARY",
- "LC_TIME",
- "LC_COLLATE",
- "LC_MESSAGES",
- #if __LOCALE_DATA_CATEGORIES == 12
- "LC_PAPER",
- "LC_NAME",
- "LC_ADDRESS",
- "LC_TELEPHONE",
- "LC_MEASUREMENT",
- "LC_IDENTIFICATION",
- #elif __LOCALE_DATA_CATEGORIES != 6
- #error unsupported __LOCALE_DATA_CATEGORIES value!
- #endif
- };
- typedef struct {
- char *glibc_name;
- char name[5];
- char dot_cs; /* 0 if no codeset specified */
- char cs;
- unsigned char idx_name;
- unsigned char lc_time_row;
- unsigned char lc_numeric_row;
- unsigned char lc_monetary_row;
- unsigned char lc_messages_row;
- unsigned char lc_ctype_row;
- #if __LOCALE_DATA_CATEGORIES != 6
- #error unsupported __LOCALE_DATA_CATEGORIES value
- #endif
- } locale_entry;
- static void read_at_mappings(void);
- static void read_enable_disable(void);
- static void read_locale_list(void);
- static int find_codeset_num(const char *cs);
- static int find_at_string_num(const char *as);
- static int le_cmp(const void *, const void *);
- static void dump_table8(const char *name, const char *tbl, int len);
- static void dump_table8c(const char *name, const char *tbl, int len);
- static void dump_table16(const char *name, const int *tbl, int len);
- static void do_lc_time(void);
- static void do_lc_numeric(void);
- static void do_lc_monetary(void);
- static void do_lc_messages(void);
- static void do_lc_ctype(void);
- static FILE *fp;
- static FILE *ofp;
- static char line_buf[80];
- static char at_mappings[256];
- static char at_mapto[256];
- static char at_strings[1024];
- static char *at_strings_end;
- static locale_entry locales[700];
- static char glibc_locale_names[60000];
- static int num_locales;
- static int default_utf8;
- static int default_8bit;
- static int total_size;
- static int null_count;
- static void do_locale_names(void)
- {
- /* "C" locale name is handled specially by the setlocale code. */
- int uniq = 0;
- int i;
- if (num_locales <= 1) {
- /* printf("error - only C locale?\n"); */
- /* exit(EXIT_FAILURE); */
- fprintf(ofp, "static const unsigned char __locales[%d];\n", (3 + __LOCALE_DATA_CATEGORIES));
- fprintf(ofp, "static const unsigned char __locale_names5[5];\n");
- } else {
- if (default_utf8) {
- fprintf(ofp, "#define __CTYPE_HAS_UTF_8_LOCALES\t\t\t1\n");
- }
- fprintf(ofp, "#define __LOCALE_DATA_CATEGORIES\t\t\t%d\n", __LOCALE_DATA_CATEGORIES);
- fprintf(ofp, "#define __LOCALE_DATA_WIDTH_LOCALES\t\t\t%d\n", 3+__LOCALE_DATA_CATEGORIES);
- fprintf(ofp, "#define __LOCALE_DATA_NUM_LOCALES\t\t\t%d\n", num_locales);
- fprintf(ofp, "static const unsigned char __locales[%d] = {\n",
- (num_locales) * (3 + __LOCALE_DATA_CATEGORIES));
- for (i=0 ; i < num_locales ; i++) {
- if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) {
- locales[i].idx_name = uniq;
- ++uniq;
- } else {
- locales[i].idx_name = uniq - 1;
- }
- fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].idx_name));
- fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].dot_cs));
- fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].cs));
- /* lc_ctype would store translit flags and turkish up/low flag. */
- fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_ctype_row));
- fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_numeric_row));
- fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_monetary_row));
- fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_time_row));
- #if 1
- /* lc_collate */
- if (strlen(locales[i].glibc_name) >= 5) {
- fprintf(ofp, "COL_IDX_%.2s_%.2s, ", locales[i].glibc_name, locales[i].glibc_name+3);
- } else if (!strcmp(locales[i].glibc_name, "C")) {
- fprintf(ofp, "COL_IDX_C , ");
- } else {
- printf("don't know how to handle COL_IDX_ for %s\n", locales[i].glibc_name);
- exit(EXIT_FAILURE);
- }
- #else
- fprintf(ofp, "%#4x, ", 0); /* place holder for lc_collate */
- #endif
- fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_messages_row));
- fprintf(ofp, "\t/* %s */\n", locales[i].glibc_name);
- }
- fprintf(ofp, "};\n\n");
- fprintf(ofp, "#define __LOCALE_DATA_NUM_LOCALE_NAMES\t\t%d\n", uniq );
- fprintf(ofp, "static const unsigned char __locale_names5[%d] = \n\t", uniq * 5);
- uniq = 0;
- for (i=1 ; i < num_locales ; i++) {
- if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) {
- fprintf(ofp, "\"%5.5s\" ", locales[i].name);
- ++uniq;
- if ((uniq % 8) == 0) {
- fprintf(ofp, "\n\t");
- }
- }
- }
- fprintf(ofp,";\n\n");
- if (at_strings_end > at_strings) {
- int i, j;
- char *p;
- i = 0;
- p = at_strings;
- while (*p) {
- ++i;
- p += 1 + (unsigned char) *p;
- }
- /* len, char, string\0 */
- fprintf(ofp, "#define __LOCALE_DATA_AT_MODIFIERS_LENGTH\t\t%d\n",
- i + (at_strings_end - at_strings));
- fprintf(ofp, "static const unsigned char __locale_at_modifiers[%d] = {",
- i + (at_strings_end - at_strings));
- i = 0;
- p = at_strings;
- while (*p) {
- fprintf(ofp, "\n\t%4d, '%c',",
- (unsigned char) *p, /* len of string\0 */
- at_mapto[i]);
- for (j=1 ; j < ((unsigned char) *p) ; j++) {
- fprintf(ofp, " '%c',", p[j]);
- }
- fprintf(ofp, " 0,");
- ++i;
- p += 1 + (unsigned char) *p;
- }
- fprintf(ofp, "\n};\n\n");
- }
- {
- int pos[__LOCALE_DATA_CATEGORIES];
- pos[0] = __LOCALE_DATA_CATEGORIES;
- for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) {
- fprintf(ofp, "#define __%s\t\t%d\n", lc_names[i], i);
- if (i + 1 < __LOCALE_DATA_CATEGORIES) {
- pos[i+1] = 1 + strlen(lc_names[i]) + pos[i];
- }
- }
- if (pos[__LOCALE_DATA_CATEGORIES-1] > 255) {
- printf("error - lc_names is too big (%d)\n", pos[__LOCALE_DATA_CATEGORIES-1]);
- exit(EXIT_FAILURE);
- }
- fprintf(ofp, "#define __LC_ALL\t\t%d\n\n", i);
- fprintf(ofp, "#define __lc_names_LEN\t\t%d\n",
- pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1);
- total_size += pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1;
- fprintf(ofp, "static unsigned const char lc_names[%d] =\n",
- pos[__LOCALE_DATA_CATEGORIES-1] + strlen(lc_names[__LOCALE_DATA_CATEGORIES-1]) + 1);
- fprintf(ofp, "\t\"");
- for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) {
- fprintf(ofp, "\\x%02x", (unsigned char) pos[i]);
- }
- fprintf(ofp, "\"");
- for (i=0 ; i < __LOCALE_DATA_CATEGORIES ; i++) {
- fprintf(ofp, "\n\t\"%s\\0\"", lc_names[i]);
- }
- fprintf(ofp, ";\n\n");
- }
- printf("locale data = %d name data = %d for %d uniq\n",
- num_locales * (3 + __LOCALE_DATA_CATEGORIES), uniq * 5, uniq);
- total_size += num_locales * (3 + __LOCALE_DATA_CATEGORIES) + uniq * 5;
- }
- }
- static void read_at_mappings(void)
- {
- char *p;
- char *m;
- int mc = 0;
- do {
- if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) {
- if (!fgets(line_buf, sizeof(line_buf), fp)) {
- if (ferror(fp)) {
- printf("error reading file\n");
- exit(EXIT_FAILURE);
- }
- return; /* EOF */
- }
- if ((*line_buf == '#') && (line_buf[1] == '-')) {
- break;
- }
- continue;
- }
- if (*p == '@') {
- if (p[1] == 0) {
- printf("error: missing @modifier name\n");
- exit(EXIT_FAILURE);
- }
- m = p; /* save the modifier name */
- if (!(p = strtok(NULL, " \t\n")) || p[1] || (((unsigned char) *p) > 0x7f)) {
- printf("error: missing or illegal @modifier mapping char\n");
- exit(EXIT_FAILURE);
- }
- if (at_mappings[(int)((unsigned char) *p)]) {
- printf("error: reused @modifier mapping char\n");
- exit(EXIT_FAILURE);
- }
- at_mappings[(int)((unsigned char) *p)] = 1;
- at_mapto[mc] = *p;
- ++mc;
- *at_strings_end = (char)( (unsigned char) (strlen(m)) );
- strcpy(++at_strings_end, m+1);
- at_strings_end += (unsigned char) at_strings_end[-1];
- printf("@mapping: \"%s\" to '%c'\n", m, *p);
- if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
- printf("ignoring trailing text: %s...\n", p);
- }
- *line_buf = 0;
- continue;
- }
- break;
- } while (1);
- #if 0
- {
- p = at_strings;
-
- if (!*p) {
- printf("no @ strings\n");
- return;
- }
-
- do {
- printf("%s\n", p+1);
- p += 1 + (unsigned char) *p;
- } while (*p);
- }
- #endif
- }
- static void read_enable_disable(void)
- {
- char *p;
- do {
- if (!(p = strtok(line_buf, " =\t\n")) || (*p == '#')) {
- if (!fgets(line_buf, sizeof(line_buf), fp)) {
- if (ferror(fp)) {
- printf("error reading file\n");
- exit(EXIT_FAILURE);
- }
- return; /* EOF */
- }
- if ((*line_buf == '#') && (line_buf[1] == '-')) {
- break;
- }
- continue;
- }
- if (!strcmp(p, "UTF-8")) {
- if (!(p = strtok(NULL, " =\t\n"))
- || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) {
- printf("error: missing or illegal UTF-8 setting\n");
- exit(EXIT_FAILURE);
- }
- default_utf8 = (toupper(*p) == 'Y');
- printf("UTF-8 locales are %sabled\n", "dis\0en"+ (default_utf8 << 2));
- } else if (!strcmp(p, "8-BIT")) {
- if (!(p = strtok(NULL, " =\t\n"))
- || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) {
- printf("error: missing or illegal 8-BIT setting\n");
- exit(EXIT_FAILURE);
- }
- default_8bit = (toupper(*p) == 'Y');
- printf("8-BIT locales are %sabled\n", "dis\0en" + (default_8bit << 2));
- } else {
- break;
- }
- if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
- printf("ignoring trailing text: %s...\n", p);
- }
- *line_buf = 0;
- continue;
- } while (1);
- }
- #ifdef __LOCALE_DATA_CODESET_LIST
- static int find_codeset_num(const char *cs)
- {
- int r = 2;
- char *s = __LOCALE_DATA_CODESET_LIST;
- /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */
- if (strcmp(cs, "UTF-8") != 0) {
- ++r;
- while (*s && strcmp(__LOCALE_DATA_CODESET_LIST+ ((unsigned char) *s), cs)) {
- /* printf("tried %s\n", __LOCALE_DATA_CODESET_LIST + ((unsigned char) *s)); */
- ++r;
- ++s;
- }
- if (!*s) {
- printf("error: unsupported codeset %s\n", cs);
- exit(EXIT_FAILURE);
- }
- }
- return r;
- }
- #else
- static int find_codeset_num(const char *cs)
- {
- int r = 2;
- /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */
- if (strcmp(cs, "UTF-8") != 0) {
- printf("error: unsupported codeset %s\n", cs);
- exit(EXIT_FAILURE);
- }
- return r;
- }
- #endif
- static int find_at_string_num(const char *as)
- {
- int i = 0;
- char *p = at_strings;
- while (*p) {
- if (!strcmp(p+1, as)) {
- return i;
- }
- ++i;
- p += 1 + (unsigned char) *p;
- }
- printf("error: unmapped @string %s\n", as);
- exit(EXIT_FAILURE);
- }
- static void read_locale_list(void)
- {
- char *p;
- char *s;
- char *ln; /* locale name */
- char *ls; /* locale name ll_CC */
- char *as; /* at string */
- char *ds; /* dot string */
- char *cs; /* codeset */
- int i;
- typedef struct {
- char *glibc_name;
- char name[5];
- char dot_cs; /* 0 if no codeset specified */
- char cs;
- } locale_entry;
- /* First the C locale. */
- locales[0].glibc_name = locales[0].name;
- strncpy(locales[0].name,"C",5);
- locales[0].dot_cs = 0;
- locales[0].cs = 1; /* 7-bit encoding */
- ++num_locales;
- do {
- if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) {
- if (!fgets(line_buf, sizeof(line_buf), fp)) {
- if (ferror(fp)) {
- printf("error reading file\n");
- exit(EXIT_FAILURE);
- }
- return; /* EOF */
- }
- if ((*line_buf == '#') && (line_buf[1] == '-')) {
- break;
- }
- continue;
- }
- s = glibc_locale_names;
- for (i=0 ; i < num_locales ; i++) {
- if (!strcmp(s+1, p)) {
- break;
- }
- s += 1 + ((unsigned char) *s);
- }
- if (i < num_locales) {
- printf("ignoring dulplicate locale name: %s", p);
- *line_buf = 0;
- continue;
- }
- /* New locale, but don't increment num until codeset verified! */
- *s = (char)((unsigned char) (strlen(p) + 1));
- strcpy(s+1, p);
- locales[num_locales].glibc_name = s+1;
- ln = p; /* save locale name */
- if (!(p = strtok(NULL, " \t\n"))) {
- printf("error: missing codeset for locale %s\n", ln);
- exit(EXIT_FAILURE);
- }
- cs = p;
- i = find_codeset_num(p);
- if ((i == 2) && !default_utf8) {
- printf("ignoring UTF-8 locale %s\n", ln);
- *line_buf = 0;
- continue;
- } else if ((i > 2) && !default_8bit) {
- printf("ignoring 8-bit codeset locale %s\n", ln);
- *line_buf = 0;
- continue;
- }
- locales[num_locales].cs = (char)((unsigned char) i);
- if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
- printf("ignoring trailing text: %s...\n", p);
- }
- /* Now go back to locale string for .codeset and @modifier */
- as = strtok(ln, "@");
- if (as) {
- as = strtok(NULL, "@");
- }
- ds = strtok(ln, ".");
- if (ds) {
- ds = strtok(NULL, ".");
- }
- ls = ln;
- if ((strlen(ls) != 5) || (ls[2] != '_')) {
- printf("error: illegal locale name %s\n", ls);
- exit(EXIT_FAILURE);
- }
- i = 0; /* value for unspecified codeset */
- if (ds) {
- i = find_codeset_num(ds);
- if ((i == 2) && !default_utf8) {
- printf("ignoring UTF-8 locale %s\n", ln);
- *line_buf = 0;
- continue;
- } else if ((i > 2) && !default_8bit) {
- printf("ignoring 8-bit codeset locale %s\n", ln);
- *line_buf = 0;
- continue;
- }
- }
- locales[num_locales].dot_cs = (char)((unsigned char) i);
- if (as) {
- i = find_at_string_num(as);
- ls[2] = at_mapto[i];
- }
- memcpy(locales[num_locales].name, ls, 5);
- /* printf("locale: %5.5s %2d %2d %s\n", */
- /* locales[num_locales].name, */
- /* locales[num_locales].cs, */
- /* locales[num_locales].dot_cs, */
- /* locales[num_locales].glibc_name */
- /* ); */
- ++num_locales;
- *line_buf = 0;
- } while (1);
- }
- static int le_cmp(const void *a, const void *b)
- {
- const locale_entry *p;
- const locale_entry *q;
- int r;
- p = (const locale_entry *) a;
- q = (const locale_entry *) b;
- if (!(r = p->name[0] - q->name[0])
- && !(r = p->name[1] - q->name[1])
- && !(r = p->name[3] - q->name[3])
- && !(r = p->name[4] - q->name[4])
- && !(r = p->name[2] - q->name[2])
- && !(r = -(p->cs - q->cs))
- ) {
- r = -(p->dot_cs - q->dot_cs);
- /* Reverse the ordering of the codesets so UTF-8 comes last.
- * Work-around (hopefully) for glibc bug affecting at least
- * the euro currency symbol. */
- }
- return r;
- }
- int main(int argc, char **argv)
- {
- if ((argc != 2) || (!(fp = fopen(*++argv, "r")))) {
- printf("error: missing filename or file!\n");
- return EXIT_FAILURE;
- }
- at_strings_end = at_strings;
- read_at_mappings();
- read_enable_disable();
- read_locale_list();
- fclose(fp);
- /* handle C locale specially */
- qsort(locales+1, num_locales-1, sizeof(locale_entry), le_cmp);
- #if 0
- for (i=0 ; i < num_locales ; i++) {
- printf("locale: %5.5s %2d %2d %s\n",
- locales[i].name,
- locales[i].cs,
- locales[i].dot_cs,
- locales[i].glibc_name
- );
- }
- #endif
- if (!(ofp = fopen("locale_tables.h", "w"))) {
- printf("error: can not open locale_tables.h for writing!\n");
- return EXIT_FAILURE;
- }
- do_lc_time();
- do_lc_numeric();
- do_lc_monetary();
- do_lc_messages();
- do_lc_ctype();
- do_locale_names();
- fclose(ofp);
- printf("total data size = %d\n", total_size);
- printf("null count = %d\n", null_count);
- return EXIT_SUCCESS;
- }
- static char *idx[10000];
- static char buf[100000];
- static char *last;
- static int uniq;
- static int addblock(const char *s, size_t n) /* l includes nul terminator */
- {
- int j;
- if (!s) {
- ++null_count;
- return 0;
- }
- for (j=0 ; (j < uniq) && (idx[j] + n < last) ; j++) {
- if (!memcmp(s, idx[j], n)) {
- return idx[j] - buf;
- }
- }
- if (uniq >= sizeof(idx)) {
- printf("too many uniq strings!\n");
- exit(EXIT_FAILURE);
- }
- if (last + n >= buf + sizeof(buf)) {
- printf("need to increase size of buf!\n");
- exit(EXIT_FAILURE);
- }
- idx[uniq] = last;
- ++uniq;
- memcpy(last, s, n);
- last += n;
- return idx[uniq - 1] - buf;
- }
- static int addstring(const char *s)
- {
- int j;
- size_t l;
- if (!s) {
- ++null_count;
- return 0;
- }
- for (j=0 ; j < uniq ; j++) {
- if (!strcmp(s, idx[j])) {
- return idx[j] - buf;
- }
- }
- if (uniq >= sizeof(idx)) {
- printf("too many uniq strings!\n");
- exit(EXIT_FAILURE);
- }
- l = strlen(s) + 1;
- if (last + l >= buf + sizeof(buf)) {
- printf("need to increase size of buf!\n");
- exit(EXIT_FAILURE);
- }
- idx[uniq] = last;
- ++uniq;
- strcpy(last, s);
- last += l;
- return idx[uniq - 1] - buf;
- }
- #define DO_LC_COMMON(CATEGORY) \
- printf("buf-size=%d uniq=%d rows=%d\n", \
- (int)(last - buf), uniq, lc_##CATEGORY##_uniq); \
- printf("total = %d + %d * %d + %d = %d\n", \
- num_locales, lc_##CATEGORY##_uniq, NUM_NL_##CATEGORY, (int)(last - buf), \
- i = num_locales + lc_##CATEGORY##_uniq*NUM_NL_##CATEGORY + (int)(last - buf)); \
- total_size += i; \
- dump_table8c("__lc_" #CATEGORY "_data", buf, (int)(last - buf)); \
- for (i=0 ; i < lc_##CATEGORY##_uniq ; i++) { \
- m = locales[i].lc_##CATEGORY##_row; \
- for (k=0 ; k < NUM_NL_##CATEGORY ; k++) { \
- buf[NUM_NL_##CATEGORY*i + k] = (char)((unsigned char) lc_##CATEGORY##_uniq_X[i][k]); \
- } \
- } \
- dump_table8("__lc_" #CATEGORY "_rows", buf, lc_##CATEGORY##_uniq * NUM_NL_##CATEGORY); \
- buf16[0] =0; \
- for (i=0 ; i < NUM_NL_##CATEGORY - 1 ; i++) { \
- buf16[i+1] = buf16[i] + lc_##CATEGORY##_count[i]; \
- } \
- dump_table16("__lc_" #CATEGORY "_item_offsets", buf16, NUM_NL_##CATEGORY); \
- m = 0; \
- for (k=0 ; k < NUM_NL_##CATEGORY ; k++) { \
- for (i=0 ; i < lc_##CATEGORY##_count[k] ; i++) { \
- buf16[m] = lc_##CATEGORY##_item[k][i]; \
- ++m; \
- } \
- } \
- dump_table16("__lc_" #CATEGORY "_item_idx", buf16, m);
- #define DL_LC_LOOPTAIL(CATEGORY) \
- if (k > NUM_NL_##CATEGORY) { \
- printf("error -- lc_" #CATEGORY " nl_item count > %d!\n", NUM_NL_##CATEGORY); \
- exit(EXIT_FAILURE); \
- } \
- { \
- int r; \
- for (r=0 ; r < lc_##CATEGORY##_uniq ; r++) { \
- if (!memcmp(lc_##CATEGORY##_uniq_X[lc_##CATEGORY##_uniq], \
- lc_##CATEGORY##_uniq_X[r], NUM_NL_##CATEGORY)) { \
- break; \
- } \
- } \
- if (r == lc_##CATEGORY##_uniq) { /* new locale row */ \
- ++lc_##CATEGORY##_uniq; \
- if (lc_##CATEGORY##_uniq > 255) { \
- printf("too many unique lc_" #CATEGORY " rows!\n"); \
- exit(EXIT_FAILURE); \
- } \
- } \
- locales[i].lc_##CATEGORY##_row = r; \
- }
- static int buf16[100*256];
- static void dump_table8(const char *name, const char *tbl, int len)
- {
- int i;
- fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
- fprintf(ofp, "static const unsigned char %s[%d] = {", name, len);
- for (i=0 ; i < len ; i++) {
- if ((i % 12) == 0) {
- fprintf(ofp, "\n\t");
- }
- fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i]));
- }
- fprintf(ofp, "\n};\n\n");
- }
- #define __C_isdigit(c) \
- ((sizeof(c) == sizeof(char)) \
- ? (((unsigned char)((c) - '0')) < 10) \
- : (((unsigned int)((c) - '0')) < 10))
- #define __C_isalpha(c) \
- ((sizeof(c) == sizeof(char)) \
- ? (((unsigned char)(((c) | 0x20) - 'a')) < 26) \
- : (((unsigned int)(((c) | 0x20) - 'a')) < 26))
- #define __C_isalnum(c) (__C_isalpha(c) || __C_isdigit(c))
- static void dump_table8c(const char *name, const char *tbl, int len)
- {
- int i;
- fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
- fprintf(ofp, "static const unsigned char %s[%d] = {", name, len);
- for (i=0 ; i < len ; i++) {
- if ((i % 12) == 0) {
- fprintf(ofp, "\n\t");
- }
- if (__C_isalnum(tbl[i]) || (tbl[i] == ' ')) {
- fprintf(ofp, " '%c', ", (int)((unsigned char) tbl[i]));
- } else {
- fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i]));
- }
- }
- fprintf(ofp, "\n};\n\n");
- }
- static void dump_table16(const char *name, const int *tbl, int len)
- {
- int i;
- fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
- fprintf(ofp, "static const uint16_t %s[%d] = {", name, len);
- for (i=0 ; i < len ; i++) {
- if ((i % 8) == 0) {
- fprintf(ofp, "\n\t");
- }
- if (tbl[i] != (uint16_t) tbl[i]) {
- printf("error - falls outside uint16 range!\n");
- exit(EXIT_FAILURE);
- }
- fprintf(ofp, "%#6x, ", tbl[i]);
- }
- fprintf(ofp, "\n};\n\n");
- }
- #define NUM_NL_time 50
- static int lc_time_item[NUM_NL_time][256];
- static int lc_time_count[NUM_NL_time];
- static unsigned char lc_time_uniq_X[700][NUM_NL_time];
- static int lc_time_uniq;
- #define DO_NL_S(X) lc_time_S(X, k++)
- static void lc_time_S(int X, int k)
- {
- size_t len;
- int j, m;
- const char *s = nl_langinfo(X);
- const char *p;
- static const char nulbuf[] = "";
- if (X == ALT_DIGITS) {
- len = 1;
- if (!s) {
- s = nulbuf;
- }
- if (*s) {
- p = s;
- for (j = 0 ; j < 100 ; j++) {
- while (*p) {
- ++p;
- }
- ++p;
- }
- len = p - s;
- }
- j = addblock(s, len);
- /* if (len > 1) fprintf(stderr, "alt_digit: called addblock with len %zd\n", len); */
- } else if (X == ERA) {
- if (!s) {
- s = nulbuf;
- }
- p = s;
- while (*p) {
- while (*p) {
- ++p;
- }
- ++p;
- }
- ++p;
- j = addblock(s, p - s);
- /* if (p-s > 1) fprintf(stderr, "era: called addblock with len %d\n", p-s); */
- } else {
- j = addstring(s);
- }
- for (m=0 ; m < lc_time_count[k] ; m++) {
- if (lc_time_item[k][m] == j) {
- break;
- }
- }
- if (m == lc_time_count[k]) { /* new for this nl_item */
- if (m > 255) {
- printf("too many nl_item %d entries in lc_time\n", k);
- exit(EXIT_FAILURE);
- }
- lc_time_item[k][m] = j;
- ++lc_time_count[k];
- }
- lc_time_uniq_X[lc_time_uniq][k] = m;
- }
- static void do_lc_time(void)
- {
- int i, k, m;
- last = buf+1;
- uniq = 1;
- *buf = 0;
- *idx = buf;
- for (i=0 ; i < num_locales ; i++) {
- k = 0;
- if (!setlocale(LC_ALL, locales[i].glibc_name)) {
- printf("setlocale(LC_ALL,%s) failed!\n",
- locales[i].glibc_name);
- }
- DO_NL_S(ABDAY_1);
- DO_NL_S(ABDAY_2);
- DO_NL_S(ABDAY_3);
- DO_NL_S(ABDAY_4);
- DO_NL_S(ABDAY_5);
- DO_NL_S(ABDAY_6);
- DO_NL_S(ABDAY_7);
- DO_NL_S(DAY_1);
- DO_NL_S(DAY_2);
- DO_NL_S(DAY_3);
- DO_NL_S(DAY_4);
- DO_NL_S(DAY_5);
- DO_NL_S(DAY_6);
- DO_NL_S(DAY_7);
- DO_NL_S(ABMON_1);
- DO_NL_S(ABMON_2);
- DO_NL_S(ABMON_3);
- DO_NL_S(ABMON_4);
- DO_NL_S(ABMON_5);
- DO_NL_S(ABMON_6);
- DO_NL_S(ABMON_7);
- DO_NL_S(ABMON_8);
- DO_NL_S(ABMON_9);
- DO_NL_S(ABMON_10);
- DO_NL_S(ABMON_11);
- DO_NL_S(ABMON_12);
- DO_NL_S(MON_1);
- DO_NL_S(MON_2);
- DO_NL_S(MON_3);
- DO_NL_S(MON_4);
- DO_NL_S(MON_5);
- DO_NL_S(MON_6);
- DO_NL_S(MON_7);
- DO_NL_S(MON_8);
- DO_NL_S(MON_9);
- DO_NL_S(MON_10);
- DO_NL_S(MON_11);
- DO_NL_S(MON_12);
- DO_NL_S(AM_STR);
- DO_NL_S(PM_STR);
- DO_NL_S(D_T_FMT);
- DO_NL_S(D_FMT);
- DO_NL_S(T_FMT);
- DO_NL_S(T_FMT_AMPM);
- DO_NL_S(ERA);
- DO_NL_S(ERA_YEAR); /* non SuSv3 */
- DO_NL_S(ERA_D_FMT);
- DO_NL_S(ALT_DIGITS);
- DO_NL_S(ERA_D_T_FMT);
- DO_NL_S(ERA_T_FMT);
- DL_LC_LOOPTAIL(time)
- }
- DO_LC_COMMON(time)
- }
- #undef DO_NL_S
- #define NUM_NL_numeric 3
- static int lc_numeric_item[NUM_NL_numeric][256];
- static int lc_numeric_count[NUM_NL_numeric];
- static unsigned char lc_numeric_uniq_X[700][NUM_NL_numeric];
- static int lc_numeric_uniq;
- #define DO_NL_S(X) lc_numeric_S(X, k++)
- static void lc_numeric_S(int X, int k)
- {
- int j, m;
- char buf[256];
- char *e;
- char *s;
- char c;
- s = nl_langinfo(X);
- if (X == GROUPING) {
- if (s) {
- if ((*s == CHAR_MAX) || (*s == -1)) { /* stupid glibc... :-( */
- s = "";
- }
- e = s;
- c = 0;
- while (*e) { /* find end of string */
- if (*e == CHAR_MAX) {
- c = CHAR_MAX;
- ++e;
- break;
- }
- ++e;
- }
- if ((e - s) > sizeof(buf)) {
- printf("grouping specifier too long\n");
- exit(EXIT_FAILURE);
- }
- strncpy(buf, s, (e-s));
- e = buf + (e-s);
- *e = 0; /* Make sure we're null-terminated. */
- if (c != CHAR_MAX) { /* remove duplicate repeats */
- while (e > buf) {
- --e;
- if (*e != e[-1]) {
- break;
- }
- }
- *++e = 0;
- }
- s = buf;
- }
- }
- j = addstring(s);
- for (m=0 ; m < lc_numeric_count[k] ; m++) {
- if (lc_numeric_item[k][m] == j) {
- break;
- }
- }
- if (m == lc_numeric_count[k]) { /* new for this nl_item */
- if (m > 255) {
- printf("too many nl_item %d entries in lc_numeric\n", k);
- exit(EXIT_FAILURE);
- }
- lc_numeric_item[k][m] = j;
- ++lc_numeric_count[k];
- }
- /* printf("\\x%02x", m); */
- lc_numeric_uniq_X[lc_numeric_uniq][k] = m;
- }
- static void do_lc_numeric(void)
- {
- int i, k, m;
- last = buf+1;
- uniq = 1;
- *buf = 0;
- *idx = buf;
- for (i=0 ; i < num_locales ; i++) {
- k = 0;
- if (!setlocale(LC_ALL, locales[i].glibc_name)) {
- printf("setlocale(LC_ALL,%s) failed!\n",
- locales[i].glibc_name);
- }
- DO_NL_S(RADIXCHAR); /* DECIMAL_POINT */
- DO_NL_S(THOUSEP); /* THOUSANDS_SEP */
- DO_NL_S(GROUPING);
- DL_LC_LOOPTAIL(numeric)
- }
- DO_LC_COMMON(numeric)
- }
- #undef DO_NL_S
- #define NUM_NL_monetary (7+14+1)
- static int lc_monetary_item[NUM_NL_monetary][256];
- static int lc_monetary_count[NUM_NL_monetary];
- static unsigned char lc_monetary_uniq_X[700][NUM_NL_monetary];
- static int lc_monetary_uniq;
- #define DO_NL_S(X) lc_monetary_S(X, k++)
- /* #define DO_NL_C(X) printf("%#02x", (int)(unsigned char)(*nl_langinfo(X))); */
- #define DO_NL_C(X) lc_monetary_C(X, k++)
- static void lc_monetary_C(int X, int k)
- {
- int j, m;
- char c_buf[2];
- #warning fix the char entries for monetary... target signedness of char may be different!
- c_buf[1] = 0;
- c_buf[0] = *nl_langinfo(X);
- j = addstring(c_buf);
- for (m=0 ; m < lc_monetary_count[k] ; m++) {
- if (lc_monetary_item[k][m] == j) {
- break;
- }
- }
- if (m == lc_monetary_count[k]) { /* new for this nl_item */
- if (m > 255) {
- printf("too many nl_item %d entries in lc_monetary\n", k);
- exit(EXIT_FAILURE);
- }
- lc_monetary_item[k][m] = j;
- ++lc_monetary_count[k];
- }
- /* printf("\\x%02x", m); */
- lc_monetary_uniq_X[lc_monetary_uniq][k] = m;
- }
- static void lc_monetary_S(int X, int k)
- {
- int j, m;
- char buf[256];
- char *e;
- char *s;
- char c;
- s = nl_langinfo(X);
- if (X == MON_GROUPING) {
- if (s) {
- if ((*s == CHAR_MAX) || (*s == -1)) { /* stupid glibc... :-( */
- s = "";
- }
- e = s;
- c = 0;
- while (*e) { /* find end of string */
- if (*e == CHAR_MAX) {
- c = CHAR_MAX;
- ++e;
- break;
- }
- ++e;
- }
- if ((e - s) > sizeof(buf)) {
- printf("mon_grouping specifier too long\n");
- exit(EXIT_FAILURE);
- }
- strncpy(buf, s, (e-s));
- e = buf + (e-s);
- *e = 0; /* Make sure we're null-terminated. */
- if (c != CHAR_MAX) { /* remove duplicate repeats */
- while (e > buf) {
- --e;
- if (*e != e[-1]) {
- break;
- }
- }
- *++e = 0;
- }
- s = buf;
- }
- }
- j = addstring(s);
- for (m=0 ; m < lc_monetary_count[k] ; m++) {
- if (lc_monetary_item[k][m] == j) {
- break;
- }
- }
- if (m == lc_monetary_count[k]) { /* new for this nl_item */
- if (m > 255) {
- printf("too many nl_item %d entries in lc_monetary\n", k);
- exit(EXIT_FAILURE);
- }
- lc_monetary_item[k][m] = j;
- ++lc_monetary_count[k];
- }
- /* printf("\\x%02x", m); */
- lc_monetary_uniq_X[lc_monetary_uniq][k] = m;
- }
- static void do_lc_monetary(void)
- {
- int i, k, m;
- last = buf+1;
- uniq = 1;
- *buf = 0;
- *idx = buf;
- for (i=0 ; i < num_locales ; i++) {
- k = 0;
- if (!setlocale(LC_ALL, locales[i].glibc_name)) {
- printf("setlocale(LC_ALL,%s) failed!\n",
- locales[i].glibc_name);
- }
- /* non SUSv3 */
- DO_NL_S(INT_CURR_SYMBOL);
- DO_NL_S(CURRENCY_SYMBOL);
- DO_NL_S(MON_DECIMAL_POINT);
- DO_NL_S(MON_THOUSANDS_SEP);
- DO_NL_S(MON_GROUPING);
- DO_NL_S(POSITIVE_SIGN);
- DO_NL_S(NEGATIVE_SIGN);
- DO_NL_C(INT_FRAC_DIGITS);
- DO_NL_C(FRAC_DIGITS);
- DO_NL_C(P_CS_PRECEDES);
- DO_NL_C(P_SEP_BY_SPACE);
- DO_NL_C(N_CS_PRECEDES);
- DO_NL_C(N_SEP_BY_SPACE);
- DO_NL_C(P_SIGN_POSN);
- DO_NL_C(N_SIGN_POSN);
- DO_NL_C(INT_P_CS_PRECEDES);
- DO_NL_C(INT_P_SEP_BY_SPACE);
- DO_NL_C(INT_N_CS_PRECEDES);
- DO_NL_C(INT_N_SEP_BY_SPACE);
- DO_NL_C(INT_P_SIGN_POSN);
- DO_NL_C(INT_N_SIGN_POSN);
- DO_NL_S(CRNCYSTR); /* CURRENCY_SYMBOL */
- DL_LC_LOOPTAIL(monetary)
- }
- DO_LC_COMMON(monetary)
- }
- #undef DO_NL_S
- #define NUM_NL_messages 4
- static int lc_messages_item[NUM_NL_messages][256];
- static int lc_messages_count[NUM_NL_messages];
- static unsigned char lc_messages_uniq_X[700][NUM_NL_messages];
- static int lc_messages_uniq;
- #define DO_NL_S(X) lc_messages_S(X, k++)
- static void lc_messages_S(int X, int k)
- {
- int j, m;
- j = addstring(nl_langinfo(X));
- for (m=0 ; m < lc_messages_count[k] ; m++) {
- if (lc_messages_item[k][m] == j) {
- break;
- }
- }
- if (m == lc_messages_count[k]) { /* new for this nl_item */
- if (m > 255) {
- printf("too many nl_item %d entries in lc_messages\n", k);
- exit(EXIT_FAILURE);
- }
- lc_messages_item[k][m] = j;
- ++lc_messages_count[k];
- }
- /* printf("\\x%02x", m); */
- lc_messages_uniq_X[lc_messages_uniq][k] = m;
- }
- static void do_lc_messages(void)
- {
- int i, k, m;
- last = buf+1;
- uniq = 1;
- *buf = 0;
- *idx = buf;
- for (i=0 ; i < num_locales ; i++) {
- k = 0;
- if (!setlocale(LC_ALL, locales[i].glibc_name)) {
- printf("setlocale(LC_ALL,%s) failed!\n",
- locales[i].glibc_name);
- }
- DO_NL_S(YESEXPR);
- DO_NL_S(NOEXPR);
- DO_NL_S(YESSTR);
- DO_NL_S(NOSTR);
- DL_LC_LOOPTAIL(messages)
- }
- DO_LC_COMMON(messages)
- }
- #undef DO_NL_S
- #define NUM_NL_ctype 10
- static int lc_ctype_item[NUM_NL_ctype][256];
- static int lc_ctype_count[NUM_NL_ctype];
- static unsigned char lc_ctype_uniq_X[700][NUM_NL_ctype];
- static int lc_ctype_uniq;
- #define DO_NL_S(X) lc_ctype_S(X, k++)
- static void lc_ctype_S(int X, int k)
- {
- int j, m;
- j = addstring(nl_langinfo(X));
- for (m=0 ; m < lc_ctype_count[k] ; m++) {
- if (lc_ctype_item[k][m] == j) {
- break;
- }
- }
- if (m == lc_ctype_count[k]) { /* new for this nl_item */
- if (m > 255) {
- printf("too many nl_item %d entries in lc_ctype\n", k);
- exit(EXIT_FAILURE);
- }
- lc_ctype_item[k][m] = j;
- ++lc_ctype_count[k];
- }
- /* printf("\\x%02x", m); */
- lc_ctype_uniq_X[lc_ctype_uniq][k] = m;
- }
- static void do_lc_ctype(void)
- {
- int i, k, m;
- last = buf+1;
- uniq = 1;
- *buf = 0;
- *idx = buf;
- for (i=0 ; i < num_locales ; i++) {
- k = 0;
- if (!setlocale(LC_ALL, locales[i].glibc_name)) {
- printf("setlocale(LC_ALL,%s) failed!\n",
- locales[i].glibc_name);
- }
- DO_NL_S(_NL_CTYPE_OUTDIGIT0_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT1_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT2_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT3_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT4_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT5_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT6_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT7_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT8_MB);
- DO_NL_S(_NL_CTYPE_OUTDIGIT9_MB);
- DL_LC_LOOPTAIL(ctype)
- }
- DO_LC_COMMON(ctype)
- }
|