gen_locale.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254
  1. #define _GNU_SOURCE
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <ctype.h>
  6. #include <assert.h>
  7. #include <locale.h>
  8. #include <langinfo.h>
  9. #include <nl_types.h>
  10. #include <stdint.h>
  11. #define __CTYPE_HAS_8_BIT_LOCALES
  12. #include "c8tables.h"
  13. #define CATEGORIES 6
  14. /* must agree with ordering of gen_mmap! */
  15. static const unsigned char *lc_names[] = {
  16. "LC_CTYPE",
  17. "LC_NUMERIC",
  18. "LC_MONETARY",
  19. "LC_TIME",
  20. "LC_COLLATE",
  21. "LC_MESSAGES",
  22. #if CATEGORIES == 12
  23. "LC_PAPER",
  24. "LC_NAME",
  25. "LC_ADDRESS",
  26. "LC_TELEPHONE",
  27. "LC_MEASUREMENT",
  28. "LC_IDENTIFICATION",
  29. #elif CATEGORIES != 6
  30. #error unsupported CATEGORIES value!
  31. #endif
  32. };
  33. typedef struct {
  34. char *glibc_name;
  35. char name[5];
  36. char dot_cs; /* 0 if no codeset specified */
  37. char cs;
  38. unsigned char idx_name;
  39. unsigned char lc_time_row;
  40. unsigned char lc_numeric_row;
  41. unsigned char lc_monetary_row;
  42. unsigned char lc_messages_row;
  43. #if CATEGORIES != 6
  44. #error unsupported CATEGORIES value
  45. #endif
  46. } locale_entry;
  47. static void read_at_mappings(void);
  48. static void read_enable_disable(void);
  49. static void read_locale_list(void);
  50. static int find_codeset_num(const char *cs);
  51. static int find_at_string_num(const char *as);
  52. static int le_cmp(const void *, const void *);
  53. static void dump_table8(const char *name, const char *tbl, int len);
  54. static void dump_table8c(const char *name, const char *tbl, int len);
  55. static void dump_table16(const char *name, const int *tbl, int len);
  56. static void do_lc_time(void);
  57. static void do_lc_numeric(void);
  58. static void do_lc_monetary(void);
  59. static void do_lc_messages(void);
  60. static FILE *fp;
  61. static FILE *ofp;
  62. static char line_buf[80];
  63. static char at_mappings[256];
  64. static char at_mapto[256];
  65. static char at_strings[1024];
  66. static char *at_strings_end;
  67. static locale_entry locales[700];
  68. static char glibc_locale_names[60000];
  69. static int num_locales;
  70. static int default_utf8;
  71. static int default_8bit;
  72. static int total_size;
  73. static int null_count;
  74. static void do_locale_names(void)
  75. {
  76. /* "C" locale name is handled specially by the setlocale code. */
  77. int uniq = 0;
  78. int i;
  79. if (num_locales <= 1) {
  80. /* printf("error - only C locale?\n"); */
  81. /* exit(EXIT_FAILURE); */
  82. fprintf(ofp, "static const unsigned char __locales[%d];\n", (3 + CATEGORIES));
  83. fprintf(ofp, "static const unsigned char __locale_names5[5];\n");
  84. } else {
  85. fprintf(ofp, "#define CATEGORIES\t\t\t%d\n", CATEGORIES);
  86. fprintf(ofp, "#define WIDTH_LOCALES\t\t\t%d\n", 3+CATEGORIES);
  87. fprintf(ofp, "#define NUM_LOCALES\t\t\t%d\n", num_locales);
  88. fprintf(ofp, "static const unsigned char __locales[%d] = {\n",
  89. (num_locales) * (3 + CATEGORIES));
  90. for (i=0 ; i < num_locales ; i++) {
  91. if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) {
  92. locales[i].idx_name = uniq;
  93. ++uniq;
  94. } else {
  95. locales[i].idx_name = uniq - 1;
  96. }
  97. fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].idx_name));
  98. fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].dot_cs));
  99. fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].cs));
  100. /* lc_ctype would store translit flags and turkish up/low flag. */
  101. fprintf(ofp, "%#4x, ", 0); /* place holder for lc_ctype */
  102. fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_numeric_row));
  103. fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_monetary_row));
  104. fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_time_row));
  105. fprintf(ofp, "%#4x, ", 0); /* place holder for lc_collate */
  106. fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_messages_row));
  107. fprintf(ofp, "\t/* %s */\n", locales[i].glibc_name);
  108. }
  109. fprintf(ofp, "};\n\n");
  110. fprintf(ofp, "#define NUM_LOCALE_NAMES\t\t%d\n", uniq );
  111. fprintf(ofp, "static const unsigned char __locale_names5[%d] = \n\t", uniq * 5);
  112. uniq = 0;
  113. for (i=1 ; i < num_locales ; i++) {
  114. if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) {
  115. fprintf(ofp, "\"%5.5s\" ", locales[i].name);
  116. ++uniq;
  117. if ((uniq % 8) == 0) {
  118. fprintf(ofp, "\n\t");
  119. }
  120. }
  121. }
  122. fprintf(ofp,";\n\n");
  123. if (at_strings_end > at_strings) {
  124. int i, j;
  125. char *p;
  126. i = 0;
  127. p = at_strings;
  128. while (*p) {
  129. ++i;
  130. p += 1 + (unsigned char) *p;
  131. }
  132. /* len, char, string\0 */
  133. fprintf(ofp, "#define LOCALE_AT_MODIFIERS_LENGTH\t\t%d\n",
  134. i + (at_strings_end - at_strings));
  135. fprintf(ofp, "static const unsigned char __locale_at_modifiers[%d] = {",
  136. i + (at_strings_end - at_strings));
  137. i = 0;
  138. p = at_strings;
  139. while (*p) {
  140. fprintf(ofp, "\n\t%4d, '%c',",
  141. (unsigned char) *p, /* len of string\0 */
  142. at_mapto[i]);
  143. for (j=1 ; j < ((unsigned char) *p) ; j++) {
  144. fprintf(ofp, " '%c',", p[j]);
  145. }
  146. fprintf(ofp, " 0,");
  147. ++i;
  148. p += 1 + (unsigned char) *p;
  149. }
  150. fprintf(ofp, "\n};\n\n");
  151. }
  152. {
  153. int pos[CATEGORIES];
  154. pos[0] = CATEGORIES;
  155. for (i=0 ; i < CATEGORIES ; i++) {
  156. fprintf(ofp, "#define __%s\t\t%d\n", lc_names[i], i);
  157. if (i + 1 < CATEGORIES) {
  158. pos[i+1] = 1 + strlen(lc_names[i]) + pos[i];
  159. }
  160. }
  161. if (pos[CATEGORIES-1] > 255) {
  162. printf("error - lc_names is too big (%d)\n", pos[CATEGORIES-1]);
  163. exit(EXIT_FAILURE);
  164. }
  165. fprintf(ofp, "#define __LC_ALL\t\t%d\n\n", i);
  166. fprintf(ofp, "#define lc_names_LEN\t\t%d\n",
  167. pos[CATEGORIES-1] + strlen(lc_names[CATEGORIES-1]) + 1);
  168. total_size += pos[CATEGORIES-1] + strlen(lc_names[CATEGORIES-1]) + 1;
  169. fprintf(ofp, "static unsigned const char lc_names[%d] =\n",
  170. pos[CATEGORIES-1] + strlen(lc_names[CATEGORIES-1]) + 1);
  171. fprintf(ofp, "\t\"");
  172. for (i=0 ; i < CATEGORIES ; i++) {
  173. fprintf(ofp, "\\x%02x", (unsigned char) pos[i]);
  174. }
  175. fprintf(ofp, "\"");
  176. for (i=0 ; i < CATEGORIES ; i++) {
  177. fprintf(ofp, "\n\t\"%s\\0\"", lc_names[i]);
  178. }
  179. fprintf(ofp, ";\n\n");
  180. }
  181. printf("locale data = %d name data = %d for %d uniq\n",
  182. num_locales * (3 + CATEGORIES), uniq * 5, uniq);
  183. total_size += num_locales * (3 + CATEGORIES) + uniq * 5;
  184. }
  185. }
  186. int main(int argc, char **argv)
  187. {
  188. if ((argc != 2) || (!(fp = fopen(*++argv, "r")))) {
  189. printf("error: missing filename or file!\n");
  190. return EXIT_FAILURE;
  191. }
  192. at_strings_end = at_strings;
  193. read_at_mappings();
  194. read_enable_disable();
  195. read_locale_list();
  196. fclose(fp);
  197. /* handle C locale specially */
  198. qsort(locales+1, num_locales-1, sizeof(locale_entry), le_cmp);
  199. #if 0
  200. for (i=0 ; i < num_locales ; i++) {
  201. printf("locale: %5.5s %2d %2d %s\n",
  202. locales[i].name,
  203. locales[i].cs,
  204. locales[i].dot_cs,
  205. locales[i].glibc_name
  206. );
  207. }
  208. #endif
  209. if (!(ofp = fopen("locale_tables.h", "w"))) {
  210. printf("error: can not open locale_tables.h for writing!\n");
  211. return EXIT_FAILURE;
  212. }
  213. do_lc_time();
  214. do_lc_numeric();
  215. do_lc_monetary();
  216. do_lc_messages();
  217. do_locale_names();
  218. fclose(ofp);
  219. printf("total data size = %d\n", total_size);
  220. printf("null count = %d\n", null_count);
  221. return EXIT_SUCCESS;
  222. }
  223. static char *idx[10000];
  224. static char buf[100000];
  225. static char *last;
  226. static int uniq;
  227. static int addstring(const char *s)
  228. {
  229. int j;
  230. size_t l;
  231. if (!s) {
  232. ++null_count;
  233. return 0;
  234. }
  235. for (j=0 ; j < uniq ; j++) {
  236. if (!strcmp(s, idx[j])) {
  237. return idx[j] - buf;
  238. }
  239. }
  240. if (uniq >= sizeof(idx)) {
  241. printf("too many uniq strings!\n");
  242. exit(EXIT_FAILURE);
  243. }
  244. l = strlen(s) + 1;
  245. if (last + l >= buf + sizeof(buf)) {
  246. printf("need to increase size of buf!\n");
  247. exit(EXIT_FAILURE);
  248. }
  249. idx[uniq] = last;
  250. ++uniq;
  251. strcpy(last, s);
  252. last += l;
  253. return idx[uniq - 1] - buf;
  254. }
  255. static int buf16[50*256];
  256. static int lc_time_item[50][256];
  257. static int lc_time_count[50];
  258. static unsigned char lc_time_uniq_50[700][50];
  259. static int lc_time_uniq;
  260. #define DO_NL_S(X) lc_time_S(X, k++)
  261. static void lc_time_S(int X, int k)
  262. {
  263. int j, m;
  264. j = addstring(nl_langinfo(X));
  265. for (m=0 ; m < lc_time_count[k] ; m++) {
  266. if (lc_time_item[k][m] == j) {
  267. break;
  268. }
  269. }
  270. if (m == lc_time_count[k]) { /* new for this nl_item */
  271. if (m > 255) {
  272. printf("too many nl_item %d entries in lc_time\n", k);
  273. exit(EXIT_FAILURE);
  274. }
  275. lc_time_item[k][m] = j;
  276. ++lc_time_count[k];
  277. }
  278. /* printf("\\x%02x", m); */
  279. lc_time_uniq_50[lc_time_uniq][k] = m;
  280. }
  281. static void do_lc_time(void)
  282. {
  283. int i, k, m;
  284. last = buf+1;
  285. uniq = 1;
  286. *buf = 0;
  287. *idx = buf;
  288. /* printf("processing lc_time..."); */
  289. for (i=0 ; i < num_locales ; i++) {
  290. k = 0;
  291. /* printf(" %d", i); fflush(stdout); */
  292. if (!setlocale(LC_ALL, locales[i].glibc_name)) {
  293. printf("setlocale(LC_ALL,%s) failed!\n",
  294. locales[i].glibc_name);
  295. }
  296. DO_NL_S(ABDAY_1);
  297. DO_NL_S(ABDAY_2);
  298. DO_NL_S(ABDAY_3);
  299. DO_NL_S(ABDAY_4);
  300. DO_NL_S(ABDAY_5);
  301. DO_NL_S(ABDAY_6);
  302. DO_NL_S(ABDAY_7);
  303. DO_NL_S(DAY_1);
  304. DO_NL_S(DAY_2);
  305. DO_NL_S(DAY_3);
  306. DO_NL_S(DAY_4);
  307. DO_NL_S(DAY_5);
  308. DO_NL_S(DAY_6);
  309. DO_NL_S(DAY_7);
  310. DO_NL_S(ABMON_1);
  311. DO_NL_S(ABMON_2);
  312. DO_NL_S(ABMON_3);
  313. DO_NL_S(ABMON_4);
  314. DO_NL_S(ABMON_5);
  315. DO_NL_S(ABMON_6);
  316. DO_NL_S(ABMON_7);
  317. DO_NL_S(ABMON_8);
  318. DO_NL_S(ABMON_9);
  319. DO_NL_S(ABMON_10);
  320. DO_NL_S(ABMON_11);
  321. DO_NL_S(ABMON_12);
  322. DO_NL_S(MON_1);
  323. DO_NL_S(MON_2);
  324. DO_NL_S(MON_3);
  325. DO_NL_S(MON_4);
  326. DO_NL_S(MON_5);
  327. DO_NL_S(MON_6);
  328. DO_NL_S(MON_7);
  329. DO_NL_S(MON_8);
  330. DO_NL_S(MON_9);
  331. DO_NL_S(MON_10);
  332. DO_NL_S(MON_11);
  333. DO_NL_S(MON_12);
  334. DO_NL_S(AM_STR);
  335. DO_NL_S(PM_STR);
  336. DO_NL_S(D_T_FMT);
  337. DO_NL_S(D_FMT);
  338. DO_NL_S(T_FMT);
  339. DO_NL_S(T_FMT_AMPM);
  340. DO_NL_S(ERA);
  341. DO_NL_S(ERA_YEAR); /* non SuSv3 */
  342. DO_NL_S(ERA_D_FMT);
  343. DO_NL_S(ALT_DIGITS);
  344. DO_NL_S(ERA_D_T_FMT);
  345. DO_NL_S(ERA_T_FMT);
  346. if (k > 50) {
  347. printf("error -- lc_time nl_item count > 50!\n");
  348. exit(EXIT_FAILURE);
  349. }
  350. {
  351. int r;
  352. for (r=0 ; r < lc_time_uniq ; r++) {
  353. if (!memcmp(lc_time_uniq_50[lc_time_uniq],
  354. lc_time_uniq_50[r], 50)) {
  355. break;
  356. }
  357. }
  358. if (r == lc_time_uniq) { /* new locale row */
  359. ++lc_time_uniq;
  360. if (lc_time_uniq > 255) {
  361. printf("too many unique lc_time rows!\n");
  362. exit(EXIT_FAILURE);
  363. }
  364. }
  365. locales[i].lc_time_row = r;
  366. }
  367. /* printf("\n"); */
  368. }
  369. /* printf(" done\n"); */
  370. m = k = 0;
  371. for (i=0 ; i < 50 ; i++) {
  372. k += lc_time_count[i];
  373. if (m < lc_time_count[i]) {
  374. m = lc_time_count[i];
  375. }
  376. }
  377. printf("buf-size=%d uniq=%d item_offsets=%d max=%d rows=%d\n",
  378. (int)(last - buf), uniq, k, m, lc_time_uniq);
  379. /* printf("total = %d * 50 + 2 * (50 + %d) + %d = %d\n", */
  380. /* num_locales, k, (int)(last - buf), */
  381. /* num_locales*50 + 2*(50 + k) + (int)(last - buf)); */
  382. printf("total = %d + %d * 50 + 2 * (50 + %d) + %d = %d\n",
  383. num_locales, lc_time_uniq, k, (int)(last - buf),
  384. i = num_locales + lc_time_uniq*50 + 2*(50 + k) + (int)(last - buf));
  385. total_size += i;
  386. dump_table8c("__lc_time_data", buf, (int)(last - buf));
  387. for (i=0 ; i < lc_time_uniq ; i++) {
  388. m = locales[i].lc_time_row;
  389. for (k=0 ; k < 50 ; k++) {
  390. buf[50*i + k] = (char)((unsigned char) lc_time_uniq_50[i][k]);
  391. }
  392. }
  393. dump_table8("__lc_time_rows", buf, lc_time_uniq * 50);
  394. buf16[0] =0;
  395. for (i=0 ; i < 50 - 1 ; i++) {
  396. buf16[i+1] = buf16[i] + lc_time_count[i];
  397. }
  398. dump_table16("__lc_time_item_offsets", buf16, 50);
  399. m = 0;
  400. for (k=0 ; k < 50 ; k++) {
  401. for (i=0 ; i < lc_time_count[k] ; i++) {
  402. buf16[m] = lc_time_item[k][i];
  403. ++m;
  404. }
  405. }
  406. dump_table16("__lc_time_item_idx", buf16, m);
  407. }
  408. static void dump_table8(const char *name, const char *tbl, int len)
  409. {
  410. int i;
  411. fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
  412. fprintf(ofp, "static const unsigned char %s[%d] = {", name, len);
  413. for (i=0 ; i < len ; i++) {
  414. if ((i % 12) == 0) {
  415. fprintf(ofp, "\n\t");
  416. }
  417. fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i]));
  418. }
  419. fprintf(ofp, "\n};\n\n");
  420. }
  421. #define __C_isdigit(c) \
  422. ((sizeof(c) == sizeof(char)) \
  423. ? (((unsigned char)((c) - '0')) < 10) \
  424. : (((unsigned int)((c) - '0')) < 10))
  425. #define __C_isalpha(c) \
  426. ((sizeof(c) == sizeof(char)) \
  427. ? (((unsigned char)(((c) | 0x20) - 'a')) < 26) \
  428. : (((unsigned int)(((c) | 0x20) - 'a')) < 26))
  429. #define __C_isalnum(c) (__C_isalpha(c) || __C_isdigit(c))
  430. static void dump_table8c(const char *name, const char *tbl, int len)
  431. {
  432. int i;
  433. fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
  434. fprintf(ofp, "static const unsigned char %s[%d] = {", name, len);
  435. for (i=0 ; i < len ; i++) {
  436. if ((i % 12) == 0) {
  437. fprintf(ofp, "\n\t");
  438. }
  439. if (__C_isalnum(tbl[i]) || (tbl[i] == ' ')) {
  440. fprintf(ofp, " '%c', ", (int)((unsigned char) tbl[i]));
  441. } else {
  442. fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i]));
  443. }
  444. }
  445. fprintf(ofp, "\n};\n\n");
  446. }
  447. static void dump_table16(const char *name, const int *tbl, int len)
  448. {
  449. int i;
  450. fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len);
  451. fprintf(ofp, "static const uint16_t %s[%d] = {", name, len);
  452. for (i=0 ; i < len ; i++) {
  453. if ((i % 8) == 0) {
  454. fprintf(ofp, "\n\t");
  455. }
  456. if (tbl[i] != (uint16_t) tbl[i]) {
  457. printf("error - falls outside uint16 range!\n");
  458. exit(EXIT_FAILURE);
  459. }
  460. fprintf(ofp, "%#6x, ", tbl[i]);
  461. }
  462. fprintf(ofp, "\n};\n\n");
  463. }
  464. #undef DO_NL_S
  465. static int lc_numeric_item[3][256];
  466. static int lc_numeric_count[3];
  467. static unsigned char lc_numeric_uniq_3[700][3];
  468. static int lc_numeric_uniq;
  469. #define DO_NL_S(X) lc_numeric_S(X, k++)
  470. static void lc_numeric_S(int X, int k)
  471. {
  472. int j, m;
  473. j = addstring(nl_langinfo(X));
  474. for (m=0 ; m < lc_numeric_count[k] ; m++) {
  475. if (lc_numeric_item[k][m] == j) {
  476. break;
  477. }
  478. }
  479. if (m == lc_numeric_count[k]) { /* new for this nl_item */
  480. if (m > 255) {
  481. printf("too many nl_item %d entries in lc_numeric\n", k);
  482. exit(EXIT_FAILURE);
  483. }
  484. lc_numeric_item[k][m] = j;
  485. ++lc_numeric_count[k];
  486. }
  487. /* printf("\\x%02x", m); */
  488. lc_numeric_uniq_3[lc_numeric_uniq][k] = m;
  489. }
  490. static void do_lc_numeric(void)
  491. {
  492. int i, k, m;
  493. last = buf+1;
  494. uniq = 1;
  495. *buf = 0;
  496. *idx = buf;
  497. for (i=0 ; i < num_locales ; i++) {
  498. k = 0;
  499. if (!setlocale(LC_ALL, locales[i].glibc_name)) {
  500. printf("setlocale(LC_ALL,%s) failed!\n",
  501. locales[i].glibc_name);
  502. }
  503. DO_NL_S(RADIXCHAR); /* DECIMAL_POINT */
  504. DO_NL_S(THOUSEP); /* THOUSANDS_SEP */
  505. DO_NL_S(GROUPING);
  506. if (k > 3) {
  507. printf("error -- lc_numeric nl_item count > 3!\n");
  508. exit(EXIT_FAILURE);
  509. }
  510. {
  511. int r;
  512. for (r=0 ; r < lc_numeric_uniq ; r++) {
  513. if (!memcmp(lc_numeric_uniq_3[lc_numeric_uniq],
  514. lc_numeric_uniq_3[r], 3)) {
  515. break;
  516. }
  517. }
  518. if (r == lc_numeric_uniq) { /* new locale row */
  519. ++lc_numeric_uniq;
  520. if (lc_numeric_uniq > 255) {
  521. printf("too many unique lc_numeric rows!\n");
  522. exit(EXIT_FAILURE);
  523. }
  524. }
  525. locales[i].lc_numeric_row = r;
  526. }
  527. }
  528. printf("buf-size=%d uniq=%d rows=%d\n",
  529. (int)(last - buf), uniq, lc_numeric_uniq);
  530. printf("total = %d + %d * 3 + %d = %d\n",
  531. num_locales, lc_numeric_uniq, (int)(last - buf),
  532. i = num_locales + lc_numeric_uniq*3 + (int)(last - buf));
  533. total_size += i;
  534. /* printf("buf-size=%d uniq=%d\n", (int)(last - buf), uniq); */
  535. dump_table8c("__lc_numeric_data", buf, (int)(last - buf));
  536. for (i=0 ; i < lc_numeric_uniq ; i++) {
  537. m = locales[i].lc_numeric_row;
  538. for (k=0 ; k < 3 ; k++) {
  539. buf[3*i + k] = (char)((unsigned char) lc_numeric_uniq_3[i][k]);
  540. }
  541. }
  542. dump_table8("__lc_numeric_rows", buf, lc_numeric_uniq * 3);
  543. buf16[0] =0;
  544. for (i=0 ; i < 3 - 1 ; i++) {
  545. buf16[i+1] = buf16[i] + lc_numeric_count[i];
  546. }
  547. dump_table16("__lc_numeric_item_offsets", buf16, 3);
  548. m = 0;
  549. for (k=0 ; k < 3 ; k++) {
  550. for (i=0 ; i < lc_numeric_count[k] ; i++) {
  551. buf16[m] = lc_numeric_item[k][i];
  552. ++m;
  553. }
  554. }
  555. dump_table16("__lc_numeric_item_idx", buf16, m);
  556. }
  557. #undef DO_NL_S
  558. /* #define NUM_NL_MONETARY 7 */
  559. #define NUM_NL_MONETARY (7+14+1)
  560. static int lc_monetary_item[NUM_NL_MONETARY][256];
  561. static int lc_monetary_count[NUM_NL_MONETARY];
  562. static unsigned char lc_monetary_uniq_X[700][NUM_NL_MONETARY];
  563. static int lc_monetary_uniq;
  564. #define DO_NL_S(X) lc_monetary_S(X, k++)
  565. /* #define DO_NL_C(X) printf("%#02x", (int)(unsigned char)(*nl_langinfo(X))); */
  566. #define DO_NL_C(X) lc_monetary_C(X, k++)
  567. static void lc_monetary_C(int X, int k)
  568. {
  569. int j, m;
  570. char c_buf[2];
  571. #warning fix the char entries for monetary... target signedness of char may be different!
  572. c_buf[1] = 0;
  573. c_buf[0] = *nl_langinfo(X);
  574. j = addstring(c_buf);
  575. for (m=0 ; m < lc_monetary_count[k] ; m++) {
  576. if (lc_monetary_item[k][m] == j) {
  577. break;
  578. }
  579. }
  580. if (m == lc_monetary_count[k]) { /* new for this nl_item */
  581. if (m > 255) {
  582. printf("too many nl_item %d entries in lc_monetary\n", k);
  583. exit(EXIT_FAILURE);
  584. }
  585. lc_monetary_item[k][m] = j;
  586. ++lc_monetary_count[k];
  587. }
  588. /* printf("\\x%02x", m); */
  589. lc_monetary_uniq_X[lc_monetary_uniq][k] = m;
  590. }
  591. static void lc_monetary_S(int X, int k)
  592. {
  593. int j, m;
  594. j = addstring(nl_langinfo(X));
  595. for (m=0 ; m < lc_monetary_count[k] ; m++) {
  596. if (lc_monetary_item[k][m] == j) {
  597. break;
  598. }
  599. }
  600. if (m == lc_monetary_count[k]) { /* new for this nl_item */
  601. if (m > 255) {
  602. printf("too many nl_item %d entries in lc_monetary\n", k);
  603. exit(EXIT_FAILURE);
  604. }
  605. lc_monetary_item[k][m] = j;
  606. ++lc_monetary_count[k];
  607. }
  608. /* printf("\\x%02x", m); */
  609. lc_monetary_uniq_X[lc_monetary_uniq][k] = m;
  610. }
  611. static void do_lc_monetary(void)
  612. {
  613. int i, k, m;
  614. last = buf+1;
  615. uniq = 1;
  616. *buf = 0;
  617. *idx = buf;
  618. for (i=0 ; i < num_locales ; i++) {
  619. k = 0;
  620. if (!setlocale(LC_ALL, locales[i].glibc_name)) {
  621. printf("setlocale(LC_ALL,%s) failed!\n",
  622. locales[i].glibc_name);
  623. }
  624. /* non SUSv3 */
  625. DO_NL_S(INT_CURR_SYMBOL);
  626. DO_NL_S(CURRENCY_SYMBOL);
  627. DO_NL_S(MON_DECIMAL_POINT);
  628. DO_NL_S(MON_THOUSANDS_SEP);
  629. DO_NL_S(MON_GROUPING);
  630. DO_NL_S(POSITIVE_SIGN);
  631. DO_NL_S(NEGATIVE_SIGN);
  632. DO_NL_C(INT_FRAC_DIGITS);
  633. DO_NL_C(FRAC_DIGITS);
  634. DO_NL_C(P_CS_PRECEDES);
  635. DO_NL_C(P_SEP_BY_SPACE);
  636. DO_NL_C(N_CS_PRECEDES);
  637. DO_NL_C(N_SEP_BY_SPACE);
  638. DO_NL_C(P_SIGN_POSN);
  639. DO_NL_C(N_SIGN_POSN);
  640. DO_NL_C(INT_P_CS_PRECEDES);
  641. DO_NL_C(INT_P_SEP_BY_SPACE);
  642. DO_NL_C(INT_N_CS_PRECEDES);
  643. DO_NL_C(INT_N_SEP_BY_SPACE);
  644. DO_NL_C(INT_P_SIGN_POSN);
  645. DO_NL_C(INT_N_SIGN_POSN);
  646. DO_NL_S(CRNCYSTR); /* CURRENCY_SYMBOL */
  647. /* printf("\n"); */
  648. if (k > NUM_NL_MONETARY) {
  649. printf("error -- lc_monetary nl_item count > %d!\n", NUM_NL_MONETARY);
  650. exit(EXIT_FAILURE);
  651. }
  652. {
  653. int r;
  654. for (r=0 ; r < lc_monetary_uniq ; r++) {
  655. if (!memcmp(lc_monetary_uniq_X[lc_monetary_uniq],
  656. lc_monetary_uniq_X[r], NUM_NL_MONETARY)) {
  657. break;
  658. }
  659. }
  660. if (r == lc_monetary_uniq) { /* new locale row */
  661. ++lc_monetary_uniq;
  662. if (lc_monetary_uniq > 255) {
  663. printf("too many unique lc_monetary rows!\n");
  664. exit(EXIT_FAILURE);
  665. }
  666. }
  667. locales[i].lc_monetary_row = r;
  668. }
  669. }
  670. printf("buf-size=%d uniq=%d rows=%d\n",
  671. (int)(last - buf), uniq, lc_monetary_uniq);
  672. printf("total = %d + %d * %d + %d = %d\n",
  673. num_locales, lc_monetary_uniq, NUM_NL_MONETARY, (int)(last - buf),
  674. i = num_locales + lc_monetary_uniq*NUM_NL_MONETARY + (int)(last - buf));
  675. total_size += i;
  676. dump_table8c("__lc_monetary_data", buf, (int)(last - buf));
  677. for (i=0 ; i < lc_monetary_uniq ; i++) {
  678. m = locales[i].lc_monetary_row;
  679. for (k=0 ; k < NUM_NL_MONETARY ; k++) {
  680. buf[NUM_NL_MONETARY*i + k] = (char)((unsigned char) lc_monetary_uniq_X[i][k]);
  681. }
  682. }
  683. dump_table8("__lc_monetary_rows", buf, lc_monetary_uniq * NUM_NL_MONETARY);
  684. buf16[0] =0;
  685. for (i=0 ; i < NUM_NL_MONETARY - 1 ; i++) {
  686. buf16[i+1] = buf16[i] + lc_monetary_count[i];
  687. }
  688. dump_table16("__lc_monetary_item_offsets", buf16, NUM_NL_MONETARY);
  689. m = 0;
  690. for (k=0 ; k < NUM_NL_MONETARY ; k++) {
  691. for (i=0 ; i < lc_monetary_count[k] ; i++) {
  692. buf16[m] = lc_monetary_item[k][i];
  693. ++m;
  694. }
  695. }
  696. dump_table16("__lc_monetary_item_idx", buf16, m);
  697. }
  698. #undef DO_NL_S
  699. static int lc_messages_item[2][256];
  700. static int lc_messages_count[2];
  701. static unsigned char lc_messages_uniq_2[700][2];
  702. static int lc_messages_uniq;
  703. #define DO_NL_S(X) lc_messages_S(X, k++)
  704. static void lc_messages_S(int X, int k)
  705. {
  706. int j, m;
  707. j = addstring(nl_langinfo(X));
  708. for (m=0 ; m < lc_messages_count[k] ; m++) {
  709. if (lc_messages_item[k][m] == j) {
  710. break;
  711. }
  712. }
  713. if (m == lc_messages_count[k]) { /* new for this nl_item */
  714. if (m > 255) {
  715. printf("too many nl_item %d entries in lc_messages\n", k);
  716. exit(EXIT_FAILURE);
  717. }
  718. lc_messages_item[k][m] = j;
  719. ++lc_messages_count[k];
  720. }
  721. /* printf("\\x%02x", m); */
  722. lc_messages_uniq_2[lc_messages_uniq][k] = m;
  723. }
  724. static void do_lc_messages(void)
  725. {
  726. int i, k, m;
  727. last = buf+1;
  728. uniq = 1;
  729. *buf = 0;
  730. *idx = buf;
  731. for (i=0 ; i < num_locales ; i++) {
  732. k = 0;
  733. if (!setlocale(LC_ALL, locales[i].glibc_name)) {
  734. printf("setlocale(LC_ALL,%s) failed!\n",
  735. locales[i].glibc_name);
  736. }
  737. DO_NL_S(YESEXPR);
  738. DO_NL_S(NOEXPR);
  739. if (k > 2) {
  740. printf("error -- lc_messages nl_item count > 2!\n");
  741. exit(EXIT_FAILURE);
  742. }
  743. {
  744. int r;
  745. for (r=0 ; r < lc_messages_uniq ; r++) {
  746. if (!memcmp(lc_messages_uniq_2[lc_messages_uniq],
  747. lc_messages_uniq_2[r], 2)) {
  748. break;
  749. }
  750. }
  751. if (r == lc_messages_uniq) { /* new locale row */
  752. ++lc_messages_uniq;
  753. if (lc_messages_uniq > 255) {
  754. printf("too many unique lc_messages rows!\n");
  755. exit(EXIT_FAILURE);
  756. }
  757. }
  758. locales[i].lc_messages_row = r;
  759. }
  760. }
  761. printf("buf-size=%d uniq=%d rows=%d\n",
  762. (int)(last - buf), uniq, lc_messages_uniq);
  763. printf("total = %d + %d * 2 + %d = %d\n",
  764. num_locales, lc_messages_uniq, (int)(last - buf),
  765. i = num_locales + lc_messages_uniq*2 + (int)(last - buf));
  766. total_size += i;
  767. /* printf("buf-size=%d uniq=%d\n", (int)(last - buf), uniq); */
  768. dump_table8c("__lc_messages_data", buf, (int)(last - buf));
  769. for (i=0 ; i < lc_messages_uniq ; i++) {
  770. m = locales[i].lc_messages_row;
  771. for (k=0 ; k < 2 ; k++) {
  772. buf[2*i + k] = (char)((unsigned char) lc_messages_uniq_2[i][k]);
  773. }
  774. }
  775. dump_table8("__lc_messages_rows", buf, lc_messages_uniq * 2);
  776. buf16[0] =0;
  777. for (i=0 ; i < 2 - 1 ; i++) {
  778. buf16[i+1] = buf16[i] + lc_messages_count[i];
  779. }
  780. dump_table16("__lc_messages_item_offsets", buf16, 2);
  781. m = 0;
  782. for (k=0 ; k < 2 ; k++) {
  783. for (i=0 ; i < lc_messages_count[k] ; i++) {
  784. buf16[m] = lc_messages_item[k][i];
  785. ++m;
  786. }
  787. }
  788. dump_table16("__lc_messages_item_idx", buf16, m);
  789. }
  790. static void read_at_mappings(void)
  791. {
  792. char *p;
  793. char *m;
  794. int mc = 0;
  795. do {
  796. if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) {
  797. if (!fgets(line_buf, sizeof(line_buf), fp)) {
  798. if (ferror(fp)) {
  799. printf("error reading file\n");
  800. exit(EXIT_FAILURE);
  801. }
  802. return; /* EOF */
  803. }
  804. if ((*line_buf == '#') && (line_buf[1] == '-')) {
  805. break;
  806. }
  807. continue;
  808. }
  809. if (*p == '@') {
  810. if (p[1] == 0) {
  811. printf("error: missing @modifier name\n");
  812. exit(EXIT_FAILURE);
  813. }
  814. m = p; /* save the modifier name */
  815. if (!(p = strtok(NULL, " \t\n")) || p[1] || (((unsigned char) *p) > 0x7f)) {
  816. printf("error: missing or illegal @modifier mapping char\n");
  817. exit(EXIT_FAILURE);
  818. }
  819. if (at_mappings[(int)((unsigned char) *p)]) {
  820. printf("error: reused @modifier mapping char\n");
  821. exit(EXIT_FAILURE);
  822. }
  823. at_mappings[(int)((unsigned char) *p)] = 1;
  824. at_mapto[mc] = *p;
  825. ++mc;
  826. *at_strings_end = (char)( (unsigned char) (strlen(m)) );
  827. strcpy(++at_strings_end, m+1);
  828. at_strings_end += (unsigned char) at_strings_end[-1];
  829. printf("@mapping: \"%s\" to '%c'\n", m, *p);
  830. if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
  831. printf("ignoring trailing text: %s...\n", p);
  832. }
  833. *line_buf = 0;
  834. continue;
  835. }
  836. break;
  837. } while (1);
  838. #if 0
  839. {
  840. p = at_strings;
  841. if (!*p) {
  842. printf("no @ strings\n");
  843. return;
  844. }
  845. do {
  846. printf("%s\n", p+1);
  847. p += 1 + (unsigned char) *p;
  848. } while (*p);
  849. }
  850. #endif
  851. }
  852. static void read_enable_disable(void)
  853. {
  854. char *p;
  855. do {
  856. if (!(p = strtok(line_buf, " =\t\n")) || (*p == '#')) {
  857. if (!fgets(line_buf, sizeof(line_buf), fp)) {
  858. if (ferror(fp)) {
  859. printf("error reading file\n");
  860. exit(EXIT_FAILURE);
  861. }
  862. return; /* EOF */
  863. }
  864. if ((*line_buf == '#') && (line_buf[1] == '-')) {
  865. break;
  866. }
  867. continue;
  868. }
  869. if (!strcmp(p, "UTF-8")) {
  870. if (!(p = strtok(NULL, " =\t\n"))
  871. || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) {
  872. printf("error: missing or illegal UTF-8 setting\n");
  873. exit(EXIT_FAILURE);
  874. }
  875. default_utf8 = (toupper(*p) == 'Y');
  876. printf("UTF-8 locales are %sabled\n", "dis\0en"+ (default_utf8 << 2));
  877. } else if (!strcmp(p, "8-BIT")) {
  878. if (!(p = strtok(NULL, " =\t\n"))
  879. || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) {
  880. printf("error: missing or illegal 8-BIT setting\n");
  881. exit(EXIT_FAILURE);
  882. }
  883. default_8bit = (toupper(*p) == 'Y');
  884. printf("8-BIT locales are %sabled\n", "dis\0en" + (default_8bit << 2));
  885. } else {
  886. break;
  887. }
  888. if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
  889. printf("ignoring trailing text: %s...\n", p);
  890. }
  891. *line_buf = 0;
  892. continue;
  893. } while (1);
  894. }
  895. #ifdef CODESET_LIST
  896. static int find_codeset_num(const char *cs)
  897. {
  898. int r = 2;
  899. char *s = CODESET_LIST;
  900. /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */
  901. if (strcmp(cs, "UTF-8") != 0) {
  902. ++r;
  903. while (*s && strcmp(CODESET_LIST+ ((unsigned char) *s), cs)) {
  904. /* printf("tried %s\n", CODESET_LIST + ((unsigned char) *s)); */
  905. ++r;
  906. ++s;
  907. }
  908. if (!*s) {
  909. printf("error: unsupported codeset %s\n", cs);
  910. exit(EXIT_FAILURE);
  911. }
  912. }
  913. return r;
  914. }
  915. #else
  916. static int find_codeset_num(const char *cs)
  917. {
  918. int r = 2;
  919. /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */
  920. if (strcmp(cs, "UTF-8") != 0) {
  921. printf("error: unsupported codeset %s\n", cs);
  922. exit(EXIT_FAILURE);
  923. }
  924. return r;
  925. }
  926. #endif
  927. static int find_at_string_num(const char *as)
  928. {
  929. int i = 0;
  930. char *p = at_strings;
  931. while (*p) {
  932. if (!strcmp(p+1, as)) {
  933. return i;
  934. }
  935. ++i;
  936. p += 1 + (unsigned char) *p;
  937. }
  938. printf("error: unmapped @string %s\n", as);
  939. exit(EXIT_FAILURE);
  940. }
  941. static void read_locale_list(void)
  942. {
  943. char *p;
  944. char *s;
  945. char *ln; /* locale name */
  946. char *ls; /* locale name ll_CC */
  947. char *as; /* at string */
  948. char *ds; /* dot string */
  949. char *cs; /* codeset */
  950. int i;
  951. typedef struct {
  952. char *glibc_name;
  953. char name[5];
  954. char dot_cs; /* 0 if no codeset specified */
  955. char cs;
  956. } locale_entry;
  957. /* First the C locale. */
  958. locales[0].glibc_name = locales[0].name;
  959. strncpy(locales[0].name,"C",5);
  960. locales[0].dot_cs = 0;
  961. locales[0].cs = 1; /* 7-bit encoding */
  962. ++num_locales;
  963. do {
  964. if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) {
  965. if (!fgets(line_buf, sizeof(line_buf), fp)) {
  966. if (ferror(fp)) {
  967. printf("error reading file\n");
  968. exit(EXIT_FAILURE);
  969. }
  970. return; /* EOF */
  971. }
  972. if ((*line_buf == '#') && (line_buf[1] == '-')) {
  973. break;
  974. }
  975. continue;
  976. }
  977. s = glibc_locale_names;
  978. for (i=0 ; i < num_locales ; i++) {
  979. if (!strcmp(s+1, p)) {
  980. break;
  981. }
  982. s += 1 + ((unsigned char) *s);
  983. }
  984. if (i < num_locales) {
  985. printf("ignoring dulplicate locale name: %s", p);
  986. *line_buf = 0;
  987. continue;
  988. }
  989. /* New locale, but don't increment num until codeset verified! */
  990. *s = (char)((unsigned char) (strlen(p) + 1));
  991. strcpy(s+1, p);
  992. locales[num_locales].glibc_name = s+1;
  993. ln = p; /* save locale name */
  994. if (!(p = strtok(NULL, " \t\n"))) {
  995. printf("error: missing codeset for locale %s\n", ln);
  996. exit(EXIT_FAILURE);
  997. }
  998. cs = p;
  999. i = find_codeset_num(p);
  1000. if ((i == 2) && !default_utf8) {
  1001. printf("ignoring UTF-8 locale %s\n", ln);
  1002. *line_buf = 0;
  1003. continue;
  1004. } else if ((i > 2) && !default_8bit) {
  1005. printf("ignoring 8-bit codeset locale %s\n", ln);
  1006. *line_buf = 0;
  1007. continue;
  1008. }
  1009. locales[num_locales].cs = (char)((unsigned char) i);
  1010. if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) {
  1011. printf("ignoring trailing text: %s...\n", p);
  1012. }
  1013. /* Now go back to locale string for .codeset and @modifier */
  1014. as = strtok(ln, "@");
  1015. if (as) {
  1016. as = strtok(NULL, "@");
  1017. }
  1018. ds = strtok(ln, ".");
  1019. if (ds) {
  1020. ds = strtok(NULL, ".");
  1021. }
  1022. ls = ln;
  1023. if ((strlen(ls) != 5) || (ls[2] != '_')) {
  1024. printf("error: illegal locale name %s\n", ls);
  1025. exit(EXIT_FAILURE);
  1026. }
  1027. i = 0; /* value for unspecified codeset */
  1028. if (ds) {
  1029. i = find_codeset_num(ds);
  1030. if ((i == 2) && !default_utf8) {
  1031. printf("ignoring UTF-8 locale %s\n", ln);
  1032. *line_buf = 0;
  1033. continue;
  1034. } else if ((i > 2) && !default_8bit) {
  1035. printf("ignoring 8-bit codeset locale %s\n", ln);
  1036. *line_buf = 0;
  1037. continue;
  1038. }
  1039. }
  1040. locales[num_locales].dot_cs = (char)((unsigned char) i);
  1041. if (as) {
  1042. i = find_at_string_num(as);
  1043. ls[2] = at_mapto[i];
  1044. }
  1045. memcpy(locales[num_locales].name, ls, 5);
  1046. /* printf("locale: %5.5s %2d %2d %s\n", */
  1047. /* locales[num_locales].name, */
  1048. /* locales[num_locales].cs, */
  1049. /* locales[num_locales].dot_cs, */
  1050. /* locales[num_locales].glibc_name */
  1051. /* ); */
  1052. ++num_locales;
  1053. *line_buf = 0;
  1054. } while (1);
  1055. }
  1056. static int le_cmp(const void *a, const void *b)
  1057. {
  1058. const locale_entry *p;
  1059. const locale_entry *q;
  1060. int r;
  1061. p = (const locale_entry *) a;
  1062. q = (const locale_entry *) b;
  1063. if (!(r = p->name[0] - q->name[0])
  1064. && !(r = p->name[1] - q->name[1])
  1065. && !(r = p->name[3] - q->name[3])
  1066. && !(r = p->name[4] - q->name[4])
  1067. && !(r = p->name[2] - q->name[2])
  1068. && !(r = -(p->cs - q->cs))
  1069. ) {
  1070. r = -(p->dot_cs - q->dot_cs);
  1071. /* Reverse the ordering of the codesets so UTF-8 comes last.
  1072. * Work-around (hopefully) for glibc bug affecting at least
  1073. * the euro currency symbol. */
  1074. }
  1075. return r;
  1076. }