gen_wc8bit.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729
  1. #define _GNU_SOURCE
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <locale.h>
  6. #include <stddef.h>
  7. #include <wctype.h>
  8. #include <limits.h>
  9. /* #define CTYPE_PACKED */
  10. #define UPLOW_IDX_SHIFT 3
  11. /* best if 2 unpacked or 3 packed */
  12. #define CTYPE_IDX_SHIFT 3
  13. /* 3 or 4 are very similar */
  14. #define C2WC_IDX_SHIFT 3
  15. #define CTYPE_IDX_LEN (128 >> (CTYPE_IDX_SHIFT))
  16. #define UPLOW_IDX_LEN (128 >> (UPLOW_IDX_SHIFT))
  17. #define C2WC_IDX_LEN (128 >> (C2WC_IDX_SHIFT))
  18. /* #ifdef CTYPE_PACKED */
  19. /* #define CTYPE_ROW_LEN (1 << ((CTYPE_IDX_SHIFT)-1)) */
  20. /* #else */
  21. #define CTYPE_ROW_LEN (1 << (CTYPE_IDX_SHIFT))
  22. /* #endif */
  23. #define UPLOW_ROW_LEN (1 << (UPLOW_IDX_SHIFT))
  24. #define C2WC_ROW_LEN (1 << (C2WC_IDX_SHIFT))
  25. #define MAX_WCHAR (0x2600-1)
  26. static unsigned char ctype_tbl[256 * CTYPE_ROW_LEN];
  27. static unsigned char uplow_tbl[256 * UPLOW_ROW_LEN];
  28. #ifdef DO_WIDE_CHAR
  29. static unsigned short c2wc_tbl[256 * C2WC_ROW_LEN];
  30. #endif
  31. static unsigned char tt[MAX_WCHAR+1];
  32. static unsigned char ti[MAX_WCHAR+1];
  33. static unsigned char xi[MAX_WCHAR+1];
  34. static int n_ctype_rows;
  35. static int n_uplow_rows;
  36. #ifdef DO_WIDE_CHAR
  37. static int n_c2wc_rows;
  38. #endif
  39. static int tt_num;
  40. static int ti_num;
  41. #define RANGE MAX_WCHAR
  42. #define TT_SHIFT 4
  43. #define TI_SHIFT 4
  44. #define II_LEN ((MAX_WCHAR+1) >> (TT_SHIFT+TI_SHIFT))
  45. typedef struct {
  46. unsigned long c2w[256];
  47. unsigned char w2c[MAX_WCHAR];
  48. unsigned char ii[II_LEN];
  49. unsigned char ctype_idx[CTYPE_IDX_LEN];
  50. unsigned char uplow_idx[UPLOW_IDX_LEN];
  51. unsigned char c2wc_idx[C2WC_IDX_LEN];
  52. } charset_data;
  53. /* Taking advantage of the C99 mutual-exclusion guarantees for the various
  54. * (w)ctype classes, including the descriptions of printing and control
  55. * (w)chars, we can place each in one of the following mutually-exlusive
  56. * subsets. Since there are less than 16, we can store the data for
  57. * each (w)chars in a nibble. In contrast, glibc uses an unsigned int
  58. * per (w)char, with one bit flag for each is* type. While this allows
  59. * a simple '&' operation to determine the type vs. a range test and a
  60. * little special handling for the "blank" and "xdigit" types in my
  61. * approach, it also uses 8 times the space for the tables on the typical
  62. * 32-bit archs we supported.*/
  63. enum {
  64. __CTYPE_unclassified = 0,
  65. __CTYPE_alpha_nonupper_nonlower,
  66. __CTYPE_alpha_lower,
  67. __CTYPE_alpha_upper_lower,
  68. __CTYPE_alpha_upper,
  69. __CTYPE_digit,
  70. __CTYPE_punct,
  71. __CTYPE_graph,
  72. __CTYPE_print_space_nonblank,
  73. __CTYPE_print_space_blank,
  74. __CTYPE_space_nonblank_noncntrl,
  75. __CTYPE_space_blank_noncntrl,
  76. __CTYPE_cntrl_space_nonblank,
  77. __CTYPE_cntrl_space_blank,
  78. __CTYPE_cntrl_nonspace,
  79. };
  80. int main(int argc, char **argv)
  81. {
  82. FILE *fp;
  83. FILE *out;
  84. charset_data csd[20];
  85. unsigned long max_wchar;
  86. unsigned char *p;
  87. int numsets;
  88. int i;
  89. int j;
  90. char buf[80];
  91. unsigned char row[256];
  92. #ifdef DO_WIDE_CHAR
  93. unsigned short wrow[256];
  94. #endif
  95. char codeset_list[500];
  96. char codeset_index[30];
  97. int codeset_list_end = 0;
  98. int total_size = 0;
  99. if (!setlocale(LC_CTYPE, "en_US.UTF-8")) {
  100. printf("setlocale(LC_CTYPE,\"en_US.UTF-8\") failed!\n");
  101. return EXIT_FAILURE;
  102. }
  103. if (!(out = fopen("c8tables.h","w"))) {
  104. printf("error: couldn't open file \"c8tables.h\"\n");
  105. return EXIT_FAILURE;
  106. }
  107. #if 0
  108. if (argc == 1) {
  109. /* User requested 8-bit codesets, but didn't list any... */
  110. /* Allow to build, just so this feature can be left on in config. */
  111. fprintf(out, "#ifdef __CTYPE_HAS_8_BIT_LOCALES\n");
  112. fprintf(out, "#warning ignoring 8 bit codesets request"
  113. " as no codesets specified.\n");
  114. fprintf(out, "#endif\n");
  115. fprintf(out, "#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
  116. fprintf(out, "#define NUM_CODESETS\t\t0\n");
  117. fprintf(out, "#define CODESET_LIST\t\t\"\"\n");
  118. fclose(out);
  119. return EXIT_SUCCESS;
  120. }
  121. /* fprintf(out, "#define __CTYPE_HAS_8_BIT_LOCALES\t1\n\n"); */
  122. fprintf(out, "#ifdef __CTYPE_HAS_8_BIT_LOCALES\n\n");
  123. #endif
  124. if (argc == 1) {
  125. fprintf(out, "#undef __CTYPE_HAS_8_BIT_LOCALES\n\n");
  126. fprintf(out, "#define NUM_CODESETS\t\t0\n");
  127. fprintf(out, "#define CODESET_LIST\t\t\"\"\n");
  128. } else {
  129. fprintf(out, "#define __CTYPE_HAS_8_BIT_LOCALES\t\t1\n\n");
  130. }
  131. fprintf(out, "#define Cctype_IDX_SHIFT\t%d\n", CTYPE_IDX_SHIFT);
  132. fprintf(out, "#define Cctype_IDX_LEN\t\t%d\n", CTYPE_IDX_LEN);
  133. #ifdef CTYPE_PACKED
  134. fprintf(out, "#define Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN >> 1);
  135. fprintf(out, "#define Cctype_PACKED\t\t1\n");
  136. #else
  137. fprintf(out, "#define Cctype_ROW_LEN\t\t%d\n", CTYPE_ROW_LEN);
  138. fprintf(out, "#undef Cctype_PACKED\n");
  139. #endif
  140. fprintf(out, "\n#define Cuplow_IDX_SHIFT\t%d\n", UPLOW_IDX_SHIFT);
  141. fprintf(out, "#define Cuplow_IDX_LEN\t\t%d\n", UPLOW_IDX_LEN);
  142. fprintf(out, "#define Cuplow_ROW_LEN\t\t%d\n", UPLOW_ROW_LEN);
  143. #ifdef DO_WIDE_CHAR
  144. fprintf(out, "\n#define Cc2wc_IDX_LEN\t\t%d\n", C2WC_IDX_LEN);
  145. fprintf(out, "#define Cc2wc_IDX_SHIFT\t\t%d\n", C2WC_IDX_SHIFT);
  146. fprintf(out, "#define Cc2wc_ROW_LEN\t\t%d\n", C2WC_ROW_LEN);
  147. #endif
  148. fprintf(out, "\ntypedef struct {\n");
  149. fprintf(out, "\tunsigned char idx8ctype[%d];\n", CTYPE_IDX_LEN);
  150. fprintf(out, "\tunsigned char idx8uplow[%d];\n", UPLOW_IDX_LEN);
  151. #ifdef DO_WIDE_CHAR
  152. fprintf(out, "\tunsigned char idx8c2wc[%d];\n", C2WC_IDX_LEN);
  153. fprintf(out, "\tunsigned char idx8wc2c[%d];\n", II_LEN);
  154. #endif
  155. fprintf(out, "} codeset_8_bit_t;\n\n");
  156. fprintf(out, "#ifdef WANT_DATA\n\n");
  157. fprintf(out, "static const codeset_8_bit_t codeset_8_bit[%d] = {\n", argc-1);
  158. max_wchar = 0x7f;
  159. numsets = 0;
  160. codeset_index[0] = 0;
  161. while (--argc) {
  162. if (!(fp = fopen(*++argv,"r"))) {
  163. printf("error: couldn't open file \"%s\"\n", *argv);
  164. return EXIT_FAILURE;
  165. }
  166. printf("processing %s... ", *argv);
  167. {
  168. char *s0;
  169. char *s1;
  170. int n;
  171. s0 = strrchr(*argv, '/');
  172. if (!s0) {
  173. s0 = *argv;
  174. } else {
  175. ++s0;
  176. }
  177. s1 = strchr(s0, '.');
  178. if (!s1) {
  179. n = strlen(s0);
  180. } else {
  181. n = s1 - s0;
  182. }
  183. /* if ((numsets == 0) && strncmp("ASCII", s0, n)) { */
  184. /* printf("error - first codeset isn't ASCII!\n"); */
  185. /* return EXIT_FAILURE; */
  186. /* } */
  187. if (numsets >= sizeof(codeset_index)) {
  188. printf("error - too many codesets!\n");
  189. return EXIT_FAILURE;
  190. }
  191. if (codeset_list_end + n + 1 + numsets + 1 + 1 >= 256) {
  192. printf("error - codeset list to big!\n");
  193. return EXIT_FAILURE;
  194. }
  195. codeset_index[numsets+1] = codeset_index[numsets] + n+1;
  196. strncpy(codeset_list + codeset_list_end, s0, n);
  197. codeset_list_end += (n+1);
  198. codeset_list[codeset_list_end - 1] = 0;
  199. fprintf(out, "\t{ /* %.*s */", n, s0);
  200. }
  201. memset(&csd[numsets],sizeof(charset_data),0);
  202. memset(xi, sizeof(xi), 0);
  203. {
  204. unsigned long c, wc;
  205. int lines;
  206. lines = 0;
  207. while (fgets(buf,sizeof(buf),fp)) {
  208. if ((2 != sscanf(buf, "{ %lx , %lx", &c, &wc))
  209. || (c >= 256) || (wc > MAX_WCHAR)) {
  210. printf("error: scanf failure! \"%s\"\n", buf);
  211. return EXIT_FAILURE;
  212. }
  213. /* don't put in w2c... dynamicly build tt instead. */
  214. if (c <= 0x7f) { /* check the 7bit entries but don't store */
  215. if (c != wc) {
  216. printf("error: c != wc in %s\n", buf);
  217. return EXIT_FAILURE;
  218. }
  219. csd[numsets].c2w[c] = wc;
  220. csd[numsets].w2c[wc] = 0; /* ignore */
  221. if (wc > max_wchar) {
  222. max_wchar = wc;
  223. }
  224. } else {
  225. csd[numsets].c2w[c] = wc;
  226. csd[numsets].w2c[wc] = c;
  227. if (wc > max_wchar) {
  228. max_wchar = wc;
  229. }
  230. }
  231. ++lines;
  232. }
  233. printf("%d lines ", lines);
  234. for (i = 0 ; i <= MAX_WCHAR ; i += (1 << TT_SHIFT)) {
  235. p = &csd[numsets].w2c[i];
  236. for (j = 0 ; j < tt_num ; j++) {
  237. if (!memcmp(p, &tt[j << TT_SHIFT], (1 << TT_SHIFT))) {
  238. break;
  239. }
  240. }
  241. if (j == tt_num) { /* new entry */
  242. memcpy(&tt[j << TT_SHIFT], p, (1 << TT_SHIFT));
  243. ++tt_num;
  244. }
  245. xi[i >> TT_SHIFT] = j;
  246. }
  247. for (i = 0 ; i <= (MAX_WCHAR >> TT_SHIFT) ; i += (1 << TI_SHIFT)) {
  248. p = &xi[i];
  249. for (j = 0 ; j < ti_num ; j++) {
  250. if (!memcmp(p, &ti[j << TI_SHIFT], (1 << TI_SHIFT))) {
  251. break;
  252. }
  253. }
  254. if (j == ti_num) { /* new entry */
  255. memcpy(&ti[j << TI_SHIFT], p, (1 << TI_SHIFT));
  256. ++ti_num;
  257. }
  258. csd[numsets].ii[i >> TI_SHIFT] = j;
  259. /* printf("%d ", i >> TI_SHIFT); */
  260. }
  261. #if 1
  262. fprintf(out, "\n\t\t/* idx8ctype data */\n\t\t{");
  263. for (i = 128 ; i < 256 ; i++) {
  264. wchar_t c;
  265. unsigned int d;
  266. /* if (!(i & 0x7)) { */
  267. /* fprintf(out, "\n"); */
  268. /* } */
  269. c = csd[numsets].c2w[i];
  270. if (c == 0) { /* non-existant char in codeset */
  271. d = __CTYPE_unclassified;
  272. } else if (iswdigit(c)) {
  273. d = __CTYPE_digit;
  274. } else if (iswalpha(c)) {
  275. d = __CTYPE_alpha_nonupper_nonlower;
  276. if (iswlower(c)) {
  277. d = __CTYPE_alpha_lower;
  278. if (iswupper(c)) {
  279. d = __CTYPE_alpha_upper_lower;
  280. }
  281. } else if (iswupper(c)) {
  282. d = __CTYPE_alpha_upper;
  283. }
  284. } else if (iswpunct(c)) {
  285. d = __CTYPE_punct;
  286. } else if (iswgraph(c)) {
  287. d = __CTYPE_graph;
  288. } else if (iswprint(c)) {
  289. d = __CTYPE_print_space_nonblank;
  290. if (iswblank(c)) {
  291. d = __CTYPE_print_space_blank;
  292. }
  293. } else if (iswspace(c) && !iswcntrl(c)) {
  294. d = __CTYPE_space_nonblank_noncntrl;
  295. if (iswblank(c)) {
  296. d = __CTYPE_space_blank_noncntrl;
  297. }
  298. } else if (iswcntrl(c)) {
  299. d = __CTYPE_cntrl_nonspace;
  300. if (iswspace(c)) {
  301. d = __CTYPE_cntrl_space_nonblank;
  302. if (iswblank(c)) {
  303. d = __CTYPE_cntrl_space_blank;
  304. }
  305. }
  306. } else {
  307. d = __CTYPE_unclassified;
  308. }
  309. #if 1
  310. row[i & (CTYPE_ROW_LEN-1)] = d;
  311. if ((i & (CTYPE_ROW_LEN-1)) == (CTYPE_ROW_LEN-1)) {
  312. p = ctype_tbl;
  313. for (j=0 ; j < n_ctype_rows ; j++) {
  314. if (!memcmp(p, row, CTYPE_ROW_LEN)) {
  315. break;
  316. }
  317. p += CTYPE_ROW_LEN;
  318. }
  319. if (j == n_ctype_rows) { /* new entry */
  320. if (++n_ctype_rows > 256) {
  321. printf("error -- to many ctype rows!\n");
  322. return EXIT_FAILURE;
  323. }
  324. memcpy(p, row, CTYPE_ROW_LEN);
  325. }
  326. csd[numsets].ctype_idx[i >> CTYPE_IDX_SHIFT] = j;
  327. if (!((i >> CTYPE_IDX_SHIFT) & 0x7)
  328. && (i != (127 + CTYPE_ROW_LEN))
  329. ) {
  330. fprintf(out, "\n\t\t ");
  331. }
  332. fprintf(out, " %#4x,", j);
  333. }
  334. #else
  335. fprintf(out, " %#4x,", d);
  336. #endif
  337. }
  338. #endif
  339. fprintf(out, " }");
  340. #if 1
  341. fprintf(out, ",\n\t\t/* idx8uplow data */\n\t\t{");
  342. for (i = 128 ; i < 256 ; i++) {
  343. wchar_t c, u, l;
  344. /* if (!(i & 0x7)) { */
  345. /* fprintf(out, "\n"); */
  346. /* } */
  347. c = csd[numsets].c2w[i];
  348. if ((c != 0) || 1) {
  349. u = towupper(c);
  350. l = towlower(c);
  351. if (u >= 0x80) u = csd[numsets].w2c[u];
  352. if (l >= 0x80) l = csd[numsets].w2c[l];
  353. if (u == 0) u = i; /* upper is missing, so ignore */
  354. if (l == 0) l = i; /* lower is missing, so ignore */
  355. #if 1
  356. /* store as unsigned char and let overflow handle it. */
  357. /* if ((((u-i) < CHAR_MIN) || ((u-i) > CHAR_MAX)) */
  358. /* || (((i-l) < CHAR_MIN) || ((i-l) > CHAR_MAX)) */
  359. /* ) { */
  360. /* printf("error - uplow diff out of range! %d %ld %ld\n", */
  361. /* i, u, l); */
  362. /* return EXIT_FAILURE; */
  363. /* } */
  364. row[i & (UPLOW_ROW_LEN-1)] = ((l==i) ? (u-i) : (i-l));
  365. if ((i & (UPLOW_ROW_LEN-1)) == (UPLOW_ROW_LEN-1)) {
  366. p = uplow_tbl;
  367. for (j=0 ; j < n_uplow_rows ; j++) {
  368. if (!memcmp(p, row, UPLOW_ROW_LEN)) {
  369. break;
  370. }
  371. p += UPLOW_ROW_LEN;
  372. }
  373. if (j == n_uplow_rows) { /* new entry */
  374. if (++n_uplow_rows > 256) {
  375. printf("error -- to many uplow rows!\n");
  376. return EXIT_FAILURE;
  377. }
  378. memcpy(p, row, UPLOW_ROW_LEN);
  379. }
  380. csd[numsets].uplow_idx[i >> UPLOW_IDX_SHIFT] = j;
  381. if (!((i >> UPLOW_IDX_SHIFT) & 0x7)
  382. && (i != (127 + UPLOW_ROW_LEN))
  383. ) {
  384. fprintf(out, "\n\t\t ");
  385. }
  386. fprintf(out, " %#4x,", j);
  387. }
  388. #elif 0
  389. if (!(i & 0x7) && i) {
  390. fprintf(out, "\n");
  391. }
  392. fprintf(out, " %4ld,", (l==i) ? (u-i) : (i-l));
  393. /* fprintf(out, " %4ld,", (l==i) ? u : l); */
  394. #else
  395. if ((u != i) || (l != i)) {
  396. #if 0
  397. fprintf(out, " %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, %#08lx, \n",
  398. (unsigned long) i,
  399. (unsigned long) c,
  400. (unsigned long) l,
  401. (unsigned long) towlower(c),
  402. (unsigned long) u,
  403. (unsigned long) towupper(c));
  404. #else
  405. fprintf(out, " %#08lx, %8ld, %d, %8ld, %d, %#08lx\n",
  406. (unsigned long) i,
  407. (long) (l - i),
  408. iswupper(c),
  409. (long) (i - u),
  410. iswlower(c),
  411. (unsigned long) c);
  412. #endif
  413. }
  414. #endif
  415. }
  416. }
  417. fprintf(out, " }");
  418. #endif
  419. #ifndef DO_WIDE_CHAR
  420. fprintf(out,"\n");
  421. #else /* DO_WIDE_CHAR */
  422. #if 1
  423. fprintf(out, ",\n\t\t/* idx8c2wc data */\n\t\t{");
  424. for (i = 128 ; i < 256 ; i++) {
  425. #if 1
  426. wrow[i & (C2WC_ROW_LEN-1)] = csd[numsets].c2w[i];
  427. if ((i & (C2WC_ROW_LEN-1)) == (C2WC_ROW_LEN-1)) {
  428. p = (char *) c2wc_tbl;
  429. for (j=0 ; j < n_c2wc_rows ; j++) {
  430. if (!memcmp(p, (char *) wrow, 2*C2WC_ROW_LEN)) {
  431. break;
  432. }
  433. p += 2*C2WC_ROW_LEN;
  434. }
  435. if (j == n_c2wc_rows) { /* new entry */
  436. if (++n_c2wc_rows > 256) {
  437. printf("error -- to many c2wc rows!\n");
  438. return EXIT_FAILURE;
  439. }
  440. memcpy(p, (char *) wrow, 2*C2WC_ROW_LEN);
  441. }
  442. csd[numsets].c2wc_idx[i >> C2WC_IDX_SHIFT] = j;
  443. if (!((i >> C2WC_IDX_SHIFT) & 0x7)
  444. && (i != (127 + C2WC_ROW_LEN))
  445. ) {
  446. fprintf(out, "\n\t\t ");
  447. }
  448. fprintf(out, " %#4x,", j);
  449. }
  450. #else
  451. if (!(i & 0x7) && i) {
  452. fprintf(out, "\n");
  453. }
  454. fprintf(out, " %#6lx,", csd[numsets].c2w[i]);
  455. #endif
  456. }
  457. fprintf(out, " },\n");
  458. #endif
  459. #if 1
  460. /* fprintf(out, "\nII_LEN = %d\n", II_LEN); */
  461. fprintf(out, "\t\t/* idx8wc2c data */\n\t\t{");
  462. for (i = 0 ; i < II_LEN ; i++) {
  463. if (!(i & 0x7) && i) {
  464. fprintf(out, "\n\t\t ");
  465. }
  466. fprintf(out, " %#4x,", csd[numsets].ii[i]);
  467. }
  468. fprintf(out, " }\n");
  469. #endif
  470. #endif /* DO_WIDE_CHAR */
  471. fprintf(out, "\t},\n");
  472. }
  473. ++numsets;
  474. printf("done\n");
  475. }
  476. fprintf(out, "};\n");
  477. fprintf(out, "\n#endif /* WANT_DATA */\n");
  478. #ifdef DO_WIDE_CHAR
  479. fprintf(out, "\n");
  480. fprintf(out, "#define Cwc2c_DOMAIN_MAX\t%#x\n", RANGE);
  481. fprintf(out, "#define Cwc2c_TI_SHIFT\t\t%d\n", TI_SHIFT);
  482. fprintf(out, "#define Cwc2c_TT_SHIFT\t\t%d\n", TT_SHIFT);
  483. fprintf(out, "#define Cwc2c_II_LEN\t\t%d\n", II_LEN);
  484. fprintf(out, "#define Cwc2c_TI_LEN\t\t%d\n", ti_num << TI_SHIFT);
  485. fprintf(out, "#define Cwc2c_TT_LEN\t\t%d\n", tt_num << TT_SHIFT);
  486. fprintf(out, "\n");
  487. fprintf(out, "\n#define Cwc2c_TBL_LEN\t\t%d\n",
  488. (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT));
  489. fprintf(out, "#ifdef WANT_DATA\n\n");
  490. fprintf(out, "static const unsigned char Cwc2c_data[%d] = {\n",
  491. (ti_num << TI_SHIFT) + (tt_num << TT_SHIFT));
  492. fprintf(out, "\t/* ti_table */\n\t");
  493. for (i=0 ; i < ti_num << TI_SHIFT ; i++) {
  494. if (!(i & 7) && i) {
  495. fprintf(out, "\n\t");
  496. }
  497. fprintf(out, " %#4x,", ti[i]);
  498. }
  499. fprintf(out, "\n");
  500. fprintf(out, "\t/* tt_table */\n\t");
  501. for (i=0 ; i < tt_num << TT_SHIFT ; i++) {
  502. if (!(i & 7) && i) {
  503. fprintf(out, "\n\t");
  504. }
  505. fprintf(out, " %#4x,", tt[i]);
  506. }
  507. fprintf(out, "\n};\n");
  508. fprintf(out, "\n#endif /* WANT_DATA */\n");
  509. #endif /* DO_WIDE_CHAR */
  510. fprintf(out, "\n#define Cuplow_TBL_LEN\t\t%d\n",
  511. n_uplow_rows * UPLOW_ROW_LEN);
  512. fprintf(out, "\n#ifdef WANT_DATA\n\n");
  513. fprintf(out, "\nstatic const unsigned char Cuplow_data[%d] = {\n",
  514. n_uplow_rows * UPLOW_ROW_LEN);
  515. p = uplow_tbl;
  516. for (j=0 ; j < n_uplow_rows ; j++) {
  517. fprintf(out, "\t");
  518. for (i=0 ; i < UPLOW_ROW_LEN ; i++) {
  519. fprintf(out, " %#4x,", (unsigned int)((unsigned char) p[i]));
  520. }
  521. fprintf(out, "\n");
  522. p += UPLOW_ROW_LEN;
  523. }
  524. fprintf(out, "};\n");
  525. fprintf(out, "\n#endif /* WANT_DATA */\n");
  526. fprintf(out, "\n#define Cctype_TBL_LEN\t\t%d\n",
  527. #ifdef CTYPE_PACKED
  528. n_ctype_rows * CTYPE_ROW_LEN / 2
  529. #else
  530. n_ctype_rows * CTYPE_ROW_LEN
  531. #endif
  532. );
  533. fprintf(out, "\n#ifdef WANT_DATA\n\n");
  534. fprintf(out, "\nstatic const unsigned char Cctype_data[%d] = {\n",
  535. #ifdef CTYPE_PACKED
  536. n_ctype_rows * CTYPE_ROW_LEN / 2
  537. #else
  538. n_ctype_rows * CTYPE_ROW_LEN
  539. #endif
  540. );
  541. p = ctype_tbl;
  542. for (j=0 ; j < n_ctype_rows ; j++) {
  543. fprintf(out, "\t");
  544. for (i=0 ; i < CTYPE_ROW_LEN ; i++) {
  545. #ifdef CTYPE_PACKED
  546. fprintf(out, " %#4x,", (unsigned int)(p[i] + (p[i+1] << 4)));
  547. ++i;
  548. #else
  549. fprintf(out, " %#4x,", (unsigned int)p[i]);
  550. #endif
  551. }
  552. fprintf(out, "\n");
  553. p += CTYPE_ROW_LEN;
  554. }
  555. fprintf(out, "};\n");
  556. fprintf(out, "\n#endif /* WANT_DATA */\n");
  557. #ifdef DO_WIDE_CHAR
  558. fprintf(out, "\n#define Cc2wc_TBL_LEN\t\t%d\n",
  559. n_c2wc_rows * C2WC_ROW_LEN);
  560. fprintf(out, "\n#ifdef WANT_DATA\n\n");
  561. fprintf(out, "\nstatic const unsigned short Cc2wc_data[%d] = {\n",
  562. n_c2wc_rows * C2WC_ROW_LEN);
  563. p = (char *) c2wc_tbl;
  564. for (j=0 ; j < n_c2wc_rows ; j++) {
  565. fprintf(out, "\t");
  566. for (i=0 ; i < C2WC_ROW_LEN ; i++) {
  567. fprintf(out, " %#6x,", (unsigned int)(((unsigned short *)p)[i]));
  568. }
  569. fprintf(out, "\n");
  570. p += 2*C2WC_ROW_LEN;
  571. }
  572. fprintf(out, "};\n");
  573. fprintf(out, "\n#endif /* WANT_DATA */\n");
  574. #endif /* DO_WIDE_CHAR */
  575. fprintf(out, "\n\n");
  576. fprintf(out, "#define NUM_CODESETS\t\t%d\n", numsets);
  577. fprintf(out, "#define CODESET_LIST \\\n\t\"");
  578. for (i=0 ; i < numsets ; i++) {
  579. fprintf(out, "\\x%02x", numsets + 1 + (unsigned char) codeset_index[i]);
  580. if (((i & 7) == 7) && (i + 1 < numsets)) {
  581. fprintf(out, "\" \\\n\t\"");
  582. }
  583. }
  584. fprintf(out, "\" \\\n\t\"\\0\"");
  585. for (i=0 ; i < numsets ; i++) {
  586. fprintf(out, " \\\n\t\"%s\\0\"",
  587. codeset_list + ((unsigned char)codeset_index[i]));
  588. }
  589. fprintf(out, "\n\n");
  590. for (i=0 ; i < numsets ; i++) {
  591. char buf[30];
  592. char *z;
  593. strcpy(buf, codeset_list + ((unsigned char)codeset_index[i]));
  594. for (z=buf ; *z ; z++) {
  595. if (*z == '-') {
  596. *z = '_';
  597. }
  598. }
  599. fprintf(out, "#define __CTYPE_HAS_CODESET_%s\n", buf);
  600. }
  601. #ifdef DO_WIDE_CHAR
  602. fprintf(out, "#define __CTYPE_HAS_CODESET_UTF_8\n");
  603. #endif /* DO_WIDE_CHAR */
  604. #if 0
  605. fprintf(out, "\n#endif /* __CTYPE_HAS_8_BIT_LOCALES */\n\n");
  606. #endif
  607. fclose(out);
  608. total_size = 0;
  609. #ifdef DO_WIDE_CHAR
  610. printf("tt_num = %d ti_num = %d\n", tt_num, ti_num);
  611. printf("max_wchar = %#lx\n", max_wchar);
  612. printf("size is %d * %d + %d * %d + %d * %d = %d\n",
  613. tt_num, 1 << TT_SHIFT, ti_num, 1 << TI_SHIFT,
  614. ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1), numsets,
  615. j = tt_num * (1 << TT_SHIFT) + ti_num * (1 << TI_SHIFT)
  616. + ((MAX_WCHAR >> (TT_SHIFT + TI_SHIFT)) + 1) * numsets);
  617. total_size += j;
  618. #endif /* DO_WIDE_CHAR */
  619. #ifdef CTYPE_PACKED
  620. i = 2;
  621. #else
  622. i = 1;
  623. #endif
  624. printf("ctype - CTYPE_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
  625. CTYPE_IDX_SHIFT, numsets, CTYPE_IDX_LEN, n_ctype_rows, CTYPE_ROW_LEN / i,
  626. j = numsets * CTYPE_IDX_LEN + n_ctype_rows * CTYPE_ROW_LEN / i);
  627. total_size += j;
  628. printf("uplow - UPLOW_IDX_SHIFT = %d -- %d * %d + %d * %d = %d\n",
  629. UPLOW_IDX_SHIFT, numsets, UPLOW_IDX_LEN, n_uplow_rows, UPLOW_ROW_LEN,
  630. j = numsets * UPLOW_IDX_LEN + n_uplow_rows * UPLOW_ROW_LEN);
  631. total_size += j;
  632. #ifdef DO_WIDE_CHAR
  633. printf("c2wc - C2WC_IDX_SHIFT = %d -- %d * %d + 2 * %d * %d = %d\n",
  634. C2WC_IDX_SHIFT, numsets, C2WC_IDX_LEN, n_c2wc_rows, C2WC_ROW_LEN,
  635. j = numsets * C2WC_IDX_LEN + 2 * n_c2wc_rows * C2WC_ROW_LEN);
  636. total_size += j;
  637. #endif /* DO_WIDE_CHAR */
  638. printf("total size = %d\n", total_size);
  639. /* for (i=0 ; i < numsets ; i++) { */
  640. /* printf("codeset_index[i] = %d codeset_list[ci[i]] = \"%s\"\n", */
  641. /* (unsigned char) codeset_index[i], */
  642. /* codeset_list + ((unsigned char)codeset_index[i])); */
  643. /* } */
  644. return EXIT_SUCCESS;
  645. }