gen_wctype.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815
  1. /*
  2. * Copyright (C) 2000-2006 Erik Andersen <andersen@uclibc.org>
  3. *
  4. * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  5. */
  6. #define _GNU_SOURCE
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <locale.h>
  11. #include <wctype.h>
  12. #include <limits.h>
  13. #include <stdint.h>
  14. #include <wchar.h>
  15. #include <ctype.h>
  16. #include "include/bits/uClibc_charclass.h"
  17. /* 0x9 : space blank */
  18. /* 0xa : space */
  19. /* 0xb : space */
  20. /* 0xc : space */
  21. /* 0xd : space */
  22. /* 0x20 : space blank */
  23. /* 0x1680 : space blank */
  24. /* 0x2000 : space blank */
  25. /* 0x2001 : space blank */
  26. /* 0x2002 : space blank */
  27. /* 0x2003 : space blank */
  28. /* 0x2004 : space blank */
  29. /* 0x2005 : space blank */
  30. /* 0x2006 : space blank */
  31. /* 0x2008 : space blank */
  32. /* 0x2009 : space blank */
  33. /* 0x200a : space blank */
  34. /* 0x200b : space blank */
  35. /* 0x2028 : space */
  36. /* 0x2029 : space */
  37. /* 0x3000 : space blank */
  38. /* typecount[ 0] = 88670 C_alpha_nonupper_nonlower */
  39. /* typecount[ 1] = 742 C_alpha_lower */
  40. /* typecount[ 2] = 4 C_alpha_upper_lower */
  41. /* typecount[ 3] = 731 C_alpha_upper */
  42. /* typecount[ 4] = 10 C_digit */
  43. /* typecount[ 5] = 10270 C_punct */
  44. /* typecount[ 6] = 0 C_graph */
  45. /* typecount[ 7] = 0 C_print_space_nonblank */
  46. /* typecount[ 8] = 14 C_print_space_blank */
  47. /* typecount[ 9] = 0 C_space_nonblank_noncntrl */
  48. /* typecount[10] = 0 C_space_blank_noncntrl */
  49. /* typecount[11] = 6 C_cntrl_space_nonblank */
  50. /* typecount[12] = 1 C_cntrl_space_blank */
  51. /* typecount[13] = 60 C_cntrl_nonspace */
  52. /* typecount[14] = 96100 C_unclassified */
  53. /* typecount[15] = 0 empty_slot */
  54. /* Set to #if 0 to restrict wchars to 16 bits. */
  55. #if 1
  56. #define RANGE 0x2ffffUL
  57. #elif 0
  58. #define RANGE 0x1ffffUL
  59. #else
  60. #define RANGE 0xffffUL /* Restrict for 16-bit wchar_t... */
  61. #endif
  62. /* Some macros that test for various (w)ctype classes when passed one of the
  63. * designator values enumerated above. */
  64. #define mywalnum(D,C) ((unsigned)(D - 1) <= (__CTYPE_digit - 1))
  65. #define mywalpha(D,C) ((unsigned)(D - 1) <= (__CTYPE_alpha_upper - 1))
  66. #define mywblank(D,C) ((unsigned)(D - __CTYPE_print_space_nonblank) <= 5 && (D & 1))
  67. #define mywcntrl(D,C) ((unsigned)(D - __CTYPE_cntrl_space_nonblank) <= 2)
  68. #define mywdigit(D,C) (D == __CTYPE_digit)
  69. #define mywgraph(D,C) ((unsigned)(D - 1) <= (__CTYPE_graph - 1))
  70. #define mywlower(D,C) ((unsigned)(D - __CTYPE_alpha_lower) <= 1)
  71. #define mywprint(D,C) ((unsigned)(D - 1) <= (__CTYPE_print_space_blank - 1))
  72. #define mywpunct(D,C) (D == __CTYPE_punct)
  73. #define mywspace(D,C) ((unsigned)(D - __CTYPE_print_space_nonblank) <= 5)
  74. #define mywupper(D,C) ((unsigned)(D - __CTYPE_alpha_upper_lower) <= 1)
  75. /* #define mywxdigit(D,C) -- isxdigit is untestable this way.
  76. * But that's ok as isxdigit() (and isdigit() too) are locale-invariant. */
  77. #define mywxdigit(D,C) (mywdigit(D,C) || (unsigned)(((C) | 0x20) - 'a') <= 5)
  78. typedef struct {
  79. short l;
  80. short u;
  81. } uldiff_entry;
  82. typedef struct {
  83. uint16_t ii_len;
  84. uint16_t ti_len;
  85. uint16_t ut_len;
  86. unsigned char ii_shift;
  87. unsigned char ti_shift;
  88. unsigned char *ii;
  89. unsigned char *ti;
  90. unsigned char *ut;
  91. } table_data;
  92. static unsigned verbose;
  93. #define verbose_msg(msg...) if (verbose) fprintf(stderr, msg)
  94. void output_table(const char *name, table_data *tbl)
  95. {
  96. size_t i;
  97. printf("#define __LOCALE_DATA_WC%s_II_LEN %7u\n", name, tbl->ii_len);
  98. printf("#define __LOCALE_DATA_WC%s_TI_LEN %7u\n", name, tbl->ti_len);
  99. printf("#define __LOCALE_DATA_WC%s_UT_LEN %7u\n", name, tbl->ut_len);
  100. printf("#define __LOCALE_DATA_WC%s_II_SHIFT %7u\n", name, tbl->ii_shift);
  101. printf("#define __LOCALE_DATA_WC%s_TI_SHIFT %7u\n", name, tbl->ti_shift);
  102. printf("\n#ifdef WANT_WC%s_data\n", name);
  103. i = tbl->ii_len + tbl->ti_len + tbl->ut_len;
  104. printf("\nstatic const unsigned char __LOCALE_DATA_WC%s_data[%zu] = {", name, i);
  105. for (i = 0; i < tbl->ii_len; i++) {
  106. if (i % 12 == 0) {
  107. printf("\n");
  108. }
  109. printf(" %#04x,", tbl->ii[i]);
  110. }
  111. for (i = 0; i < tbl->ti_len; i++) {
  112. if (i % 12 == 0) {
  113. printf("\n");
  114. }
  115. printf(" %#04x,", tbl->ti[i]);
  116. }
  117. for (i = 0; i < tbl->ut_len; i++) {
  118. if (i % 12 == 0) {
  119. printf("\n");
  120. }
  121. printf(" %#04x,", tbl->ut[i]);
  122. }
  123. printf("\n};\n\n");
  124. printf("#endif /* WANT_WC%s_data */\n\n", name);
  125. }
  126. static void dump_table_data(table_data *tbl)
  127. {
  128. verbose_msg("ii_shift = %d ti_shift = %d\n"
  129. "ii_len = %d ti_len = %d ut_len = %d\n"
  130. "total = %d\n",
  131. tbl->ii_shift, tbl->ti_shift,
  132. tbl->ii_len, tbl->ti_len, tbl->ut_len,
  133. (int) tbl->ii_len + (int) tbl->ti_len + (int) tbl->ut_len);
  134. }
  135. /* For sorting the blocks of unsigned chars. */
  136. static size_t nu_val;
  137. int nu_memcmp(const void *a, const void *b)
  138. {
  139. return memcmp(*(unsigned char**)a, *(unsigned char**)b, nu_val);
  140. }
  141. static size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl);
  142. #define MAXTO 255 /* Restrict to minimal unsigned char max. */
  143. int main(int argc, char **argv)
  144. {
  145. long int u, l, tt;
  146. size_t smallest, t;
  147. unsigned int c;
  148. unsigned int d;
  149. int i, n;
  150. int ul_count = 0;
  151. uldiff_entry uldiff[MAXTO];
  152. table_data cttable;
  153. table_data ultable;
  154. #if 0
  155. table_data combtable;
  156. table_data widthtable;
  157. long int last_comb = 0;
  158. #endif
  159. unsigned char wct[(RANGE/2)+1]; /* wctype table (nibble per wchar) */
  160. unsigned char ult[RANGE+1]; /* upper/lower table */
  161. unsigned char combt[(RANGE/4)+1]; /* combining */
  162. unsigned char widtht[(RANGE/4)+1]; /* width */
  163. wctrans_t totitle;
  164. wctype_t is_comb, is_comb3;
  165. long int typecount[16];
  166. const char *typename[16];
  167. static const char empty_slot[] = "empty_slot";
  168. int built = 0;
  169. #define INIT_TYPENAME(X) typename[__CTYPE_##X] = "C_" #X
  170. for (i = 0; i < 16; i++) {
  171. typename[i] = empty_slot;
  172. }
  173. INIT_TYPENAME(unclassified);
  174. INIT_TYPENAME(alpha_nonupper_nonlower);
  175. INIT_TYPENAME(alpha_lower);
  176. INIT_TYPENAME(alpha_upper_lower);
  177. INIT_TYPENAME(alpha_upper);
  178. INIT_TYPENAME(digit);
  179. INIT_TYPENAME(punct);
  180. INIT_TYPENAME(graph);
  181. INIT_TYPENAME(print_space_nonblank);
  182. INIT_TYPENAME(print_space_blank);
  183. INIT_TYPENAME(space_nonblank_noncntrl);
  184. INIT_TYPENAME(space_blank_noncntrl);
  185. INIT_TYPENAME(cntrl_space_nonblank);
  186. INIT_TYPENAME(cntrl_space_blank);
  187. INIT_TYPENAME(cntrl_nonspace);
  188. memset(&cttable, 0, sizeof(table_data));
  189. memset(&ultable, 0, sizeof(table_data));
  190. #if 0
  191. memset(combtable, 0, sizeof(table_data));
  192. memset(widthtable, 0, sizeof(table_data));
  193. #endif
  194. setvbuf(stdout, NULL, _IONBF, 0);
  195. while (--argc) {
  196. ++argv;
  197. if (!strcmp(*argv, "-v")) {
  198. ++verbose;
  199. continue;
  200. }
  201. /* setlocale might be just a stub */
  202. /* if (!setlocale(LC_CTYPE, *argv)) {
  203. verbose_msg("setlocale(LC_CTYPE,%s) failed! Skipping this locale...\n", *argv);
  204. continue;
  205. }
  206. */
  207. if (!(totitle = wctrans("totitle"))) {
  208. verbose_msg("no totitle transformation.\n");
  209. }
  210. if (!(is_comb = wctype("combining"))) {
  211. verbose_msg("no combining wctype.\n");
  212. }
  213. if (!(is_comb3 = wctype("combining_level3"))) {
  214. verbose_msg("no combining_level3 wctype.\n");
  215. }
  216. if (!built) {
  217. built = 1;
  218. ul_count = 1;
  219. uldiff[0].u = uldiff[0].l = 0;
  220. memset(wct, 0, sizeof(wct));
  221. memset(combt, 0, sizeof(combt));
  222. memset(widtht, 0, sizeof(widtht));
  223. for (i = 0; i < 16; i++) {
  224. typecount[i] = 0;
  225. }
  226. for (c = 0; c <= RANGE; c++) {
  227. if (iswdigit(c)) {
  228. d = __CTYPE_digit;
  229. } else if (iswalpha(c)) {
  230. d = __CTYPE_alpha_nonupper_nonlower;
  231. if (iswlower(c)) {
  232. d = __CTYPE_alpha_lower;
  233. if (iswupper(c)) {
  234. d = __CTYPE_alpha_upper_lower;
  235. }
  236. } else if (iswupper(c)) {
  237. d = __CTYPE_alpha_upper;
  238. }
  239. } else if (iswpunct(c)) {
  240. d = __CTYPE_punct;
  241. } else if (iswgraph(c)) {
  242. d = __CTYPE_graph;
  243. } else if (iswprint(c)) {
  244. d = __CTYPE_print_space_nonblank;
  245. if (iswblank(c)) {
  246. d = __CTYPE_print_space_blank;
  247. }
  248. } else if (iswspace(c) && !iswcntrl(c)) {
  249. d = __CTYPE_space_nonblank_noncntrl;
  250. if (iswblank(c)) {
  251. d = __CTYPE_space_blank_noncntrl;
  252. }
  253. } else if (iswcntrl(c)) {
  254. d = __CTYPE_cntrl_nonspace;
  255. if (iswspace(c)) {
  256. d = __CTYPE_cntrl_space_nonblank;
  257. if (iswblank(c)) {
  258. d = __CTYPE_cntrl_space_blank;
  259. }
  260. }
  261. } else {
  262. d = __CTYPE_unclassified;
  263. }
  264. ++typecount[d];
  265. #if 0
  266. if (iswspace(c)) {
  267. if (iswblank(c)) {
  268. verbose_msg("%#8x : space blank\n", c);
  269. } else {
  270. verbose_msg("%#8x : space\n", c);
  271. }
  272. }
  273. #endif
  274. #if 0
  275. if (c < 256) {
  276. unsigned int curr_stdclib;
  277. curr_stdclib = 0;
  278. if (isalnum(c)) ++curr_stdclib; curr_stdclib <<= 1;
  279. if (isalpha(c)) ++curr_stdclib; curr_stdclib <<= 1;
  280. if (isblank(c)) ++curr_stdclib; curr_stdclib <<= 1;
  281. if (iscntrl(c)) ++curr_stdclib; curr_stdclib <<= 1;
  282. if (isdigit(c)) ++curr_stdclib; curr_stdclib <<= 1;
  283. if (isgraph(c)) ++curr_stdclib; curr_stdclib <<= 1;
  284. if (islower(c)) ++curr_stdclib; curr_stdclib <<= 1;
  285. if (isprint(c)) ++curr_stdclib; curr_stdclib <<= 1;
  286. if (ispunct(c)) ++curr_stdclib; curr_stdclib <<= 1;
  287. if (isspace(c)) ++curr_stdclib; curr_stdclib <<= 1;
  288. if (isupper(c)) ++curr_stdclib; curr_stdclib <<= 1;
  289. if (isxdigit(c)) ++curr_stdclib;
  290. verbose_msg("%#8x : ctype %#4x\n", c, curr_stdclib);
  291. }
  292. #endif
  293. #if 1
  294. /* Paranoid checking... */
  295. {
  296. unsigned int curr_stdclib;
  297. unsigned int mine;
  298. curr_stdclib = 0;
  299. if (iswalnum(c)) ++curr_stdclib; curr_stdclib <<= 1;
  300. if (iswalpha(c)) ++curr_stdclib; curr_stdclib <<= 1;
  301. if (iswblank(c)) ++curr_stdclib; curr_stdclib <<= 1;
  302. if (iswcntrl(c)) ++curr_stdclib; curr_stdclib <<= 1;
  303. if (iswdigit(c)) ++curr_stdclib; curr_stdclib <<= 1;
  304. if (iswgraph(c)) ++curr_stdclib; curr_stdclib <<= 1;
  305. if (iswlower(c)) ++curr_stdclib; curr_stdclib <<= 1;
  306. if (iswprint(c)) ++curr_stdclib; curr_stdclib <<= 1;
  307. if (iswpunct(c)) ++curr_stdclib; curr_stdclib <<= 1;
  308. if (iswspace(c)) ++curr_stdclib; curr_stdclib <<= 1;
  309. if (iswupper(c)) ++curr_stdclib; curr_stdclib <<= 1;
  310. if (iswxdigit(c)) ++curr_stdclib;
  311. mine = 0;
  312. if (mywalnum(d,c)) ++mine; mine <<= 1;
  313. if (mywalpha(d,c)) ++mine; mine <<= 1;
  314. if (mywblank(d,c)) ++mine; mine <<= 1;
  315. if (mywcntrl(d,c)) ++mine; mine <<= 1;
  316. if (mywdigit(d,c)) ++mine; mine <<= 1;
  317. if (mywgraph(d,c)) ++mine; mine <<= 1;
  318. if (mywlower(d,c)) ++mine; mine <<= 1;
  319. if (mywprint(d,c)) ++mine; mine <<= 1;
  320. if (mywpunct(d,c)) ++mine; mine <<= 1;
  321. if (mywspace(d,c)) ++mine; mine <<= 1;
  322. if (mywupper(d,c)) ++mine; mine <<= 1;
  323. if (mywxdigit(d,c)) ++mine;
  324. if (curr_stdclib != mine) {
  325. verbose_msg("%#8x : curr_stdclib %#4x != %#4x mine %u\n", c, curr_stdclib, mine, d);
  326. return EXIT_FAILURE;
  327. }
  328. #if 0
  329. if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
  330. /* if (!iswpunct(c)) { */
  331. verbose_msg("%#8x : %d %d %#4x\n",
  332. c, iswctype(c,is_comb),iswctype(c,is_comb3), curr_stdclib);
  333. /* } */
  334. }
  335. #endif
  336. #if 0
  337. if (iswctype(c,is_comb) || iswctype(c,is_comb3)) {
  338. if (!last_comb) {
  339. verbose_msg("%#8x - ", c);
  340. last_comb = c;
  341. } else if (last_comb + 1 < c) {
  342. verbose_msg("%#8x\n%#8x - ", last_comb, c);
  343. last_comb = c;
  344. } else {
  345. last_comb = c;
  346. }
  347. }
  348. #endif
  349. }
  350. #endif
  351. combt[c/4] |= ((((!!iswctype(c,is_comb)) << 1) | !!iswctype(c,is_comb3))
  352. << ((c & 3) << 1));
  353. /* comb3t[c/8] |= ((!!iswctype(c,is_comb3)) << (c & 7)); */
  354. /* widtht[c/4] |= (wcwidth(c) << ((c & 3) << 1)); */
  355. if (c & 1) { /* Use the high nibble for odd numbered wchars. */
  356. d <<= 4;
  357. }
  358. wct[c/2] |= d;
  359. l = (long)(int) towlower(c) - c;
  360. u = (long)(int) towupper(c) - c;
  361. ult[c] = 0;
  362. if (l || u) {
  363. if ((l != (short)l) || (u != (short)u)) {
  364. verbose_msg("range assumption error! %x %ld %ld\n", c, l, u);
  365. return EXIT_FAILURE;
  366. }
  367. for (i = 0; i < ul_count; i++) {
  368. if ((l == uldiff[i].l) && (u == uldiff[i].u)) {
  369. goto found;
  370. }
  371. }
  372. uldiff[ul_count].l = l;
  373. uldiff[ul_count].u = u;
  374. ++ul_count;
  375. if (ul_count > MAXTO) {
  376. verbose_msg("too many touppers/tolowers!\n");
  377. return EXIT_FAILURE;
  378. }
  379. found:
  380. ult[c] = i;
  381. }
  382. }
  383. for (i = 0; i < 16; i++) {
  384. verbose_msg("typecount[%2d] = %8ld %s\n", i, typecount[i], typename[i]);
  385. }
  386. verbose_msg("optimizing is* table..\n");
  387. n = -1;
  388. smallest = SIZE_MAX;
  389. cttable.ii = NULL;
  390. for (i = 0; i < 14; i++) {
  391. t = newopt(wct, (RANGE/2)+1, i, &cttable);
  392. if (smallest >= t) {
  393. n = i;
  394. smallest = t;
  395. /* } else { */
  396. /* break; */
  397. }
  398. }
  399. verbose_msg("smallest = %zu\n", smallest);
  400. if (!(cttable.ii = malloc(smallest))) {
  401. verbose_msg("couldn't allocate space!\n");
  402. return EXIT_FAILURE;
  403. }
  404. smallest = SIZE_MAX;
  405. newopt(wct, (RANGE/2)+1, n, &cttable);
  406. ++cttable.ti_shift; /* correct for nibble mode */
  407. verbose_msg("optimizing u/l-to table..\n");
  408. smallest = SIZE_MAX;
  409. ultable.ii = NULL;
  410. for (i = 0; i < 14; i++) {
  411. t = newopt(ult, RANGE+1, i, &ultable);
  412. if (smallest >= t) {
  413. n = i;
  414. smallest = t;
  415. /* } else { */
  416. /* break; */
  417. }
  418. }
  419. verbose_msg("%lu (smallest) + %lu (u/l diffs) = %lu\n",
  420. (unsigned long) smallest,
  421. (unsigned long) (4 * ul_count),
  422. (unsigned long) (smallest + 4 * ul_count)
  423. );
  424. verbose_msg("smallest = %zu\n", smallest);
  425. if (!(ultable.ii = malloc(smallest))) {
  426. verbose_msg("couldn't allocate space!\n");
  427. return EXIT_FAILURE;
  428. }
  429. smallest = SIZE_MAX;
  430. newopt(ult, RANGE+1, n, &ultable);
  431. #if 0
  432. verbose_msg("optimizing comb table..\n");
  433. smallest = SIZE_MAX;
  434. combtable.ii = NULL;
  435. for (i = 0; i < 14; i++) {
  436. t = newopt(combt, sizeof(combt), i, &combtable);
  437. if (smallest >= t) {
  438. n = i;
  439. smallest = t;
  440. /* } else { */
  441. /* break; */
  442. }
  443. }
  444. verbose_msg("smallest = %zu\n", smallest);
  445. if (!(combtable.ii = malloc(smallest))) {
  446. verbose_msg("couldn't allocate space!\n");
  447. return EXIT_FAILURE;
  448. }
  449. smallest = SIZE_MAX;
  450. newopt(combt, sizeof(combt), n, &combtable);
  451. combtable.ti_shift += 4; /* correct for 4 entries per */
  452. #endif
  453. #if 0
  454. verbose_msg("optimizing width table..\n");
  455. smallest = SIZE_MAX;
  456. widthtable.ii = NULL;
  457. for (i = 0; i < 14; i++) {
  458. t = newopt(widtht, sizeof(widtht), i, &widthtable);
  459. if (smallest >= t) {
  460. n = i;
  461. smallest = t;
  462. /* } else { */
  463. /* break; */
  464. }
  465. }
  466. verbose_msg("smallest = %zu\n", smallest);
  467. if (!(widthtable.ii = malloc(smallest))) {
  468. verbose_msg("couldn't allocate space!\n");
  469. return EXIT_FAILURE;
  470. }
  471. smallest = SIZE_MAX;
  472. newopt(widtht, sizeof(widtht), n, &widthtable);
  473. widthtable.ti_shift += 4; /* correct for 4 entries per */
  474. #endif
  475. #if 0
  476. verbose_msg("optimizing comb3 table..\n");
  477. smallest = SIZE_MAX;
  478. comb3table.ii = NULL;
  479. for (i = 0; i < 14; i++) {
  480. t = newopt(comb3t, sizeof(comb3t), i, &comb3table);
  481. if (smallest >= t) {
  482. n = i;
  483. smallest = t;
  484. /* } else { */
  485. /* break; */
  486. }
  487. }
  488. verbose_msg("smallest = %zu\n", smallest);
  489. if (!(comb3table.ii = malloc(smallest))) {
  490. verbose_msg("couldn't allocate space!\n");
  491. return EXIT_FAILURE;
  492. }
  493. smallest = SIZE_MAX;
  494. newopt(comb3t, sizeof(comb3t), n, &comb3table);
  495. comb3table.ti_shift += 8; /* correct for 4 entries per */
  496. #endif
  497. dump_table_data(&cttable);
  498. dump_table_data(&ultable);
  499. #if 0
  500. dump_table_data(&combtable);
  501. #endif
  502. }
  503. verbose_msg("verifying for %s...\n", *argv);
  504. #if RANGE == 0xffffU
  505. for (c = 0; c <= 0xffffUL; c++)
  506. #else
  507. for (c = 0; c <= 0x10ffffUL; c++)
  508. #endif
  509. {
  510. unsigned int curr_stdclib;
  511. unsigned int mine;
  512. unsigned int upper, lower;
  513. #if 0
  514. #if RANGE < 0x10000UL
  515. if (c == 0x10000UL) {
  516. c = 0x30000UL; /* skip 1st and 2nd sup planes */
  517. }
  518. #elif RANGE < 0x20000UL
  519. if (c == 0x20000UL) {
  520. c = 0x30000UL; /* skip 2nd sup planes */
  521. }
  522. #endif
  523. #endif
  524. curr_stdclib = 0;
  525. if (iswalnum(c)) ++curr_stdclib; curr_stdclib <<= 1;
  526. if (iswalpha(c)) ++curr_stdclib; curr_stdclib <<= 1;
  527. if (iswblank(c)) ++curr_stdclib; curr_stdclib <<= 1;
  528. if (iswcntrl(c)) ++curr_stdclib; curr_stdclib <<= 1;
  529. if (iswdigit(c)) ++curr_stdclib; curr_stdclib <<= 1;
  530. if (iswgraph(c)) ++curr_stdclib; curr_stdclib <<= 1;
  531. if (iswlower(c)) ++curr_stdclib; curr_stdclib <<= 1;
  532. if (iswprint(c)) ++curr_stdclib; curr_stdclib <<= 1;
  533. if (iswpunct(c)) ++curr_stdclib; curr_stdclib <<= 1;
  534. if (iswspace(c)) ++curr_stdclib; curr_stdclib <<= 1;
  535. if (iswupper(c)) ++curr_stdclib; curr_stdclib <<= 1;
  536. if (iswxdigit(c)) ++curr_stdclib;
  537. {
  538. unsigned int u;
  539. int n = 0, sc = 0; /* = 0 for verbose_msg only */
  540. int i0 = 0, i1 = 0;
  541. u = c;
  542. if (u <= RANGE) {
  543. sc = u & ((1 << cttable.ti_shift) - 1);
  544. u >>= cttable.ti_shift;
  545. n = u & ((1 << cttable.ii_shift) - 1);
  546. u >>= cttable.ii_shift;
  547. i0 = cttable.ii[u];
  548. i0 <<= cttable.ii_shift;
  549. i1 = cttable.ti[i0 + n];
  550. i1 <<= (cttable.ti_shift - 1);
  551. d = cttable.ut[i1 + (sc >> 1)];
  552. if (sc & 1) {
  553. d >>= 4;
  554. }
  555. d &= 0x0f;
  556. } else if (((unsigned)(c - 0xe0020UL) <= 0x5f) || (c == 0xe0001UL)) {
  557. d = __CTYPE_punct;
  558. } else if ((unsigned)(c - 0xf0000UL) < 0x20000UL) {
  559. if ((c & 0xffffU) <= 0xfffdU) {
  560. d = __CTYPE_punct;
  561. } else {
  562. d = __CTYPE_unclassified;
  563. }
  564. } else {
  565. d = __CTYPE_unclassified;
  566. }
  567. mine = 0;
  568. if (mywalnum(d,c)) ++mine; mine <<= 1;
  569. if (mywalpha(d,c)) ++mine; mine <<= 1;
  570. if (mywblank(d,c)) ++mine; mine <<= 1;
  571. if (mywcntrl(d,c)) ++mine; mine <<= 1;
  572. if (mywdigit(d,c)) ++mine; mine <<= 1;
  573. if (mywgraph(d,c)) ++mine; mine <<= 1;
  574. if (mywlower(d,c)) ++mine; mine <<= 1;
  575. if (mywprint(d,c)) ++mine; mine <<= 1;
  576. if (mywpunct(d,c)) ++mine; mine <<= 1;
  577. if (mywspace(d,c)) ++mine; mine <<= 1;
  578. if (mywupper(d,c)) ++mine; mine <<= 1;
  579. if (mywxdigit(d,c)) ++mine;
  580. if (curr_stdclib != mine) {
  581. verbose_msg("%#8x : curr_stdclib %#4x != %#4x mine %d\n", c, curr_stdclib, mine, d);
  582. if (c < 0x30000UL) {
  583. verbose_msg("sc=%#x u=%#x n=%#x i0=%#x i1=%#x\n", sc, u, n, i0, i1);
  584. }
  585. }
  586. upper = lower = u = c;
  587. if (u <= RANGE) {
  588. sc = u & ((1 << ultable.ti_shift) - 1);
  589. u >>= ultable.ti_shift;
  590. n = u & ((1 << ultable.ii_shift) - 1);
  591. u >>= ultable.ii_shift;
  592. i0 = ultable.ii[u];
  593. i0 <<= ultable.ii_shift;
  594. i1 = ultable.ti[i0 + n];
  595. i1 <<= (ultable.ti_shift);
  596. i1 += sc;
  597. i0 = ultable.ut[i1];
  598. upper = c + uldiff[i0].u;
  599. lower = c + uldiff[i0].l;
  600. }
  601. if (towupper(c) != upper) {
  602. verbose_msg("%#8x : towupper curr_stdclib %#4x != %#4x mine\n",
  603. c, towupper(c), upper);
  604. }
  605. if (towlower(c) != lower) {
  606. verbose_msg("%#8x : towlower curr_stdclib %#4x != %#4x mine i0 = %d\n",
  607. c, towlower(c), lower, i0);
  608. }
  609. if (totitle && ((tt = towctrans(c, totitle)) != upper)) {
  610. verbose_msg("%#8x : totitle curr_stdclib %#4lx != %#4x mine i0 = %d\n",
  611. c, tt, upper, i0);
  612. }
  613. }
  614. if ((c & 0xfff) == 0xfff) verbose_msg(".");
  615. }
  616. verbose_msg("done\n");
  617. }
  618. if (built) {
  619. printf("#define __LOCALE_DATA_WC_TABLE_DOMAIN_MAX %#8lx\n\n",
  620. (unsigned long) RANGE);
  621. output_table("ctype", &cttable);
  622. output_table("uplow", &ultable);
  623. #warning fix the upper bound on the upper/lower tables... save 200 bytes or so
  624. printf("#define __LOCALE_DATA_WCuplow_diffs %7u\n", ul_count);
  625. printf("\n#ifdef WANT_WCuplow_diff_data\n\n");
  626. printf("\nstatic const short __LOCALE_DATA_WCuplow_diff_data[%zu] = {",
  627. 2 * (size_t) ul_count);
  628. for (i = 0; i < ul_count; i++) {
  629. if (i % 4 == 0) {
  630. printf("\n");
  631. }
  632. printf(" %6d, %6d,", uldiff[i].u, uldiff[i].l);
  633. }
  634. printf("\n};\n\n");
  635. printf("#endif /* WANT_WCuplow_diff_data */\n\n");
  636. /* output_table("comb", &combtable); */
  637. /* output_table("width", &widthtable); */
  638. }
  639. return !built;
  640. }
  641. size_t newopt(unsigned char *ut, size_t usize, int shift, table_data *tbl)
  642. {
  643. static int recurse;
  644. unsigned char *ti[RANGE+1]; /* table index */
  645. size_t numblocks;
  646. size_t blocksize;
  647. size_t uniq;
  648. size_t i, j;
  649. size_t smallest, t;
  650. unsigned char *ii_save;
  651. int uniqblock[256];
  652. unsigned char uit[RANGE+1];
  653. int shift2;
  654. memset(uniqblock, 0x00, sizeof(uniqblock));
  655. ii_save = NULL;
  656. blocksize = 1 << shift;
  657. numblocks = usize >> shift;
  658. /* init table index */
  659. for (i=j = 0; i < numblocks; i++) {
  660. ti[i] = ut + j;
  661. j += blocksize;
  662. }
  663. /* sort */
  664. nu_val = blocksize;
  665. qsort(ti, numblocks, sizeof(unsigned char *), nu_memcmp);
  666. uniq = 1;
  667. uit[(ti[0]-ut)/blocksize] = 0;
  668. for (i=1; i < numblocks; i++) {
  669. if (memcmp(ti[i-1], ti[i], blocksize) < 0) {
  670. if (++uniq > 255) {
  671. break;
  672. }
  673. uniqblock[uniq - 1] = i;
  674. }
  675. #if 1
  676. else if (memcmp(ti[i-1], ti[i], blocksize) > 0) {
  677. verbose_msg("bad sort %li!\n", (long) i);
  678. abort();
  679. }
  680. #endif
  681. uit[(ti[i]-ut)/blocksize] = uniq - 1;
  682. }
  683. smallest = SIZE_MAX;
  684. shift2 = -1;
  685. if (uniq > 255)
  686. return SIZE_MAX;
  687. smallest = numblocks + uniq * blocksize;
  688. if (!recurse) {
  689. ++recurse;
  690. for (j=1; j < 14; j++) {
  691. if ((numblocks >> j) < 2) break;
  692. if (tbl) {
  693. ii_save = tbl->ii;
  694. tbl->ii = NULL;
  695. }
  696. if ((t = newopt(uit, numblocks, j, tbl)) < SIZE_MAX) {
  697. t += uniq * blocksize;
  698. }
  699. if (tbl) {
  700. tbl->ii = ii_save;
  701. }
  702. if (smallest >= t) {
  703. shift2 = j;
  704. smallest = t;
  705. if (!tbl->ii) {
  706. verbose_msg("ishift %u tshift %u size %lu\n",
  707. shift2, shift, (unsigned long) t);
  708. }
  709. /* } else { */
  710. /* break; */
  711. }
  712. }
  713. --recurse;
  714. }
  715. if (tbl->ii) {
  716. if (recurse) {
  717. tbl->ii_shift = shift;
  718. tbl->ii_len = numblocks;
  719. memcpy(tbl->ii, uit, numblocks);
  720. tbl->ti = tbl->ii + tbl->ii_len;
  721. tbl->ti_len = uniq * blocksize;
  722. for (i = 0; i < uniq; i++) {
  723. memcpy(tbl->ti + i * blocksize, ti[uniqblock[i]], blocksize);
  724. }
  725. } else {
  726. ++recurse;
  727. verbose_msg("setting ishift %u tshift %u\n",
  728. shift2, shift);
  729. newopt(uit, numblocks, shift2, tbl);
  730. --recurse;
  731. tbl->ti_shift = shift;
  732. tbl->ut_len = uniq * blocksize;
  733. tbl->ut = tbl->ti + tbl->ti_len;
  734. for (i = 0; i < uniq; i++) {
  735. memcpy(tbl->ut + i * blocksize, ti[uniqblock[i]], blocksize);
  736. }
  737. }
  738. }
  739. return smallest;
  740. }
  741. /* vi: set sw=4 ts=4: */