_collate.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. /*
  2. * Copyright (C) 2002 Manuel Novoa III
  3. * Copyright (C) 2000-2005 Erik Andersen <andersen@uclibc.org>
  4. *
  5. * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
  6. */
  7. /* Dec 20, 2002
  8. * Initial test implementation of strcoll, strxfrm, wcscoll, and wcsxfrm.
  9. * The code needs to be cleaned up a good bit, but I'd like to see people
  10. * test it out.
  11. *
  12. */
  13. #include "_string.h"
  14. #include <ctype.h>
  15. #include <locale.h>
  16. #include <stdlib.h>
  17. #include <errno.h>
  18. #include <assert.h>
  19. extern size_t __strlcpy(char *__restrict dst, const char *__restrict src,
  20. size_t n) attribute_hidden;
  21. #ifdef WANT_WIDE
  22. extern int __wcscmp (__const wchar_t *__s1, __const wchar_t *__s2) attribute_hidden;
  23. extern size_t __wcsxfrm (wchar_t *__restrict __s1,
  24. __const wchar_t *__restrict __s2, size_t __n) attribute_hidden;
  25. #endif
  26. #ifdef __UCLIBC_HAS_XLOCALE__
  27. extern int __strcoll_l (__const char *__s1, __const char *__s2, __locale_t __l) attribute_hidden;
  28. extern size_t __strxfrm_l (char *__dest, __const char *__src, size_t __n, __locale_t __l) attribute_hidden;
  29. extern int __wcscoll_l (__const wchar_t *__s1, __const wchar_t *__s2, __locale_t __loc) attribute_hidden;
  30. extern size_t __wcsxfrm_l (wchar_t *__s1, __const wchar_t *__s2, size_t __n, __locale_t __loc) attribute_hidden;
  31. #endif
  32. #ifdef __UCLIBC_HAS_LOCALE__
  33. #if defined(L_strxfrm) || defined(L_strxfrm_l) || defined(L_wcsxfrm) || defined(L_wcsxfrm_l)
  34. #ifdef L_strxfrm
  35. #ifndef WANT_WIDE
  36. #error WANT_WIDE should be defined for L_strxfrm
  37. #endif
  38. #ifdef L_wcsxfrm
  39. #error L_wcsxfrm already defined for L_strxfrm
  40. #endif
  41. #endif /* L_strxfrm */
  42. #if defined(L_strxfrm) || defined(L_strxfrm_l)
  43. #define wcscoll strcoll
  44. #define __wcscoll __strcoll
  45. #define wcscoll_l strcoll_l
  46. #define __wcscoll_l __strcoll_l
  47. #define wcsxfrm strxfrm
  48. #define __wcsxfrm __strxfrm
  49. #define wcsxfrm_l strxfrm_l
  50. #define __wcsxfrm_l __strxfrm_l
  51. #undef WANT_WIDE
  52. #undef Wvoid
  53. #undef Wchar
  54. #undef Wuchar
  55. #undef Wint
  56. #define Wchar char
  57. #endif /* defined(L_strxfrm) || defined(L_strxfrm_l) */
  58. #if defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE)
  59. int attribute_hidden __wcscoll (const Wchar *s0, const Wchar *s1)
  60. {
  61. return __wcscoll_l(s0, s1, __UCLIBC_CURLOCALE );
  62. }
  63. strong_alias(__wcscoll,wcscoll)
  64. size_t attribute_hidden __wcsxfrm(Wchar *__restrict ws1, const Wchar *__restrict ws2, size_t n)
  65. {
  66. return __wcsxfrm_l(ws1, ws2, n, __UCLIBC_CURLOCALE );
  67. }
  68. strong_alias(__wcsxfrm,wcsxfrm)
  69. #else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
  70. #if 0
  71. #define CUR_COLLATE (&__UCLIBC_CURLOCALE_DATA.collate)
  72. #else
  73. #define CUR_COLLATE (& __LOCALE_PTR->collate)
  74. #endif
  75. #define MAX_PENDING 8
  76. typedef struct {
  77. const Wchar *s;
  78. const Wchar *eob; /* end of backward */
  79. __uwchar_t weight;
  80. __uwchar_t ui_weight; /* undefined or invalid */
  81. int colitem;
  82. int weightidx;
  83. int rule;
  84. size_t position;
  85. /* should be wchar_t. if wchar < 0 do EILSEQ? */
  86. __uwchar_t *cip;
  87. __uwchar_t ci_pending[MAX_PENDING]; /* nul-terminated */
  88. char *back_buf;
  89. char *bbe; /* end of back_buf (actual last... not 1 past end) */
  90. char *bp; /* ptr into backbuf, NULL if not in backward mode */
  91. char ibb[128];
  92. size_t bb_size;
  93. int ru_pushed;
  94. } col_state_t;
  95. #define WEIGHT_MASK 0x3fffU
  96. #define RULE_MASK 0xc000U
  97. #define RULE_FORWARD (1 << 14)
  98. #define RULE_POSITION (1 << 15)
  99. #define UI_IDX (WEIGHT_MASK-6)
  100. #define POSIT_IDX (WEIGHT_MASK-5)
  101. #define RANGE_IDX (WEIGHT_MASK-4)
  102. #define UNDEF_IDX (WEIGHT_MASK-3)
  103. #define INVAL_IDX (WEIGHT_MASK-2)
  104. #define DITTO_IDX (WEIGHT_MASK-1)
  105. #undef TRACE
  106. #if 0
  107. #define TRACE(X) printf X
  108. #else
  109. #define TRACE(X) ((void)0)
  110. #endif
  111. static int lookup(wchar_t wc __LOCALE_PARAM )
  112. {
  113. unsigned int sc, n, i0, i1;
  114. if (((__uwchar_t) wc) > 0xffffU) {
  115. return 0;
  116. }
  117. sc = wc & CUR_COLLATE->ti_mask;
  118. wc >>= CUR_COLLATE->ti_shift;
  119. n = wc & CUR_COLLATE->ii_mask;
  120. wc >>= CUR_COLLATE->ii_shift;
  121. i0 = CUR_COLLATE->wcs2colidt_tbl[wc];
  122. i0 <<= CUR_COLLATE->ii_shift;
  123. i1 = CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + i0 + n];
  124. i1 <<= CUR_COLLATE->ti_shift;
  125. return CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + CUR_COLLATE->ti_len + i1 + sc];
  126. }
  127. static void init_col_state(col_state_t *cs, const Wchar *wcs)
  128. {
  129. __memset(cs, 0, sizeof(col_state_t));
  130. cs->s = wcs;
  131. cs->bp = cs->back_buf = cs->ibb;
  132. cs->bb_size = 128;
  133. cs->bbe = cs->back_buf + (cs->bb_size -1);
  134. }
  135. static void next_weight(col_state_t *cs, int pass __LOCALE_PARAM )
  136. {
  137. int r, w, ru, ri, popping_backup_stack;
  138. ssize_t n;
  139. const uint16_t *p;
  140. #ifdef WANT_WIDE
  141. #define WC (*cs->s)
  142. #define N (1)
  143. #else /* WANT_WIDE */
  144. wchar_t WC;
  145. size_t n0, nx;
  146. #define N n0
  147. #endif /* WANT_WIDE */
  148. do {
  149. if (cs->ru_pushed) {
  150. ru = cs->ru_pushed;
  151. TRACE(("ru_pushed = %d\n", ru));
  152. cs->ru_pushed = 0;
  153. goto POSITION_SKIP;
  154. }
  155. #ifdef __UCLIBC_MJN3_ONLY__
  156. #warning should we walk pendings backwards?
  157. #endif
  158. if (cs->cip) { /* possible pending weight */
  159. if ((r = *(cs->cip++)) == 0) {
  160. cs->cip = NULL;
  161. continue;
  162. }
  163. cs->weightidx = r & WEIGHT_MASK;
  164. assert(cs->weightidx);
  165. /* assert(cs->weightidx != WEIGHT_MASK); */
  166. } else { /* get the next collation item from the string */
  167. TRACE(("clearing popping flag\n"));
  168. popping_backup_stack = 0;
  169. IGNORE_LOOP:
  170. /* keep first pos as 0 for a sentinal */
  171. if (*cs->bp) { /* pending backward chars */
  172. POP_BACKUP:
  173. popping_backup_stack = 1;
  174. TRACE(("setting popping flag\n"));
  175. n = 0;
  176. if (*cs->bp > 0) { /* singles pending */
  177. cs->s -= 1;
  178. if ((*cs->bp -= 1) == 0) {
  179. cs->bp -= 1;
  180. }
  181. } else { /* last was a multi */
  182. cs->s += *cs->bp;
  183. cs->bp -= 1;
  184. }
  185. } else if (!*cs->s) { /* not in backward mode and end of string */
  186. cs->weight = 0;
  187. return;
  188. } else {
  189. cs->position += 1;
  190. }
  191. BACK_LOOP:
  192. #ifdef WANT_WIDE
  193. n = 1;
  194. cs->colitem = r = lookup(*cs->s __LOCALE_ARG );
  195. #else /* WANT_WIDE */
  196. n = n0 = __locale_mbrtowc_l(&WC, cs->s, __LOCALE_PTR);
  197. if (n < 0) {
  198. __set_errno(EILSEQ);
  199. cs->weight = 0;
  200. return;
  201. }
  202. cs->colitem = r = lookup(WC __LOCALE_ARG );
  203. #endif /* WANT_WIDE */
  204. TRACE((" r=%d WC=%#lx\n", r, (unsigned long)(WC)));
  205. if (r > CUR_COLLATE->max_col_index) { /* starting char for one or more sequences */
  206. p = CUR_COLLATE->multistart_tbl;
  207. p += p[r-CUR_COLLATE->max_col_index -1];
  208. do {
  209. n = N;
  210. r = *p++;
  211. do {
  212. if (!*p) { /* found it */
  213. cs->colitem = r;
  214. TRACE((" found multi %d\n", n));
  215. goto FOUND;
  216. }
  217. #ifdef WANT_WIDE
  218. /* the lookup check here is safe since we're assured that *p is a valid colidx */
  219. if (!cs->s[n] || (lookup(cs->s[n] __LOCALE_ARG ) != *p)) {
  220. do {} while (*p++);
  221. break;
  222. }
  223. ++p;
  224. ++n;
  225. #else /* WANT_WIDE */
  226. if (cs->s[n]) {
  227. nx = __locale_mbrtowc_l(&WC, cs->s + n, __LOCALE_PTR);
  228. if (nx < 0) {
  229. __set_errno(EILSEQ);
  230. cs->weight = 0;
  231. return;
  232. }
  233. }
  234. if (!cs->s[n] || (lookup(WC __LOCALE_ARG ) != *p)) {
  235. do {} while (*p++);
  236. break;
  237. }
  238. ++p;
  239. n += nx; /* Only gets here if cs->s[n] != 0, so nx is set. */
  240. #endif /* WANT_WIDE */
  241. } while (1);
  242. } while (1);
  243. } else if (r == 0) { /* illegal, undefined, or part of a range */
  244. if ((CUR_COLLATE->range_count)
  245. #ifdef __UCLIBC_MJN3_ONLY__
  246. #warning .. need to introduce range as a collating item?
  247. #endif
  248. && (((__uwchar_t)(WC - CUR_COLLATE->range_low)) <= CUR_COLLATE->range_count)
  249. ) { /* part of a range */
  250. /* Note: cs->colitem = 0 already. */
  251. TRACE((" found range\n"));
  252. ru = CUR_COLLATE->ruletable[CUR_COLLATE->range_rule_offset*CUR_COLLATE->MAX_WEIGHTS + pass];
  253. assert((ru & WEIGHT_MASK) != DITTO_IDX);
  254. if ((ru & WEIGHT_MASK) == WEIGHT_MASK) {
  255. ru = (ru & RULE_MASK) | RANGE_IDX;
  256. cs->weight = CUR_COLLATE->range_base_weight + (WC - CUR_COLLATE->range_low);
  257. }
  258. goto RANGE_SKIP_TO;
  259. } else if (((__uwchar_t)(WC)) <= 0x7fffffffUL) { /* legal but undefined */
  260. UNDEFINED:
  261. /* Note: cs->colitem = 0 already. */
  262. ri = CUR_COLLATE->undefined_idx;
  263. assert(ri != 0); /* implicit undefined isn't supported */
  264. TRACE((" found explicit UNDEFINED\n"));
  265. #ifdef __UCLIBC_MJN3_ONLY__
  266. #warning right now single weight locales do not support ..
  267. #endif
  268. if (CUR_COLLATE->num_weights == 1) {
  269. TRACE((" single weight UNDEFINED\n"));
  270. cs->weightidx = RANGE_IDX;
  271. cs->weight = ri;
  272. cs->s += n;
  273. goto PROCESS_WEIGHT;
  274. }
  275. ri = CUR_COLLATE->index2ruleidx[ri - 1];
  276. ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass];
  277. assert((ru & WEIGHT_MASK) != WEIGHT_MASK); /* TODO: handle ".." */
  278. if ((ru & WEIGHT_MASK) == DITTO_IDX) {
  279. cs->colitem = CUR_COLLATE->undefined_idx;
  280. }
  281. goto RANGE_SKIP_TO;
  282. } else { /* illegal */
  283. TRACE((" found illegal\n"));
  284. __set_errno(EINVAL);
  285. /* We put all illegals in the same equiv class with maximal weight,
  286. * and ignore them after the first pass. */
  287. if (pass > 0) {
  288. cs->s += n;
  289. goto IGNORE_LOOP;
  290. }
  291. ru = (RULE_FORWARD | RANGE_IDX);
  292. cs->weight = 0xffffU;
  293. goto RANGE_SKIP_TO;
  294. }
  295. } else if (CUR_COLLATE->num_weights == 1) {
  296. TRACE((" single weight\n"));
  297. cs->weightidx = RANGE_IDX;
  298. cs->weight = cs->colitem;
  299. cs->s += n;
  300. goto PROCESS_WEIGHT;
  301. } else {
  302. TRACE((" normal\n"));
  303. }
  304. /* if we get here, it is a normal char either singlely weighted, undefined, or in a range */
  305. FOUND:
  306. ri = CUR_COLLATE->index2ruleidx[cs->colitem - 1];
  307. TRACE((" ri=%d ", ri));
  308. #ifdef __UCLIBC_MJN3_ONLY__
  309. #warning make sure this is correct
  310. #endif
  311. if (!ri) {
  312. TRACE(("NOT IN THIS LOCALE\n"));
  313. goto UNDEFINED;
  314. }
  315. ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass];
  316. RANGE_SKIP_TO:
  317. #ifdef __UCLIBC_MJN3_ONLY__
  318. #warning ignoreables probably should not interrupt backwards processing, but this is wrong
  319. #endif
  320. /* if (!(ru & WEIGHT_MASK)) { */
  321. /* TRACE(("IGNORE\n")); */
  322. /* cs->s += n; */
  323. /* continue; */
  324. /* } */
  325. TRACE((" rule = %#x weight = %#x popping = %d s = %p eob = %p\n",
  326. ru & RULE_MASK, ru & WEIGHT_MASK, popping_backup_stack,
  327. cs->s, cs->eob));
  328. /* now we need to check if we're going backwards... */
  329. if (!popping_backup_stack) {
  330. if (!(ru & RULE_MASK)) { /* backward */
  331. TRACE(("backwards\n"));
  332. assert(cs->bp <= cs->bbe);
  333. if (cs->bp == cs->bbe) {
  334. if (cs->back_buf == cs->ibb) { /* was using internal buffer */
  335. cs->bp = malloc(cs->bb_size + 128);
  336. if (!cs->bp) {
  337. __set_errno(ENOMEM);
  338. #ifdef __UCLIBC_MJN3_ONLY__
  339. #warning what to do here?
  340. #endif
  341. cs->weight = 0;
  342. return;
  343. }
  344. __memcpy(cs->bp, cs->back_buf, cs->bb_size);
  345. } else {
  346. cs->bp = realloc(cs->back_buf, cs->bb_size + 128);
  347. if (!cs->bp) {
  348. __set_errno(ENOMEM);
  349. #ifdef __UCLIBC_MJN3_ONLY__
  350. #warning what to do here?
  351. #endif
  352. cs->weight = 0;
  353. return;
  354. }
  355. }
  356. cs->bb_size += 128;
  357. cs->bbe = cs->bp + (cs->bbe - cs->back_buf);
  358. cs->back_buf = cs->bp;
  359. cs->bp = cs->bbe;
  360. }
  361. if (n==1) { /* single char */
  362. if (*cs->bp && (((unsigned char)(*cs->bp)) < CHAR_MAX)) {
  363. *cs->bp += 1; /* increment last single's count */
  364. } else { /* last was a multi, or just starting */
  365. if (!cs->bp) {
  366. cs->bp = cs->back_buf;
  367. } else {
  368. assert(cs->bp < cs->bbe);
  369. ++cs->bp;
  370. }
  371. *cs->bp = 1;
  372. }
  373. } else { /* multichar */
  374. assert(n>1);
  375. assert(cs->bp < cs->bbe);
  376. *++cs->bp = -n;
  377. }
  378. cs->s += n;
  379. if (*cs->s) {
  380. goto BACK_LOOP;
  381. }
  382. /* end-of-string so start popping */
  383. cs->eob = cs->s;
  384. TRACE(("popping\n"));
  385. goto POP_BACKUP;
  386. } else if (*cs->bp) { /* was going backward but this element isn't */
  387. /* discard current and use previous backward element */
  388. assert(!cs->cip);
  389. cs->eob = cs->s;
  390. TRACE(("popping\n"));
  391. goto POP_BACKUP;
  392. } else { /* was and still going forward */
  393. TRACE(("forwards\n"));
  394. if ((ru & (RULE_POSITION|WEIGHT_MASK)) > RULE_POSITION) {
  395. assert(ru & WEIGHT_MASK);
  396. cs->ru_pushed = ru;
  397. cs->weight = cs->position;
  398. #ifdef __UCLIBC_MJN3_ONLY__
  399. #warning devel code
  400. #endif
  401. cs->position = 0; /* reset to reduce size for strcoll? */
  402. cs->s += n;
  403. cs->weightidx = RANGE_IDX;
  404. goto PROCESS_WEIGHT;
  405. }
  406. }
  407. } else { /* popping backwards stack */
  408. TRACE(("popping (continued)\n"));
  409. if (!*cs->bp) {
  410. cs->s = cs->eob;
  411. }
  412. cs->s -= n;
  413. }
  414. cs->s += n;
  415. POSITION_SKIP:
  416. cs->weightidx = ru & WEIGHT_MASK;
  417. cs->rule = ru & RULE_MASK;
  418. }
  419. #ifdef __UCLIBC_MJN3_ONLY__
  420. #warning for pending we only want the weight... _not_ the rule
  421. #endif
  422. if (!cs->weightidx) { /* ignore */
  423. continue;
  424. }
  425. PROCESS_WEIGHT:
  426. assert(cs->weightidx);
  427. if (((unsigned int)(cs->weightidx - UI_IDX)) <= (INVAL_IDX-UI_IDX)) {
  428. if (cs->weightidx == UI_IDX) {
  429. cs->weight = cs->ui_weight;
  430. }
  431. return;
  432. }
  433. assert(cs->weightidx != WEIGHT_MASK);
  434. if (cs->weightidx == DITTO_IDX) { /* want the weight of the current collating item */
  435. TRACE(("doing ditto\n"));
  436. w = CUR_COLLATE->index2weight[cs->colitem -1];
  437. } else if (cs->weightidx <= CUR_COLLATE->max_col_index) { /* normal */
  438. TRACE(("doing normal\n"));
  439. w = CUR_COLLATE->index2weight[cs->weightidx -1];
  440. } else { /* a string */
  441. TRACE(("doing string\n"));
  442. assert(!(cs->weightidx & RULE_MASK));
  443. /* note: iso14561 allows null string here */
  444. p = CUR_COLLATE->weightstr + (cs->weightidx - (CUR_COLLATE->max_col_index + 2));
  445. if (*p & WEIGHT_MASK) {
  446. r = 0;
  447. do {
  448. assert(r < MAX_PENDING);
  449. cs->ci_pending[r++] = *p++;
  450. } while (*p & WEIGHT_MASK);
  451. cs->cip = cs->ci_pending;
  452. }
  453. continue;
  454. }
  455. cs->weight = w;
  456. return;
  457. } while (1);
  458. }
  459. int attribute_hidden __UCXL(wcscoll) (const Wchar *s0, const Wchar *s1 __LOCALE_PARAM )
  460. {
  461. col_state_t ws[2];
  462. int pass;
  463. if (!CUR_COLLATE->num_weights) { /* C locale */
  464. #ifdef WANT_WIDE
  465. return __wcscmp(s0, s1);
  466. #else /* WANT_WIDE */
  467. return __strcmp(s0, s1);
  468. #endif /* WANT_WIDE */
  469. }
  470. pass = 0;
  471. do { /* loop through the weights levels */
  472. init_col_state(ws, s0);
  473. init_col_state(ws+1, s1);
  474. do { /* loop through the strings */
  475. /* for each string, get the next weight */
  476. next_weight(ws, pass __LOCALE_ARG );
  477. next_weight(ws+1, pass __LOCALE_ARG );
  478. TRACE(("w0=%lu w1=%lu\n",
  479. (unsigned long) ws[0].weight,
  480. (unsigned long) ws[1].weight));
  481. if (ws[0].weight != ws[1].weight) {
  482. return ws[0].weight - ws[1].weight;
  483. }
  484. } while (ws[0].weight);
  485. } while (++pass < CUR_COLLATE->num_weights);
  486. return 0;
  487. }
  488. __UCXL_ALIAS(wcscoll)
  489. #ifdef WANT_WIDE
  490. size_t attribute_hidden __UCXL(wcsxfrm)(wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
  491. size_t n __LOCALE_PARAM )
  492. {
  493. col_state_t cs;
  494. size_t count;
  495. int pass;
  496. if (!CUR_COLLATE->num_weights) { /* C locale */
  497. return __wcsxfrm(ws1, ws2, n);
  498. }
  499. #ifdef __UCLIBC_MJN3_ONLY__
  500. #warning handle empty string as a special case
  501. #endif
  502. count = pass = 0;
  503. do { /* loop through the weights levels */
  504. init_col_state(&cs, ws2);
  505. do { /* loop through the string */
  506. next_weight(&cs, pass __LOCALE_ARG );
  507. TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight));
  508. if (count < n) {
  509. ws1[count] = cs.weight +1;
  510. }
  511. ++count;
  512. TRACE(("--------------------------------------------\n"));
  513. } while (cs.weight);
  514. if (count <= n) { /* overwrite the trailing 0 end-of-pass marker */
  515. ws1[count-1] = 1;
  516. }
  517. TRACE(("-------------------- pass %d --------------------\n", pass));
  518. } while (++pass < CUR_COLLATE->num_weights);
  519. if (count <= n) { /* oops... change it back */
  520. ws1[count-1] = 0;
  521. }
  522. return count-1;
  523. }
  524. __UCXL_ALIAS(wcsxfrm)
  525. #else /* WANT_WIDE */
  526. static const unsigned long bound[] = {
  527. 1UL << 7,
  528. 1UL << 11,
  529. 1UL << 16,
  530. 1UL << 21,
  531. 1UL << 26,
  532. };
  533. static unsigned char first[] = {
  534. 0x0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
  535. };
  536. /* Use an extension of UTF-8 to store a 32 bit val in max 6 bytes. */
  537. static size_t store(unsigned char *s, size_t count, size_t n, __uwchar_t weight)
  538. {
  539. int i, r;
  540. i = 0;
  541. do {
  542. if (weight < bound[i]) {
  543. break;
  544. }
  545. } while (++i < sizeof(bound)/sizeof(bound[0]));
  546. r = i+1;
  547. if (i + count < n) {
  548. s += count;
  549. s[0] = first[i];
  550. while (i) {
  551. s[i] = 0x80 | (weight & 0x3f);
  552. weight >>= 6;
  553. --i;
  554. }
  555. s[0] |= weight;
  556. }
  557. return r;
  558. }
  559. size_t attribute_hidden __UCXL(strxfrm)(char *__restrict ws1, const char *__restrict ws2, size_t n
  560. __LOCALE_PARAM )
  561. {
  562. col_state_t cs;
  563. size_t count, inc;
  564. int pass;
  565. if (!CUR_COLLATE->num_weights) { /* C locale */
  566. return __strlcpy(ws1, ws2, n);
  567. }
  568. #ifdef __UCLIBC_MJN3_ONLY__
  569. #warning handle empty string as a special case
  570. #endif
  571. inc = count = pass = 0;
  572. do { /* loop through the weights levels */
  573. init_col_state(&cs, ws2);
  574. do { /* loop through the string */
  575. next_weight(&cs, pass __LOCALE_ARG );
  576. TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight));
  577. inc = store((unsigned char *)ws1, count, n, cs.weight + 1);
  578. count += inc;
  579. TRACE(("--------------------------------------------\n"));
  580. } while (cs.weight);
  581. /* overwrite the trailing 0 end-of-pass marker */
  582. assert(inc == 1);
  583. if (count <= n) {
  584. ws1[count-1] = 1;
  585. }
  586. TRACE(("-------------------- pass %d --------------------\n", pass));
  587. } while (++pass < CUR_COLLATE->num_weights);
  588. if (count <= n) { /* oops... change it back */
  589. ws1[count-1] = 0;
  590. }
  591. return count-1;
  592. }
  593. __UCXL_ALIAS(strxfrm)
  594. #endif /* WANT_WIDE */
  595. #endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */
  596. #endif /* defined(L_strxfrm) || defined(L_strxfrm_l) || defined(L_wcsxfrm) || defined(L_wcsxfrm_l) */
  597. #endif /* __UCLIBC_HAS_LOCALE__ */
  598. /**********************************************************************/