wctype.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. #define _GNU_SOURCE
  28. #define __NO_CTYPE
  29. #include <wctype.h>
  30. #include <assert.h>
  31. #include <string.h>
  32. #include <errno.h>
  33. #include <locale.h>
  34. #include <ctype.h>
  35. /* We know wide char support is enabled. We wouldn't be here otherwise. */
  36. /* Define this if you want to unify the towupper and towlower code in the
  37. * towctrans function. */
  38. /* #define SMALL_UPLOW */
  39. #ifndef __LOCALE_C_ONLY
  40. #define __WCTYPE_WITH_LOCALE
  41. #endif
  42. /**********************************************************************/
  43. #ifndef __PASTE
  44. #define __PASTE(X,Y) X ## Y
  45. #endif
  46. #define C_MACRO(X) __PASTE(__C_,X)(wc)
  47. #define CT_MACRO(X) __PASTE(__ctype_,X)(wc)
  48. /**********************************************************************/
  49. /* TODO: fix this! */
  50. #ifdef __WCTYPE_WITH_LOCALE
  51. #define WCctype (__global_locale.tblwctype)
  52. #define WCuplow (__global_locale.tblwuplow)
  53. #define WCcmob (__global_locale.tblwcomb)
  54. #define WCuplow_diff (__global_locale.tblwuplow_diff)
  55. #define ENCODING (__global_locale.encoding)
  56. #define ISW_FUNC_BODY(NAME) \
  57. int NAME (wint_t wc) \
  58. { \
  59. return iswctype(wc, __PASTE(_CTYPE_,NAME)); \
  60. }
  61. #else /* __WCTYPE_WITH_LOCALE */
  62. #define ISW_FUNC_BODY(NAME) \
  63. int NAME (wint_t wc) \
  64. { \
  65. return C_MACRO(NAME); \
  66. }
  67. #endif /* __WCTYPE_WITH_LOCALE */
  68. /**********************************************************************/
  69. #ifdef L_iswalnum
  70. ISW_FUNC_BODY(iswalnum);
  71. #endif
  72. /**********************************************************************/
  73. #ifdef L_iswalpha
  74. ISW_FUNC_BODY(iswalpha);
  75. #endif
  76. /**********************************************************************/
  77. #ifdef L_iswblank
  78. ISW_FUNC_BODY(iswblank);
  79. #endif
  80. /**********************************************************************/
  81. #ifdef L_iswcntrl
  82. ISW_FUNC_BODY(iswcntrl);
  83. #endif
  84. /**********************************************************************/
  85. #ifdef L_iswdigit
  86. int iswdigit(wint_t wc)
  87. {
  88. return __C_iswdigit(wc);
  89. }
  90. #endif
  91. /**********************************************************************/
  92. #ifdef L_iswgraph
  93. ISW_FUNC_BODY(iswgraph);
  94. #endif
  95. /**********************************************************************/
  96. #ifdef L_iswlower
  97. ISW_FUNC_BODY(iswlower);
  98. #endif
  99. /**********************************************************************/
  100. #ifdef L_iswprint
  101. ISW_FUNC_BODY(iswprint);
  102. #endif
  103. /**********************************************************************/
  104. #ifdef L_iswpunct
  105. ISW_FUNC_BODY(iswpunct);
  106. #endif
  107. /**********************************************************************/
  108. #ifdef L_iswspace
  109. ISW_FUNC_BODY(iswspace);
  110. #endif
  111. /**********************************************************************/
  112. #ifdef L_iswupper
  113. ISW_FUNC_BODY(iswupper);
  114. #endif
  115. /**********************************************************************/
  116. #ifdef L_iswxdigit
  117. int iswxdigit(wint_t wc)
  118. {
  119. return __C_iswxdigit(wc);
  120. }
  121. #endif
  122. /**********************************************************************/
  123. #ifdef L_towlower
  124. #ifdef __WCTYPE_WITH_LOCALE
  125. #ifdef SMALL_UPLOW
  126. wint_t towlower(wint_t wc)
  127. {
  128. return towctrans(wc, _CTYPE_tolower);
  129. }
  130. #else
  131. wint_t towlower(wint_t wc)
  132. {
  133. unsigned int sc, n, i;
  134. __uwchar_t u = wc;
  135. if (ENCODING == __ctype_encoding_7_bit) {
  136. /* We're in the C/POSIX locale, so ignore the tables. */
  137. return __C_towlower(wc);
  138. }
  139. if (u <= WC_TABLE_DOMAIN_MAX) {
  140. sc = u & ((1 << WCuplow_TI_SHIFT) - 1);
  141. u >>= WCuplow_TI_SHIFT;
  142. n = u & ((1 << WCuplow_II_SHIFT) - 1);
  143. u >>= WCuplow_II_SHIFT;
  144. i = ((unsigned int) WCuplow[u]) << WCuplow_II_SHIFT;
  145. i = ((unsigned int) WCuplow[WCuplow_II_LEN + i + n])
  146. << WCuplow_TI_SHIFT;
  147. i = ((unsigned int) WCuplow[WCuplow_II_LEN + WCuplow_TI_LEN
  148. + i + sc]) << 1;
  149. wc += WCuplow_diff[i + 1];
  150. }
  151. return wc;
  152. }
  153. #endif
  154. #else /* __WCTYPE_WITH_LOCALE */
  155. wint_t towlower(wint_t wc)
  156. {
  157. return __C_towlower(wc);
  158. }
  159. #endif /* __WCTYPE_WITH_LOCALE */
  160. #endif
  161. /**********************************************************************/
  162. #ifdef L_towupper
  163. #ifdef __WCTYPE_WITH_LOCALE
  164. #ifdef SMALL_UPLOW
  165. wint_t towupper(wint_t wc)
  166. {
  167. return towctrans(wc, _CTYPE_toupper);
  168. }
  169. #else
  170. wint_t towupper(wint_t wc)
  171. {
  172. unsigned int sc, n, i;
  173. __uwchar_t u = wc;
  174. if (ENCODING == __ctype_encoding_7_bit) {
  175. /* We're in the C/POSIX locale, so ignore the tables. */
  176. return __C_towupper(wc);
  177. }
  178. if (u <= WC_TABLE_DOMAIN_MAX) {
  179. sc = u & ((1 << WCuplow_TI_SHIFT) - 1);
  180. u >>= WCuplow_TI_SHIFT;
  181. n = u & ((1 << WCuplow_II_SHIFT) - 1);
  182. u >>= WCuplow_II_SHIFT;
  183. i = ((unsigned int) WCuplow[u]) << WCuplow_II_SHIFT;
  184. i = ((unsigned int) WCuplow[WCuplow_II_LEN + i + n])
  185. << WCuplow_TI_SHIFT;
  186. i = ((unsigned int) WCuplow[WCuplow_II_LEN + WCuplow_TI_LEN
  187. + i + sc]) << 1;
  188. wc += WCuplow_diff[i];
  189. }
  190. return wc;
  191. }
  192. #endif
  193. #else /* __WCTYPE_WITH_LOCALE */
  194. wint_t towupper(wint_t wc)
  195. {
  196. return __C_towupper(wc);
  197. }
  198. #endif /* __WCTYPE_WITH_LOCALE */
  199. #endif
  200. /**********************************************************************/
  201. #ifdef L_wctype
  202. static const unsigned char typestring[] = __CTYPE_TYPESTRING;
  203. /* extern const unsigned char typestring[]; */
  204. wctype_t wctype(const char *property)
  205. {
  206. const unsigned char *p;
  207. int i;
  208. p = typestring;
  209. i = 1;
  210. do {
  211. if (!strcmp(property, ++p)) {
  212. return i;
  213. }
  214. ++i;
  215. p += p[-1];
  216. } while (*p);
  217. /* TODO - Add locale-specific classifications. */
  218. return 0;
  219. }
  220. #endif
  221. /**********************************************************************/
  222. #ifdef L_iswctype
  223. #warning duh... replace the range-based classification with table lookup!
  224. #ifdef __WCTYPE_WITH_LOCALE
  225. #warning TODO: need to fix locale ctype table lookup stuff
  226. #if 0
  227. extern const char ctype_range[];
  228. #else
  229. static const char ctype_range[] = {
  230. __CTYPE_RANGES
  231. };
  232. #endif
  233. #warning TODO: need to handle combining class!
  234. #define WCctype_TI_MASK ((1 << WCctype_TI_SHIFT) - 1)
  235. #define WCctype_II_MASK ((1 << WCctype_II_SHIFT) - 1)
  236. int iswctype(wint_t wc, wctype_t desc)
  237. {
  238. unsigned int sc, n, i0, i1;
  239. unsigned char d = __CTYPE_unclassified;
  240. if ((ENCODING != __ctype_encoding_7_bit) || (((__uwchar_t) wc) <= 0x7f)){
  241. if (desc < _CTYPE_iswxdigit) {
  242. if (((__uwchar_t) wc) <= WC_TABLE_DOMAIN_MAX) {
  243. /* From here on, we know wc > 0. */
  244. sc = wc & WCctype_TI_MASK;
  245. wc >>= WCctype_TI_SHIFT;
  246. n = wc & WCctype_II_MASK;
  247. wc >>= WCctype_II_SHIFT;
  248. i0 = WCctype[wc];
  249. i0 <<= WCctype_II_SHIFT;
  250. i1 = WCctype[WCctype_II_LEN + i0 + n];
  251. i1 <<= (WCctype_TI_SHIFT-1);
  252. d = WCctype[WCctype_II_LEN + WCctype_TI_LEN + i1 + (sc >> 1)];
  253. d = (sc & 1) ? (d >> 4) : (d & 0xf);
  254. } else if ( ((((__uwchar_t)(wc - 0xe0020UL)) <= 0x5f)
  255. || (wc == 0xe0001UL))
  256. || ( (((__uwchar_t)(wc - 0xf0000UL)) < 0x20000UL)
  257. && ((wc & 0xffffU) <= 0xfffdU))
  258. ) {
  259. d = __CTYPE_punct;
  260. }
  261. return ( ((unsigned char)(d - ctype_range[2*desc]))
  262. <= ctype_range[2*desc + 1] )
  263. && ((desc != _CTYPE_iswblank) || (d & 1));
  264. }
  265. /* TODO - Add locale-specific classifications. */
  266. return (desc == _CTYPE_iswxdigit) ? __C_iswxdigit(wc) : 0;
  267. }
  268. return 0;
  269. }
  270. #else
  271. static const unsigned char WCctype[] = {
  272. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  273. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  274. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  275. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  276. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_space_blank << 4),
  277. __CTYPE_cntrl_space_nonblank | (__CTYPE_cntrl_space_nonblank << 4),
  278. __CTYPE_cntrl_space_nonblank | (__CTYPE_cntrl_space_nonblank << 4),
  279. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  280. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  281. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  282. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  283. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  284. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  285. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  286. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  287. __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4),
  288. __CTYPE_print_space_blank | (__CTYPE_punct << 4),
  289. __CTYPE_punct | (__CTYPE_punct << 4),
  290. __CTYPE_punct | (__CTYPE_punct << 4),
  291. __CTYPE_punct | (__CTYPE_punct << 4),
  292. __CTYPE_punct | (__CTYPE_punct << 4),
  293. __CTYPE_punct | (__CTYPE_punct << 4),
  294. __CTYPE_punct | (__CTYPE_punct << 4),
  295. __CTYPE_punct | (__CTYPE_punct << 4),
  296. __CTYPE_digit | (__CTYPE_digit << 4),
  297. __CTYPE_digit | (__CTYPE_digit << 4),
  298. __CTYPE_digit | (__CTYPE_digit << 4),
  299. __CTYPE_digit | (__CTYPE_digit << 4),
  300. __CTYPE_digit | (__CTYPE_digit << 4),
  301. __CTYPE_punct | (__CTYPE_punct << 4),
  302. __CTYPE_punct | (__CTYPE_punct << 4),
  303. __CTYPE_punct | (__CTYPE_punct << 4),
  304. __CTYPE_punct | (__CTYPE_alpha_upper << 4),
  305. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  306. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  307. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  308. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  309. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  310. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  311. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  312. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  313. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  314. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  315. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  316. __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4),
  317. __CTYPE_alpha_upper | (__CTYPE_punct << 4),
  318. __CTYPE_punct | (__CTYPE_punct << 4),
  319. __CTYPE_punct | (__CTYPE_punct << 4),
  320. __CTYPE_punct | (__CTYPE_alpha_lower << 4),
  321. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  322. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  323. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  324. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  325. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  326. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  327. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  328. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  329. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  330. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  331. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  332. __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4),
  333. __CTYPE_alpha_lower | (__CTYPE_punct << 4),
  334. __CTYPE_punct | (__CTYPE_punct << 4),
  335. __CTYPE_punct | (__CTYPE_cntrl_nonspace << 4),
  336. };
  337. static const char ctype_range[] = {
  338. __CTYPE_RANGES
  339. };
  340. int iswctype(wint_t wc, wctype_t desc)
  341. {
  342. unsigned char d = __CTYPE_unclassified;
  343. if (((__uwchar_t) wc) <= 0x7f) {
  344. if (desc < _CTYPE_iswxdigit) {
  345. d = WCctype[wc >> 1];
  346. d = (wc & 1) ? (d >> 4) : (d & 0xf);
  347. return ( ((unsigned char)(d - ctype_range[2*desc]))
  348. <= ctype_range[2*desc + 1] )
  349. && ((desc != _CTYPE_iswblank) || (d & 1));
  350. }
  351. if (desc == _CTYPE_iswxdigit) {
  352. return __C_isxdigit(((char) wc));
  353. }
  354. }
  355. return 0;
  356. }
  357. #endif
  358. #endif
  359. /**********************************************************************/
  360. #ifdef L_towctrans
  361. #ifdef __WCTYPE_WITH_LOCALE
  362. #ifdef SMALL_UPLOW
  363. wint_t towctrans(wint_t wc, wctrans_t desc)
  364. {
  365. unsigned int sc, n, i;
  366. __uwchar_t u = wc;
  367. /* TODO - clean up */
  368. if (ENCODING == __ctype_encoding_7_bit) {
  369. if ((((__uwchar_t) wc) > 0x7f)
  370. || (((unsigned int)(desc - _CTYPE_tolower))
  371. > (_CTYPE_toupper - _CTYPE_tolower))
  372. ){
  373. /* We're in the C/POSIX locale, so ignore non-ASCII values
  374. * as well an any mappings other than toupper or tolower. */
  375. return wc;
  376. }
  377. }
  378. if (((unsigned int)(desc - _CTYPE_tolower))
  379. <= (_CTYPE_totitle - _CTYPE_tolower)
  380. ) {
  381. if (u <= WC_TABLE_DOMAIN_MAX) {
  382. sc = u & ((1 << WCuplow_TI_SHIFT) - 1);
  383. u >>= WCuplow_TI_SHIFT;
  384. n = u & ((1 << WCuplow_II_SHIFT) - 1);
  385. u >>= WCuplow_II_SHIFT;
  386. i = ((unsigned int) WCuplow[u]) << WCuplow_II_SHIFT;
  387. i = ((unsigned int) WCuplow[WCuplow_II_LEN + i + n])
  388. << WCuplow_TI_SHIFT;
  389. i = ((unsigned int) WCuplow[WCuplow_II_LEN + WCuplow_TI_LEN
  390. + i + sc]) << 1;
  391. if (desc == _CTYPE_tolower) {
  392. ++i;
  393. }
  394. wc += WCuplow_diff[i];
  395. if (desc == _CTYPE_totitle) {
  396. /* WARNING! These special cases work for glibc 2.2.4. Changes
  397. * may be needed if the glibc locale tables are updated. */
  398. if ( (((__uwchar_t)(wc - 0x1c4)) <= (0x1cc - 0x1c4))
  399. || (wc == 0x1f1)
  400. ) {
  401. ++wc;
  402. }
  403. }
  404. }
  405. } else {
  406. /* TODO - Deal with other transliterations. */
  407. __set_errno(EINVAL);
  408. }
  409. return wc;
  410. }
  411. #else
  412. wint_t towctrans(wint_t wc, wctrans_t desc)
  413. {
  414. if (ENCODING == __ctype_encoding_7_bit) {
  415. if ((((__uwchar_t) wc) > 0x7f)
  416. || (((unsigned int)(desc - _CTYPE_tolower))
  417. > (_CTYPE_toupper - _CTYPE_tolower))
  418. ){
  419. /* We're in the C/POSIX locale, so ignore non-ASCII values
  420. * as well an any mappings other than toupper or tolower. */
  421. return wc;
  422. }
  423. }
  424. if (desc == _CTYPE_tolower) {
  425. return towlower(wc);
  426. } else if (((unsigned int)(desc - _CTYPE_toupper))
  427. <= (_CTYPE_totitle - _CTYPE_toupper)
  428. ) {
  429. wc = towupper(wc);
  430. if (desc == _CTYPE_totitle) {
  431. /* WARNING! These special cases work for glibc 2.2.4. Changes
  432. * may be needed if the glibc locale tables are updated. */
  433. if ( (((__uwchar_t)(wc - 0x1c4)) <= (0x1cc - 0x1c4))
  434. || (wc == 0x1f1)
  435. ) {
  436. ++wc;
  437. }
  438. }
  439. } else {
  440. /* TODO - Deal with other transliterations. */
  441. __set_errno(EINVAL);
  442. }
  443. return wc;
  444. }
  445. #endif
  446. #else /* __WCTYPE_WITH_LOCALE */
  447. /* Minimal support for C/POSIX locale. */
  448. wint_t towctrans(wint_t wc, wctrans_t desc)
  449. {
  450. if (((unsigned int)(desc - _CTYPE_tolower))
  451. <= (_CTYPE_toupper - _CTYPE_tolower)
  452. ) {
  453. /* Transliteration is either tolower or toupper. */
  454. if (((__uwchar_t) wc) <= 0x7f) {
  455. return (desc == _CTYPE_tolower) ? _tolower(wc) : _toupper(wc);
  456. }
  457. } else {
  458. __set_errno(EINVAL); /* Invalid transliteration. */
  459. }
  460. return wc;
  461. }
  462. #endif /* __WCTYPE_WITH_LOCALE */
  463. #endif
  464. /**********************************************************************/
  465. #ifdef L_wctrans
  466. static const char transstring[] = __CTYPE_TRANSTRING;
  467. wctrans_t wctrans(const char *property)
  468. {
  469. const unsigned char *p;
  470. int i;
  471. p = transstring;
  472. i = 1;
  473. do {
  474. if (!strcmp(property, ++p)) {
  475. return i;
  476. }
  477. ++i;
  478. p += p[-1];
  479. } while (*p);
  480. /* TODO - Add locale-specific translations. */
  481. return 0;
  482. }
  483. #endif
  484. /**********************************************************************/