wctype.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
  18. *
  19. * Besides uClibc, I'm using this code in my libc for elks, which is
  20. * a 16-bit environment with a fairly limited compiler. It would make
  21. * things much easier for me if this file isn't modified unnecessarily.
  22. * In particular, please put any new or replacement functions somewhere
  23. * else, and modify the makefile to use your version instead.
  24. * Thanks. Manuel
  25. *
  26. * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
  27. #define _GNU_SOURCE
  28. #define __NO_CTYPE
  29. #include <wctype.h>
  30. #include <assert.h>
  31. #include <string.h>
  32. #include <errno.h>
  33. #include <locale.h>
  34. /* We know wide char support is enabled. We wouldn't be here otherwise. */
  35. /* Define this if you want to unify the towupper and towlower code in the
  36. * towctrans function. */
  37. /* #define SMALL_UPLOW */
  38. #define __WCTYPE_WITH_LOCALE
  39. /**********************************************************************/
  40. #ifndef __PASTE
  41. #define __PASTE(X,Y) X ## Y
  42. #endif
  43. #define C_MACRO(X) __PASTE(__C_,X)(wc)
  44. #define CT_MACRO(X) __PASTE(__ctype_,X)(wc)
  45. /**********************************************************************/
  46. /* TODO: fix this! */
  47. #ifdef __WCTYPE_WITH_LOCALE
  48. #define WCctype (__global_locale.tblwctype)
  49. #define WCuplow (__global_locale.tblwuplow)
  50. #define WCcmob (__global_locale.tblwcomb)
  51. #define WCuplow_diff (__global_locale.tblwuplow_diff)
  52. #define ENCODING (__global_locale.encoding)
  53. #define ISW_FUNC_BODY(NAME) \
  54. int NAME (wint_t wc) \
  55. { \
  56. return iswctype(wc, __PASTE(_CTYPE_,NAME)); \
  57. }
  58. #else /* __WCTYPE_WITH_LOCALE */
  59. #define ISW_FUNC_BODY(NAME) \
  60. int NAME (wint_t wc) \
  61. { \
  62. return C_MACRO(NAME); \
  63. }
  64. #endif /* __WCTYPE_WITH_LOCALE */
  65. /**********************************************************************/
  66. #ifdef L_iswalnum
  67. ISW_FUNC_BODY(iswalnum);
  68. #endif
  69. /**********************************************************************/
  70. #ifdef L_iswalpha
  71. ISW_FUNC_BODY(iswalpha);
  72. #endif
  73. /**********************************************************************/
  74. #ifdef L_iswblank
  75. ISW_FUNC_BODY(iswblank);
  76. #endif
  77. /**********************************************************************/
  78. #ifdef L_iswcntrl
  79. ISW_FUNC_BODY(iswcntrl);
  80. #endif
  81. /**********************************************************************/
  82. #ifdef L_iswdigit
  83. int iswdigit(wint_t wc)
  84. {
  85. return __C_iswdigit(wc);
  86. }
  87. #endif
  88. /**********************************************************************/
  89. #ifdef L_iswgraph
  90. ISW_FUNC_BODY(iswgraph);
  91. #endif
  92. /**********************************************************************/
  93. #ifdef L_iswlower
  94. ISW_FUNC_BODY(iswlower);
  95. #endif
  96. /**********************************************************************/
  97. #ifdef L_iswprint
  98. ISW_FUNC_BODY(iswprint);
  99. #endif
  100. /**********************************************************************/
  101. #ifdef L_iswpunct
  102. ISW_FUNC_BODY(iswpunct);
  103. #endif
  104. /**********************************************************************/
  105. #ifdef L_iswspace
  106. ISW_FUNC_BODY(iswspace);
  107. #endif
  108. /**********************************************************************/
  109. #ifdef L_iswupper
  110. ISW_FUNC_BODY(iswupper);
  111. #endif
  112. /**********************************************************************/
  113. #ifdef L_iswxdigit
  114. int iswxdigit(wint_t wc)
  115. {
  116. return __C_iswxdigit(wc);
  117. }
  118. #endif
  119. /**********************************************************************/
  120. #ifdef L_towlower
  121. #ifdef __WCTYPE_WITH_LOCALE
  122. #ifdef SMALL_UPLOW
  123. wint_t towlower(wint_t wc)
  124. {
  125. return towctrans(wc, _CTYPE_tolower);
  126. }
  127. #else
  128. wint_t towlower(wint_t wc)
  129. {
  130. unsigned int sc, n, i;
  131. __uwchar_t u = wc;
  132. if (ENCODING == __ctype_encoding_7_bit) {
  133. /* We're in the C/POSIX locale, so ignore the tables. */
  134. return __C_towlower(wc);
  135. }
  136. if (u <= WC_TABLE_DOMAIN_MAX) {
  137. sc = u & ((1 << WCuplow_TI_SHIFT) - 1);
  138. u >>= WCuplow_TI_SHIFT;
  139. n = u & ((1 << WCuplow_II_SHIFT) - 1);
  140. u >>= WCuplow_II_SHIFT;
  141. i = ((unsigned int) WCuplow[u]) << WCuplow_II_SHIFT;
  142. i = ((unsigned int) WCuplow[WCuplow_II_LEN + i + n])
  143. << WCuplow_TI_SHIFT;
  144. i = ((unsigned int) WCuplow[WCuplow_II_LEN + WCuplow_TI_LEN
  145. + i + sc]) << 1;
  146. wc += WCuplow_diff[i + 1];
  147. }
  148. return wc;
  149. }
  150. #endif
  151. #else /* __WCTYPE_WITH_LOCALE */
  152. wint_t towlower(wint_t wc)
  153. {
  154. return __C_towlower(wc);
  155. }
  156. #endif /* __WCTYPE_WITH_LOCALE */
  157. #endif
  158. /**********************************************************************/
  159. #ifdef L_towupper
  160. #ifdef __WCTYPE_WITH_LOCALE
  161. #ifdef SMALL_UPLOW
  162. wint_t towupper(wint_t wc)
  163. {
  164. return towctrans(wc, _CTYPE_toupper);
  165. }
  166. #else
  167. wint_t towupper(wint_t wc)
  168. {
  169. unsigned int sc, n, i;
  170. __uwchar_t u = wc;
  171. if (ENCODING == __ctype_encoding_7_bit) {
  172. /* We're in the C/POSIX locale, so ignore the tables. */
  173. return __C_towupper(wc);
  174. }
  175. if (u <= WC_TABLE_DOMAIN_MAX) {
  176. sc = u & ((1 << WCuplow_TI_SHIFT) - 1);
  177. u >>= WCuplow_TI_SHIFT;
  178. n = u & ((1 << WCuplow_II_SHIFT) - 1);
  179. u >>= WCuplow_II_SHIFT;
  180. i = ((unsigned int) WCuplow[u]) << WCuplow_II_SHIFT;
  181. i = ((unsigned int) WCuplow[WCuplow_II_LEN + i + n])
  182. << WCuplow_TI_SHIFT;
  183. i = ((unsigned int) WCuplow[WCuplow_II_LEN + WCuplow_TI_LEN
  184. + i + sc]) << 1;
  185. wc += WCuplow_diff[i];
  186. }
  187. return wc;
  188. }
  189. #endif
  190. #else /* __WCTYPE_WITH_LOCALE */
  191. wint_t towupper(wint_t wc)
  192. {
  193. return __C_towupper(wc);
  194. }
  195. #endif /* __WCTYPE_WITH_LOCALE */
  196. #endif
  197. /**********************************************************************/
  198. #ifdef L_wctype
  199. static const unsigned char typestring[] = __CTYPE_TYPESTRING;
  200. /* extern const unsigned char typestring[]; */
  201. wctype_t wctype(const char *property)
  202. {
  203. const unsigned char *p;
  204. int i;
  205. p = typestring;
  206. i = 1;
  207. do {
  208. if (!strcmp(property, ++p)) {
  209. return i;
  210. }
  211. ++i;
  212. p += p[-1];
  213. } while (*p);
  214. /* TODO - Add locale-specific classifications. */
  215. return 0;
  216. }
  217. #endif
  218. /**********************************************************************/
  219. #ifdef L_iswctype
  220. #warning TODO: need to fix locale ctype table lookup stuff
  221. #if 0
  222. extern const char ctype_range[];
  223. #else
  224. static const char ctype_range[] = {
  225. __CTYPE_RANGES
  226. };
  227. #endif
  228. #warning TODO: need to handle combining class!
  229. #define WCctype_TI_MASK ((1 << WCctype_TI_SHIFT) - 1)
  230. #define WCctype_II_MASK ((1 << WCctype_II_SHIFT) - 1)
  231. int iswctype(wint_t wc, wctype_t desc)
  232. {
  233. unsigned int sc, n, i0, i1;
  234. unsigned char d = __CTYPE_unclassified;
  235. if ((ENCODING != __ctype_encoding_7_bit) || (((__uwchar_t) wc) <= 0x7f)){
  236. if (desc < _CTYPE_iswxdigit) {
  237. if (((__uwchar_t) wc) <= WC_TABLE_DOMAIN_MAX) {
  238. /* From here on, we know wc > 0. */
  239. sc = wc & WCctype_TI_MASK;
  240. wc >>= WCctype_TI_SHIFT;
  241. n = wc & WCctype_II_MASK;
  242. wc >>= WCctype_II_SHIFT;
  243. i0 = WCctype[wc];
  244. i0 <<= WCctype_II_SHIFT;
  245. i1 = WCctype[WCctype_II_LEN + i0 + n];
  246. i1 <<= (WCctype_TI_SHIFT-1);
  247. d = WCctype[WCctype_II_LEN + WCctype_TI_LEN + i1 + (sc >> 1)];
  248. d = (sc & 1) ? (d >> 4) : (d & 0xf);
  249. } else if ( ((((__uwchar_t)(wc - 0xe0020UL)) <= 0x5f)
  250. || (wc == 0xe0001UL))
  251. || ( (((__uwchar_t)(wc - 0xf0000UL)) < 0x20000UL)
  252. && ((wc & 0xffffU) <= 0xfffdU))
  253. ) {
  254. d = __CTYPE_punct;
  255. }
  256. return ( ((unsigned char)(d - ctype_range[2*desc]))
  257. <= ctype_range[2*desc + 1] )
  258. && ((desc != _CTYPE_iswblank) || (d & 1));
  259. }
  260. /* TODO - Add locale-specific classifications. */
  261. return (desc == _CTYPE_iswxdigit) ? __C_iswxdigit(wc) : 0;
  262. }
  263. return 0;
  264. }
  265. #endif
  266. /**********************************************************************/
  267. #ifdef L_towctrans
  268. #ifdef __WCTYPE_WITH_LOCALE
  269. #ifdef SMALL_UPLOW
  270. wint_t towctrans(wint_t wc, wctrans_t desc)
  271. {
  272. unsigned int sc, n, i;
  273. __uwchar_t u = wc;
  274. /* TODO - clean up */
  275. if (ENCODING == __ctype_encoding_7_bit) {
  276. if ((((__uwchar_t) wc) > 0x7f)
  277. || (((unsigned int)(desc - _CTYPE_tolower))
  278. > (_CTYPE_toupper - _CTYPE_tolower))
  279. ){
  280. /* We're in the C/POSIX locale, so ignore non-ASCII values
  281. * as well an any mappings other than toupper or tolower. */
  282. return wc;
  283. }
  284. }
  285. if (((unsigned int)(desc - _CTYPE_tolower))
  286. <= (_CTYPE_totitle - _CTYPE_tolower)
  287. ) {
  288. if (u <= WC_TABLE_DOMAIN_MAX) {
  289. sc = u & ((1 << WCuplow_TI_SHIFT) - 1);
  290. u >>= WCuplow_TI_SHIFT;
  291. n = u & ((1 << WCuplow_II_SHIFT) - 1);
  292. u >>= WCuplow_II_SHIFT;
  293. i = ((unsigned int) WCuplow[u]) << WCuplow_II_SHIFT;
  294. i = ((unsigned int) WCuplow[WCuplow_II_LEN + i + n])
  295. << WCuplow_TI_SHIFT;
  296. i = ((unsigned int) WCuplow[WCuplow_II_LEN + WCuplow_TI_LEN
  297. + i + sc]) << 1;
  298. if (desc == _CTYPE_tolower) {
  299. ++i;
  300. }
  301. wc += WCuplow_diff[i];
  302. if (desc == _CTYPE_totitle) {
  303. /* WARNING! These special cases work for glibc 2.2.4. Changes
  304. * may be needed if the glibc locale tables are updated. */
  305. if ( (((__uwchar_t)(wc - 0x1c4)) <= (0x1cc - 0x1c4))
  306. || (wc == 0x1f1)
  307. ) {
  308. ++wc;
  309. }
  310. }
  311. }
  312. } else {
  313. /* TODO - Deal with other transliterations. */
  314. __set_errno(EINVAL);
  315. }
  316. return wc;
  317. }
  318. #else
  319. wint_t towctrans(wint_t wc, wctrans_t desc)
  320. {
  321. if (ENCODING == __ctype_encoding_7_bit) {
  322. if ((((__uwchar_t) wc) > 0x7f)
  323. || (((unsigned int)(desc - _CTYPE_tolower))
  324. > (_CTYPE_toupper - _CTYPE_tolower))
  325. ){
  326. /* We're in the C/POSIX locale, so ignore non-ASCII values
  327. * as well an any mappings other than toupper or tolower. */
  328. return wc;
  329. }
  330. }
  331. if (desc == _CTYPE_tolower) {
  332. return towlower(wc);
  333. } else if (((unsigned int)(desc - _CTYPE_toupper))
  334. <= (_CTYPE_totitle - _CTYPE_toupper)
  335. ) {
  336. wc = towupper(wc);
  337. if (desc == _CTYPE_totitle) {
  338. /* WARNING! These special cases work for glibc 2.2.4. Changes
  339. * may be needed if the glibc locale tables are updated. */
  340. if ( (((__uwchar_t)(wc - 0x1c4)) <= (0x1cc - 0x1c4))
  341. || (wc == 0x1f1)
  342. ) {
  343. ++wc;
  344. }
  345. }
  346. } else {
  347. /* TODO - Deal with other transliterations. */
  348. __set_errno(EINVAL);
  349. }
  350. return wc;
  351. }
  352. #endif
  353. #else
  354. #endif
  355. #endif
  356. /**********************************************************************/
  357. #ifdef L_wctrans
  358. static const char transstring[] = __CTYPE_TRANSTRING;
  359. wctrans_t wctrans(const char *property)
  360. {
  361. const unsigned char *p;
  362. int i;
  363. p = transstring;
  364. i = 1;
  365. do {
  366. if (!strcmp(property, ++p)) {
  367. return i;
  368. }
  369. ++i;
  370. p += p[-1];
  371. } while (*p);
  372. /* TODO - Add locale-specific translations. */
  373. return 0;
  374. }
  375. #endif
  376. /**********************************************************************/