locale.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /* Copyright (C) 2002 Manuel Novoa III
  2. *
  3. * This library is free software; you can redistribute it and/or
  4. * modify it under the terms of the GNU Library General Public
  5. * License as published by the Free Software Foundation; either
  6. * version 2 of the License, or (at your option) any later version.
  7. *
  8. * This library is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * Library General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU Library General Public
  14. * License along with this library; if not, write to the Free
  15. * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. */
  17. /* TODO:
  18. * Implement the shared mmap code so non-mmu platforms can use this.
  19. * Implement nl_langinfo() for the stub locale support.
  20. * Add some basic collate functionality similar to what the previous
  21. * locale support had (8-bit codesets only).
  22. */
  23. #define _GNU_SOURCE
  24. #include <locale.h>
  25. #include <string.h>
  26. #include <stdlib.h>
  27. #include <stddef.h>
  28. #include <limits.h>
  29. #include <stdint.h>
  30. #include <assert.h>
  31. #ifdef __LOCALE_C_ONLY
  32. #ifdef __WCHAR_ENABLED
  33. #error wide char support requires full locale support
  34. #endif
  35. #else /* __LOCALE_C_ONLY */
  36. #define CUR_LOCALE_SPEC (__global_locale.cur_locale)
  37. #undef CODESET_LIST
  38. #define CODESET_LIST (__locale_mmap->codeset_list)
  39. /* TODO: Optional... See below. */
  40. #define __LOCALE_STRICTER_SETLOCALE
  41. #endif /* __LOCALE_C_ONLY */
  42. /**********************************************************************/
  43. #ifdef L_setlocale
  44. #ifdef __LOCALE_C_ONLY
  45. link_warning(setlocale,"the 'setlocale' function supports only C|POSIX locales");
  46. static const char C_string[] = "C";
  47. char *setlocale(int category, register const char *locale)
  48. {
  49. return ( (((unsigned int)(category)) <= LC_ALL)
  50. && ( (!locale) /* Request for locale category string. */
  51. || (!*locale) /* Implementation-defined default is C. */
  52. || ((*locale == 'C') && !locale[1])
  53. || (!strcmp(locale, "POSIX"))) )
  54. ? (char *) C_string /* Always in C/POSIX locale. */
  55. : NULL;
  56. }
  57. #else /* ---------------------------------------------- __LOCALE_C_ONLY */
  58. #if !defined(NUM_LOCALES) || (NUM_LOCALES <= 1)
  59. #error locales enabled, but not data other than for C locale!
  60. #endif
  61. static unsigned char setlocale_buf[LOCALE_STRING_SIZE];
  62. #define LOCALE_NAMES (__locale_mmap->locale_names5)
  63. #define LOCALES (__locale_mmap->locales)
  64. #define LOCALE_AT_MODIFIERS (__locale_mmap->locale_at_modifiers)
  65. #define CATEGORY_NAMES (__locale_mmap->lc_names)
  66. static const char posix[] = "POSIX";
  67. static int find_locale(int category, const char *p, unsigned char *new_locale)
  68. {
  69. int i;
  70. const unsigned char *s;
  71. uint16_t n;
  72. unsigned char lang_cult, codeset;
  73. #if defined(LOCALE_AT_MODIFIERS_LENGTH) && 1
  74. /* Support standard locale handling for @-modifiers. */
  75. char buf[18]; /* TODO: 7+{max codeset name length} */
  76. const char *q;
  77. if ((q = strchr(p,'@')) != NULL) {
  78. if ((((size_t)((q-p)-5)) > (sizeof(buf) - 5)) || (p[2] != '_')) {
  79. return 0;
  80. }
  81. /* locale name at least 5 chars long and 3rd char is '_' */
  82. s = LOCALE_AT_MODIFIERS;
  83. do {
  84. if (!strcmp(s+2, q+1)) {
  85. break;
  86. }
  87. s += 2 + *s; /* TODO - fix this throughout */
  88. } while (*s);
  89. if (!*s) {
  90. return 0;
  91. }
  92. memcpy(buf, p, q-p);
  93. buf[q-p] = 0;
  94. buf[2] = s[1];
  95. p = buf;
  96. }
  97. #endif
  98. lang_cult = codeset = 0; /* Assume C and default codeset. */
  99. if (((*p == 'C') && !p[1]) || !strcmp(p, posix)) {
  100. goto FIND_LOCALE;
  101. }
  102. if (p[5] == '.') { /* Codeset specified in locale name? */
  103. /* TODO: maybe CODESET_LIST + *s ??? */
  104. /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
  105. codeset = 2;
  106. if (strcmp("UTF-8",p+6) != 0) {/* TODO - fix! */
  107. s = CODESET_LIST;
  108. do {
  109. ++codeset; /* Increment codeset first. */
  110. if (!strcmp(CODESET_LIST+*s, p+6)) {
  111. goto FIND_LANG_CULT;
  112. }
  113. } while (*++s);
  114. return 0; /* No matching codeset! */
  115. }
  116. }
  117. FIND_LANG_CULT: /* Find language_culture number. */
  118. s = LOCALE_NAMES;
  119. do { /* TODO -- do a binary search? */
  120. /* TODO -- fix gen_mmap!*/
  121. ++lang_cult; /* Increment first since C/POSIX is 0. */
  122. if (!strncmp(s,p,5)) { /* Found a matching locale name; */
  123. goto FIND_LOCALE;
  124. }
  125. s += 5;
  126. } while (lang_cult < NUM_LOCALE_NAMES);
  127. return 0; /* No matching language_culture! */
  128. FIND_LOCALE: /* Find locale row matching name and codeset */
  129. s = LOCALES;
  130. n = 1;
  131. do { /* TODO -- do a binary search? */
  132. if ((lang_cult == *s) && ((codeset == s[1]) || (codeset == s[2]))) {
  133. i = ((category == LC_ALL) ? 0 : category);
  134. s = new_locale + 2*i;
  135. do {
  136. /* Encode current locale row number. */
  137. *((unsigned char *) ++s) = (n >> 8) | 0x80;
  138. *((unsigned char *) ++s) = n & 0xff;
  139. } while (++i < category);
  140. return i; /* Return non-zero */
  141. }
  142. s += WIDTH_LOCALES;
  143. ++n;
  144. } while (n <= NUM_LOCALES); /* We started at 1!!! */
  145. return 0; /* Unsupported locale. */
  146. }
  147. char *setlocale(int category, const char *locale)
  148. {
  149. const unsigned char *p;
  150. unsigned char *s;
  151. int i;
  152. unsigned lc_mask;
  153. unsigned char new_locale[LOCALE_STRING_SIZE];
  154. if (((unsigned int)(category)) > LC_ALL) {
  155. /* TODO - set errno? SUSv3 doesn't say too. */
  156. return NULL; /* Illegal/unsupported category. */
  157. }
  158. lc_mask = 1 << category;
  159. if (category == LC_ALL) {
  160. --lc_mask;
  161. }
  162. if (!locale) { /* Request for locale category string... */
  163. DONE:
  164. strcpy(setlocale_buf, CUR_LOCALE_SPEC);
  165. #ifdef __LOCALE_STRICTER_SETLOCALE
  166. /* The standard says you can only use the string returned to restore
  167. * the category (categories) requested. This could be optional.
  168. * See below as well. */
  169. s = setlocale_buf + 1;
  170. lc_mask |= (1 << LC_ALL);
  171. do {
  172. if (!(lc_mask & 1)) {
  173. /* Encode non-selected locale flag. */
  174. s[1] = *s = 0xff;
  175. }
  176. s += 2;
  177. } while ((lc_mask >>= 1) > 1);
  178. #endif /* __LOCALE_STRICTER_SETLOCALE */
  179. return (char *) setlocale_buf;
  180. }
  181. strcpy(new_locale, CUR_LOCALE_SPEC); /* Start with current. */
  182. if (!*locale) { /* locale == "", so check environment. */
  183. i = ((category == LC_ALL) ? 0 : category);
  184. do {
  185. /* Note: SUSv3 doesn't define a fallback mechanism here. So,
  186. * if LC_ALL is invalid, we do _not_ continue trying the other
  187. * environment vars. */
  188. if (!(p = getenv("LC_ALL"))) {
  189. if (!(p = getenv(CATEGORY_NAMES + CATEGORY_NAMES[i]))) {
  190. if (!(p = getenv("LANG"))) {
  191. p = posix;
  192. }
  193. }
  194. }
  195. /* The user set something... is it valid? */
  196. /* Note: Since we don't support user-supplied locales and
  197. * alternate paths, we don't need to worry about special
  198. * handling for suid/sgid apps. */
  199. if (!find_locale(i, p, new_locale)) {
  200. return NULL;
  201. }
  202. } while (++i < category);
  203. } else if (*locale == '#') { /* Previsouly returned value. */
  204. assert(strlen(locale) == LOCALE_STRING_SIZE - 1);
  205. i = ((category == LC_ALL) ? 0 : category);
  206. p = locale + 2*i;
  207. s = new_locale + 2*i;
  208. do {
  209. #ifdef __LOCALE_STRICTER_SETLOCALE
  210. /* Only set categories that were selected in the previous
  211. * return value. Could be optional. See above as well.
  212. * NOTE: This still isn't quite right for non-LC_ALL
  213. * as it only checks the category selected to set. */
  214. if ((*p == 0xff) && (p[1] == 0xff)) {
  215. return NULL;
  216. }
  217. #endif /* __LOCALE_STRICTER_SETLOCALE */
  218. /* Note: Validate settings below. */
  219. *++s = *++p;
  220. *++s = *++p;
  221. } while (++i < category);
  222. } else if (!find_locale(category, locale, new_locale)) {
  223. return NULL;
  224. }
  225. /* TODO: Ok, everything checks out, so install the new locale. */
  226. _locale_set(new_locale);
  227. /* Everything ok, so make a copy in setlocale_buf and return. */
  228. goto DONE;
  229. }
  230. #endif /* __LOCALE_C_ONLY */
  231. #endif
  232. /**********************************************************************/
  233. #ifdef L_localeconv
  234. /* Note: We assume here that the compiler does the sane thing regarding
  235. * placement of the fields in the struct. If necessary, we could ensure
  236. * this usings an array of offsets but at some size cost. */
  237. #ifdef __LOCALE_C_ONLY
  238. #warning localeconv is hardwired for C/POSIX locale only
  239. link_warning(localeconv,"the 'localeconv' function is hardwired for C/POSIX locale only");
  240. static struct lconv the_lconv;
  241. static const char decpt[] = ".";
  242. struct lconv *localeconv(void)
  243. {
  244. register char *p = (char *)(&the_lconv);
  245. *((char **)p) = (char *) decpt;
  246. do {
  247. p += sizeof(char **);
  248. *((char **)p) = (char *) (decpt+1);
  249. } while (p < (char *) &the_lconv.negative_sign);
  250. p = (&the_lconv.int_frac_digits);
  251. do {
  252. *p = CHAR_MAX;
  253. ++p;
  254. } while (p <= &the_lconv.int_n_sign_posn);
  255. return &the_lconv;
  256. }
  257. #else /* __LOCALE_C_ONLY */
  258. static struct lconv the_lconv;
  259. struct lconv *localeconv(void)
  260. {
  261. register char *p = (char *) &the_lconv;
  262. register char **q = (char **) &__global_locale.decimal_point;
  263. do {
  264. *((char **)p) = *q;
  265. p += sizeof(char **);
  266. ++q;
  267. } while (p < &the_lconv.int_frac_digits);
  268. do {
  269. *p = **q;
  270. ++p;
  271. ++q;
  272. } while (p <= &the_lconv.int_n_sign_posn);
  273. return &the_lconv;
  274. }
  275. #endif /* __LOCALE_C_ONLY */
  276. #endif
  277. /**********************************************************************/
  278. #ifdef L__locale_init
  279. #ifndef __LOCALE_C_ONLY
  280. #define C_LOCALE_SELECTOR "\x23\x80\x01\x80\x01\x80\x01\x80\x01\x80\x01\x80\x01"
  281. #define LOCALE_INIT_FAILED "locale init failed!\n"
  282. #define CUR_LOCALE_SPEC (__global_locale.cur_locale)
  283. __locale_t __global_locale;
  284. void _locale_init(void)
  285. {
  286. /* TODO: mmap the locale file */
  287. /* TODO - ??? */
  288. memset(CUR_LOCALE_SPEC, 0, LOCALE_STRING_SIZE);
  289. CUR_LOCALE_SPEC[0] = '#';
  290. memcpy(__global_locale.category_item_count,
  291. __locale_mmap->lc_common_item_offsets_LEN,
  292. LC_ALL);
  293. __global_locale.category_offsets[0] = offsetof(__locale_t, codeset);
  294. __global_locale.category_offsets[1] = offsetof(__locale_t, decimal_point);
  295. __global_locale.category_offsets[2] = offsetof(__locale_t, int_curr_symbol);
  296. __global_locale.category_offsets[3] = offsetof(__locale_t, abday_1);
  297. /* __global_locale.category_offsets[4] = offsetof(__locale_t, collate???); */
  298. __global_locale.category_offsets[5] = offsetof(__locale_t, yesexpr);
  299. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  300. __global_locale.tbl8ctype
  301. = (const unsigned char *) &__locale_mmap->tbl8ctype;
  302. __global_locale.tbl8uplow
  303. = (const unsigned char *) &__locale_mmap->tbl8uplow;
  304. #ifdef __WCHAR_ENABLED
  305. __global_locale.tbl8c2wc
  306. = (const uint16_t *) &__locale_mmap->tbl8c2wc;
  307. __global_locale.tbl8wc2c
  308. = (const unsigned char *) &__locale_mmap->tbl8wc2c;
  309. /* translit */
  310. #endif /* __WCHAR_ENABLED */
  311. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  312. #ifdef __WCHAR_ENABLED
  313. __global_locale.tblwctype
  314. = (const unsigned char *) &__locale_mmap->tblwctype;
  315. __global_locale.tblwuplow
  316. = (const unsigned char *) &__locale_mmap->tblwuplow;
  317. __global_locale.tblwuplow_diff
  318. = (const uint16_t *) &__locale_mmap->tblwuplow_diff;
  319. __global_locale.tblwcomb
  320. = (const unsigned char *) &__locale_mmap->tblwcomb;
  321. /* width?? */
  322. #endif /* __WCHAR_ENABLED */
  323. _locale_set(C_LOCALE_SELECTOR);
  324. }
  325. static const char ascii[] = "ASCII";
  326. static const char utf8[] = "UTF-8";
  327. void _locale_set(const unsigned char *p)
  328. {
  329. const char **x;
  330. unsigned char *s = CUR_LOCALE_SPEC + 1;
  331. const size_t *stp;
  332. const unsigned char *r;
  333. const uint16_t *io;
  334. const uint16_t *ii;
  335. const unsigned char *d;
  336. int row; /* locale row */
  337. int crow; /* category row */
  338. int len;
  339. int c;
  340. int i = 0;
  341. ++p;
  342. do {
  343. if ((*p != *s) || (p[1] != s[1])) {
  344. row = (((int)(*p & 0x7f)) << 8) + p[1] - 1;
  345. #ifndef NDEBUG
  346. assert(row < NUM_LOCALES);
  347. #endif
  348. *s = *p;
  349. s[1] = p[1];
  350. if (i == LC_CTYPE) {
  351. c = __locale_mmap->locales[ WIDTH_LOCALES * row + 2 ]; /* codeset */
  352. if (c <= 2) {
  353. if (c == 2) {
  354. __global_locale.codeset = utf8;
  355. __global_locale.encoding = __ctype_encoding_utf8;
  356. /* TODO - fix for bcc */
  357. __global_locale.mb_cur_max = 6;
  358. } else {
  359. assert(c==1);
  360. __global_locale.codeset = ascii;
  361. __global_locale.encoding = __ctype_encoding_7_bit;
  362. __global_locale.mb_cur_max = 1;
  363. }
  364. } else {
  365. const codeset_8_bit_t *c8b;
  366. r = CODESET_LIST;
  367. __global_locale.codeset = r + r[c -= 3];
  368. __global_locale.encoding = __ctype_encoding_8_bit;
  369. #warning REMINDER: update 8 bit mb_cur_max when trasnlit implemented!
  370. /* TODO - update when translit implemented! */
  371. __global_locale.mb_cur_max = 1;
  372. c8b = __locale_mmap->codeset_8_bit + c;
  373. #ifdef __CTYPE_HAS_8_BIT_LOCALES
  374. __global_locale.idx8ctype = c8b->idx8ctype;
  375. __global_locale.idx8uplow = c8b->idx8uplow;
  376. #ifdef __WCHAR_ENABLED
  377. __global_locale.idx8c2wc = c8b->idx8c2wc;
  378. __global_locale.idx8wc2c = c8b->idx8wc2c;
  379. /* translit */
  380. #endif /* __WCHAR_ENABLED */
  381. #endif /* __CTYPE_HAS_8_BIT_LOCALES */
  382. }
  383. } else if ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) {
  384. crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]
  385. * len;
  386. x = (const char **)(((char *) &__global_locale)
  387. + __global_locale.category_offsets[i]);
  388. stp = __locale_mmap->lc_common_tbl_offsets + 4*i;
  389. r = (const unsigned char *)( ((char *)__locale_mmap) + *stp );
  390. io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
  391. ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
  392. d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp );
  393. for (c=0 ; c < len ; c++) {
  394. *(x + c) = d + ii[ r[crow + c] + io[c] ];
  395. }
  396. }
  397. }
  398. ++i;
  399. p += 2;
  400. s += 2;
  401. } while (i < LC_ALL);
  402. }
  403. #endif /* __LOCALE_C_ONLY */
  404. #endif
  405. /**********************************************************************/
  406. #ifdef L_nl_langinfo
  407. #ifndef __LOCALE_C_ONLY
  408. #include <langinfo.h>
  409. #include <nl_types.h>
  410. static const char empty[] = "";
  411. char *nl_langinfo(nl_item item)
  412. {
  413. unsigned int c = _NL_ITEM_CATEGORY(item);
  414. unsigned int i = _NL_ITEM_INDEX(item);
  415. if ((c < LC_ALL) && (i < __global_locale.category_item_count[c])) {
  416. return ((char **)(((char *) &__global_locale)
  417. + __global_locale.category_offsets[c]))[i];
  418. }
  419. return (char *) empty;
  420. }
  421. #endif /* __LOCALE_C_ONLY */
  422. #endif
  423. /**********************************************************************/