wstdio.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /*
  2. * ANSI/ISO C99 says
  3. 9 Although both text and binary wide­oriented streams are conceptually sequences of wide
  4. characters, the external file associated with a wide­oriented stream is a sequence of
  5. multibyte characters, generalized as follows:
  6. --- Multibyte encodings within files may contain embedded null bytes (unlike multibyte
  7. encodings valid for use internal to the program).
  8. --- A file need not begin nor end in the initial shift state. 225)
  9. * How do we deal with this?
  10. * Should auto_wr_transition init the mbstate object?
  11. */
  12. #define _GNU_SOURCE
  13. #include <stdio.h>
  14. #include <wchar.h>
  15. #include <limits.h>
  16. #include <errno.h>
  17. #include <assert.h>
  18. #ifndef __STDIO_THREADSAFE
  19. #ifdef __BCC__
  20. #define UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,STREAM) \
  21. asm(".text\nexport _" "NAME" "_unlocked\n_" "NAME" "_unlocked = _" "NAME"); \
  22. RETURNTYPE NAME PARAMS
  23. #else
  24. #define UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,STREAM) \
  25. strong_alias(NAME,NAME##_unlocked) \
  26. RETURNTYPE NAME PARAMS
  27. #endif
  28. #define UNLOCKED(RETURNTYPE,NAME,PARAMS,ARGS) \
  29. UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,stream)
  30. #ifdef __BCC__
  31. #define UNLOCKED_VOID_RETURN(NAME,PARAMS,ARGS) \
  32. asm(".text\nexport _" "NAME" "_unlocked\n_" "NAME" "_unlocked = _" "NAME"); \
  33. void NAME PARAMS
  34. #else
  35. #define UNLOCKED_VOID_RETURN(NAME,PARAMS,ARGS) \
  36. strong_alias(NAME,NAME##_unlocked) \
  37. void NAME PARAMS
  38. #endif
  39. #define __STDIO_THREADLOCK_OPENLIST
  40. #define __STDIO_THREADUNLOCK_OPENLIST
  41. #else /* __STDIO_THREADSAFE */
  42. #include <pthread.h>
  43. #define UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,STREAM) \
  44. RETURNTYPE NAME PARAMS \
  45. { \
  46. RETURNTYPE retval; \
  47. __STDIO_THREADLOCK(STREAM); \
  48. retval = NAME##_unlocked ARGS ; \
  49. __STDIO_THREADUNLOCK(STREAM); \
  50. return retval; \
  51. } \
  52. RETURNTYPE NAME##_unlocked PARAMS
  53. #define UNLOCKED(RETURNTYPE,NAME,PARAMS,ARGS) \
  54. UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,stream)
  55. #define UNLOCKED_VOID_RETURN(NAME,PARAMS,ARGS) \
  56. void NAME PARAMS \
  57. { \
  58. __STDIO_THREADLOCK(stream); \
  59. NAME##_unlocked ARGS ; \
  60. __STDIO_THREADUNLOCK(stream); \
  61. } \
  62. void NAME##_unlocked PARAMS
  63. #define __STDIO_THREADLOCK_OPENLIST \
  64. pthread_mutex_lock(&_stdio_openlist_lock)
  65. #define __STDIO_THREADUNLOCK_OPENLIST \
  66. pthread_mutex_unlock(&_stdio_openlist_lock)
  67. #define __STDIO_THREADTRYLOCK_OPENLIST \
  68. pthread_mutex_trylock(&_stdio_openlist_lock)
  69. #endif /* __STDIO_THREADSAFE */
  70. #ifndef __STDIO_BUFFERS
  71. #error stdio buffers are currently required for wide i/o
  72. #endif
  73. /**********************************************************************/
  74. #ifdef L_fwide
  75. /* TODO: According to SUSv3 should return EBADF if invalid stream. */
  76. int fwide(register FILE *stream, int mode)
  77. {
  78. __STDIO_THREADLOCK(stream);
  79. if (mode && !(stream->modeflags & (__FLAG_WIDE|__FLAG_NARROW))) {
  80. stream->modeflags |= ((mode > 0) ? __FLAG_WIDE : __FLAG_NARROW);
  81. }
  82. mode = (stream->modeflags & __FLAG_WIDE)
  83. - (stream->modeflags & __FLAG_NARROW);
  84. __STDIO_THREADUNLOCK(stream);
  85. return mode;
  86. }
  87. #endif
  88. /**********************************************************************/
  89. #ifdef L_fgetwc
  90. static void munge_stream(register FILE *stream, unsigned char *buf)
  91. {
  92. #ifdef __STDIO_GETC_MACRO
  93. stream->bufgetc =
  94. #endif
  95. #ifdef __STDIO_PUTC_MACRO
  96. stream->bufputc =
  97. #endif
  98. stream->bufpos = stream->bufread = stream->bufend = stream->bufstart = buf;
  99. }
  100. UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream))
  101. {
  102. wint_t wi;
  103. wchar_t wc[1];
  104. int n;
  105. size_t r;
  106. unsigned char c[1];
  107. unsigned char sbuf[1];
  108. unsigned char ungot_width; /* Support ftell after wscanf ungetwc. */
  109. wi = WEOF; /* Prepare for failure. */
  110. if (stream->modeflags & __FLAG_NARROW) {
  111. stream->modeflags |= __FLAG_ERROR;
  112. __set_errno(EBADF);
  113. goto DONE;
  114. }
  115. stream->modeflags |= __FLAG_WIDE;
  116. if (stream->modeflags & __MASK_UNGOT) {/* Any ungetwc()s? */
  117. assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR))
  118. == __FLAG_READING);
  119. wi = stream->ungot[(--stream->modeflags) & __MASK_UNGOT];
  120. stream->ungot[1] = 0;
  121. goto DONE;
  122. }
  123. if (!stream->bufstart) { /* Ugh... stream isn't buffered! */
  124. /* Munge the stream temporarily to use a 1-byte buffer. */
  125. munge_stream(stream, sbuf);
  126. ++stream->bufend;
  127. }
  128. ungot_width = 0;
  129. LOOP:
  130. if ((n = stream->bufread - stream->bufpos) == 0) {
  131. goto FILL_BUFFER;
  132. }
  133. r = mbrtowc(wc, stream->bufpos, n, &stream->state);
  134. if (((ssize_t) r) >= 0) { /* Single byte... */
  135. if (r == 0) { /* Nul wide char... means 0 byte for us so */
  136. ++r; /* increment r and handle below as single. */
  137. }
  138. stream->bufpos += r;
  139. stream->ungot_width[0] = ungot_width + r;
  140. wi = *wc;
  141. goto DONE;
  142. }
  143. if (r == ((size_t) -2)) {
  144. /* Potentially valid but incomplete and no more buffered. */
  145. stream->bufpos += n; /* Update bufpos for stream. */
  146. ungot_width += n;
  147. FILL_BUFFER:
  148. if (_stdio_fread(c, (size_t) 1, stream) > 0) {
  149. assert(stream->bufpos == stream->bufstart + 1);
  150. *--stream->bufpos = *c; /* Insert byte into buffer. */
  151. goto LOOP;
  152. }
  153. if (!__FERROR(stream)) { /* EOF with no error. */
  154. if (!stream->state.mask) { /* No partially complete wchar. */
  155. goto DONE;
  156. }
  157. /* EOF but partially complete wchar. */
  158. /* TODO: should EILSEQ be set? */
  159. __set_errno(EILSEQ);
  160. }
  161. }
  162. /* If we reach here, either r == ((size_t)-1) and mbrtowc set errno
  163. * to EILSEQ, or r == ((size_t)-2) and stream is in an error state
  164. * or at EOF with a partially complete wchar. Make sure stream's
  165. * error indicator is set. */
  166. stream->modeflags |= __FLAG_ERROR;
  167. DONE:
  168. if (stream->bufstart == sbuf) { /* Need to un-munge the stream. */
  169. munge_stream(stream, NULL);
  170. }
  171. return wi;
  172. }
  173. strong_alias(fgetwc_unlocked,getwc_unlocked);
  174. strong_alias(fgetwc,getwc);
  175. #endif
  176. /**********************************************************************/
  177. #ifdef L_getwchar
  178. UNLOCKED_STREAM(wint_t,getwchar,(void),(),stdin)
  179. {
  180. register FILE *stream = stdin; /* This helps bcc optimize. */
  181. return fgetwc_unlocked(stream);
  182. }
  183. #endif
  184. /**********************************************************************/
  185. #ifdef L_fgetws
  186. UNLOCKED(wchar_t *,fgetws,(wchar_t *__restrict ws, int n,
  187. FILE *__restrict stream),(ws, n, stream))
  188. {
  189. register wchar_t *p = ws;
  190. wint_t wi;
  191. while ((n > 1)
  192. && ((wi = fgetwc_unlocked(stream)) != WEOF)
  193. && ((*p++ = wi) != '\n')
  194. ) {
  195. --n;
  196. }
  197. if (p == ws) {
  198. /* TODO -- should we set errno? */
  199. /* if (n <= 0) { */
  200. /* errno = EINVAL; */
  201. /* } */
  202. return NULL;
  203. }
  204. *p = 0;
  205. return ws;
  206. }
  207. #endif
  208. /**********************************************************************/
  209. #ifdef L_fputwc
  210. UNLOCKED(wint_t,fputwc,(wchar_t wc, FILE *stream),(wc, stream))
  211. {
  212. #if 0
  213. size_t r;
  214. char buf[MB_LEN_MAX];
  215. if (stream->modeflags & __FLAG_NARROW) {
  216. stream->modeflags |= __FLAG_ERROR;
  217. __set_errno(EBADF);
  218. return WEOF;
  219. }
  220. stream->modeflags |= __FLAG_WIDE;
  221. /* TODO:
  222. * If stream is in reading state with bad mbstate object, what to do?
  223. * Should we check the state first? Should we check error indicator?
  224. * Should we check reading or even read-only?
  225. */
  226. /* It looks like the only ANSI/ISO C99 - blessed way of manipulating
  227. * the stream's mbstate object is through fgetpos/fsetpos. */
  228. r = wcrtomb(buf, wc, &stream->state);
  229. return (r != ((size_t) -1) && (r == _stdio_fwrite(buf, r, stream)))
  230. ? wc : WEOF;
  231. #elif 0
  232. /* this is broken if wc == 0 !!! */
  233. wchar_t wbuf[2];
  234. wbuf[0] = wc;
  235. wbuf[1] = 0;
  236. return (fputws_unlocked(wbuf, stream) > 0) ? wc : WEOF;
  237. #else
  238. size_t n;
  239. char buf[MB_LEN_MAX];
  240. if (stream->modeflags & __FLAG_NARROW) {
  241. stream->modeflags |= __FLAG_ERROR;
  242. __set_errno(EBADF);
  243. return WEOF;
  244. }
  245. stream->modeflags |= __FLAG_WIDE;
  246. return (((n = wcrtomb(buf, wc, &stream->state)) != ((size_t)-1)) /* EILSEQ */
  247. && (_stdio_fwrite(buf, n, stream) != n))/* Didn't write everything. */
  248. ? wc : WEOF;
  249. #endif
  250. }
  251. strong_alias(fputwc_unlocked,putwc_unlocked);
  252. strong_alias(fputwc,putwc);
  253. #endif
  254. /**********************************************************************/
  255. #ifdef L_putwchar
  256. UNLOCKED_STREAM(wint_t,putwchar,(wchar_t wc),(wc),stdout)
  257. {
  258. register FILE *stream = stdout; /* This helps bcc optimize. */
  259. return fputwc_unlocked(wc, stream);
  260. }
  261. #endif
  262. /**********************************************************************/
  263. #ifdef L_fputws
  264. UNLOCKED(int,fputws,(const wchar_t *__restrict ws,
  265. register FILE *__restrict stream),(ws, stream))
  266. {
  267. #if 1
  268. size_t n;
  269. char buf[64];
  270. if (stream->modeflags & __FLAG_NARROW) {
  271. stream->modeflags |= __FLAG_ERROR;
  272. __set_errno(EBADF);
  273. return -1;
  274. }
  275. stream->modeflags |= __FLAG_WIDE;
  276. while ((n = wcsrtombs(buf, &ws, sizeof(buf), &stream->state)) != 0) {
  277. /* Wasn't an empty wide string. */
  278. if ((n == ((size_t) -1))/* Encoding error! */
  279. || (_stdio_fwrite(buf, n, stream) != n)/* Didn't write everything. */
  280. ) {
  281. return -1;
  282. }
  283. if (!ws) { /* Done? */
  284. break;
  285. }
  286. }
  287. return 1;
  288. #elif 1
  289. int result;
  290. size_t n;
  291. size_t len;
  292. register char *s;
  293. unsigned char *bufend;
  294. char sbuf[MB_LEN_MAX];
  295. if (stream->modeflags & __FLAG_NARROW) {
  296. RETURN_BADF:
  297. stream->modeflags |= __FLAG_ERROR;
  298. __set_errno(EBADF);
  299. return -1;
  300. }
  301. stream->modeflags |= __FLAG_WIDE;
  302. /* Note: What follows is setup grabbed from _stdio_fwrite and modified
  303. * slightly. Since this is a wide stream, we can ignore bufgetc and
  304. * bufputc if present. They always == bufstart.
  305. * It is unfortunate that we need to duplicate so much code here, but
  306. * we need to do the stream setup before starting the wc->mb conversion. */
  307. if ((stream->modeflags & __FLAG_READONLY)
  308. #ifndef __STDIO_AUTO_RW_TRANSITION
  309. /* ANSI/ISO requires either at EOF or currently not reading. */
  310. || ((stream->modeflags & (__FLAG_READING|__FLAG_EOF))
  311. == __FLAG_READING)
  312. #endif /* __STDIO_AUTO_RW_TRANSITION */
  313. ) {
  314. /* TODO: This is for posix behavior if readonly. To save space, we
  315. * use this errno for write attempt while reading, as no errno is
  316. * specified by posix for this case, even though the restriction is
  317. * mentioned in fopen(). */
  318. goto RETURN_BADF;
  319. }
  320. #ifdef __STDIO_AUTO_RW_TRANSITION
  321. /* If reading, deal with ungots and read-buffered chars. */
  322. if (stream->modeflags & __FLAG_READING) {
  323. if (((stream->bufpos < stream->bufread)
  324. || (stream->modeflags & __MASK_UNGOT))
  325. /* If appending, we might as well seek to end to save a seek. */
  326. /* TODO: set EOF in fseek when appropriate? */
  327. && fseek(stream, 0L,
  328. ((stream->modeflags & __FLAG_APPEND)
  329. ? SEEK_END : SEEK_CUR))
  330. ) {
  331. /* Note: This differs from glibc's apparent behavior of
  332. not setting the error flag and discarding the buffered
  333. read data. */
  334. stream->modeflags |= __FLAG_ERROR; /* fseek may not set this. */
  335. return -1; /* Fail if we need to fseek but can't. */
  336. }
  337. /* Always reset even if fseek called (saves a test). */
  338. stream->bufpos = stream->bufread = stream->bufstart;
  339. stream->modeflags &= ~__FLAG_READING;
  340. }
  341. #endif
  342. /* Ok, the boilerplate from _stdio_fwrite is done. */
  343. if (stream->bufpos > stream->bufstart) { /* Pending writes.. */
  344. /* This is a performance penalty, but it simplifies the code below.
  345. * If this is removed, the buffer sharing and while loop condition
  346. * need to be modified below (at least). We at least save a little
  347. * on the overhead by calling _stdio_fwrite directly instead of
  348. * fflush_unlocked. */
  349. if (_stdio_fwrite(NULL, 0, stream) > 0) {/* fflush incomplete! */
  350. return -1;
  351. }
  352. }
  353. stream->modeflags |= __FLAG_WRITING; /* Ensure Writing flag is set. */
  354. /* Next, we "steal" the stream's buffer and do the wc->mb conversion
  355. * straight into it. This will cause the equivalent of an fflush
  356. * for each string write. :-( */
  357. bufend = NULL;
  358. s = stream->bufstart;
  359. if ((len = stream->bufend - stream->bufstart) < MB_LEN_MAX) {
  360. /* Stream is unbuffered or buffer is too small, so deactivate. */
  361. bufend = stream->bufend;
  362. stream->bufend = stream->bufstart;
  363. s = sbuf;
  364. len = MB_LEN_MAX;
  365. }
  366. result = 1; /* Assume success. */
  367. while (ws && (n = wcsrtombs(s, &ws, len, &stream->state)) != 0) {
  368. if ((n == ((size_t) -1)) /* Encoding error! */
  369. /* TODO - maybe call write directly? but what about custom streams? */
  370. || (_stdio_fwrite(s, n, stream) != n)/* Didn't write everything. */
  371. ) {
  372. result = -1;
  373. break;
  374. }
  375. }
  376. if (bufend) { /* If deactivated stream buffer, renable it. */
  377. stream->bufend = bufend;
  378. }
  379. return result;
  380. #else /* slow, dumb version */
  381. while (*ws) {
  382. if (fputwc_unlocked(*ws, stream) == WEOF) {
  383. return -1;
  384. }
  385. ++ws;
  386. }
  387. return 1;
  388. #endif
  389. }
  390. #endif
  391. /**********************************************************************/
  392. #ifdef L_ungetwc
  393. /*
  394. * Note: This is the application-callable ungetwc. If wscanf calls this, it
  395. * should also set stream->ungot[1] to 0 if this is the only ungot.
  396. */
  397. /* Reentrant. */
  398. wint_t ungetwc(wint_t c, register FILE *stream)
  399. {
  400. __STDIO_THREADLOCK(stream);
  401. __stdio_validate_FILE(stream); /* debugging only */
  402. if (stream->modeflags & __FLAG_NARROW) {
  403. stream->modeflags |= __FLAG_ERROR;
  404. c = WEOF;
  405. goto DONE;
  406. }
  407. stream->modeflags |= __FLAG_WIDE;
  408. /* If can't read or there's been an error, or c == EOF, or ungot slots
  409. * already filled, then return EOF */
  410. if ((stream->modeflags
  411. & (__MASK_UNGOT2|__FLAG_WRITEONLY
  412. #ifndef __STDIO_AUTO_RW_TRANSITION
  413. |__FLAG_WRITING /* Note: technically no, but yes in spirit */
  414. #endif /* __STDIO_AUTO_RW_TRANSITION */
  415. ))
  416. || ((stream->modeflags & __MASK_UNGOT1) && (stream->ungot[1]))
  417. || (c == WEOF) ) {
  418. c = WEOF;
  419. goto DONE;;
  420. }
  421. /* ungot_width */
  422. #ifdef __STDIO_BUFFERS
  423. /* TODO: shouldn't allow writing??? */
  424. if (stream->modeflags & __FLAG_WRITING) {
  425. fflush_unlocked(stream); /* Commit any write-buffered chars. */
  426. }
  427. #endif /* __STDIO_BUFFERS */
  428. /* Clear EOF and WRITING flags, and set READING FLAG */
  429. stream->modeflags &= ~(__FLAG_EOF|__FLAG_WRITING);
  430. stream->modeflags |= __FLAG_READING;
  431. stream->ungot[1] = 1; /* Flag as app ungetc call; wscanf fixes up. */
  432. stream->ungot[(stream->modeflags++) & __MASK_UNGOT] = c;
  433. __stdio_validate_FILE(stream); /* debugging only */
  434. DONE:
  435. __STDIO_THREADUNLOCK(stream);
  436. return c;
  437. }
  438. #endif
  439. /**********************************************************************/