scanf.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698
  1. /*
  2. * Modified by Manuel Novoa III Mar 13, 2001
  3. *
  4. * The vfscanf routine was completely rewritten to add features and remove
  5. * bugs. The function __strtold, based on my strtod code in stdlib, was
  6. * added to provide floating point support for the scanf functions.
  7. *
  8. * So far they pass the test cases from glibc-2.1.3, except in two instances.
  9. * In one case, the test appears to be broken. The other case is something
  10. * I need to research further. This version of scanf assumes it can only
  11. * peek one character ahead. Apparently, glibc looks further. The difference
  12. * can be seen when parsing a floating point value in the character
  13. * sequence "100ergs". glibc is able to back up before the 'e' and return
  14. * a value of 100, whereas this scanf reports a bad match with the stream
  15. * pointer at 'r'. A similar situation can also happen when parsing hex
  16. * values prefixed by 0x or 0X; a failure would occur for "0xg". In order to
  17. * fix this, I need to rework the "ungetc" machinery in stdio.c again.
  18. * I do have one reference though, that seems to imply scanf has a single
  19. * character of lookahead.
  20. */
  21. #include <stdlib.h>
  22. #include <unistd.h>
  23. #include <stdio.h>
  24. #include <ctype.h>
  25. #include <string.h>
  26. #include <stdarg.h>
  27. #ifdef L_scanf
  28. #ifdef __STDC__
  29. int scanf(const char *fmt, ...)
  30. #else
  31. int scanf(fmt, va_alist)
  32. __const char *fmt;
  33. va_dcl
  34. #endif
  35. {
  36. va_list ptr;
  37. int rv;
  38. va_start(ptr, fmt);
  39. rv = vfscanf(stdin, fmt, ptr);
  40. va_end(ptr);
  41. return rv;
  42. }
  43. #endif
  44. #ifdef L_sscanf
  45. #ifdef __STDC__
  46. int sscanf(const char *sp, const char *fmt, ...)
  47. #else
  48. int sscanf(sp, fmt, va_alist)
  49. __const char *sp;
  50. __const char *fmt;
  51. va_dcl
  52. #endif
  53. {
  54. FILE string[1] = {
  55. {0, (unsigned char *) ((unsigned) -1), 0, 0, (char *) ((unsigned) -1),
  56. 0, -1, _IOFBF}
  57. };
  58. va_list ptr;
  59. int rv;
  60. string->bufpos = (unsigned char *) ((void *) sp);
  61. va_start(ptr, fmt);
  62. rv = vfscanf(string, fmt, ptr);
  63. va_end(ptr);
  64. return rv;
  65. }
  66. #endif
  67. #ifdef L_fscanf
  68. #ifdef __STDC__
  69. int fscanf(FILE * fp, const char *fmt, ...)
  70. #else
  71. int fscanf(fp, fmt, va_alist)
  72. FILE *fp;
  73. __const char *fmt;
  74. va_dcl
  75. #endif
  76. {
  77. va_list ptr;
  78. int rv;
  79. va_start(ptr, fmt);
  80. rv = vfscanf(fp, fmt, ptr);
  81. va_end(ptr);
  82. return rv;
  83. }
  84. #endif
  85. #ifdef L_vscanf
  86. int vscanf(fmt, ap)
  87. __const char *fmt;
  88. va_list ap;
  89. {
  90. return vfscanf(stdin, fmt, ap);
  91. }
  92. #endif
  93. #ifdef L_vsscanf
  94. int vsscanf(__const char *sp, __const char *fmt, va_list ap)
  95. {
  96. FILE string[1] = {
  97. {0, (unsigned char *) ((unsigned) -1), 0, 0, (char *) ((unsigned) -1),
  98. 0, -1, _IOFBF}
  99. };
  100. string->bufpos = (unsigned char *) sp;
  101. return vfscanf(string, fmt, ap);
  102. }
  103. #endif
  104. #ifdef L_vfscanf
  105. #include <assert.h>
  106. #include <ctype.h>
  107. #include <limits.h>
  108. static int valid_digit(char c, char base)
  109. {
  110. if (base == 16) {
  111. return isxdigit(c);
  112. } else {
  113. return (isdigit(c) && (c < '0' + base));
  114. }
  115. }
  116. extern unsigned long long
  117. _strto_ll(const char *str, char **endptr, int base, int uflag);
  118. extern unsigned long
  119. _strto_l(const char *str, char **endptr, int base, int uflag);
  120. struct scan_cookie {
  121. FILE *fp;
  122. int nread;
  123. int width;
  124. int width_flag;
  125. int ungot_char;
  126. int ungot_flag;
  127. };
  128. #ifdef __UCLIBC_HAS_LONG_LONG__
  129. static const char qual[] = "hl" /* "jtz" */ "Lq";
  130. /* char = -2, short = -1, int = 0, long = 1, long long = 2 */
  131. static const char qsz[] = { -1, 1, 2, 2 };
  132. #else
  133. static const char qual[] = "hl" /* "jtz" */;
  134. static const char qsz[] = { -1, 1, };
  135. #endif
  136. #ifdef __UCLIBC_HAS_FLOATS__
  137. static int __strtold(long double *ld, struct scan_cookie *sc);
  138. /*01234567890123456 */
  139. static const char spec[] = "%n[csoupxXidfeEgG";
  140. #else
  141. static const char spec[] = "%n[csoupxXid";
  142. #endif
  143. /* radix[i] <-> spec[i+5] o u p x X i d */
  144. static const char radix[] = { 8, 10, 16, 16, 16, 0, 10 };
  145. static void init_scan_cookie(struct scan_cookie *sc, FILE *fp)
  146. {
  147. sc->fp = fp;
  148. sc->nread = 0;
  149. sc->width_flag = 0;
  150. sc->ungot_flag = 0;
  151. if ((sc->ungot_char = getc(fp)) > 0) { /* not EOF or EOS */
  152. sc->ungot_flag = 1;
  153. }
  154. }
  155. static int scan_getc_nw(struct scan_cookie *sc)
  156. {
  157. if (sc->ungot_flag == 0) {
  158. sc->ungot_char = getc(sc->fp);
  159. } else {
  160. sc->ungot_flag = 0;
  161. }
  162. if (sc->ungot_char > 0) {
  163. ++sc->nread;
  164. }
  165. sc->width_flag = 0;
  166. return sc->ungot_char;
  167. }
  168. static int scan_getc(struct scan_cookie *sc)
  169. {
  170. if (sc->ungot_flag == 0) {
  171. sc->ungot_char = getc(sc->fp);
  172. }
  173. sc->width_flag = 1;
  174. if (--sc->width < 0) {
  175. sc->ungot_flag = 1;
  176. return 0;
  177. }
  178. sc->ungot_flag = 0;
  179. if (sc->ungot_char > 0) {
  180. ++sc->nread;
  181. }
  182. return sc->ungot_char;
  183. }
  184. static void scan_ungetc(struct scan_cookie *sc)
  185. {
  186. if (sc->ungot_flag != 0) {
  187. assert(sc->width < 0);
  188. return;
  189. }
  190. if (sc->width_flag) {
  191. ++sc->width;
  192. }
  193. sc->ungot_flag = 1;
  194. if (sc->ungot_char > 0) { /* not EOF or EOS */
  195. --sc->nread;
  196. }
  197. }
  198. static void kill_scan_cookie(struct scan_cookie *sc)
  199. {
  200. if (sc->ungot_flag) {
  201. ungetc(sc->ungot_char,sc->fp);
  202. }
  203. }
  204. int vfscanf(fp, format, ap)
  205. FILE *fp;
  206. const char *format;
  207. va_list ap;
  208. {
  209. #ifdef __UCLIBC_HAS_LONG_LONG__
  210. #define STRTO_L_(s,e,b,u) _strto_ll(s,e,b,u)
  211. #define MAX_DIGITS 64
  212. #define UV_TYPE unsigned long long
  213. #define V_TYPE long long
  214. #else
  215. #define STRTO_L_(s,e,b,u) _strto_l(s,e,b,u)
  216. #define MAX_DIGITS 32
  217. #define UV_TYPE unsigned long
  218. #define V_TYPE long
  219. #endif
  220. #ifdef __UCLIBC_HAS_FLOATS__
  221. long double ld;
  222. #endif
  223. UV_TYPE uv;
  224. struct scan_cookie sc;
  225. unsigned const char *fmt;
  226. const char *p;
  227. unsigned char *b;
  228. void *vp;
  229. int cc, i, cnt;
  230. signed char lval;
  231. unsigned char store, usflag, base, invert, r0, r1;
  232. unsigned char buf[MAX_DIGITS+2];
  233. unsigned char scanset[UCHAR_MAX + 1];
  234. init_scan_cookie(&sc,fp);
  235. fmt = (unsigned const char *) format;
  236. cnt = 0;
  237. while (*fmt) {
  238. store = 1;
  239. lval = 0;
  240. sc.width = INT_MAX;
  241. if (*fmt == '%') { /* Conversion specification. */
  242. ++fmt;
  243. if (*fmt == '*') { /* Suppress assignment. */
  244. store = 0;
  245. ++fmt;
  246. }
  247. for (i = 0 ; isdigit(*fmt) ; sc.width = i) {
  248. i = (i * 10) + (*fmt++ - '0'); /* Get specified width. */
  249. }
  250. for (i = 0 ; i < sizeof(qual) ; i++) { /* Optional qualifier. */
  251. if (qual[i] == *fmt) {
  252. ++fmt;
  253. lval += qsz[i];
  254. if ((i < 2) && (qual[i] == *fmt)) { /* Double h or l. */
  255. ++fmt;
  256. lval += qsz[i];
  257. }
  258. break;
  259. }
  260. }
  261. for (p = spec ; *p ; p++) { /* Process format specifier. */
  262. if (*fmt != *p) continue;
  263. if (p-spec < 1) { /* % - match a '%'*/
  264. goto matchchar;
  265. }
  266. if (p-spec < 2) { /* n - store number of chars read */
  267. *(va_arg(ap, int *)) = sc.nread;
  268. scan_getc_nw(&sc);
  269. goto nextfmt;
  270. }
  271. if (p-spec > 3) { /* skip white space if not c or [ */
  272. while (isspace(scan_getc_nw(&sc)))
  273. {}
  274. scan_ungetc(&sc);
  275. }
  276. if (p-spec < 5) { /* [,c,s - string conversions */
  277. invert = 0;
  278. if (*p == 'c') {
  279. invert = 1;
  280. if (sc.width == INT_MAX) {
  281. sc.width = 1;
  282. }
  283. }
  284. for (i=0 ; i<= UCHAR_MAX ; i++) {
  285. scanset[i] = ((*p == 's') ? (isspace(i) == 0) : 0);
  286. }
  287. if (*p == '[') { /* need to build a scanset */
  288. if (*++fmt == '^') {
  289. invert = 1;
  290. ++fmt;
  291. }
  292. if (*fmt == ']') {
  293. scanset[(int)']'] = 1;
  294. ++fmt;
  295. }
  296. r0 = 0;
  297. while (*fmt && *fmt !=']') { /* build scanset */
  298. if ((*fmt == '-') && r0 && (fmt[1] != ']')) {
  299. /* range */
  300. ++fmt;
  301. if (*fmt < r0) {
  302. r1 = r0;
  303. r0 = *fmt;
  304. } else {
  305. r1 = *fmt;
  306. }
  307. for (i=r0 ; i<= r1 ; i++) {
  308. scanset[i] = 1;
  309. }
  310. r0 = 0;
  311. } else {
  312. r0 = *fmt;
  313. scanset[r0] = 1;
  314. }
  315. ++fmt;
  316. }
  317. if (!*fmt) { /* format string exhausted! */
  318. goto done;
  319. }
  320. }
  321. /* ok -- back to common work */
  322. if (sc.width <= 0) {
  323. goto done;
  324. }
  325. if (store) {
  326. b = va_arg(ap, unsigned char *);
  327. } else {
  328. b = buf;
  329. }
  330. i = 0;
  331. cc = scan_getc(&sc);
  332. while ((cc>0) && (scanset[cc] != invert)) {
  333. i = store; /* yes, we stored something */
  334. *b = cc;
  335. b += store;
  336. cc = scan_getc(&sc);
  337. }
  338. if (*p != 'c') { /* nul-terminate the stored string */
  339. *b = 0;
  340. cnt += i;
  341. goto nextfmt;
  342. } else if (sc.width < 0) { /* case 'c' */
  343. cnt += store;
  344. goto nextfmt;
  345. }
  346. scan_ungetc(&sc);
  347. goto done;
  348. }
  349. if (p-spec < 12) { /* o,u,p,x,X,i,d - (un)signed integer */
  350. if (*p == 'p') {
  351. /* assume pointer same size as int or long. */
  352. lval = (sizeof(char *) == sizeof(long));
  353. }
  354. usflag = ((p-spec) < 10); /* (1)0 if (un)signed */
  355. base = radix[(int)(p-spec) - 5];
  356. b = buf;
  357. if (sc.width <= 0) {
  358. goto done;
  359. }
  360. cc = scan_getc(&sc);
  361. if ((cc == '+') || (cc == '-')) { /* Handle leading sign.*/
  362. *b++ = cc;
  363. cc = scan_getc(&sc);
  364. }
  365. if (cc == '0') { /* Possibly set base and handle prefix. */
  366. if ((base == 0) || (base == 16)) {
  367. cc = scan_getc(&sc);
  368. if ((cc == 'x') || (cc == 'X')) {
  369. /* We're committed to base 16 now. */
  370. base = 16;
  371. cc = scan_getc(&sc);
  372. } else { /* oops... back up */
  373. scan_ungetc(&sc);
  374. cc = '0';
  375. if (base == 0) {
  376. base = 8;
  377. }
  378. }
  379. }
  380. }
  381. /* At this point, we're ready to start reading digits. */
  382. if (cc == '0') {
  383. *b++ = cc; /* Store first leading 0 */
  384. do { /* but ignore others. */
  385. cc = scan_getc(&sc);
  386. } while (cc == '0');
  387. }
  388. while (valid_digit(cc,base)) { /* Now for nonzero digits.*/
  389. if (b - buf < MAX_DIGITS) {
  390. *b++ = cc;
  391. }
  392. cc = scan_getc(&sc);
  393. }
  394. *b = 0; /* null-terminate */
  395. if ((b == buf) || (*--b == '+') || (*b == '-')) {
  396. scan_ungetc(&sc);
  397. goto done; /* No digits! */
  398. }
  399. if (store) {
  400. if (*buf == '-') {
  401. usflag = 0;
  402. }
  403. uv = STRTO_L_(buf, NULL, base, usflag);
  404. vp = va_arg(ap, void *);
  405. switch (lval) {
  406. case 2: /* If no long long, treat as long . */
  407. #ifdef __UCLIBC_HAS_LONG_LONG__
  408. *((unsigned long long *)vp) = uv;
  409. break;
  410. #endif
  411. case 1:
  412. #if ULONG_MAX == UINT_MAX
  413. case 0: /* int and long int are the same */
  414. #endif
  415. #ifdef __UCLIBC_HAS_LONG_LONG__
  416. if (usflag) {
  417. if (uv > ULONG_MAX) {
  418. uv = ULONG_MAX;
  419. }
  420. } else if (((V_TYPE)uv) > LONG_MAX) {
  421. uv = LONG_MAX;
  422. } else if (((V_TYPE)uv) < LONG_MIN) {
  423. uv = (UV_TYPE) LONG_MIN;
  424. }
  425. #endif
  426. *((unsigned long *)vp) = (unsigned long)uv;
  427. break;
  428. #if ULONG_MAX != UINT_MAX
  429. case 0: /* int and long int are different */
  430. if (usflag) {
  431. if (uv > UINT_MAX) {
  432. uv = UINT_MAX;
  433. }
  434. } else if (((V_TYPE)uv) > INT_MAX) {
  435. uv = INT_MAX;
  436. } else if (((V_TYPE)uv) < INT_MIN) {
  437. uv = (UV_TYPE) INT_MIN;
  438. }
  439. *((unsigned int *)vp) = (unsigned int)uv;
  440. break;
  441. #endif
  442. case -1:
  443. if (usflag) {
  444. if (uv > USHRT_MAX) {
  445. uv = USHRT_MAX;
  446. }
  447. } else if (((V_TYPE)uv) > SHRT_MAX) {
  448. uv = SHRT_MAX;
  449. } else if (((V_TYPE)uv) < SHRT_MIN) {
  450. uv = (UV_TYPE) SHRT_MIN;
  451. }
  452. *((unsigned short *)vp) = (unsigned short)uv;
  453. break;
  454. case -2:
  455. if (usflag) {
  456. if (uv > UCHAR_MAX) {
  457. uv = UCHAR_MAX;
  458. }
  459. } else if (((V_TYPE)uv) > CHAR_MAX) {
  460. uv = CHAR_MAX;
  461. } else if (((V_TYPE)uv) < CHAR_MIN) {
  462. uv = (UV_TYPE) CHAR_MIN;
  463. }
  464. *((unsigned char *)vp) = (unsigned char) uv;
  465. break;
  466. default:
  467. assert(0);
  468. }
  469. ++cnt;
  470. }
  471. goto nextfmt;
  472. }
  473. #ifdef __UCLIBC_HAS_FLOATS__
  474. else { /* floating point */
  475. if (sc.width <= 0) {
  476. goto done;
  477. }
  478. if (__strtold(&ld, &sc)) { /* Success! */
  479. if (store) {
  480. vp = va_arg(ap, void *);
  481. switch (lval) {
  482. case 2:
  483. *((long double *)vp) = ld;
  484. break;
  485. case 1:
  486. *((double *)vp) = (double) ld;
  487. break;
  488. case 0:
  489. *((float *)vp) = (float) ld;
  490. break;
  491. default: /* Illegal qualifier! */
  492. assert(0);
  493. goto done;
  494. }
  495. ++cnt;
  496. }
  497. goto nextfmt;
  498. }
  499. }
  500. #else
  501. assert(0);
  502. #endif
  503. goto done;
  504. }
  505. /* Unrecognized specifier! */
  506. goto done;
  507. } if (isspace(*fmt)) { /* Consume all whitespace. */
  508. while (isspace(scan_getc_nw(&sc)))
  509. {}
  510. } else { /* Match the current fmt char. */
  511. matchchar:
  512. if (scan_getc_nw(&sc) != *fmt) {
  513. goto done;
  514. }
  515. scan_getc_nw(&sc);
  516. }
  517. nextfmt:
  518. scan_ungetc(&sc);
  519. ++fmt;
  520. }
  521. done: /* end of scan */
  522. kill_scan_cookie(&sc);
  523. if ((sc.ungot_char <= 0) && (cnt == 0) && (*fmt)) {
  524. return (EOF);
  525. }
  526. return (cnt);
  527. }
  528. /*****************************************************************************/
  529. #ifdef __UCLIBC_HAS_FLOATS__
  530. #include <float.h>
  531. #define MAX_SIG_DIGITS 20
  532. #define MAX_IGNORED_DIGITS 2000
  533. #define MAX_ALLOWED_EXP (MAX_SIG_DIGITS + MAX_IGNORED_DIGITS + LDBL_MAX_10_EXP)
  534. #if LDBL_DIG > MAX_SIG_DIGITS
  535. #error need to adjust MAX_SIG_DIGITS
  536. #endif
  537. #include <limits.h>
  538. #if MAX_ALLOWED_EXP > INT_MAX
  539. #error size assumption violated for MAX_ALLOWED_EXP
  540. #endif
  541. int __strtold(long double *ld, struct scan_cookie *sc)
  542. {
  543. long double number;
  544. long double p10;
  545. int exponent_power;
  546. int exponent_temp;
  547. int negative;
  548. int num_digits;
  549. int since_decimal;
  550. int c;
  551. c = scan_getc(sc); /* Decrements width. */
  552. negative = 0;
  553. switch(c) { /* Handle optional sign. */
  554. case '-': negative = 1; /* Fall through to get next char. */
  555. case '+': c = scan_getc(sc);
  556. }
  557. number = 0.;
  558. num_digits = -1;
  559. exponent_power = 0;
  560. since_decimal = INT_MIN;
  561. LOOP:
  562. while (isdigit(c)) { /* Process string of digits. */
  563. ++since_decimal;
  564. if (num_digits < 0) { /* First time through? */
  565. ++num_digits; /* We've now seen a digit. */
  566. }
  567. if (num_digits || (c != '0')) { /* had/have nonzero */
  568. ++num_digits;
  569. if (num_digits <= MAX_SIG_DIGITS) { /* Is digit significant? */
  570. number = number * 10. + (c - '0');
  571. }
  572. }
  573. c = scan_getc(sc);
  574. }
  575. if ((c == '.') && (since_decimal < 0)) { /* If no previous decimal pt, */
  576. since_decimal = 0; /* save position of decimal point */
  577. c = scan_getc(sc); /* and process rest of digits */
  578. goto LOOP;
  579. }
  580. if (num_digits<0) { /* Must have at least one digit. */
  581. goto FAIL;
  582. }
  583. if (num_digits > MAX_SIG_DIGITS) { /* Adjust exp for skipped digits. */
  584. exponent_power += num_digits - MAX_SIG_DIGITS;
  585. }
  586. if (since_decimal >= 0) { /* Adjust exponent for decimal point. */
  587. exponent_power -= since_decimal;
  588. }
  589. if (negative) { /* Correct for sign. */
  590. number = -number;
  591. negative = 0; /* Reset for exponent processing below. */
  592. }
  593. /* Process an exponent string. */
  594. if (c == 'e' || c == 'E') {
  595. c = scan_getc(sc);
  596. switch(c) { /* Handle optional sign. */
  597. case '-': negative = 1; /* Fall through to get next char. */
  598. case '+': c = scan_getc(sc);
  599. }
  600. num_digits = 0;
  601. exponent_temp = 0;
  602. while (isdigit(c)) { /* Process string of digits. */
  603. if (exponent_temp < MAX_ALLOWED_EXP) { /* overflow check */
  604. exponent_temp = exponent_temp * 10 + (c - '0');
  605. }
  606. c = scan_getc(sc);
  607. ++num_digits;
  608. }
  609. if (num_digits == 0) { /* Were there no exp digits? */
  610. goto FAIL;
  611. } /* else */
  612. if (negative) {
  613. exponent_power -= exponent_temp;
  614. } else {
  615. exponent_power += exponent_temp;
  616. }
  617. }
  618. if (number != 0.) {
  619. /* Now scale the result. */
  620. exponent_temp = exponent_power;
  621. p10 = 10.;
  622. if (exponent_temp < 0) {
  623. exponent_temp = -exponent_temp;
  624. }
  625. while (exponent_temp) {
  626. if (exponent_temp & 1) {
  627. if (exponent_power < 0) {
  628. number /= p10;
  629. } else {
  630. number *= p10;
  631. }
  632. }
  633. exponent_temp >>= 1;
  634. p10 *= p10;
  635. }
  636. }
  637. *ld = number;
  638. return 1;
  639. FAIL:
  640. scan_ungetc(sc);
  641. return 0;
  642. }
  643. #endif /* __UCLIBC_HAS_FLOATS__ */
  644. #endif