scanf.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. /*
  2. * Modified by Manuel Novoa III Mar 13, 2001
  3. *
  4. * The vfscanf routine was completely rewritten to add features and remove
  5. * bugs. The function __strtold, based on my strtod code in stdlib, was
  6. * added to provide floating point support for the scanf functions.
  7. *
  8. * So far they pass the test cases from glibc-2.1.3, except in two instances.
  9. * In one case, the test appears to be broken. The other case is something
  10. * I need to research further. This version of scanf assumes it can only
  11. * peek one character ahead. Apparently, glibc looks further. The difference
  12. * can be seen when parsing a floating point value in the character
  13. * sequence "100ergs". glibc is able to back up before the 'e' and return
  14. * a value of 100, whereas this scanf reports a bad match with the stream
  15. * pointer at 'r'. A similar situation can also happen when parsing hex
  16. * values prefixed by 0x or 0X; a failure would occur for "0xg". In order to
  17. * fix this, I need to rework the "ungetc" machinery in stdio.c again.
  18. * I do have one reference though, that seems to imply scanf has a single
  19. * character of lookahead.
  20. */
  21. #include <stdlib.h>
  22. #include <unistd.h>
  23. #include <stdio.h>
  24. #include <ctype.h>
  25. #include <string.h>
  26. #include <stdarg.h>
  27. #ifdef L_scanf
  28. #ifdef __STDC__
  29. int scanf(const char *fmt, ...)
  30. #else
  31. int scanf(fmt, va_alist)
  32. __const char *fmt;
  33. va_dcl
  34. #endif
  35. {
  36. va_list ptr;
  37. int rv;
  38. va_start(ptr, fmt);
  39. rv = vfscanf(stdin, fmt, ptr);
  40. va_end(ptr);
  41. return rv;
  42. }
  43. #endif
  44. #ifdef L_sscanf
  45. #ifdef __STDC__
  46. int sscanf(const char *sp, const char *fmt, ...)
  47. #else
  48. int sscanf(sp, fmt, va_alist)
  49. __const char *sp;
  50. __const char *fmt;
  51. va_dcl
  52. #endif
  53. {
  54. FILE string[1] = {
  55. {0, (unsigned char *) ((unsigned) -1), 0, 0, (char *) ((unsigned) -1),
  56. 0, -1, _IOFBF}
  57. };
  58. va_list ptr;
  59. int rv;
  60. string->bufpos = (unsigned char *) ((void *) sp);
  61. va_start(ptr, fmt);
  62. rv = vfscanf(string, fmt, ptr);
  63. va_end(ptr);
  64. return rv;
  65. }
  66. #endif
  67. #ifdef L_fscanf
  68. #ifdef __STDC__
  69. int fscanf(FILE * fp, const char *fmt, ...)
  70. #else
  71. int fscanf(fp, fmt, va_alist)
  72. FILE *fp;
  73. __const char *fmt;
  74. va_dcl
  75. #endif
  76. {
  77. va_list ptr;
  78. int rv;
  79. va_start(ptr, fmt);
  80. rv = vfscanf(fp, fmt, ptr);
  81. va_end(ptr);
  82. return rv;
  83. }
  84. #endif
  85. #ifdef L_vscanf
  86. int vscanf(fmt, ap)
  87. __const char *fmt;
  88. va_list ap;
  89. {
  90. return vfscanf(stdin, fmt, ap);
  91. }
  92. #endif
  93. #ifdef L_vsscanf
  94. int vsscanf(__const char *sp, __const char *fmt, va_list ap)
  95. {
  96. FILE string[1] = {
  97. {0, (unsigned char *) ((unsigned) -1), 0, 0, (char *) ((unsigned) -1),
  98. 0, -1, _IOFBF}
  99. };
  100. string->bufpos = (unsigned char *) sp;
  101. return vfscanf(string, fmt, ap);
  102. }
  103. #endif
  104. #ifdef L_vfscanf
  105. #include <assert.h>
  106. #include <ctype.h>
  107. #include <limits.h>
  108. static int valid_digit(char c, char base)
  109. {
  110. if (base == 16) {
  111. return isxdigit(c);
  112. } else {
  113. return (isdigit(c) && (c < '0' + base));
  114. }
  115. }
  116. extern unsigned long long
  117. _strto_ll(const char *str, char **endptr, int base, int uflag);
  118. extern unsigned long
  119. _strto_l(const char *str, char **endptr, int base, int uflag);
  120. /* #define skip() do{c=getc(fp); if (c<1) goto done;}while(isspace(c))*/
  121. #if WANT_LONG_LONG || WANT_LONG_LONG_ERROR
  122. static const char qual[] = "hl" /* "jtz" */ "Lq";
  123. /* char = -2, short = -1, int = 0, long = 1, long long = 2 */
  124. static const char qsz[] = { -1, 1, 2, 2 };
  125. #else
  126. static const char qual[] = "hl" /* "jtz" */;
  127. static const char qsz[] = { -1, 1, };
  128. #endif
  129. #if WANT_DOUBLE || WANT_DOUBLE_ERROR
  130. /*01234567890123456 */
  131. static const char spec[] = "%n[csoupxXidfeEgG";
  132. #else
  133. static const char spec[] = "%n[csoupxXid";
  134. #endif
  135. /* radix[i] <-> spec[i+5] o u p x X i d */
  136. static const char radix[] = { 8, 10, 16, 16, 16, 0, 10 };
  137. struct scan_cookie {
  138. FILE *fp;
  139. int nread;
  140. int width;
  141. int ungot_char;
  142. int ungot_flag;
  143. };
  144. static int __strtold(long double *ld, struct scan_cookie *sc);
  145. static void init_scan_cookie(struct scan_cookie *sc, FILE *fp)
  146. {
  147. sc->fp = fp;
  148. sc->nread = 0;
  149. sc->ungot_flag = 0;
  150. if ((sc->ungot_char = getc(fp)) > 0) { /* not EOF or EOS */
  151. sc->ungot_flag = 1;
  152. }
  153. }
  154. static int scan_getc_nw(struct scan_cookie *sc)
  155. {
  156. if (sc->ungot_flag == 0) {
  157. sc->ungot_char = getc(sc->fp);
  158. } else {
  159. sc->ungot_flag = 0;
  160. }
  161. if (sc->ungot_char > 0) {
  162. ++sc->nread;
  163. }
  164. return sc->ungot_char;
  165. }
  166. static int scan_getc(struct scan_cookie *sc)
  167. {
  168. if (sc->ungot_flag == 0) {
  169. sc->ungot_char = getc(sc->fp);
  170. }
  171. if (--sc->width < 0) {
  172. sc->ungot_flag = 1;
  173. return 0;
  174. }
  175. sc->ungot_flag = 0;
  176. if (sc->ungot_char > 0) {
  177. ++sc->nread;
  178. }
  179. return sc->ungot_char;
  180. }
  181. static void scan_ungetc(struct scan_cookie *sc)
  182. {
  183. if (sc->ungot_flag != 0) {
  184. assert(sc->width < 0);
  185. return;
  186. }
  187. sc->ungot_flag = 1;
  188. if (sc->ungot_char > 0) { /* not EOF or EOS */
  189. --sc->nread;
  190. }
  191. }
  192. static void kill_scan_cookie(struct scan_cookie *sc)
  193. {
  194. if (sc->ungot_flag) {
  195. ungetc(sc->ungot_char,sc->fp);
  196. }
  197. }
  198. int vfscanf(fp, format, ap)
  199. FILE *fp;
  200. const char *format;
  201. va_list ap;
  202. {
  203. #if WANT_LONG_LONG
  204. #define STRTO_L_(s,e,b,u) _strto_ll(s,e,b,u)
  205. #define MAX_DIGITS 64
  206. #define UV_TYPE unsigned long long
  207. #define V_TYPE long long
  208. #else
  209. #define STRTO_L_(s,e,b,u) _strto_l(s,e,b,u)
  210. #define MAX_DIGITS 32
  211. #define UV_TYPE unsigned long
  212. #define V_TYPE long
  213. #endif
  214. #if WANT_DOUBLE
  215. long double ld;
  216. #endif
  217. UV_TYPE uv;
  218. struct scan_cookie sc;
  219. unsigned const char *fmt;
  220. const char *p;
  221. unsigned char *b;
  222. void *vp;
  223. int cc, i, cnt;
  224. signed char lval;
  225. unsigned char store, usflag, base, invert, r0, r1;
  226. unsigned char buf[MAX_DIGITS+2];
  227. unsigned char scanset[UCHAR_MAX + 1];
  228. init_scan_cookie(&sc,fp);
  229. fmt = (unsigned const char *) format;
  230. cnt = 0;
  231. while (*fmt) {
  232. store = 1;
  233. lval = 0;
  234. sc.width = INT_MAX;
  235. if (*fmt == '%') { /* Conversion specification. */
  236. ++fmt;
  237. if (*fmt == '*') { /* Suppress assignment. */
  238. store = 0;
  239. ++fmt;
  240. }
  241. for (i = 0 ; isdigit(*fmt) ; sc.width = i) {
  242. i = (i * 10) + (*fmt++ - '0'); /* Get specified width. */
  243. }
  244. for (i = 0 ; i < sizeof(qual) ; i++) { /* Optional qualifier. */
  245. if (qual[i] == *fmt) {
  246. ++fmt;
  247. lval += qsz[i];
  248. if ((i < 2) && (qual[i] == *fmt)) { /* Double h or l. */
  249. ++fmt;
  250. lval += qsz[i];
  251. }
  252. break;
  253. }
  254. }
  255. for (p = spec ; *p ; p++) { /* Process format specifier. */
  256. if (*fmt != *p) continue;
  257. if (p-spec < 1) { /* % - match a '%'*/
  258. goto matchchar;
  259. }
  260. if (p-spec < 2) { /* n - store number of chars read */
  261. *(va_arg(ap, int *)) = sc.nread;
  262. scan_getc_nw(&sc);
  263. goto nextfmt;
  264. }
  265. if (p-spec > 3) { /* skip white space if not c or [ */
  266. while (isspace(scan_getc_nw(&sc)))
  267. {}
  268. scan_ungetc(&sc);
  269. }
  270. if (p-spec < 5) { /* [,c,s - string conversions */
  271. invert = 0;
  272. if (*p == 'c') {
  273. invert = 1;
  274. if (sc.width == INT_MAX) {
  275. sc.width = 1;
  276. }
  277. }
  278. for (i=0 ; i<= UCHAR_MAX ; i++) {
  279. scanset[i] = ((*p == 's') ? (isspace(i) == 0) : 0);
  280. }
  281. if (*p == '[') { /* need to build a scanset */
  282. if (*++fmt == '^') {
  283. invert = 1;
  284. ++fmt;
  285. }
  286. if (*fmt == ']') {
  287. scanset[(int)']'] = 1;
  288. ++fmt;
  289. }
  290. r0 = 0;
  291. while (*fmt && *fmt !=']') { /* build scanset */
  292. if ((*fmt == '-') && r0 && (fmt[1] != ']')) {
  293. /* range */
  294. ++fmt;
  295. if (*fmt < r0) {
  296. r1 = r0;
  297. r0 = *fmt;
  298. } else {
  299. r1 = *fmt;
  300. }
  301. for (i=r0 ; i<= r1 ; i++) {
  302. scanset[i] = 1;
  303. }
  304. r0 = 0;
  305. } else {
  306. r0 = *fmt;
  307. scanset[r0] = 1;
  308. }
  309. ++fmt;
  310. }
  311. if (!*fmt) { /* format string exhausted! */
  312. goto done;
  313. }
  314. }
  315. /* ok -- back to common work */
  316. if (sc.width <= 0) {
  317. goto done;
  318. }
  319. if (store) {
  320. b = va_arg(ap, unsigned char *);
  321. } else {
  322. b = buf;
  323. }
  324. i = 0;
  325. cc = scan_getc(&sc);
  326. while ((cc>0) && (scanset[cc] != invert)) {
  327. i = store; /* yes, we stored something */
  328. *b = cc;
  329. b += store;
  330. cc = scan_getc(&sc);
  331. }
  332. if (*p != 'c') { /* nul-terminate the stored string */
  333. *b = 0;
  334. cnt += i;
  335. goto nextfmt;
  336. } else if (sc.width < 0) { /* case 'c' */
  337. cnt += store;
  338. goto nextfmt;
  339. }
  340. scan_ungetc(&sc);
  341. goto done;
  342. }
  343. if (p-spec < 12) { /* o,u,p,x,X,i,d - (un)signed integer */
  344. if (*p == 'p') {
  345. /* assume pointer same size as int or long. */
  346. lval = (sizeof(char *) == sizeof(long));
  347. }
  348. usflag = ((p-spec) < 10); /* (1)0 if (un)signed */
  349. base = radix[(int)(p-spec) - 5];
  350. b = buf;
  351. if (sc.width <= 0) {
  352. goto done;
  353. }
  354. cc = scan_getc(&sc);
  355. if ((cc == '+') || (cc == '-')) { /* Handle leading sign.*/
  356. *b++ = cc;
  357. cc = scan_getc(&sc);
  358. }
  359. if (cc == '0') { /* Possibly set base and handle prefix. */
  360. if ((base == 0) || (base == 16)) {
  361. cc = scan_getc(&sc);
  362. if ((cc == 'x') || (cc == 'X')) {
  363. /* We're committed to base 16 now. */
  364. base = 16;
  365. cc = scan_getc(&sc);
  366. } else { /* oops... back up */
  367. scan_ungetc(&sc);
  368. cc = '0';
  369. if (base == 0) {
  370. base = 8;
  371. }
  372. }
  373. }
  374. }
  375. /* At this point, we're ready to start reading digits. */
  376. if (cc == '0') {
  377. *b++ = cc; /* Store first leading 0 */
  378. do { /* but ignore others. */
  379. cc = scan_getc(&sc);
  380. } while (cc == '0');
  381. }
  382. while (valid_digit(cc,base)) { /* Now for nonzero digits.*/
  383. if (b - buf < MAX_DIGITS) {
  384. *b++ = cc;
  385. }
  386. cc = scan_getc(&sc);
  387. }
  388. *b = 0; /* null-terminate */
  389. if ((b == buf) || (*--b == '+') || (*b == '-')) {
  390. scan_ungetc(&sc);
  391. goto done; /* No digits! */
  392. }
  393. if (store) {
  394. if (*buf == '-') {
  395. usflag = 0;
  396. }
  397. uv = STRTO_L_(buf, NULL, base, usflag);
  398. vp = va_arg(ap, void *);
  399. switch (lval) {
  400. case 2: /* If no long long, treat as long . */
  401. #if WANT_LONG_LONG
  402. *((unsigned long long *)vp) = uv;
  403. break;
  404. #endif
  405. case 1:
  406. #if ULONG_MAX == UINT_MAX
  407. case 0: /* int and long int are the same */
  408. #endif
  409. #if WANT_LONG_LONG
  410. if (usflag) {
  411. if (uv > ULONG_MAX) {
  412. uv = ULONG_MAX;
  413. }
  414. } else if (((V_TYPE)uv) > LONG_MAX) {
  415. uv = LONG_MAX;
  416. } else if (((V_TYPE)uv) < LONG_MIN) {
  417. uv = (UV_TYPE) LONG_MIN;
  418. }
  419. #endif
  420. *((unsigned long *)vp) = (unsigned long)uv;
  421. break;
  422. #if ULONG_MAX != UINT_MAX
  423. case 0: /* int and long int are different */
  424. if (usflag) {
  425. if (uv > UINT_MAX) {
  426. uv = UINT_MAX;
  427. }
  428. } else if (((V_TYPE)uv) > INT_MAX) {
  429. uv = INT_MAX;
  430. } else if (((V_TYPE)uv) < INT_MIN) {
  431. uv = (UV_TYPE) INT_MIN;
  432. }
  433. *((unsigned int *)vp) = (unsigned int)uv;
  434. break;
  435. #endif
  436. case -1:
  437. if (usflag) {
  438. if (uv > USHRT_MAX) {
  439. uv = USHRT_MAX;
  440. }
  441. } else if (((V_TYPE)uv) > SHRT_MAX) {
  442. uv = SHRT_MAX;
  443. } else if (((V_TYPE)uv) < SHRT_MIN) {
  444. uv = (UV_TYPE) SHRT_MIN;
  445. }
  446. *((unsigned short *)vp) = (unsigned short)uv;
  447. break;
  448. case -2:
  449. if (usflag) {
  450. if (uv > UCHAR_MAX) {
  451. uv = UCHAR_MAX;
  452. }
  453. } else if (((V_TYPE)uv) > CHAR_MAX) {
  454. uv = CHAR_MAX;
  455. } else if (((V_TYPE)uv) < CHAR_MIN) {
  456. uv = (UV_TYPE) CHAR_MIN;
  457. }
  458. *((unsigned char *)vp) = (unsigned char) uv;
  459. break;
  460. default:
  461. assert(0);
  462. }
  463. ++cnt;
  464. }
  465. goto nextfmt;
  466. }
  467. #if WANT_DOUBLE
  468. else { /* floating point */
  469. if (sc.width <= 0) {
  470. goto done;
  471. }
  472. if (__strtold(&ld, &sc)) { /* Success! */
  473. if (store) {
  474. vp = va_arg(ap, void *);
  475. switch (lval) {
  476. case 2:
  477. *((long double *)vp) = ld;
  478. break;
  479. case 1:
  480. *((double *)vp) = (double) ld;
  481. break;
  482. case 0:
  483. *((float *)vp) = (float) ld;
  484. break;
  485. default: /* Illegal qualifier! */
  486. assert(0);
  487. goto done;
  488. }
  489. ++cnt;
  490. }
  491. goto nextfmt;
  492. }
  493. }
  494. #else
  495. assert(0);
  496. #endif
  497. goto done;
  498. }
  499. /* Unrecognized specifier! */
  500. goto done;
  501. } if (isspace(*fmt)) { /* Consume all whitespace. */
  502. while (isspace(scan_getc_nw(&sc)))
  503. {}
  504. } else { /* Match the current fmt char. */
  505. matchchar:
  506. if (scan_getc_nw(&sc) != *fmt) {
  507. goto done;
  508. }
  509. scan_getc_nw(&sc);
  510. }
  511. nextfmt:
  512. scan_ungetc(&sc);
  513. ++fmt;
  514. }
  515. done: /* end of scan */
  516. kill_scan_cookie(&sc);
  517. if ((sc.ungot_char <= 0) && (cnt == 0) && (*fmt)) {
  518. return (EOF);
  519. }
  520. return (cnt);
  521. }
  522. /*****************************************************************************/
  523. #if WANT_DOUBLE
  524. #include <float.h>
  525. #define MAX_SIG_DIGITS 20
  526. #define MAX_IGNORED_DIGITS 2000
  527. #define MAX_ALLOWED_EXP (MAX_SIG_DIGITS + MAX_IGNORED_DIGITS + LDBL_MAX_10_EXP)
  528. #if LDBL_DIG > MAX_SIG_DIGITS
  529. #error need to adjust MAX_SIG_DIGITS
  530. #endif
  531. #include <limits.h>
  532. #if MAX_ALLOWED_EXP > INT_MAX
  533. #error size assumption violated for MAX_ALLOWED_EXP
  534. #endif
  535. int __strtold(long double *ld, struct scan_cookie *sc)
  536. {
  537. long double number;
  538. long double p10;
  539. int exponent_power;
  540. int exponent_temp;
  541. int negative;
  542. int num_digits;
  543. int since_decimal;
  544. int c;
  545. c = scan_getc(sc); /* Decrements width. */
  546. negative = 0;
  547. switch(c) { /* Handle optional sign. */
  548. case '-': negative = 1; /* Fall through to get next char. */
  549. case '+': c = scan_getc(sc);
  550. }
  551. number = 0.;
  552. num_digits = -1;
  553. exponent_power = 0;
  554. since_decimal = INT_MIN;
  555. LOOP:
  556. while (isdigit(c)) { /* Process string of digits. */
  557. ++since_decimal;
  558. if (num_digits < 0) { /* First time through? */
  559. ++num_digits; /* We've now seen a digit. */
  560. }
  561. if (num_digits || (c != '0')) { /* had/have nonzero */
  562. ++num_digits;
  563. if (num_digits <= MAX_SIG_DIGITS) { /* Is digit significant? */
  564. number = number * 10. + (c - '0');
  565. }
  566. }
  567. c = scan_getc(sc);
  568. }
  569. if ((c == '.') && (since_decimal < 0)) { /* If no previous decimal pt, */
  570. since_decimal = 0; /* save position of decimal point */
  571. c = scan_getc(sc); /* and process rest of digits */
  572. goto LOOP;
  573. }
  574. if (num_digits<0) { /* Must have at least one digit. */
  575. goto FAIL;
  576. }
  577. if (num_digits > MAX_SIG_DIGITS) { /* Adjust exp for skipped digits. */
  578. exponent_power += num_digits - MAX_SIG_DIGITS;
  579. }
  580. if (since_decimal >= 0) { /* Adjust exponent for decimal point. */
  581. exponent_power -= since_decimal;
  582. }
  583. if (negative) { /* Correct for sign. */
  584. number = -number;
  585. negative = 0; /* Reset for exponent processing below. */
  586. }
  587. /* Process an exponent string. */
  588. if (c == 'e' || c == 'E') {
  589. c = scan_getc(sc);
  590. switch(c) { /* Handle optional sign. */
  591. case '-': negative = 1; /* Fall through to get next char. */
  592. case '+': c = scan_getc(sc);
  593. }
  594. num_digits = 0;
  595. exponent_temp = 0;
  596. while (isdigit(c)) { /* Process string of digits. */
  597. if (exponent_temp < MAX_ALLOWED_EXP) { /* overflow check */
  598. exponent_temp = exponent_temp * 10 + (c - '0');
  599. }
  600. c = scan_getc(sc);
  601. ++num_digits;
  602. }
  603. if (num_digits == 0) { /* Were there no exp digits? */
  604. goto FAIL;
  605. } /* else */
  606. if (negative) {
  607. exponent_power -= exponent_temp;
  608. } else {
  609. exponent_power += exponent_temp;
  610. }
  611. }
  612. if (number != 0.) {
  613. /* Now scale the result. */
  614. exponent_temp = exponent_power;
  615. p10 = 10.;
  616. if (exponent_temp < 0) {
  617. exponent_temp = -exponent_temp;
  618. }
  619. while (exponent_temp) {
  620. if (exponent_temp & 1) {
  621. if (exponent_power < 0) {
  622. number /= p10;
  623. } else {
  624. number *= p10;
  625. }
  626. }
  627. exponent_temp >>= 1;
  628. p10 *= p10;
  629. }
  630. }
  631. *ld = number;
  632. return 1;
  633. FAIL:
  634. scan_ungetc(sc);
  635. return 0;
  636. }
  637. #endif /* WANT_DOUBLE */
  638. #endif