scanf.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691
  1. /*
  2. * Modified by Manuel Novoa III Mar 13, 2001
  3. *
  4. * The vfscanf routine was completely rewritten to add features and remove
  5. * bugs. The function __strtold, based on my strtod code in stdlib, was
  6. * added to provide floating point support for the scanf functions.
  7. *
  8. * So far they pass the test cases from glibc-2.1.3, except in two instances.
  9. * In one case, the test appears to be broken. The other case is something
  10. * I need to research further. This version of scanf assumes it can only
  11. * peek one character ahead. Apparently, glibc looks further. The difference
  12. * can be seen when parsing a floating point value in the character
  13. * sequence "100ergs". glibc is able to back up before the 'e' and return
  14. * a value of 100, whereas this scanf reports a bad match with the stream
  15. * pointer at 'r'. A similar situation can also happen when parsing hex
  16. * values prefixed by 0x or 0X; a failure would occur for "0xg". In order to
  17. * fix this, I need to rework the "ungetc" machinery in stdio.c again.
  18. * I do have one reference though, that seems to imply scanf has a single
  19. * character of lookahead.
  20. */
  21. #include <stdlib.h>
  22. #include <unistd.h>
  23. #include <stdio.h>
  24. #include <ctype.h>
  25. #include <string.h>
  26. #include <stdarg.h>
  27. #ifdef L_scanf
  28. #ifdef __STDC__
  29. int scanf(const char *fmt, ...)
  30. #else
  31. int scanf(fmt, va_alist)
  32. __const char *fmt;
  33. va_dcl
  34. #endif
  35. {
  36. va_list ptr;
  37. int rv;
  38. va_start(ptr, fmt);
  39. rv = vfscanf(stdin, fmt, ptr);
  40. va_end(ptr);
  41. return rv;
  42. }
  43. #endif
  44. #ifdef L_sscanf
  45. #ifdef __STDC__
  46. int sscanf(const char *sp, const char *fmt, ...)
  47. #else
  48. int sscanf(sp, fmt, va_alist)
  49. __const char *sp;
  50. __const char *fmt;
  51. va_dcl
  52. #endif
  53. {
  54. FILE string[1] = {
  55. {0, (unsigned char *) ((unsigned) -1), 0, 0, (char *) ((unsigned) -1),
  56. 0, -1, _IOFBF}
  57. };
  58. va_list ptr;
  59. int rv;
  60. string->bufpos = (unsigned char *) ((void *) sp);
  61. va_start(ptr, fmt);
  62. rv = vfscanf(string, fmt, ptr);
  63. va_end(ptr);
  64. return rv;
  65. }
  66. #endif
  67. #ifdef L_fscanf
  68. #ifdef __STDC__
  69. int fscanf(FILE * fp, const char *fmt, ...)
  70. #else
  71. int fscanf(fp, fmt, va_alist)
  72. FILE *fp;
  73. __const char *fmt;
  74. va_dcl
  75. #endif
  76. {
  77. va_list ptr;
  78. int rv;
  79. va_start(ptr, fmt);
  80. rv = vfscanf(fp, fmt, ptr);
  81. va_end(ptr);
  82. return rv;
  83. }
  84. #endif
  85. #ifdef L_vscanf
  86. int vscanf(fmt, ap)
  87. __const char *fmt;
  88. va_list ap;
  89. {
  90. return vfscanf(stdin, fmt, ap);
  91. }
  92. #endif
  93. #ifdef L_vsscanf
  94. int vsscanf(__const char *sp, __const char *fmt, va_list ap)
  95. {
  96. FILE string[1] = {
  97. {0, (unsigned char *) ((unsigned) -1), 0, 0, (char *) ((unsigned) -1),
  98. 0, -1, _IOFBF}
  99. };
  100. string->bufpos = (unsigned char *) sp;
  101. return vfscanf(string, fmt, ap);
  102. }
  103. #endif
  104. #ifdef L_vfscanf
  105. #include <assert.h>
  106. #include <ctype.h>
  107. #include <limits.h>
  108. static int valid_digit(char c, char base)
  109. {
  110. if (base == 16) {
  111. return isxdigit(c);
  112. } else {
  113. return (isdigit(c) && (c < '0' + base));
  114. }
  115. }
  116. extern unsigned long long
  117. _strto_ll(const char *str, char **endptr, int base, int uflag);
  118. extern unsigned long
  119. _strto_l(const char *str, char **endptr, int base, int uflag);
  120. struct scan_cookie {
  121. FILE *fp;
  122. int nread;
  123. int width;
  124. int ungot_char;
  125. int ungot_flag;
  126. };
  127. #if __UCLIBC_HAS_LONG_LONG__
  128. static const char qual[] = "hl" /* "jtz" */ "Lq";
  129. /* char = -2, short = -1, int = 0, long = 1, long long = 2 */
  130. static const char qsz[] = { -1, 1, 2, 2 };
  131. #else
  132. static const char qual[] = "hl" /* "jtz" */;
  133. static const char qsz[] = { -1, 1, };
  134. #endif
  135. #if __UCLIBC_HAS_FLOATS__
  136. static int __strtold(long double *ld, struct scan_cookie *sc);
  137. /*01234567890123456 */
  138. static const char spec[] = "%n[csoupxXidfeEgG";
  139. #else
  140. static const char spec[] = "%n[csoupxXid";
  141. #endif
  142. /* radix[i] <-> spec[i+5] o u p x X i d */
  143. static const char radix[] = { 8, 10, 16, 16, 16, 0, 10 };
  144. static void init_scan_cookie(struct scan_cookie *sc, FILE *fp)
  145. {
  146. sc->fp = fp;
  147. sc->nread = 0;
  148. sc->ungot_flag = 0;
  149. if ((sc->ungot_char = getc(fp)) > 0) { /* not EOF or EOS */
  150. sc->ungot_flag = 1;
  151. }
  152. }
  153. static int scan_getc_nw(struct scan_cookie *sc)
  154. {
  155. if (sc->ungot_flag == 0) {
  156. sc->ungot_char = getc(sc->fp);
  157. } else {
  158. sc->ungot_flag = 0;
  159. }
  160. if (sc->ungot_char > 0) {
  161. ++sc->nread;
  162. }
  163. return sc->ungot_char;
  164. }
  165. static int scan_getc(struct scan_cookie *sc)
  166. {
  167. if (sc->ungot_flag == 0) {
  168. sc->ungot_char = getc(sc->fp);
  169. }
  170. if (--sc->width < 0) {
  171. sc->ungot_flag = 1;
  172. return 0;
  173. }
  174. sc->ungot_flag = 0;
  175. if (sc->ungot_char > 0) {
  176. ++sc->nread;
  177. }
  178. return sc->ungot_char;
  179. }
  180. static void scan_ungetc(struct scan_cookie *sc)
  181. {
  182. if (sc->ungot_flag != 0) {
  183. assert(sc->width < 0);
  184. return;
  185. }
  186. sc->ungot_flag = 1;
  187. if (sc->ungot_char > 0) { /* not EOF or EOS */
  188. --sc->nread;
  189. }
  190. }
  191. static void kill_scan_cookie(struct scan_cookie *sc)
  192. {
  193. if (sc->ungot_flag) {
  194. ungetc(sc->ungot_char,sc->fp);
  195. }
  196. }
  197. int vfscanf(fp, format, ap)
  198. FILE *fp;
  199. const char *format;
  200. va_list ap;
  201. {
  202. #if __UCLIBC_HAS_LONG_LONG__
  203. #define STRTO_L_(s,e,b,u) _strto_ll(s,e,b,u)
  204. #define MAX_DIGITS 64
  205. #define UV_TYPE unsigned long long
  206. #define V_TYPE long long
  207. #else
  208. #define STRTO_L_(s,e,b,u) _strto_l(s,e,b,u)
  209. #define MAX_DIGITS 32
  210. #define UV_TYPE unsigned long
  211. #define V_TYPE long
  212. #endif
  213. #if __UCLIBC_HAS_FLOATS__
  214. long double ld;
  215. #endif
  216. UV_TYPE uv;
  217. struct scan_cookie sc;
  218. unsigned const char *fmt;
  219. const char *p;
  220. unsigned char *b;
  221. void *vp;
  222. int cc, i, cnt;
  223. signed char lval;
  224. unsigned char store, usflag, base, invert, r0, r1;
  225. unsigned char buf[MAX_DIGITS+2];
  226. unsigned char scanset[UCHAR_MAX + 1];
  227. init_scan_cookie(&sc,fp);
  228. fmt = (unsigned const char *) format;
  229. cnt = 0;
  230. while (*fmt) {
  231. store = 1;
  232. lval = 0;
  233. sc.width = INT_MAX;
  234. if (*fmt == '%') { /* Conversion specification. */
  235. ++fmt;
  236. if (*fmt == '*') { /* Suppress assignment. */
  237. store = 0;
  238. ++fmt;
  239. }
  240. for (i = 0 ; isdigit(*fmt) ; sc.width = i) {
  241. i = (i * 10) + (*fmt++ - '0'); /* Get specified width. */
  242. }
  243. for (i = 0 ; i < sizeof(qual) ; i++) { /* Optional qualifier. */
  244. if (qual[i] == *fmt) {
  245. ++fmt;
  246. lval += qsz[i];
  247. if ((i < 2) && (qual[i] == *fmt)) { /* Double h or l. */
  248. ++fmt;
  249. lval += qsz[i];
  250. }
  251. break;
  252. }
  253. }
  254. for (p = spec ; *p ; p++) { /* Process format specifier. */
  255. if (*fmt != *p) continue;
  256. if (p-spec < 1) { /* % - match a '%'*/
  257. goto matchchar;
  258. }
  259. if (p-spec < 2) { /* n - store number of chars read */
  260. *(va_arg(ap, int *)) = sc.nread;
  261. scan_getc_nw(&sc);
  262. goto nextfmt;
  263. }
  264. if (p-spec > 3) { /* skip white space if not c or [ */
  265. while (isspace(scan_getc_nw(&sc)))
  266. {}
  267. scan_ungetc(&sc);
  268. }
  269. if (p-spec < 5) { /* [,c,s - string conversions */
  270. invert = 0;
  271. if (*p == 'c') {
  272. invert = 1;
  273. if (sc.width == INT_MAX) {
  274. sc.width = 1;
  275. }
  276. }
  277. for (i=0 ; i<= UCHAR_MAX ; i++) {
  278. scanset[i] = ((*p == 's') ? (isspace(i) == 0) : 0);
  279. }
  280. if (*p == '[') { /* need to build a scanset */
  281. if (*++fmt == '^') {
  282. invert = 1;
  283. ++fmt;
  284. }
  285. if (*fmt == ']') {
  286. scanset[(int)']'] = 1;
  287. ++fmt;
  288. }
  289. r0 = 0;
  290. while (*fmt && *fmt !=']') { /* build scanset */
  291. if ((*fmt == '-') && r0 && (fmt[1] != ']')) {
  292. /* range */
  293. ++fmt;
  294. if (*fmt < r0) {
  295. r1 = r0;
  296. r0 = *fmt;
  297. } else {
  298. r1 = *fmt;
  299. }
  300. for (i=r0 ; i<= r1 ; i++) {
  301. scanset[i] = 1;
  302. }
  303. r0 = 0;
  304. } else {
  305. r0 = *fmt;
  306. scanset[r0] = 1;
  307. }
  308. ++fmt;
  309. }
  310. if (!*fmt) { /* format string exhausted! */
  311. goto done;
  312. }
  313. }
  314. /* ok -- back to common work */
  315. if (sc.width <= 0) {
  316. goto done;
  317. }
  318. if (store) {
  319. b = va_arg(ap, unsigned char *);
  320. } else {
  321. b = buf;
  322. }
  323. i = 0;
  324. cc = scan_getc(&sc);
  325. while ((cc>0) && (scanset[cc] != invert)) {
  326. i = store; /* yes, we stored something */
  327. *b = cc;
  328. b += store;
  329. cc = scan_getc(&sc);
  330. }
  331. if (*p != 'c') { /* nul-terminate the stored string */
  332. *b = 0;
  333. cnt += i;
  334. goto nextfmt;
  335. } else if (sc.width < 0) { /* case 'c' */
  336. cnt += store;
  337. goto nextfmt;
  338. }
  339. scan_ungetc(&sc);
  340. goto done;
  341. }
  342. if (p-spec < 12) { /* o,u,p,x,X,i,d - (un)signed integer */
  343. if (*p == 'p') {
  344. /* assume pointer same size as int or long. */
  345. lval = (sizeof(char *) == sizeof(long));
  346. }
  347. usflag = ((p-spec) < 10); /* (1)0 if (un)signed */
  348. base = radix[(int)(p-spec) - 5];
  349. b = buf;
  350. if (sc.width <= 0) {
  351. goto done;
  352. }
  353. cc = scan_getc(&sc);
  354. if ((cc == '+') || (cc == '-')) { /* Handle leading sign.*/
  355. *b++ = cc;
  356. cc = scan_getc(&sc);
  357. }
  358. if (cc == '0') { /* Possibly set base and handle prefix. */
  359. if ((base == 0) || (base == 16)) {
  360. cc = scan_getc(&sc);
  361. if ((cc == 'x') || (cc == 'X')) {
  362. /* We're committed to base 16 now. */
  363. base = 16;
  364. cc = scan_getc(&sc);
  365. } else { /* oops... back up */
  366. scan_ungetc(&sc);
  367. cc = '0';
  368. if (base == 0) {
  369. base = 8;
  370. }
  371. }
  372. }
  373. }
  374. /* At this point, we're ready to start reading digits. */
  375. if (cc == '0') {
  376. *b++ = cc; /* Store first leading 0 */
  377. do { /* but ignore others. */
  378. cc = scan_getc(&sc);
  379. } while (cc == '0');
  380. }
  381. while (valid_digit(cc,base)) { /* Now for nonzero digits.*/
  382. if (b - buf < MAX_DIGITS) {
  383. *b++ = cc;
  384. }
  385. cc = scan_getc(&sc);
  386. }
  387. *b = 0; /* null-terminate */
  388. if ((b == buf) || (*--b == '+') || (*b == '-')) {
  389. scan_ungetc(&sc);
  390. goto done; /* No digits! */
  391. }
  392. if (store) {
  393. if (*buf == '-') {
  394. usflag = 0;
  395. }
  396. uv = STRTO_L_(buf, NULL, base, usflag);
  397. vp = va_arg(ap, void *);
  398. switch (lval) {
  399. case 2: /* If no long long, treat as long . */
  400. #if __UCLIBC_HAS_LONG_LONG__
  401. *((unsigned long long *)vp) = uv;
  402. break;
  403. #endif
  404. case 1:
  405. #if ULONG_MAX == UINT_MAX
  406. case 0: /* int and long int are the same */
  407. #endif
  408. #if __UCLIBC_HAS_LONG_LONG__
  409. if (usflag) {
  410. if (uv > ULONG_MAX) {
  411. uv = ULONG_MAX;
  412. }
  413. } else if (((V_TYPE)uv) > LONG_MAX) {
  414. uv = LONG_MAX;
  415. } else if (((V_TYPE)uv) < LONG_MIN) {
  416. uv = (UV_TYPE) LONG_MIN;
  417. }
  418. #endif
  419. *((unsigned long *)vp) = (unsigned long)uv;
  420. break;
  421. #if ULONG_MAX != UINT_MAX
  422. case 0: /* int and long int are different */
  423. if (usflag) {
  424. if (uv > UINT_MAX) {
  425. uv = UINT_MAX;
  426. }
  427. } else if (((V_TYPE)uv) > INT_MAX) {
  428. uv = INT_MAX;
  429. } else if (((V_TYPE)uv) < INT_MIN) {
  430. uv = (UV_TYPE) INT_MIN;
  431. }
  432. *((unsigned int *)vp) = (unsigned int)uv;
  433. break;
  434. #endif
  435. case -1:
  436. if (usflag) {
  437. if (uv > USHRT_MAX) {
  438. uv = USHRT_MAX;
  439. }
  440. } else if (((V_TYPE)uv) > SHRT_MAX) {
  441. uv = SHRT_MAX;
  442. } else if (((V_TYPE)uv) < SHRT_MIN) {
  443. uv = (UV_TYPE) SHRT_MIN;
  444. }
  445. *((unsigned short *)vp) = (unsigned short)uv;
  446. break;
  447. case -2:
  448. if (usflag) {
  449. if (uv > UCHAR_MAX) {
  450. uv = UCHAR_MAX;
  451. }
  452. } else if (((V_TYPE)uv) > CHAR_MAX) {
  453. uv = CHAR_MAX;
  454. } else if (((V_TYPE)uv) < CHAR_MIN) {
  455. uv = (UV_TYPE) CHAR_MIN;
  456. }
  457. *((unsigned char *)vp) = (unsigned char) uv;
  458. break;
  459. default:
  460. assert(0);
  461. }
  462. ++cnt;
  463. }
  464. goto nextfmt;
  465. }
  466. #if __UCLIBC_HAS_FLOATS__
  467. else { /* floating point */
  468. if (sc.width <= 0) {
  469. goto done;
  470. }
  471. if (__strtold(&ld, &sc)) { /* Success! */
  472. if (store) {
  473. vp = va_arg(ap, void *);
  474. switch (lval) {
  475. case 2:
  476. *((long double *)vp) = ld;
  477. break;
  478. case 1:
  479. *((double *)vp) = (double) ld;
  480. break;
  481. case 0:
  482. *((float *)vp) = (float) ld;
  483. break;
  484. default: /* Illegal qualifier! */
  485. assert(0);
  486. goto done;
  487. }
  488. ++cnt;
  489. }
  490. goto nextfmt;
  491. }
  492. }
  493. #else
  494. assert(0);
  495. #endif
  496. goto done;
  497. }
  498. /* Unrecognized specifier! */
  499. goto done;
  500. } if (isspace(*fmt)) { /* Consume all whitespace. */
  501. while (isspace(scan_getc_nw(&sc)))
  502. {}
  503. } else { /* Match the current fmt char. */
  504. matchchar:
  505. if (scan_getc_nw(&sc) != *fmt) {
  506. goto done;
  507. }
  508. scan_getc_nw(&sc);
  509. }
  510. nextfmt:
  511. scan_ungetc(&sc);
  512. ++fmt;
  513. }
  514. done: /* end of scan */
  515. kill_scan_cookie(&sc);
  516. if ((sc.ungot_char <= 0) && (cnt == 0) && (*fmt)) {
  517. return (EOF);
  518. }
  519. return (cnt);
  520. }
  521. /*****************************************************************************/
  522. #if __UCLIBC_HAS_FLOATS__
  523. #include <float.h>
  524. #define MAX_SIG_DIGITS 20
  525. #define MAX_IGNORED_DIGITS 2000
  526. #define MAX_ALLOWED_EXP (MAX_SIG_DIGITS + MAX_IGNORED_DIGITS + LDBL_MAX_10_EXP)
  527. #if LDBL_DIG > MAX_SIG_DIGITS
  528. #error need to adjust MAX_SIG_DIGITS
  529. #endif
  530. #include <limits.h>
  531. #if MAX_ALLOWED_EXP > INT_MAX
  532. #error size assumption violated for MAX_ALLOWED_EXP
  533. #endif
  534. int __strtold(long double *ld, struct scan_cookie *sc)
  535. {
  536. long double number;
  537. long double p10;
  538. int exponent_power;
  539. int exponent_temp;
  540. int negative;
  541. int num_digits;
  542. int since_decimal;
  543. int c;
  544. c = scan_getc(sc); /* Decrements width. */
  545. negative = 0;
  546. switch(c) { /* Handle optional sign. */
  547. case '-': negative = 1; /* Fall through to get next char. */
  548. case '+': c = scan_getc(sc);
  549. }
  550. number = 0.;
  551. num_digits = -1;
  552. exponent_power = 0;
  553. since_decimal = INT_MIN;
  554. LOOP:
  555. while (isdigit(c)) { /* Process string of digits. */
  556. ++since_decimal;
  557. if (num_digits < 0) { /* First time through? */
  558. ++num_digits; /* We've now seen a digit. */
  559. }
  560. if (num_digits || (c != '0')) { /* had/have nonzero */
  561. ++num_digits;
  562. if (num_digits <= MAX_SIG_DIGITS) { /* Is digit significant? */
  563. number = number * 10. + (c - '0');
  564. }
  565. }
  566. c = scan_getc(sc);
  567. }
  568. if ((c == '.') && (since_decimal < 0)) { /* If no previous decimal pt, */
  569. since_decimal = 0; /* save position of decimal point */
  570. c = scan_getc(sc); /* and process rest of digits */
  571. goto LOOP;
  572. }
  573. if (num_digits<0) { /* Must have at least one digit. */
  574. goto FAIL;
  575. }
  576. if (num_digits > MAX_SIG_DIGITS) { /* Adjust exp for skipped digits. */
  577. exponent_power += num_digits - MAX_SIG_DIGITS;
  578. }
  579. if (since_decimal >= 0) { /* Adjust exponent for decimal point. */
  580. exponent_power -= since_decimal;
  581. }
  582. if (negative) { /* Correct for sign. */
  583. number = -number;
  584. negative = 0; /* Reset for exponent processing below. */
  585. }
  586. /* Process an exponent string. */
  587. if (c == 'e' || c == 'E') {
  588. c = scan_getc(sc);
  589. switch(c) { /* Handle optional sign. */
  590. case '-': negative = 1; /* Fall through to get next char. */
  591. case '+': c = scan_getc(sc);
  592. }
  593. num_digits = 0;
  594. exponent_temp = 0;
  595. while (isdigit(c)) { /* Process string of digits. */
  596. if (exponent_temp < MAX_ALLOWED_EXP) { /* overflow check */
  597. exponent_temp = exponent_temp * 10 + (c - '0');
  598. }
  599. c = scan_getc(sc);
  600. ++num_digits;
  601. }
  602. if (num_digits == 0) { /* Were there no exp digits? */
  603. goto FAIL;
  604. } /* else */
  605. if (negative) {
  606. exponent_power -= exponent_temp;
  607. } else {
  608. exponent_power += exponent_temp;
  609. }
  610. }
  611. if (number != 0.) {
  612. /* Now scale the result. */
  613. exponent_temp = exponent_power;
  614. p10 = 10.;
  615. if (exponent_temp < 0) {
  616. exponent_temp = -exponent_temp;
  617. }
  618. while (exponent_temp) {
  619. if (exponent_temp & 1) {
  620. if (exponent_power < 0) {
  621. number /= p10;
  622. } else {
  623. number *= p10;
  624. }
  625. }
  626. exponent_temp >>= 1;
  627. p10 *= p10;
  628. }
  629. }
  630. *ld = number;
  631. return 1;
  632. FAIL:
  633. scan_ungetc(sc);
  634. return 0;
  635. }
  636. #endif /* __UCLIBC_HAS_FLOATS__ */
  637. #endif