process.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. /* $OpenBSD: process.c,v 1.32 2017/02/22 14:09:09 tom Exp $ */
  2. /*-
  3. * Copyright (c) 2015
  4. * mirabilos <m@mirbsd.org>
  5. * Copyright (c) 1992 Diomidis Spinellis.
  6. * Copyright (c) 1992, 1993
  7. * The Regents of the University of California. All rights reserved.
  8. *
  9. * This code is derived from software contributed to Berkeley by
  10. * Diomidis Spinellis of Imperial College, University of London.
  11. *
  12. * Redistribution and use in source and binary forms, with or without
  13. * modification, are permitted provided that the following conditions
  14. * are met:
  15. * 1. Redistributions of source code must retain the above copyright
  16. * notice, this list of conditions and the following disclaimer.
  17. * 2. Redistributions in binary form must reproduce the above copyright
  18. * notice, this list of conditions and the following disclaimer in the
  19. * documentation and/or other materials provided with the distribution.
  20. * 3. Neither the name of the University nor the names of its contributors
  21. * may be used to endorse or promote products derived from this software
  22. * without specific prior written permission.
  23. *
  24. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34. * SUCH DAMAGE.
  35. */
  36. #include <sys/types.h>
  37. #include <sys/stat.h>
  38. #include <sys/uio.h>
  39. __SCCSID("@(#)process.c 8.1 (Berkeley) 6/6/93");
  40. __RCSID("$MirOS: src/usr.bin/sed/process.c,v 1.5 2017/11/20 01:23:57 tg Exp $");
  41. #include <ctype.h>
  42. #include <errno.h>
  43. #include <fcntl.h>
  44. #include <limits.h>
  45. #include <regex.h>
  46. #include <stdio.h>
  47. #include <stdlib.h>
  48. #include <string.h>
  49. #include <unistd.h>
  50. #include "defs.h"
  51. #include "extern.h"
  52. static SPACE HS, PS, SS;
  53. #define pd PS.deleted
  54. #define ps PS.space
  55. #define psl PS.len
  56. #define psanl PS.append_newline
  57. #define hs HS.space
  58. #define hsl HS.len
  59. static inline int applies(struct s_command *);
  60. static void flush_appends(void);
  61. static void lputs(char *);
  62. static inline int regexec_e(regex_t *, const char *, int, int, size_t,
  63. size_t);
  64. static void regsub(SPACE *, char *, char *);
  65. static int substitute(struct s_command *);
  66. struct s_appends *appends; /* Array of pointers to strings to append. */
  67. static size_t appendx; /* Index into appends array. */
  68. size_t appendnum; /* Size of appends array. */
  69. static int lastaddr; /* Set by applies if last address of a range. */
  70. static int sdone; /* If any substitutes since last line input. */
  71. /* Iov structure for 'w' commands. */
  72. static regex_t *defpreg;
  73. size_t maxnsub;
  74. regmatch_t *match;
  75. #define OUT() do {\
  76. fwrite(ps, 1, psl, outfile);\
  77. if (psanl) fputc('\n', outfile);\
  78. } while (0)
  79. void
  80. process(void)
  81. {
  82. struct s_command *cp;
  83. SPACE tspace;
  84. size_t len, oldpsl;
  85. char *p;
  86. for (linenum = 0; mf_fgets(&PS, REPLACE);) {
  87. pd = 0;
  88. top:
  89. cp = prog;
  90. redirect:
  91. while (cp != NULL) {
  92. if (!applies(cp)) {
  93. cp = cp->next;
  94. continue;
  95. }
  96. switch (cp->code) {
  97. case '{':
  98. cp = cp->u.c;
  99. goto redirect;
  100. case 'a':
  101. if (appendx >= appendnum) {
  102. appends = xreallocarray(appends,
  103. appendnum,
  104. 2 * sizeof(struct s_appends));
  105. appendnum *= 2;
  106. }
  107. appends[appendx].type = AP_STRING;
  108. appends[appendx].s = cp->t;
  109. appends[appendx].len = strlen(cp->t);
  110. appendx++;
  111. break;
  112. case 'b':
  113. cp = cp->u.c;
  114. goto redirect;
  115. case 'c':
  116. pd = 1;
  117. psl = 0;
  118. if (cp->a2 == NULL || lastaddr || lastline())
  119. (void)fprintf(outfile, "%s", cp->t);
  120. break;
  121. case 'd':
  122. pd = 1;
  123. goto new;
  124. case 'D':
  125. if (pd)
  126. goto new;
  127. if (psl == 0 ||
  128. (p = memchr(ps, '\n', psl)) == NULL) {
  129. pd = 1;
  130. goto new;
  131. } else {
  132. psl -= (p + 1) - ps;
  133. memmove(ps, p + 1, psl);
  134. goto top;
  135. }
  136. case 'g':
  137. cspace(&PS, hs, hsl, REPLACE);
  138. break;
  139. case 'G':
  140. cspace(&PS, "\n", 1, 0);
  141. cspace(&PS, hs, hsl, 0);
  142. break;
  143. case 'h':
  144. cspace(&HS, ps, psl, REPLACE);
  145. break;
  146. case 'H':
  147. cspace(&HS, "\n", 1, 0);
  148. cspace(&HS, ps, psl, 0);
  149. break;
  150. case 'i':
  151. (void)fprintf(outfile, "%s", cp->t);
  152. break;
  153. case 'l':
  154. lputs(ps);
  155. break;
  156. case 'n':
  157. if (!nflag && !pd)
  158. OUT();
  159. flush_appends();
  160. if (!mf_fgets(&PS, REPLACE))
  161. exit(0);
  162. pd = 0;
  163. break;
  164. case 'N':
  165. flush_appends();
  166. cspace(&PS, "\n", 1, 0);
  167. if (!mf_fgets(&PS, 0))
  168. exit(0);
  169. break;
  170. case 'p':
  171. if (pd)
  172. break;
  173. OUT();
  174. break;
  175. case 'P':
  176. if (pd)
  177. break;
  178. if ((p = memchr(ps, '\n', psl)) != NULL) {
  179. oldpsl = psl;
  180. psl = p - ps;
  181. psanl = 1;
  182. OUT();
  183. psl = oldpsl;
  184. } else {
  185. OUT();
  186. }
  187. break;
  188. case 'q':
  189. if (!nflag && !pd)
  190. OUT();
  191. flush_appends();
  192. exit(0);
  193. case 'r':
  194. if (appendx >= appendnum) {
  195. appends = xreallocarray(appends,
  196. appendnum,
  197. 2 * sizeof(struct s_appends));
  198. appendnum *= 2;
  199. }
  200. appends[appendx].type = AP_FILE;
  201. appends[appendx].s = cp->t;
  202. appends[appendx].len = strlen(cp->t);
  203. appendx++;
  204. break;
  205. case 's':
  206. sdone |= substitute(cp);
  207. break;
  208. case 't':
  209. if (sdone) {
  210. sdone = 0;
  211. cp = cp->u.c;
  212. goto redirect;
  213. }
  214. break;
  215. case 'w':
  216. if (pd)
  217. break;
  218. if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
  219. O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
  220. DEFFILEMODE)) == -1)
  221. error(FATAL, "%s: %s",
  222. cp->t, strerror(errno));
  223. if ((size_t)write(cp->u.fd, ps, psl) != psl ||
  224. write(cp->u.fd, "\n", 1) != 1)
  225. error(FATAL, "%s: %s",
  226. cp->t, strerror(errno));
  227. break;
  228. case 'x':
  229. if (hs == NULL)
  230. cspace(&HS, "", 0, REPLACE);
  231. tspace = PS;
  232. PS = HS;
  233. psanl = tspace.append_newline;
  234. HS = tspace;
  235. break;
  236. case 'y':
  237. if (pd || psl == 0)
  238. break;
  239. for (p = ps, len = psl; len--; ++p)
  240. *p = cp->u.y[(unsigned char)*p];
  241. break;
  242. case ':':
  243. case '}':
  244. break;
  245. case '=':
  246. (void)fprintf(outfile, "%lu\n", linenum);
  247. }
  248. cp = cp->next;
  249. } /* for all cp */
  250. new: if (!nflag && !pd)
  251. OUT();
  252. flush_appends();
  253. } /* for all lines */
  254. }
  255. /*
  256. * TRUE if the address passed matches the current program state
  257. * (lastline, linenumber, ps).
  258. */
  259. #define MATCH(a) \
  260. (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) : \
  261. (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
  262. /*
  263. * Return TRUE if the command applies to the current line. Sets the inrange
  264. * flag to process ranges. Interprets the non-select (``!'') flag.
  265. */
  266. static inline int
  267. applies(struct s_command *cp)
  268. {
  269. int r;
  270. lastaddr = 0;
  271. if (cp->a1 == NULL && cp->a2 == NULL)
  272. r = 1;
  273. else if (cp->a2)
  274. if (cp->inrange) {
  275. if (MATCH(cp->a2)) {
  276. cp->inrange = 0;
  277. lastaddr = 1;
  278. }
  279. r = 1;
  280. } else if (MATCH(cp->a1)) {
  281. /*
  282. * If the second address is a number less than or
  283. * equal to the line number first selected, only
  284. * one line shall be selected.
  285. * -- POSIX 1003.2
  286. */
  287. if (cp->a2->type == AT_LINE &&
  288. linenum >= cp->a2->u.l)
  289. lastaddr = 1;
  290. else
  291. cp->inrange = 1;
  292. r = 1;
  293. } else
  294. r = 0;
  295. else
  296. r = MATCH(cp->a1);
  297. return (cp->nonsel ? !r : r);
  298. }
  299. /*
  300. * Reset all inrange markers.
  301. */
  302. void
  303. resetranges(void)
  304. {
  305. struct s_command *cp;
  306. for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
  307. if (cp->a2)
  308. cp->inrange = 0;
  309. }
  310. /*
  311. * substitute --
  312. * Do substitutions in the pattern space. Currently, we build a
  313. * copy of the new pattern space in the substitute space structure
  314. * and then swap them.
  315. */
  316. static int
  317. substitute(struct s_command *cp)
  318. {
  319. SPACE tspace;
  320. regex_t *re;
  321. regoff_t slen;
  322. int n, lastempty;
  323. size_t le = 0;
  324. char *s;
  325. s = ps;
  326. re = cp->u.s->re;
  327. if (re == NULL) {
  328. if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
  329. linenum = cp->u.s->linenum;
  330. error(COMPILE, "\\%zu not defined in the RE",
  331. cp->u.s->maxbref);
  332. }
  333. }
  334. if (!regexec_e(re, ps, 0, 0, 0, psl))
  335. return (0);
  336. SS.len = 0; /* Clean substitute space. */
  337. slen = psl;
  338. n = cp->u.s->n;
  339. lastempty = 1;
  340. do {
  341. /* Copy the leading retained string. */
  342. if (n <= 1 && (match[0].rm_so > le))
  343. cspace(&SS, s, match[0].rm_so - le, APPEND);
  344. /* Skip zero-length matches right after other matches. */
  345. if (lastempty || (match[0].rm_so - le) ||
  346. match[0].rm_so != match[0].rm_eo) {
  347. if (n <= 1) {
  348. /* Want this match: append replacement. */
  349. regsub(&SS, ps, cp->u.s->new);
  350. if (n == 1)
  351. n = -1;
  352. } else {
  353. /* Want a later match: append original. */
  354. if (match[0].rm_eo - le)
  355. cspace(&SS, s, match[0].rm_eo - le,
  356. APPEND);
  357. n--;
  358. }
  359. }
  360. /* Move past this match. */
  361. s = ps + match[0].rm_eo;
  362. slen = psl - match[0].rm_eo;
  363. le = match[0].rm_eo;
  364. /*
  365. * After a zero-length match, advance one byte,
  366. * and at the end of the line, terminate.
  367. */
  368. if (match[0].rm_so == match[0].rm_eo) {
  369. if (*s == '\0' || *s == '\n')
  370. slen = -1;
  371. else
  372. slen--;
  373. if (*s != '\0') {
  374. cspace(&SS, s++, 1, APPEND);
  375. le++;
  376. }
  377. lastempty = 1;
  378. } else
  379. lastempty = 0;
  380. } while (n >= 0 && slen >= 0 &&
  381. regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
  382. /* Did not find the requested number of matches. */
  383. if (n > 0)
  384. return (0);
  385. /* Copy the trailing retained string. */
  386. if (slen > 0)
  387. cspace(&SS, s, slen, APPEND);
  388. /*
  389. * Swap the substitute space and the pattern space, and make sure
  390. * that any leftover pointers into stdio memory get lost.
  391. */
  392. tspace = PS;
  393. PS = SS;
  394. psanl = tspace.append_newline;
  395. SS = tspace;
  396. SS.space = SS.back;
  397. /* Handle the 'p' flag. */
  398. if (cp->u.s->p)
  399. OUT();
  400. /* Handle the 'w' flag. */
  401. if (cp->u.s->wfile && !pd) {
  402. if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
  403. O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
  404. error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
  405. if ((size_t)write(cp->u.s->wfd, ps, psl) != psl ||
  406. write(cp->u.s->wfd, "\n", 1) != 1)
  407. error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
  408. }
  409. return (1);
  410. }
  411. /*
  412. * Flush append requests. Always called before reading a line,
  413. * therefore it also resets the substitution done (sdone) flag.
  414. */
  415. static void
  416. flush_appends(void)
  417. {
  418. FILE *f;
  419. int count;
  420. size_t i;
  421. char buf[8 * 1024];
  422. for (i = 0; i < appendx; i++)
  423. switch (appends[i].type) {
  424. case AP_STRING:
  425. fwrite(appends[i].s, sizeof(char), appends[i].len,
  426. outfile);
  427. break;
  428. case AP_FILE:
  429. /*
  430. * Read files probably shouldn't be cached. Since
  431. * it's not an error to read a non-existent file,
  432. * it's possible that another program is interacting
  433. * with the sed script through the filesystem. It
  434. * would be truly bizarre, but possible. It's probably
  435. * not that big a performance win, anyhow.
  436. */
  437. if ((f = fopen(appends[i].s, "r")) == NULL)
  438. break;
  439. while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
  440. (void)fwrite(buf, sizeof(char), count, outfile);
  441. (void)fclose(f);
  442. break;
  443. }
  444. if (ferror(outfile))
  445. error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
  446. appendx = sdone = 0;
  447. }
  448. static void
  449. lputs(char *s)
  450. {
  451. int count;
  452. extern int termwidth;
  453. const char *escapes;
  454. char *p;
  455. for (count = 0; *s; ++s) {
  456. if (count >= termwidth) {
  457. (void)fprintf(outfile, "\\\n");
  458. count = 0;
  459. }
  460. if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
  461. && *s != '\\') {
  462. (void)fputc(*s, outfile);
  463. count++;
  464. } else {
  465. escapes = "\\\a\b\f\n\r\t\v";
  466. if (*s == '\n' && s[1] == '\0') {
  467. /* omit trailing newline */
  468. break;
  469. }
  470. (void)fputc('\\', outfile);
  471. if ((p = strchr(escapes, *s))) {
  472. (void)fputc("\\abfnrtv"[p - escapes], outfile);
  473. count += 2;
  474. } else {
  475. (void)fprintf(outfile, "%03o", *(u_char *)s);
  476. count += 4;
  477. }
  478. }
  479. }
  480. (void)fputc('$', outfile);
  481. (void)fputc('\n', outfile);
  482. if (ferror(outfile))
  483. error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
  484. }
  485. static inline int
  486. regexec_e(regex_t *preg, const char *string, int eflags,
  487. int nomatch, size_t start, size_t stop)
  488. {
  489. int eval;
  490. if (preg == NULL) {
  491. if (defpreg == NULL)
  492. error(FATAL, "first RE may not be empty");
  493. } else
  494. defpreg = preg;
  495. /* Set anchors */
  496. match[0].rm_so = start;
  497. match[0].rm_eo = stop;
  498. eval = regexec(defpreg, string,
  499. nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
  500. switch (eval) {
  501. case 0:
  502. return (1);
  503. case REG_NOMATCH:
  504. return (0);
  505. }
  506. error(FATAL, "RE error: %s", strregerror(eval, defpreg));
  507. }
  508. /*
  509. * regsub - perform substitutions after a regexp match
  510. * Based on a routine by Henry Spencer
  511. */
  512. static void
  513. regsub(SPACE *sp, char *string, char *src)
  514. {
  515. int len, no;
  516. char c, *dst;
  517. #define NEEDSP(reqlen) \
  518. if (sp->len + (reqlen) + 1 >= sp->blen) { \
  519. size_t newlen = sp->blen + (reqlen) + 1024; \
  520. sp->space = sp->back = xrealloc(sp->back, newlen); \
  521. sp->blen = newlen; \
  522. dst = sp->space + sp->len; \
  523. }
  524. dst = sp->space + sp->len;
  525. while ((c = *src++) != '\0') {
  526. if (c == '&')
  527. no = 0;
  528. else if (c == '\\' && isdigit((unsigned char)*src))
  529. no = *src++ - '0';
  530. else
  531. no = -1;
  532. if (no < 0) { /* Ordinary character. */
  533. if (c == '\\' && (*src == '\\' || *src == '&'))
  534. c = *src++;
  535. NEEDSP(1);
  536. *dst++ = c;
  537. ++sp->len;
  538. } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
  539. len = match[no].rm_eo - match[no].rm_so;
  540. NEEDSP(len);
  541. memmove(dst, string + match[no].rm_so, len);
  542. dst += len;
  543. sp->len += len;
  544. }
  545. }
  546. NEEDSP(1);
  547. *dst = '\0';
  548. }
  549. /*
  550. * aspace --
  551. * Append the source space to the destination space, allocating new
  552. * space as necessary.
  553. */
  554. void
  555. cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
  556. {
  557. size_t tlen;
  558. /* Make sure SPACE has enough memory and ramp up quickly. */
  559. tlen = sp->len + len + 1;
  560. if (tlen > sp->blen) {
  561. size_t newlen = tlen + 1024;
  562. sp->space = sp->back = xrealloc(sp->back, newlen);
  563. sp->blen = newlen;
  564. }
  565. if (spflag == REPLACE)
  566. sp->len = 0;
  567. memmove(sp->space + sp->len, p, len);
  568. sp->space[sp->len += len] = '\0';
  569. }
  570. /*
  571. * Close all cached opened files and report any errors
  572. */
  573. void
  574. cfclose(struct s_command *cp, struct s_command *end)
  575. {
  576. for (; cp != end; cp = cp->next)
  577. switch (cp->code) {
  578. case 's':
  579. if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
  580. error(FATAL,
  581. "%s: %s", cp->u.s->wfile, strerror(errno));
  582. cp->u.s->wfd = -1;
  583. break;
  584. case 'w':
  585. if (cp->u.fd != -1 && close(cp->u.fd))
  586. error(FATAL, "%s: %s", cp->t, strerror(errno));
  587. cp->u.fd = -1;
  588. break;
  589. case '{':
  590. cfclose(cp->u.c, cp->next);
  591. break;
  592. }
  593. }