123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695 |
- /* $NetBSD: grep.c,v 1.11 2012/05/06 22:27:00 joerg Exp $ */
- /* $FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $ */
- /* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */
- /*-
- * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
- * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
- #if HAVE_NBTOOL_CONFIG_H
- #include "nbtool_config.h"
- #endif
- #include <sys/cdefs.h>
- __RCSID("$NetBSD: grep.c,v 1.11 2012/05/06 22:27:00 joerg Exp $");
- #include <sys/stat.h>
- #include <sys/types.h>
- #include <ctype.h>
- #include <err.h>
- #include <errno.h>
- #include <getopt.h>
- #include <limits.h>
- #include <libgen.h>
- #include <locale.h>
- #include <stdbool.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include "grep.h"
- /*
- * Default messags to use when NLS is disabled or no catalogue
- * is found.
- */
- const char *errstr[] = {
- "",
- /* 1*/ "(standard input)",
- /* 2*/ "cannot read bzip2 compressed file",
- /* 3*/ "unknown %s option",
- /* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n",
- /* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n",
- /* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n",
- /* 7*/ "\t[pattern] [file ...]\n",
- /* 8*/ "Binary file %s matches\n",
- /* 9*/ "%s (BSD grep) %s\n",
- };
- /* Flags passed to regcomp() and regexec() */
- int cflags = 0;
- int eflags = REG_STARTEND;
- /* Searching patterns */
- unsigned int patterns, pattern_sz;
- char **pattern;
- regex_t *r_pattern;
- fastgrep_t *fg_pattern;
- /* Filename exclusion/inclusion patterns */
- unsigned int fpatterns, fpattern_sz;
- unsigned int dpatterns, dpattern_sz;
- struct epat *dpattern, *fpattern;
- /* For regex errors */
- char re_error[RE_ERROR_BUF + 1];
- /* Command-line flags */
- unsigned long long Aflag; /* -A x: print x lines trailing each match */
- unsigned long long Bflag; /* -B x: print x lines leading each match */
- bool Hflag; /* -H: always print file name */
- bool Lflag; /* -L: only show names of files with no matches */
- bool bflag; /* -b: show block numbers for each match */
- bool cflag; /* -c: only show a count of matching lines */
- bool hflag; /* -h: don't print filename headers */
- bool iflag; /* -i: ignore case */
- bool lflag; /* -l: only show names of files with matches */
- bool mflag; /* -m x: stop reading the files after x matches */
- unsigned long long mcount; /* count for -m */
- bool nflag; /* -n: show line numbers in front of matching lines */
- bool oflag; /* -o: print only matching part */
- bool qflag; /* -q: quiet mode (don't output anything) */
- bool sflag; /* -s: silent mode (ignore errors) */
- bool vflag; /* -v: only show non-matching lines */
- bool wflag; /* -w: pattern must start and end on word boundaries */
- bool xflag; /* -x: pattern must match entire line */
- bool lbflag; /* --line-buffered */
- bool nullflag; /* --null */
- bool nulldataflag; /* --null-data */
- unsigned char line_sep = '\n'; /* 0 for --null-data */
- char *label; /* --label */
- const char *color; /* --color */
- int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */
- int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */
- int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */
- int devbehave = DEV_READ; /* -D: handling of devices */
- int dirbehave = DIR_READ; /* -dRr: handling of directories */
- int linkbehave = LINK_READ; /* -OpS: handling of symlinks */
- bool dexclude, dinclude; /* --exclude-dir and --include-dir */
- bool fexclude, finclude; /* --exclude and --include */
- enum {
- BIN_OPT = CHAR_MAX + 1,
- COLOR_OPT,
- DECOMPRESS_OPT,
- HELP_OPT,
- MMAP_OPT,
- LINEBUF_OPT,
- LABEL_OPT,
- R_EXCLUDE_OPT,
- R_INCLUDE_OPT,
- R_DEXCLUDE_OPT,
- R_DINCLUDE_OPT
- };
- static inline const char *init_color(const char *);
- /* Housekeeping */
- int tail; /* lines left to print */
- bool notfound; /* file not found */
- extern char *__progname;
- /*
- * Prints usage information and returns 2.
- */
- __dead static void
- usage(void)
- {
- fprintf(stderr, getstr(4), __progname);
- fprintf(stderr, "%s", getstr(5));
- fprintf(stderr, "%s", getstr(5));
- fprintf(stderr, "%s", getstr(6));
- fprintf(stderr, "%s", getstr(7));
- exit(2);
- }
- static const char optstr[] =
- "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxyz";
- struct option long_options[] =
- {
- {"binary-files", required_argument, NULL, BIN_OPT},
- {"decompress", no_argument, NULL, DECOMPRESS_OPT},
- {"help", no_argument, NULL, HELP_OPT},
- {"mmap", no_argument, NULL, MMAP_OPT},
- {"line-buffered", no_argument, NULL, LINEBUF_OPT},
- {"label", required_argument, NULL, LABEL_OPT},
- {"color", optional_argument, NULL, COLOR_OPT},
- {"colour", optional_argument, NULL, COLOR_OPT},
- {"exclude", required_argument, NULL, R_EXCLUDE_OPT},
- {"include", required_argument, NULL, R_INCLUDE_OPT},
- {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT},
- {"include-dir", required_argument, NULL, R_DINCLUDE_OPT},
- {"after-context", required_argument, NULL, 'A'},
- {"text", no_argument, NULL, 'a'},
- {"before-context", required_argument, NULL, 'B'},
- {"byte-offset", no_argument, NULL, 'b'},
- {"context", optional_argument, NULL, 'C'},
- {"count", no_argument, NULL, 'c'},
- {"devices", required_argument, NULL, 'D'},
- {"directories", required_argument, NULL, 'd'},
- {"extended-regexp", no_argument, NULL, 'E'},
- {"regexp", required_argument, NULL, 'e'},
- {"fixed-strings", no_argument, NULL, 'F'},
- {"file", required_argument, NULL, 'f'},
- {"basic-regexp", no_argument, NULL, 'G'},
- {"no-filename", no_argument, NULL, 'h'},
- {"with-filename", no_argument, NULL, 'H'},
- {"ignore-case", no_argument, NULL, 'i'},
- {"bz2decompress", no_argument, NULL, 'J'},
- {"files-with-matches", no_argument, NULL, 'l'},
- {"files-without-match", no_argument, NULL, 'L'},
- {"max-count", required_argument, NULL, 'm'},
- {"line-number", no_argument, NULL, 'n'},
- {"only-matching", no_argument, NULL, 'o'},
- {"quiet", no_argument, NULL, 'q'},
- {"silent", no_argument, NULL, 'q'},
- {"recursive", no_argument, NULL, 'r'},
- {"no-messages", no_argument, NULL, 's'},
- {"binary", no_argument, NULL, 'U'},
- {"unix-byte-offsets", no_argument, NULL, 'u'},
- {"invert-match", no_argument, NULL, 'v'},
- {"version", no_argument, NULL, 'V'},
- {"word-regexp", no_argument, NULL, 'w'},
- {"line-regexp", no_argument, NULL, 'x'},
- {"null", no_argument, NULL, 'Z'},
- {"null-data", no_argument, NULL, 'z'},
- {NULL, no_argument, NULL, 0}
- };
- /*
- * Adds a searching pattern to the internal array.
- */
- static void
- add_pattern(char *pat, size_t len)
- {
- /* TODO: Check for empty patterns and shortcut */
- /* Increase size if necessary */
- if (patterns == pattern_sz) {
- pattern_sz *= 2;
- pattern = grep_realloc(pattern, ++pattern_sz *
- sizeof(*pattern));
- }
- if (len > 0 && pat[len - 1] == '\n')
- --len;
- /* pat may not be NUL-terminated */
- pattern[patterns] = grep_malloc(len + 1);
- memcpy(pattern[patterns], pat, len);
- pattern[patterns][len] = '\0';
- ++patterns;
- }
- /*
- * Adds a file include/exclude pattern to the internal array.
- */
- static void
- add_fpattern(const char *pat, int mode)
- {
- /* Increase size if necessary */
- if (fpatterns == fpattern_sz) {
- fpattern_sz *= 2;
- fpattern = grep_realloc(fpattern, ++fpattern_sz *
- sizeof(struct epat));
- }
- fpattern[fpatterns].pat = grep_strdup(pat);
- fpattern[fpatterns].mode = mode;
- ++fpatterns;
- }
- /*
- * Adds a directory include/exclude pattern to the internal array.
- */
- static void
- add_dpattern(const char *pat, int mode)
- {
- /* Increase size if necessary */
- if (dpatterns == dpattern_sz) {
- dpattern_sz *= 2;
- dpattern = grep_realloc(dpattern, ++dpattern_sz *
- sizeof(struct epat));
- }
- dpattern[dpatterns].pat = grep_strdup(pat);
- dpattern[dpatterns].mode = mode;
- ++dpatterns;
- }
- /*
- * Reads searching patterns from a file and adds them with add_pattern().
- */
- static void
- read_patterns(const char *fn)
- {
- FILE *f;
- char *line;
- size_t len;
- ssize_t rlen;
- if ((f = fopen(fn, "r")) == NULL)
- err(2, "%s", fn);
- line = NULL;
- len = 0;
- #ifndef ANDROID
- while ((rlen = getline(&line, &len, f)) != -1)
- add_pattern(line, *line == '\n' ? 0 : (size_t)rlen);
- #endif
- free(line);
- if (ferror(f))
- err(2, "%s", fn);
- fclose(f);
- }
- static inline const char *
- init_color(const char *d)
- {
- char *c;
- c = getenv("GREP_COLOR");
- return (c != NULL ? c : d);
- }
- int
- main(int argc, char *argv[])
- {
- char **aargv, **eargv, *eopts;
- char *ep;
- unsigned long long l;
- unsigned int aargc, eargc, i, j;
- int c, lastc, needpattern, newarg, prevoptind;
- /* Check what is the program name of the binary. In this
- way we can have all the funcionalities in one binary
- without the need of scripting and using ugly hacks. */
- switch (__progname[0]) {
- case 'e':
- grepbehave = GREP_EXTENDED;
- break;
- case 'f':
- grepbehave = GREP_FIXED;
- break;
- case 'g':
- grepbehave = GREP_BASIC;
- break;
- case 'z':
- filebehave = FILE_GZIP;
- switch(__progname[1]) {
- case 'e':
- grepbehave = GREP_EXTENDED;
- break;
- case 'f':
- grepbehave = GREP_FIXED;
- break;
- case 'g':
- grepbehave = GREP_BASIC;
- break;
- }
- break;
- }
- lastc = '\0';
- newarg = 1;
- prevoptind = 1;
- needpattern = 1;
- eopts = getenv("GREP_OPTIONS");
- /* support for extra arguments in GREP_OPTIONS */
- eargc = 0;
- if (eopts != NULL) {
- char *str;
- /* make an estimation of how many extra arguments we have */
- for (j = 0; j < strlen(eopts); j++)
- if (eopts[j] == ' ')
- eargc++;
- eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1));
- eargc = 0;
- /* parse extra arguments */
- while ((str = strsep(&eopts, " ")) != NULL)
- eargv[eargc++] = grep_strdup(str);
- aargv = (char **)grep_calloc(eargc + argc + 1,
- sizeof(char *));
- aargv[0] = argv[0];
- for (i = 0; i < eargc; i++)
- aargv[i + 1] = eargv[i];
- for (j = 1; j < (unsigned int)argc; j++, i++)
- aargv[i + 1] = argv[j];
- aargc = eargc + argc;
- } else {
- aargv = argv;
- aargc = argc;
- }
- while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) !=
- -1)) {
- switch (c) {
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- if (newarg || !isdigit(lastc))
- Aflag = 0;
- else if (Aflag > LLONG_MAX / 10) {
- errno = ERANGE;
- err(2, NULL);
- }
- Aflag = Bflag = (Aflag * 10) + (c - '0');
- break;
- case 'C':
- if (optarg == NULL) {
- Aflag = Bflag = 2;
- break;
- }
- /* FALLTHROUGH */
- case 'A':
- /* FALLTHROUGH */
- case 'B':
- errno = 0;
- l = strtoull(optarg, &ep, 10);
- if (((errno == ERANGE) && (l == ULLONG_MAX)) ||
- ((errno == EINVAL) && (l == 0)))
- err(2, NULL);
- else if (ep[0] != '\0') {
- errno = EINVAL;
- err(2, NULL);
- }
- if (c == 'A')
- Aflag = l;
- else if (c == 'B')
- Bflag = l;
- else
- Aflag = Bflag = l;
- break;
- case 'a':
- binbehave = BINFILE_TEXT;
- break;
- case 'b':
- bflag = true;
- break;
- case 'c':
- cflag = true;
- break;
- case 'D':
- if (strcasecmp(optarg, "skip") == 0)
- devbehave = DEV_SKIP;
- else if (strcasecmp(optarg, "read") == 0)
- devbehave = DEV_READ;
- else
- errx(2, getstr(3), "--devices");
- break;
- case 'd':
- if (strcasecmp("recurse", optarg) == 0) {
- Hflag = true;
- dirbehave = DIR_RECURSE;
- } else if (strcasecmp("skip", optarg) == 0)
- dirbehave = DIR_SKIP;
- else if (strcasecmp("read", optarg) == 0)
- dirbehave = DIR_READ;
- else
- errx(2, getstr(3), "--directories");
- break;
- case 'E':
- grepbehave = GREP_EXTENDED;
- break;
- case 'e':
- add_pattern(optarg, strlen(optarg));
- needpattern = 0;
- break;
- case 'F':
- grepbehave = GREP_FIXED;
- break;
- case 'f':
- read_patterns(optarg);
- needpattern = 0;
- break;
- case 'G':
- grepbehave = GREP_BASIC;
- break;
- case 'H':
- Hflag = true;
- break;
- case 'h':
- Hflag = false;
- hflag = true;
- break;
- case 'I':
- binbehave = BINFILE_SKIP;
- break;
- case 'i':
- case 'y':
- iflag = true;
- cflags |= REG_ICASE;
- break;
- case 'J':
- filebehave = FILE_BZIP;
- break;
- case 'L':
- lflag = false;
- Lflag = true;
- break;
- case 'l':
- Lflag = false;
- lflag = true;
- break;
- case 'm':
- mflag = true;
- errno = 0;
- mcount = strtoull(optarg, &ep, 10);
- if (((errno == ERANGE) && (mcount == ULLONG_MAX)) ||
- ((errno == EINVAL) && (mcount == 0)))
- err(2, NULL);
- else if (ep[0] != '\0') {
- errno = EINVAL;
- err(2, NULL);
- }
- break;
- case 'n':
- nflag = true;
- break;
- case 'O':
- linkbehave = LINK_EXPLICIT;
- break;
- case 'o':
- oflag = true;
- break;
- case 'p':
- linkbehave = LINK_SKIP;
- break;
- case 'q':
- qflag = true;
- break;
- case 'S':
- linkbehave = LINK_READ;
- break;
- case 'R':
- case 'r':
- dirbehave = DIR_RECURSE;
- Hflag = true;
- break;
- case 's':
- sflag = true;
- break;
- case 'U':
- binbehave = BINFILE_BIN;
- break;
- case 'u':
- case MMAP_OPT:
- /* noop, compatibility */
- break;
- case 'V':
- printf(getstr(9), __progname, VERSION);
- exit(0);
- case 'v':
- vflag = true;
- break;
- case 'w':
- wflag = true;
- break;
- case 'x':
- xflag = true;
- break;
- case 'Z':
- nullflag = true;
- break;
- case 'z':
- nulldataflag = true;
- line_sep = '\0';
- break;
- case BIN_OPT:
- if (strcasecmp("binary", optarg) == 0)
- binbehave = BINFILE_BIN;
- else if (strcasecmp("without-match", optarg) == 0)
- binbehave = BINFILE_SKIP;
- else if (strcasecmp("text", optarg) == 0)
- binbehave = BINFILE_TEXT;
- else
- errx(2, getstr(3), "--binary-files");
- break;
- case COLOR_OPT:
- color = NULL;
- if (optarg == NULL || strcasecmp("auto", optarg) == 0 ||
- strcasecmp("tty", optarg) == 0 ||
- strcasecmp("if-tty", optarg) == 0) {
- char *term;
- term = getenv("TERM");
- if (isatty(STDOUT_FILENO) && term != NULL &&
- strcasecmp(term, "dumb") != 0)
- color = init_color("01;31");
- } else if (strcasecmp("always", optarg) == 0 ||
- strcasecmp("yes", optarg) == 0 ||
- strcasecmp("force", optarg) == 0) {
- color = init_color("01;31");
- } else if (strcasecmp("never", optarg) != 0 &&
- strcasecmp("none", optarg) != 0 &&
- strcasecmp("no", optarg) != 0)
- errx(2, getstr(3), "--color");
- break;
- case DECOMPRESS_OPT:
- filebehave = FILE_GZIP;
- break;
- case LABEL_OPT:
- label = optarg;
- break;
- case LINEBUF_OPT:
- lbflag = true;
- break;
- case R_INCLUDE_OPT:
- finclude = true;
- add_fpattern(optarg, INCL_PAT);
- break;
- case R_EXCLUDE_OPT:
- fexclude = true;
- add_fpattern(optarg, EXCL_PAT);
- break;
- case R_DINCLUDE_OPT:
- dinclude = true;
- add_dpattern(optarg, INCL_PAT);
- break;
- case R_DEXCLUDE_OPT:
- dexclude = true;
- add_dpattern(optarg, EXCL_PAT);
- break;
- case HELP_OPT:
- default:
- usage();
- }
- lastc = c;
- newarg = optind != prevoptind;
- prevoptind = optind;
- }
- aargc -= optind;
- aargv += optind;
- /* Fail if we don't have any pattern */
- if (aargc == 0 && needpattern)
- usage();
- /* Process patterns from command line */
- if (aargc != 0 && needpattern) {
- add_pattern(*aargv, strlen(*aargv));
- --aargc;
- ++aargv;
- }
- switch (grepbehave) {
- case GREP_FIXED:
- case GREP_BASIC:
- break;
- case GREP_EXTENDED:
- cflags |= REG_EXTENDED;
- break;
- default:
- /* NOTREACHED */
- usage();
- }
- fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
- r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
- /*
- * XXX: fgrepcomp() and fastcomp() are workarounds for regexec() performance.
- * Optimizations should be done there.
- */
- /* Check if cheating is allowed (always is for fgrep). */
- if (grepbehave == GREP_FIXED) {
- for (i = 0; i < patterns; ++i)
- fgrepcomp(&fg_pattern[i], pattern[i]);
- } else {
- for (i = 0; i < patterns; ++i) {
- if (fastcomp(&fg_pattern[i], pattern[i])) {
- /* Fall back to full regex library */
- c = regcomp(&r_pattern[i], pattern[i], cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
- }
- }
- }
- }
- if (lbflag)
- setlinebuf(stdout);
- if ((aargc == 0 || aargc == 1) && !Hflag)
- hflag = true;
- if (aargc == 0)
- exit(!procfile("-"));
- if (dirbehave == DIR_RECURSE)
- c = grep_tree(aargv);
- else
- for (c = 0; aargc--; ++aargv) {
- if ((finclude || fexclude) && !file_matching(*aargv))
- continue;
- c+= procfile(*aargv);
- }
- /* Find out the correct return value according to the
- results and the command line option. */
- exit(c ? (notfound ? (qflag ? 0 : 2) : 0) : (notfound ? 2 : 1));
- }
|