original openbsd grep (freegrep)

This commit is contained in:
Ben Gras 2010-06-20 11:54:40 +00:00
parent f19304bf22
commit bb830fc0d3
10 changed files with 2185 additions and 0 deletions

27
commands/grep/Makefile Normal file
View file

@ -0,0 +1,27 @@
# $OpenBSD: Makefile,v 1.5 2003/06/23 07:52:18 deraadt Exp $
PROG= grep
SRCS= binary.c file.c grep.c mmfile.c queue.c util.c
LINKS= ${BINDIR}/grep ${BINDIR}/egrep \
${BINDIR}/grep ${BINDIR}/fgrep \
${BINDIR}/grep ${BINDIR}/zgrep \
${BINDIR}/grep ${BINDIR}/zegrep \
${BINDIR}/grep ${BINDIR}/zfgrep \
OPSYS!= uname
MLINKS= grep.1 egrep.1 \
grep.1 fgrep.1 \
grep.1 zgrep.1 \
grep.1 zegrep.1 \
grep.1 zfgrep.1
CFLAGS+= -Wall
.if ${OPSYS} == "Minix"
CPPFLAGS+= -DNOZ -D_POSIX_SOURCE -D_MINIX
.include <minix.prog.mk>
.else
LDADD= -lz
DPADD= ${LIBZ}
.include <bsd.prog.mk>
.endif

96
commands/grep/binary.c Normal file
View file

@ -0,0 +1,96 @@
/* $OpenBSD: binary.c,v 1.14 2005/02/07 08:47:18 otto Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <ctype.h>
#include <err.h>
#include <stdio.h>
#include "grep.h"
#define isbinary(ch) (!isprint((ch)) && !isspace((ch)) && (ch) != '\b')
int
bin_file(FILE *f)
{
char buf[BUFSIZ];
size_t i, m;
int ret = 0;
if (fseek(f, 0L, SEEK_SET) == -1)
return 0;
if ((m = fread(buf, 1, BUFSIZ, f)) == 0)
return 0;
for (i = 0; i < m; i++)
if (isbinary(buf[i])) {
ret = 1;
break;
}
rewind(f);
return ret;
}
#ifndef NOZ
int
gzbin_file(gzFile *f)
{
char buf[BUFSIZ];
int i, m;
int ret = 0;
if (gzseek(f, (z_off_t)0, SEEK_SET) == -1)
return 0;
if ((m = gzread(f, buf, BUFSIZ)) <= 0)
return 0;
for (i = 0; i < m; i++)
if (isbinary(buf[i])) {
ret = 1;
break;
}
if (gzrewind(f) != 0)
err(1, "gzbin_file");
return ret;
}
#endif
int
mmbin_file(mmf_t *f)
{
int i;
/* XXX knows too much about mmf internals */
for (i = 0; i < BUFSIZ && i < f->len; i++)
if (isbinary(f->base[i]))
return 1;
return 0;
}

218
commands/grep/file.c Normal file
View file

@ -0,0 +1,218 @@
/* $OpenBSD: file.c,v 1.9 2006/02/09 09:54:46 otto Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "grep.h"
static char fname[MAXPATHLEN];
#ifndef NOZ
static char *lnbuf;
static size_t lnbuflen;
#endif
#define FILE_STDIO 0
#define FILE_MMAP 1
#define FILE_GZIP 2
struct file {
int type;
int noseek;
FILE *f;
mmf_t *mmf;
#ifndef NOZ
gzFile *gzf;
#endif
};
#ifndef NOZ
static char *
gzfgetln(gzFile *f, size_t *len)
{
size_t n;
int c;
for (n = 0; ; ++n) {
c = gzgetc(f);
if (c == -1) {
const char *gzerrstr;
int gzerr;
if (gzeof(f))
break;
gzerrstr = gzerror(f, &gzerr);
if (gzerr == Z_ERRNO)
err(2, "%s", fname);
else
errx(2, "%s: %s", fname, gzerrstr);
}
if (n >= lnbuflen) {
lnbuflen *= 2;
lnbuf = grep_realloc(lnbuf, ++lnbuflen);
}
if (c == '\n')
break;
lnbuf[n] = c;
}
if (gzeof(f) && n == 0)
return NULL;
*len = n;
return lnbuf;
}
#endif
file_t *
grep_fdopen(int fd, char *mode)
{
file_t *f;
if (fd == STDIN_FILENO)
snprintf(fname, sizeof fname, "(standard input)");
else
snprintf(fname, sizeof fname, "(fd %d)", fd);
f = grep_malloc(sizeof *f);
#ifndef NOZ
if (Zflag) {
f->type = FILE_GZIP;
f->noseek = lseek(fd, 0L, SEEK_SET) == -1;
if ((f->gzf = gzdopen(fd, mode)) != NULL)
return f;
} else
#endif
{
f->type = FILE_STDIO;
f->noseek = isatty(fd);
if ((f->f = fdopen(fd, mode)) != NULL)
return f;
}
free(f);
return NULL;
}
file_t *
grep_open(char *path, char *mode)
{
file_t *f;
snprintf(fname, sizeof fname, "%s", path);
f = grep_malloc(sizeof *f);
f->noseek = 0;
#ifndef NOZ
if (Zflag) {
f->type = FILE_GZIP;
if ((f->gzf = gzopen(fname, mode)) != NULL)
return f;
} else
#endif
{
/* try mmap first; if it fails, try stdio */
if ((f->mmf = mmopen(fname, mode)) != NULL) {
f->type = FILE_MMAP;
return f;
}
f->type = FILE_STDIO;
if ((f->f = fopen(path, mode)) != NULL)
return f;
}
free(f);
return NULL;
}
int
grep_bin_file(file_t *f)
{
if (f->noseek)
return 0;
switch (f->type) {
case FILE_STDIO:
return bin_file(f->f);
case FILE_MMAP:
return mmbin_file(f->mmf);
#ifndef NOZ
case FILE_GZIP:
return gzbin_file(f->gzf);
#endif
default:
/* can't happen */
errx(2, "invalid file type");
}
}
char *
grep_fgetln(file_t *f, size_t *l)
{
switch (f->type) {
case FILE_STDIO:
return fgetln(f->f, l);
case FILE_MMAP:
return mmfgetln(f->mmf, l);
#ifndef NOZ
case FILE_GZIP:
return gzfgetln(f->gzf, l);
#endif
default:
/* can't happen */
errx(2, "invalid file type");
}
}
void
grep_close(file_t *f)
{
switch (f->type) {
case FILE_STDIO:
fclose(f->f);
break;
case FILE_MMAP:
mmclose(f->mmf);
break;
#ifndef NOZ
case FILE_GZIP:
gzclose(f->gzf);
break;
#endif
default:
/* can't happen */
errx(2, "invalid file type");
}
free(f);
}

388
commands/grep/grep.1 Executable file
View file

@ -0,0 +1,388 @@
.\" $OpenBSD: grep.1,v 1.35 2007/05/31 19:20:10 jmc Exp $
.\" Copyright (c) 1980, 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\" 3. Neither the name of the University nor the names of its contributors
.\" may be used to endorse or promote products derived from this software
.\" without specific prior written permission.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" @(#)grep.1 8.3 (Berkeley) 4/18/94
.\"
.Dd $Mdocdate: May 31 2007 $
.Dt GREP 1
.Os
.Sh NAME
.Nm grep , egrep , fgrep ,
.Nm zgrep , zegrep , zfgrep
.Nd file pattern searcher
.Sh SYNOPSIS
.Nm grep
.Bk -words
.Op Fl abcEFGHhIiLlnoPqRSsUVvwxZ
.Op Fl A Ar num
.Op Fl B Ar num
.Op Fl C Ns Op Ar num
.Op Fl e Ar pattern
.Op Fl f Ar file
.Op Fl -binary-files Ns = Ns Ar value
.Op Fl -context Ns Op = Ns Ar num
.Op Fl -line-buffered
.Op Ar pattern
.Op Ar
.Ek
.Sh DESCRIPTION
The
.Nm grep
utility searches any given input files,
selecting lines that match one or more patterns.
By default, a pattern matches an input line if the regular expression
(RE) in the pattern matches the input line
without its trailing newline.
An empty expression matches every line.
Each input line that matches at least one of the patterns is written
to the standard output.
.Pp
.Nm grep
is used for simple patterns and
basic regular expressions
.Pq BREs ;
.Nm egrep
can handle extended regular expressions
.Pq EREs .
See
.Xr re_format 7
for more information on regular expressions.
.Nm fgrep
is quicker than both
.Nm grep
and
.Nm egrep ,
but can only handle fixed patterns
(i.e. it does not interpret regular expressions).
Patterns may consist of one or more lines,
allowing any of the pattern lines to match a portion of the input.
.Pp
.Nm zgrep ,
.Nm zegrep ,
and
.Nm zfgrep
act like
.Nm grep ,
.Nm egrep ,
and
.Nm fgrep ,
respectively, but accept input files compressed with the
.Xr compress 1
or
.Xr gzip 1
compression utilities.
.Pp
The following options are available:
.Bl -tag -width indent
.It Fl A Ar num
Print
.Ar num
lines of trailing context after each match.
See also the
.Fl B
and
.Fl C
options.
.It Fl a
Treat all files as ASCII text.
Normally
.Nm
will simply print
.Dq Binary file ... matches
if files contain binary characters.
Use of this option forces
.Nm
to output lines matching the specified pattern.
.It Fl B Ar num
Print
.Ar num
lines of leading context before each match.
See also the
.Fl A
and
.Fl C
options.
.It Fl b
The offset in bytes of a matched pattern is
displayed in front of the respective matched line.
.It Fl C Ns Op Ar num
Print
.Ar num
lines of leading and trailing context surrounding each match.
The default is 2 and is equivalent to
.Fl A
.Ar 2
.Fl B
.Ar 2 .
Note:
no whitespace may be given between the option and its argument.
.It Fl c
Only a count of selected lines is written to standard output.
.It Fl E
Interpret
.Ar pattern
as an extended regular expression
(i.e. force
.Nm grep
to behave as
.Nm egrep ) .
.It Fl e Ar pattern
Specify a pattern used during the search of the input:
an input line is selected if it matches any of the specified patterns.
This option is most useful when multiple
.Fl e
options are used to specify multiple patterns,
or when a pattern begins with a dash
.Pq Sq - .
.It Fl F
Interpret
.Ar pattern
as a set of fixed strings
(i.e. force
.Nm grep
to behave as
.Nm fgrep ) .
.It Fl f Ar file
Read one or more newline separated patterns from
.Ar file .
Empty pattern lines match every input line.
Newlines are not considered part of a pattern.
If
.Ar file
is empty, nothing is matched.
.It Fl G
Interpret
.Ar pattern
as a basic regular expression
(i.e. force
.Nm grep
to behave as traditional
.Nm grep ) .
.It Fl H
If
.Fl R
is specified, follow symbolic links only if they were explicitly listed
on the command line.
The default is not to follow symbolic links.
.It Fl h
Never print filename headers
.Pq i.e. filenames
with output lines.
.It Fl I
Ignore binary files.
.It Fl i
Perform case insensitive matching.
By default,
.Nm grep
is case sensitive.
.It Fl L
Only the names of files not containing selected lines are written to
standard output.
Pathnames are listed once per file searched.
If the standard input is searched, the string
.Dq (standard input)
is written.
.It Fl l
Only the names of files containing selected lines are written to
standard output.
.Nm grep
will only search a file until a match has been found,
making searches potentially less expensive.
Pathnames are listed once per file searched.
If the standard input is searched, the string
.Dq (standard input)
is written.
.It Fl n
Each output line is preceded by its relative line number in the file,
starting at line 1.
The line number counter is reset for each file processed.
This option is ignored if
.Fl c ,
.Fl L ,
.Fl l ,
or
.Fl q
is
specified.
.It Fl o
Always print filename headers with output lines.
.It Fl P
If
.Fl R
is specified, no symbolic links are followed.
This is the default.
.It Fl q
Quiet mode:
suppress normal output.
.Nm grep
will only search a file until a match has been found,
making searches potentially less expensive.
.It Fl R
Recursively search subdirectories listed.
.It Fl S
If
.Fl R
is specified, all symbolic links are followed.
The default is not to follow symbolic links.
.It Fl s
Silent mode.
Nonexistent and unreadable files are ignored
(i.e. their error messages are suppressed).
.It Fl U
Search binary files, but do not attempt to print them.
.It Fl V
Display version information.
All other options are ignored.
.It Fl v
Selected lines are those
.Em not
matching any of the specified patterns.
.It Fl w
The expression is searched for as a word (as if surrounded by
.Sq [[:<:]]
and
.Sq [[:>:]] ;
see
.Xr re_format 7 ) .
.It Fl x
Only input lines selected against an entire fixed string or regular
expression are considered to be matching lines.
.It Fl Z
Force
.Nm grep
to behave as
.Nm zgrep .
.It Fl Fl binary-files Ns = Ns Ar value
Controls searching and printing of binary files.
Options are
.Ar binary ,
the default: search binary files but do not print them;
.Ar without-match :
do not search binary files;
and
.Ar text :
treat all files as text.
.Sm off
.It Fl Fl context Op = Ar num
.Sm on
Print
.Ar num
lines of leading and trailing context.
The default is 2.
.It Fl Fl line-buffered
Force output to be line buffered.
By default, output is line buffered when standard output is a terminal
and block buffered otherwise.
.Pp
.El
If no file arguments are specified, the standard input is used.
.Sh RETURN VALUES
The
.Nm grep
utility exits with one of the following values:
.Pp
.Bl -tag -width flag -compact
.It Li 0
One or more lines were selected.
.It Li 1
No lines were selected.
.It Li \*(Gt1
An error occurred.
.El
.Sh EXAMPLES
To find all occurrences of the word
.Sq patricia
in a file:
.Pp
.Dl $ grep 'patricia' myfile
.Pp
To find all occurrences of the pattern
.Ql .Pp
at the beginning of a line:
.Pp
.Dl $ grep '^\e.Pp' myfile
.Pp
The apostrophes ensure the entire expression is evaluated by
.Nm grep
instead of by the user's shell.
The caret
.Ql ^
matches the null string at the beginning of a line,
and the
.Ql \e
escapes the
.Ql \&. ,
which would otherwise match any character.
.Pp
To find all lines in a file which do not contain the words
.Sq foo
or
.Sq bar :
.Pp
.Dl $ grep -v -e 'foo' -e 'bar' myfile
.Pp
A simple example of an extended regular expression:
.Pp
.Dl $ egrep '19|20|25' calendar
.Pp
Peruses the file
.Sq calendar
looking for either 19, 20, or 25.
.Sh SEE ALSO
.Xr ed 1 ,
.Xr ex 1 ,
.Xr gzip 1 ,
.Xr sed 1 ,
.Xr re_format 7
.Sh STANDARDS
The
.Nm
utility is compliant with the
.St -p1003.1-2008
specification.
.Pp
The flags
.Op Fl AaBbCGHhILoPRSUVwZ
are extensions to that specification, and the behaviour of the
.Fl f
flag when used with an empty pattern file is left undefined.
.Pp
All long options are provided for compatibility with
GNU versions of this utility.
.Pp
Historic versions of the
.Nm grep
utility also supported the flags
.Op Fl ruy .
This implementation supports those options;
however, their use is strongly discouraged.
.Sh HISTORY
The
.Nm grep
command first appeared in
.At v6 .

494
commands/grep/grep.c Executable file
View file

@ -0,0 +1,494 @@
/* $OpenBSD: grep.c,v 1.38 2007/02/13 21:48:20 kili Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/queue.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <getopt.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "grep.h"
/* Flags passed to regcomp() and regexec() */
int cflags;
int eflags = REG_STARTEND;
int matchall; /* shortcut */
int patterns, pattern_sz;
char **pattern;
regex_t *r_pattern;
fastgrep_t *fg_pattern;
/* For regex errors */
char re_error[RE_ERROR_BUF + 1];
/* Command-line flags */
int Aflag; /* -A x: print x lines trailing each match */
int Bflag; /* -B x: print x lines leading each match */
int Eflag; /* -E: interpret pattern as extended regexp */
int Fflag; /* -F: interpret pattern as list of fixed strings */
int Gflag; /* -G: interpret pattern as basic regexp */
int Hflag; /* -H: if -R, follow explicitly listed symlinks */
int Lflag; /* -L: only show names of files with no matches */
int Pflag; /* -P: if -R, no symlinks are followed */
int Rflag; /* -R: recursively search directory trees */
int Sflag; /* -S: if -R, follow all symlinks */
#ifndef NOZ
int Zflag; /* -Z: decompress input before processing */
#endif
int bflag; /* -b: show block numbers for each match */
int cflag; /* -c: only show a count of matching lines */
int hflag; /* -h: don't print filename headers */
int iflag; /* -i: ignore case */
int lflag; /* -l: only show names of files with matches */
int nflag; /* -n: show line numbers in front of matching lines */
int oflag; /* -o: always print file name */
int qflag; /* -q: quiet mode (don't output anything) */
int sflag; /* -s: silent mode (ignore errors) */
int vflag; /* -v: only show non-matching lines */
int wflag; /* -w: pattern must start and end on word boundaries */
int xflag; /* -x: pattern must match entire line */
int lbflag; /* --line-buffered */
int binbehave = BIN_FILE_BIN;
enum {
BIN_OPT = CHAR_MAX + 1,
HELP_OPT,
MMAP_OPT,
LINEBUF_OPT
};
/* Housekeeping */
int first; /* flag whether or not this is our first match */
int tail; /* lines left to print */
struct patfile {
const char *pf_file;
SLIST_ENTRY(patfile) pf_next;
};
SLIST_HEAD(, patfile) patfilelh;
extern char *__progname;
static void
usage(void)
{
fprintf(stderr,
#ifdef NOZ
"usage: %s [-abcEFGHhIiLlnoPqRSsUVvwx] [-A num] [-B num] [-C[num]]\n"
#else
"usage: %s [-abcEFGHhIiLlnoPqRSsUVvwxZ] [-A num] [-B num] [-C[num]]\n"
#endif
"\t[-e pattern] [-f file] [--binary-files=value] [--context[=num]]\n"
"\t[--line-buffered] [pattern] [file ...]\n", __progname);
exit(2);
}
#ifdef NOZ
static char *optstr = "0123456789A:B:CEFGHILPSRUVabce:f:hilnoqrsuvwxy";
#else
static char *optstr = "0123456789A:B:CEFGHILPSRUVZabce:f:hilnoqrsuvwxy";
#endif
struct option long_options[] =
{
{"binary-files", required_argument, NULL, BIN_OPT},
{"help", no_argument, NULL, HELP_OPT},
{"mmap", no_argument, NULL, MMAP_OPT},
{"line-buffered", no_argument, NULL, LINEBUF_OPT},
{"after-context", required_argument, NULL, 'A'},
{"before-context", required_argument, NULL, 'B'},
{"context", optional_argument, NULL, 'C'},
{"devices", required_argument, NULL, 'D'},
{"extended-regexp", no_argument, NULL, 'E'},
{"fixed-strings", no_argument, NULL, 'F'},
{"basic-regexp", no_argument, NULL, 'G'},
{"binary", no_argument, NULL, 'U'},
{"version", no_argument, NULL, 'V'},
{"text", no_argument, NULL, 'a'},
{"byte-offset", no_argument, NULL, 'b'},
{"count", no_argument, NULL, 'c'},
{"regexp", required_argument, NULL, 'e'},
{"file", required_argument, NULL, 'f'},
{"no-filename", no_argument, NULL, 'h'},
{"ignore-case", no_argument, NULL, 'i'},
{"files-without-match", no_argument, NULL, 'L'},
{"files-with-matches", no_argument, NULL, 'l'},
{"line-number", no_argument, NULL, 'n'},
{"quiet", no_argument, NULL, 'q'},
{"silent", no_argument, NULL, 'q'},
{"recursive", no_argument, NULL, 'r'},
{"no-messages", no_argument, NULL, 's'},
{"revert-match", no_argument, NULL, 'v'},
{"word-regexp", no_argument, NULL, 'w'},
{"line-regexp", no_argument, NULL, 'x'},
{"unix-byte-offsets", no_argument, NULL, 'u'},
#ifndef NOZ
{"decompress", no_argument, NULL, 'Z'},
#endif
{NULL, no_argument, NULL, 0}
};
static void
add_pattern(char *pat, size_t len)
{
if (!xflag && (len == 0 || matchall)) {
matchall = 1;
return;
}
if (patterns == pattern_sz) {
pattern_sz *= 2;
pattern = grep_realloc(pattern, ++pattern_sz * sizeof(*pattern));
}
if (len > 0 && pat[len - 1] == '\n')
--len;
/* pat may not be NUL-terminated */
if (wflag && !Fflag) {
int bol = 0, eol = 0, extra;
if (pat[0] == '^')
bol = 1;
if (len > 0 && pat[len - 1] == '$')
eol = 1;
extra = Eflag ? 2 : 4;
pattern[patterns] = grep_malloc(len + 15 + extra);
snprintf(pattern[patterns], len + 15 + extra,
"%s[[:<:]]%s%.*s%s[[:>:]]%s",
bol ? "^" : "",
Eflag ? "(" : "\\(",
(int)len - bol - eol, pat + bol,
Eflag ? ")" : "\\)",
eol ? "$" : "");
len += 14 + extra;
} else {
pattern[patterns] = grep_malloc(len + 1);
memcpy(pattern[patterns], pat, len);
pattern[patterns][len] = '\0';
}
++patterns;
}
static void
add_patterns(char *pats)
{
char *nl;
while ((nl = strchr(pats, '\n')) != NULL) {
add_pattern(pats, nl - pats);
pats = nl + 1;
}
add_pattern(pats, strlen(pats));
}
static void
read_patterns(const char *fn)
{
FILE *f;
char *line;
size_t len;
if ((f = fopen(fn, "r")) == NULL)
err(2, "%s", fn);
while ((line = fgetln(f, &len)) != NULL)
add_pattern(line, *line == '\n' ? 0 : len);
if (ferror(f))
err(2, "%s", fn);
fclose(f);
}
int
main(int argc, char *argv[])
{
int c, lastc, prevoptind, newarg, i, needpattern;
struct patfile *patfile, *pf_next;
long l;
char *ep;
SLIST_INIT(&patfilelh);
#ifdef __minix
setprogname(argv[0]);
#endif
switch (__progname[0]) {
case 'e':
Eflag++;
break;
case 'f':
Fflag++;
break;
case 'g':
Gflag++;
break;
#ifndef NOZ
case 'z':
Zflag++;
switch(__progname[1]) {
case 'e':
Eflag++;
break;
case 'f':
Fflag++;
break;
case 'g':
Gflag++;
break;
}
break;
#endif
}
lastc = '\0';
newarg = 1;
prevoptind = 1;
needpattern = 1;
while ((c = getopt_long(argc, argv, optstr,
long_options, NULL)) != -1) {
switch (c) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
if (newarg || !isdigit(lastc))
Aflag = 0;
else if (Aflag > INT_MAX / 10)
errx(2, "context out of range");
Aflag = Bflag = (Aflag * 10) + (c - '0');
break;
case 'A':
case 'B':
l = strtol(optarg, &ep, 10);
if (ep == optarg || *ep != '\0' ||
l <= 0 || l >= INT_MAX)
errx(2, "context out of range");
if (c == 'A')
Aflag = (int)l;
else
Bflag = (int)l;
break;
case 'C':
if (optarg == NULL)
Aflag = Bflag = 2;
else {
l = strtol(optarg, &ep, 10);
if (ep == optarg || *ep != '\0' ||
l <= 0 || l >= INT_MAX)
errx(2, "context out of range");
Aflag = Bflag = (int)l;
}
break;
case 'E':
Fflag = Gflag = 0;
Eflag++;
break;
case 'F':
Eflag = Gflag = 0;
Fflag++;
break;
case 'G':
Eflag = Fflag = 0;
Gflag++;
break;
case 'H':
Hflag++;
break;
case 'I':
binbehave = BIN_FILE_SKIP;
break;
case 'L':
lflag = 0;
Lflag = qflag = 1;
break;
case 'P':
Pflag++;
break;
case 'S':
Sflag++;
break;
case 'R':
case 'r':
Rflag++;
oflag++;
break;
case 'U':
binbehave = BIN_FILE_BIN;
break;
case 'V':
fprintf(stderr, "grep version %u.%u\n", VER_MAJ, VER_MIN);
exit(0);
break;
#ifndef NOZ
case 'Z':
Zflag++;
break;
#endif
case 'a':
binbehave = BIN_FILE_TEXT;
break;
case 'b':
bflag = 1;
break;
case 'c':
cflag = 1;
break;
case 'e':
add_patterns(optarg);
needpattern = 0;
break;
case 'f':
patfile = grep_malloc(sizeof(*patfile));
patfile->pf_file = optarg;
SLIST_INSERT_HEAD(&patfilelh, patfile, pf_next);
needpattern = 0;
break;
case 'h':
oflag = 0;
hflag = 1;
break;
case 'i':
case 'y':
iflag = 1;
cflags |= REG_ICASE;
break;
case 'l':
Lflag = 0;
lflag = qflag = 1;
break;
case 'n':
nflag = 1;
break;
case 'o':
hflag = 0;
oflag = 1;
break;
case 'q':
qflag = 1;
break;
case 's':
sflag = 1;
break;
case 'v':
vflag = 1;
break;
case 'w':
wflag = 1;
break;
case 'x':
xflag = 1;
break;
case BIN_OPT:
if (strcmp("binary", optarg) == 0)
binbehave = BIN_FILE_BIN;
else if (strcmp("without-match", optarg) == 0)
binbehave = BIN_FILE_SKIP;
else if (strcmp("text", optarg) == 0)
binbehave = BIN_FILE_TEXT;
else
errx(2, "Unknown binary-files option");
break;
case 'u':
case MMAP_OPT:
/* default, compatibility */
break;
case LINEBUF_OPT:
lbflag = 1;
break;
case HELP_OPT:
default:
usage();
}
lastc = c;
newarg = optind != prevoptind;
prevoptind = optind;
}
argc -= optind;
argv += optind;
for (patfile = SLIST_FIRST(&patfilelh); patfile != NULL;
patfile = pf_next) {
pf_next = SLIST_NEXT(patfile, pf_next);
read_patterns(patfile->pf_file);
free(patfile);
}
if (argc == 0 && needpattern)
usage();
if (argc != 0 && needpattern) {
add_patterns(*argv);
--argc;
++argv;
}
if (Eflag)
cflags |= REG_EXTENDED;
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
for (i = 0; i < patterns; ++i) {
/* Check if cheating is allowed (always is for fgrep). */
if (Fflag) {
fgrepcomp(&fg_pattern[i], pattern[i]);
} else {
if (fastcomp(&fg_pattern[i], pattern[i])) {
/* Fall back to full regex library */
c = regcomp(&r_pattern[i], pattern[i], cflags);
if (c != 0) {
regerror(c, &r_pattern[i], re_error,
RE_ERROR_BUF);
errx(2, "%s", re_error);
}
}
}
}
#ifndef __minix
if (lbflag)
setlinebuf(stdout);
#endif
if ((argc == 0 || argc == 1) && !oflag)
hflag = 1;
if (argc == 0)
exit(!procfile(NULL));
if (Rflag)
c = grep_tree(argv);
else
for (c = 0; argc--; ++argv)
c += procfile(*argv);
exit(!c);
}

127
commands/grep/grep.h Normal file
View file

@ -0,0 +1,127 @@
/* $OpenBSD: grep.h,v 1.13 2006/02/09 09:54:47 otto Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <limits.h>
#include <regex.h>
#include <stdio.h>
#ifndef NOZ
#include <zlib.h>
#endif
#define VER_MAJ 1
#define VER_MIN 2
#define BIN_FILE_BIN 0
#define BIN_FILE_SKIP 1
#define BIN_FILE_TEXT 2
typedef struct {
size_t len;
int line_no;
off_t off;
char *file;
char *dat;
} str_t;
typedef struct {
unsigned char *pattern;
int patternLen;
int qsBc[UCHAR_MAX + 1];
/* flags */
int bol;
int eol;
int wmatch;
int reversedSearch;
} fastgrep_t;
/* Flags passed to regcomp() and regexec() */
extern int cflags, eflags;
/* Command line flags */
extern int Aflag, Bflag, Eflag, Fflag, Gflag, Hflag, Lflag, Pflag,
Sflag, Rflag, Zflag,
bflag, cflag, hflag, iflag, lflag, nflag, qflag, sflag,
vflag, wflag, xflag;
extern int binbehave;
extern int first, matchall, patterns, tail;
extern char **pattern;
extern fastgrep_t *fg_pattern;
extern regex_t *r_pattern;
/* For regex errors */
#define RE_ERROR_BUF 512
extern char re_error[RE_ERROR_BUF + 1]; /* Seems big enough */
/* util.c */
int procfile(char *fn);
int grep_tree(char **argv);
void *grep_malloc(size_t size);
void *grep_calloc(size_t nmemb, size_t size);
void *grep_realloc(void *ptr, size_t size);
void printline(str_t *line, int sep);
int fastcomp(fastgrep_t *, const char *);
void fgrepcomp(fastgrep_t *, const char *);
/* queue.c */
void initqueue(void);
void enqueue(str_t *x);
void printqueue(void);
void clearqueue(void);
/* mmfile.c */
typedef struct mmfile {
int fd;
size_t len;
char *base, *end, *ptr;
} mmf_t;
mmf_t *mmopen(char *fn, char *mode);
void mmclose(mmf_t *mmf);
char *mmfgetln(mmf_t *mmf, size_t *l);
/* file.c */
struct file;
typedef struct file file_t;
file_t *grep_fdopen(int fd, char *mode);
file_t *grep_open(char *path, char *mode);
int grep_bin_file(file_t *f);
char *grep_fgetln(file_t *f, size_t *l);
void grep_close(file_t *f);
/* binary.c */
int bin_file(FILE * f);
#ifndef NOZ
int gzbin_file(gzFile * f);
#endif
int mmbin_file(mmf_t *f);

102
commands/grep/mmfile.c Normal file
View file

@ -0,0 +1,102 @@
/* $OpenBSD: mmfile.c,v 1.11 2006/09/19 05:52:23 otto Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <err.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
#include "grep.h"
#define MAX_MAP_LEN 1048576
mmf_t *
mmopen(char *fn, char *mode)
{
mmf_t *mmf;
struct stat st;
/* XXX ignore mode for now */
mode = mode;
mmf = grep_malloc(sizeof *mmf);
if ((mmf->fd = open(fn, O_RDONLY)) == -1)
goto ouch1;
if (fstat(mmf->fd, &st) == -1)
goto ouch2;
if (st.st_size > SIZE_T_MAX) /* too big to mmap */
goto ouch2;
if (!S_ISREG(st.st_mode)) /* only mmap regular files */
goto ouch2;
mmf->len = (size_t)st.st_size;
mmf->base = mmap(NULL, mmf->len, PROT_READ, MAP_PRIVATE, mmf->fd, (off_t)0);
if (mmf->base == MAP_FAILED)
goto ouch2;
mmf->ptr = mmf->base;
mmf->end = mmf->base + mmf->len;
#ifndef __minix
madvise(mmf->base, mmf->len, MADV_SEQUENTIAL);
#endif
return mmf;
ouch2:
close(mmf->fd);
ouch1:
free(mmf);
return NULL;
}
void
mmclose(mmf_t *mmf)
{
munmap(mmf->base, mmf->len);
close(mmf->fd);
free(mmf);
}
char *
mmfgetln(mmf_t *mmf, size_t *l)
{
static char *p;
char *start = mmf->ptr; /* Remove speed bump */
char *end = mmf->end; /* Remove speed bump */
if (start >= end)
return NULL;
for (p = mmf->ptr; mmf->ptr < mmf->end; ++mmf->ptr)
if (*mmf->ptr == '\n')
break;
*l = mmf->ptr - p;
++mmf->ptr;
return p;
}

122
commands/grep/queue.c Normal file
View file

@ -0,0 +1,122 @@
/* $OpenBSD: queue.c,v 1.4 2003/06/25 05:31:11 deraadt Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* A really poor man's queue. It does only what it has to and gets out of
* Dodge.
*/
#include <sys/param.h>
#include <stdlib.h>
#include <string.h>
#include "grep.h"
typedef struct queue {
struct queue *next;
str_t data;
} queue_t;
static queue_t *q_head, *q_tail;
static int count;
static queue_t *dequeue(void);
void
initqueue(void)
{
q_head = q_tail = NULL;
}
static void
free_item(queue_t *item)
{
free(item);
}
void
enqueue(str_t *x)
{
queue_t *item;
item = grep_malloc(sizeof *item + x->len);
item->data.len = x->len;
item->data.line_no = x->line_no;
item->data.off = x->off;
item->data.dat = (char *)item + sizeof *item;
memcpy(item->data.dat, x->dat, x->len);
item->data.file = x->file;
item->next = NULL;
if (!q_head) {
q_head = q_tail = item;
} else {
q_tail->next = item;
q_tail = item;
}
if (++count > Bflag)
free_item(dequeue());
}
static queue_t *
dequeue(void)
{
queue_t *item;
if (q_head == NULL)
return NULL;
--count;
item = q_head;
q_head = item->next;
if (q_head == NULL)
q_tail = NULL;
return item;
}
void
printqueue(void)
{
queue_t *item;
while ((item = dequeue()) != NULL) {
printline(&item->data, '-');
free_item(item);
}
}
void
clearqueue(void)
{
queue_t *item;
while ((item = dequeue()) != NULL)
free_item(item);
}

17
commands/grep/readme.md Normal file
View file

@ -0,0 +1,17 @@
FreeGrep
========
The grep utility searches any given input files, selecting lines
that match one or more patterns. By default, a pattern matches an
input line if the regular expression in the pattern matches the
input line without its trailing newline. An empty expression matches
every line. Each input line that matches at least one of the patterns
is written to the standard output. grep is used for simple patterns
and basic regular expressions; egrep can handle extended regular
expressions. fgrep is quicker than both grep and egrep, but can
only handle fixed patterns (i.e. it does not interpret regular
expressions). Patterns may consist of one or more lines, allowing
any of the pattern lines to match a portion of the input. zgrep,
zegrep, and zfgrep act like grep, egrep, and fgrep, respectively,
but accept input files compressed with the compress or gzip compression
utilities.

594
commands/grep/util.c Normal file
View file

@ -0,0 +1,594 @@
/* $OpenBSD: util.c,v 1.35 2007/09/02 15:19:32 deraadt Exp $ */
/*-
* Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fts.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "grep.h"
/*
* Process a file line by line...
*/
static int linesqueued;
static int procline(str_t *l, int);
static int grep_search(fastgrep_t *, unsigned char *, size_t, regmatch_t *pmatch);
static int grep_cmp(const unsigned char *, const unsigned char *, size_t);
static void grep_revstr(unsigned char *, int);
int
grep_tree(char **argv)
{
FTS *fts;
FTSENT *p;
int c, fts_flags;
c = fts_flags = 0;
if (Hflag)
fts_flags = FTS_COMFOLLOW;
if (Pflag)
fts_flags = FTS_PHYSICAL;
if (Sflag)
fts_flags = FTS_LOGICAL;
fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
if (!(fts = fts_open(argv, fts_flags, NULL)))
err(2, NULL);
while ((p = fts_read(fts)) != NULL) {
switch (p->fts_info) {
case FTS_DNR:
/* FALL THROUGH */
case FTS_ERR:
errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
break;
case FTS_D:
case FTS_DP:
break;
default:
c += procfile(p->fts_path);
break;
}
}
if (errno)
err(2, "fts_read");
return c;
}
int
procfile(char *fn)
{
str_t ln;
file_t *f;
int c, t, z, nottext;
if (fn == NULL) {
fn = "(standard input)";
f = grep_fdopen(STDIN_FILENO, "r");
} else {
f = grep_open(fn, "r");
}
if (f == NULL) {
if (!sflag)
warn("%s", fn);
return 0;
}
nottext = grep_bin_file(f);
if (nottext && binbehave == BIN_FILE_SKIP) {
grep_close(f);
return 0;
}
ln.file = fn;
ln.line_no = 0;
ln.len = 0;
linesqueued = 0;
tail = 0;
ln.off = -1;
if (Bflag > 0)
initqueue();
for (c = 0; c == 0 || !(lflag || qflag); ) {
ln.off += ln.len + 1;
if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL)
break;
if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
--ln.len;
ln.line_no++;
z = tail;
if ((t = procline(&ln, nottext)) == 0 && Bflag > 0 && z == 0) {
enqueue(&ln);
linesqueued++;
}
c += t;
}
if (Bflag > 0)
clearqueue();
grep_close(f);
if (cflag) {
if (!hflag)
printf("%s:", ln.file);
printf("%u\n", c);
}
if (lflag && c != 0)
printf("%s\n", fn);
if (Lflag && c == 0)
printf("%s\n", fn);
if (c && !cflag && !lflag && !Lflag &&
binbehave == BIN_FILE_BIN && nottext && !qflag)
printf("Binary file %s matches\n", fn);
return c;
}
/*
* Process an individual line in a file. Return non-zero if it matches.
*/
#define isword(x) (isalnum(x) || (x) == '_')
static int
procline(str_t *l, int nottext)
{
regmatch_t pmatch;
int c, i, r;
if (matchall) {
c = !vflag;
goto print;
}
for (c = i = 0; i < patterns; i++) {
if (fg_pattern[i].pattern) {
r = grep_search(&fg_pattern[i], (unsigned char *)l->dat,
l->len, &pmatch);
} else {
pmatch.rm_so = 0;
pmatch.rm_eo = l->len;
r = regexec(&r_pattern[i], l->dat, 1, &pmatch, eflags);
}
if (r == 0 && xflag) {
if (pmatch.rm_so != 0 || pmatch.rm_eo != l->len)
r = REG_NOMATCH;
}
if (r == 0) {
c++;
break;
}
}
if (vflag)
c = !c;
print:
if (c && binbehave == BIN_FILE_BIN && nottext)
return c; /* Binary file */
if ((tail > 0 || c) && !cflag && !qflag) {
if (c) {
if (first > 0 && tail == 0 && (Bflag < linesqueued) &&
(Aflag || Bflag))
printf("--\n");
first = 1;
tail = Aflag;
if (Bflag > 0)
printqueue();
linesqueued = 0;
printline(l, ':');
} else {
printline(l, '-');
tail--;
}
}
return c;
}
void
fgrepcomp(fastgrep_t *fg, const char *pattern)
{
int i;
/* Initialize. */
fg->patternLen = strlen(pattern);
fg->bol = 0;
fg->eol = 0;
fg->wmatch = wflag;
fg->reversedSearch = 0;
/*
* Make a copy and upper case it for later if in -i mode,
* else just copy the pointer.
*/
if (iflag) {
fg->pattern = grep_malloc(fg->patternLen + 1);
for (i = 0; i < fg->patternLen; i++)
fg->pattern[i] = toupper(pattern[i]);
fg->pattern[fg->patternLen] = '\0';
} else
fg->pattern = (unsigned char *)pattern; /* really const */
/* Preprocess pattern. */
for (i = 0; i <= UCHAR_MAX; i++)
fg->qsBc[i] = fg->patternLen;
for (i = 1; i < fg->patternLen; i++) {
fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
/*
* If case is ignored, make the jump apply to both upper and
* lower cased characters. As the pattern is stored in upper
* case, apply the same to the lower case equivalents.
*/
if (iflag)
fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
}
}
/*
* Returns: -1 on failure, 0 on success
*/
int
fastcomp(fastgrep_t *fg, const char *pattern)
{
int i;
int bol = 0;
int eol = 0;
int shiftPatternLen;
int hasDot = 0;
int firstHalfDot = -1;
int firstLastHalfDot = -1;
int lastHalfDot = 0;
/* Initialize. */
fg->patternLen = strlen(pattern);
fg->bol = 0;
fg->eol = 0;
fg->wmatch = 0;
fg->reversedSearch = 0;
/* Remove end-of-line character ('$'). */
if (pattern[fg->patternLen - 1] == '$') {
eol++;
fg->eol = 1;
fg->patternLen--;
}
/* Remove beginning-of-line character ('^'). */
if (pattern[0] == '^') {
bol++;
fg->bol = 1;
fg->patternLen--;
}
/* Remove enclosing [[:<:]] and [[:>:]] (word match). */
if (wflag) {
/* basic re's use \( \), extended re's ( ) */
int extra = Eflag ? 1 : 2;
fg->patternLen -= 14 + 2 * extra;
fg->wmatch = 7 + extra;
} else if (fg->patternLen >= 14 &&
strncmp(pattern + fg->bol, "[[:<:]]", 7) == 0 &&
strncmp(pattern + fg->bol + fg->patternLen - 7, "[[:>:]]", 7) == 0) {
fg->patternLen -= 14;
fg->wmatch = 7;
}
/*
* Copy pattern minus '^' and '$' characters as well as word
* match character classes at the beginning and ending of the
* string respectively.
*/
fg->pattern = grep_malloc(fg->patternLen + 1);
memcpy(fg->pattern, pattern + bol + fg->wmatch, fg->patternLen);
fg->pattern[fg->patternLen] = '\0';
/* Look for ways to cheat...er...avoid the full regex engine. */
for (i = 0; i < fg->patternLen; i++)
{
/* Can still cheat? */
if ((isalnum(fg->pattern[i])) || isspace(fg->pattern[i]) ||
(fg->pattern[i] == '_') || (fg->pattern[i] == ',') ||
(fg->pattern[i] == '=') || (fg->pattern[i] == '-') ||
(fg->pattern[i] == ':') || (fg->pattern[i] == '/')) {
/* As long as it is good, upper case it for later. */
if (iflag)
fg->pattern[i] = toupper(fg->pattern[i]);
} else if (fg->pattern[i] == '.') {
hasDot = i;
if (i < fg->patternLen / 2) {
if (firstHalfDot < 0)
/* Closest dot to the beginning */
firstHalfDot = i;
} else {
/* Closest dot to the end of the pattern. */
lastHalfDot = i;
if (firstLastHalfDot < 0)
firstLastHalfDot = i;
}
} else {
/* Free memory and let others know this is empty. */
free(fg->pattern);
fg->pattern = NULL;
return (-1);
}
}
/*
* Determine if a reverse search would be faster based on the placement
* of the dots.
*/
if ((!(lflag || cflag)) && ((!(bol || eol)) &&
((lastHalfDot) && ((firstHalfDot < 0) ||
((fg->patternLen - (lastHalfDot + 1)) < firstHalfDot))))) {
fg->reversedSearch = 1;
hasDot = fg->patternLen - (firstHalfDot < 0 ?
firstLastHalfDot : firstHalfDot) - 1;
grep_revstr(fg->pattern, fg->patternLen);
}
/*
* Normal Quick Search would require a shift based on the position the
* next character after the comparison is within the pattern. With
* wildcards, the position of the last dot effects the maximum shift
* distance.
* The closer to the end the wild card is the slower the search. A
* reverse version of this algorithm would be useful for wildcards near
* the end of the string.
*
* Examples:
* Pattern Max shift
* ------- ---------
* this 5
* .his 4
* t.is 3
* th.s 2
* thi. 1
*/
/* Adjust the shift based on location of the last dot ('.'). */
shiftPatternLen = fg->patternLen - hasDot;
/* Preprocess pattern. */
for (i = 0; i <= UCHAR_MAX; i++)
fg->qsBc[i] = shiftPatternLen;
for (i = hasDot + 1; i < fg->patternLen; i++) {
fg->qsBc[fg->pattern[i]] = fg->patternLen - i;
/*
* If case is ignored, make the jump apply to both upper and
* lower cased characters. As the pattern is stored in upper
* case, apply the same to the lower case equivalents.
*/
if (iflag)
fg->qsBc[tolower(fg->pattern[i])] = fg->patternLen - i;
}
/*
* Put pattern back to normal after pre-processing to allow for easy
* comparisons later.
*/
if (fg->reversedSearch)
grep_revstr(fg->pattern, fg->patternLen);
return (0);
}
/*
* Word boundaries using regular expressions are defined as the point
* of transition from a non-word char to a word char, or vice versa.
* This means that grep -w +a and grep -w a+ never match anything,
* because they lack a starting or ending transition, but grep -w a+b
* does match a line containing a+b.
*/
#define wmatch(d, l, s, e) \
((s == 0 || !isword(d[s-1])) && (e == l || !isword(d[e])) && \
e > s && isword(d[s]) && isword(d[e-1]))
static int
grep_search(fastgrep_t *fg, unsigned char *data, size_t dataLen, regmatch_t *pmatch)
{
int j;
int rtrnVal = REG_NOMATCH;
pmatch->rm_so = -1;
pmatch->rm_eo = -1;
/* No point in going farther if we do not have enough data. */
if (dataLen < fg->patternLen)
return (rtrnVal);
/* Only try once at the beginning or ending of the line. */
if (fg->bol || fg->eol) {
/* Simple text comparison. */
/* Verify data is >= pattern length before searching on it. */
if (dataLen >= fg->patternLen) {
/* Determine where in data to start search at. */
if (fg->eol)
j = dataLen - fg->patternLen;
else
j = 0;
if (!((fg->bol && fg->eol) && (dataLen != fg->patternLen)))
if (grep_cmp(fg->pattern, data + j,
fg->patternLen) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->patternLen;
if (!fg->wmatch || wmatch(data, dataLen,
pmatch->rm_so, pmatch->rm_eo))
rtrnVal = 0;
}
}
} else if (fg->reversedSearch) {
/* Quick Search algorithm. */
j = dataLen;
do {
if (grep_cmp(fg->pattern, data + j - fg->patternLen,
fg->patternLen) == -1) {
pmatch->rm_so = j - fg->patternLen;
pmatch->rm_eo = j;
if (!fg->wmatch || wmatch(data, dataLen,
pmatch->rm_so, pmatch->rm_eo)) {
rtrnVal = 0;
break;
}
}
/* Shift if within bounds, otherwise, we are done. */
if (j == fg->patternLen)
break;
j -= fg->qsBc[data[j - fg->patternLen - 1]];
} while (j >= fg->patternLen);
} else {
/* Quick Search algorithm. */
j = 0;
do {
if (grep_cmp(fg->pattern, data + j, fg->patternLen) == -1) {
pmatch->rm_so = j;
pmatch->rm_eo = j + fg->patternLen;
if (fg->patternLen == 0 || !fg->wmatch ||
wmatch(data, dataLen, pmatch->rm_so,
pmatch->rm_eo)) {
rtrnVal = 0;
break;
}
}
/* Shift if within bounds, otherwise, we are done. */
if (j + fg->patternLen == dataLen)
break;
else
j += fg->qsBc[data[j + fg->patternLen]];
} while (j <= (dataLen - fg->patternLen));
}
return (rtrnVal);
}
void *
grep_malloc(size_t size)
{
void *ptr;
if ((ptr = malloc(size)) == NULL)
err(2, "malloc");
return ptr;
}
void *
grep_calloc(size_t nmemb, size_t size)
{
void *ptr;
if ((ptr = calloc(nmemb, size)) == NULL)
err(2, "calloc");
return ptr;
}
void *
grep_realloc(void *ptr, size_t size)
{
if ((ptr = realloc(ptr, size)) == NULL)
err(2, "realloc");
return ptr;
}
/*
* Returns: i >= 0 on failure (position that it failed)
* -1 on success
*/
static int
grep_cmp(const unsigned char *pattern, const unsigned char *data, size_t len)
{
int i;
for (i = 0; i < len; i++) {
if (((pattern[i] == data[i]) || (!Fflag && pattern[i] == '.'))
|| (iflag && pattern[i] == toupper(data[i])))
continue;
return (i);
}
return (-1);
}
static void
grep_revstr(unsigned char *str, int len)
{
int i;
char c;
for (i = 0; i < len / 2; i++) {
c = str[i];
str[i] = str[len - i - 1];
str[len - i - 1] = c;
}
}
void
printline(str_t *line, int sep)
{
int n;
n = 0;
if (!hflag) {
fputs(line->file, stdout);
++n;
}
if (nflag) {
if (n)
putchar(sep);
printf("%d", line->line_no);
++n;
}
if (bflag) {
if (n)
putchar(sep);
#ifndef __minix
printf("%lld", (long long)line->off);
#else
printf("%ld", (long)line->off);
#endif
++n;
}
if (n)
putchar(sep);
fwrite(line->dat, line->len, 1, stdout);
putchar('\n');
}