New sed
This commit is contained in:
parent
64dbdd855d
commit
aa8206941c
6 changed files with 1973 additions and 0 deletions
10
commands/sed/BUGS
Normal file
10
commands/sed/BUGS
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
So far no regression over the historic sed are known. If you find a bug,
|
||||||
|
please provide a test-case (.sed, .in and .out, look into tests/) - if
|
||||||
|
possible try to debug the problem and propose a patch.
|
||||||
|
|
||||||
|
We will focus on POSIX conformance and small size - GNU sed extensions are
|
||||||
|
most likely not accepted.
|
||||||
|
|
||||||
|
Please report issues to: Rene Rebe <rene@exactcode.de>
|
||||||
|
|
22
commands/sed/Makefile
Normal file
22
commands/sed/Makefile
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
# Makefile for minised
|
||||||
|
|
||||||
|
# If your compiler does not support this flags, just remove them.
|
||||||
|
# They only ensure that no new warning regressions make it into the source.
|
||||||
|
CFLAGS = -Wall -Wwrite-strings
|
||||||
|
|
||||||
|
minised: sedcomp.o sedexec.o
|
||||||
|
$(CC) $(LFLAGS) sedcomp.o sedexec.o -o minised
|
||||||
|
|
||||||
|
sedcomp.o: sedcomp.c sed.h
|
||||||
|
sedexec.o: sedexec.c sed.h
|
||||||
|
|
||||||
|
install: minised
|
||||||
|
install -o bin -m 755 minised /usr/bin/
|
||||||
|
install -o bin -m 755 minised /bin/
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f minised sedcomp.o sedexec.o
|
||||||
|
|
||||||
|
check: minised
|
||||||
|
cd tests; ./run ../minised
|
||||||
|
|
81
commands/sed/README
Normal file
81
commands/sed/README
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
small-sed
|
||||||
|
by Eric S. Raymond, <esr@snark.thyrsus.com>
|
||||||
|
and Rene Rebe <rene@exactcode.de>
|
||||||
|
|
||||||
|
This is a smaller, cheaper, and faster SED utility. Minix uses it. GNU used
|
||||||
|
to use it, until they built their own sed around an extended (some would
|
||||||
|
say over-extended) regexp package and it is used for embedded tasks (for
|
||||||
|
example by the T2 SDE - http://www.t2-project.org).
|
||||||
|
|
||||||
|
The original sed 1.0 was written in three pieces; sed.h, sedcomp.c, sedexec.c.
|
||||||
|
Some Minix hacker ran them together into a single-file version, mnsed.c which
|
||||||
|
is not supported and shipped these days; if changes are needed for Minix please
|
||||||
|
send a patch to the normal source.
|
||||||
|
|
||||||
|
The 1.2 version (9 Oct 1996) add mnsed's support for detecting
|
||||||
|
truncated hold spaces. The mnsed version is missing one feature in
|
||||||
|
of the 1.2 version; support of +. Also, the multiple-file I/O is
|
||||||
|
organized slightly differently.
|
||||||
|
|
||||||
|
The 1.3 version added a bug fix by Tom Oehser, and the `L' command. Also
|
||||||
|
this program is now distributed under GPL.
|
||||||
|
|
||||||
|
The 1.5 version incooperated a lot of bug fixes by Rene Rebe as well as
|
||||||
|
a real test suite. Also the function declaration and definition have been
|
||||||
|
converted from the K&R C to ANSI C.
|
||||||
|
|
||||||
|
The 1.6 version includes support for the n'th match for the substitude command
|
||||||
|
as well as support for predefined character classes and only writes lines
|
||||||
|
with newline if one was present in the input line (compatible with GNU sed).
|
||||||
|
|
||||||
|
The 1.7 version fixed a segmentation fault with empty regular expressions,
|
||||||
|
not to leak other buffer content for groups of commands and escaping
|
||||||
|
numerical seperators in regular expressions by disabling obscure code.
|
||||||
|
Additionally compilation with older compilers as well as warnings with the
|
||||||
|
latest gcc versions have been corrected.
|
||||||
|
|
||||||
|
The 1.8 version fixes matching of some escaped characters (a regression
|
||||||
|
introduced with \+ star matching), \+ star matching to corretly copy
|
||||||
|
and mark the internal bytecode representation, back references inside lhs
|
||||||
|
regular expressions matching (to work at all) and marking the correct
|
||||||
|
regular expression for star matches.
|
||||||
|
|
||||||
|
The 1.9 version included a microoptimization shaving some bytes off the
|
||||||
|
binary and some cpu cycles at run time, reusing the previous regular
|
||||||
|
expressions for empty ones, predefined character classes with control
|
||||||
|
characters, handling of escaped ampesands and support for backreference
|
||||||
|
\0 and Kleene star operator on groups.
|
||||||
|
|
||||||
|
The 1.10 version fixed a special case of grouped star matching where
|
||||||
|
\+1..n overwrote the last match, not to infinite loop on certain zero match
|
||||||
|
grouped star cases and not to crash on w(rite to file). The version also
|
||||||
|
no longer falls into the conservative end-of-file matching mode when just
|
||||||
|
end-of-line matching was used.
|
||||||
|
|
||||||
|
The 1.11 version again fixed w(rite to file) handling to correctly honor
|
||||||
|
/dev/stdout and /dev/stderr as GNU sed does and thus keep the streams in
|
||||||
|
sync. Some unused variables have been removed and a two diagnostics
|
||||||
|
fixed to be printed correctly.
|
||||||
|
|
||||||
|
The 1.12 version fixed the l(ist) command to actually work, some tiny
|
||||||
|
optimizations have been performed as well as some more compiler warnings
|
||||||
|
fixed.
|
||||||
|
|
||||||
|
Makefile -- how to build sed
|
||||||
|
sed.h -- declarations and structures
|
||||||
|
sedcomp.c -- sed pattern compilation
|
||||||
|
sedexec.c -- sed program execution
|
||||||
|
sed.1 -- source for the man page
|
||||||
|
tests/ -- a small set of sed tests
|
||||||
|
|
||||||
|
For some releases the man page in the man format.
|
||||||
|
|
||||||
|
Surf to
|
||||||
|
|
||||||
|
http://www.exactcode.de/oss/minised/
|
||||||
|
http://www.catb.org/~esr/
|
||||||
|
|
||||||
|
for updates of this software. There is a sed FAQ kept at these
|
||||||
|
locations:
|
||||||
|
|
||||||
|
http://www.dreamwvr.com/sed-info/sed-faq.html
|
85
commands/sed/sed.h
Normal file
85
commands/sed/sed.h
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
/* sed.h -- types and constants for the stream editor
|
||||||
|
Copyright (C) 1995-2003 Eric S. Raymond
|
||||||
|
Copyright (C) 2004-2005 Rene Rebe
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define TRUE 1
|
||||||
|
#define FALSE 0
|
||||||
|
|
||||||
|
/* data area sizes used by both modules */
|
||||||
|
#define MAXBUF 4000 /* current line buffer size */
|
||||||
|
#define MAXAPPENDS 20 /* maximum number of appends */
|
||||||
|
#define MAXTAGS 9 /* tagged patterns are \1 to \9 */
|
||||||
|
#define MAXCMDS 200 /* maximum number of compiled commands */
|
||||||
|
#define MAXLINES 256 /* max # numeric addresses to compile */
|
||||||
|
|
||||||
|
/* constants for compiled-command representation */
|
||||||
|
#define EQCMD 0x01 /* = -- print current line number */
|
||||||
|
#define ACMD 0x02 /* a -- append text after current line */
|
||||||
|
#define BCMD 0x03 /* b -- branch to label */
|
||||||
|
#define CCMD 0x04 /* c -- change current line */
|
||||||
|
#define DCMD 0x05 /* d -- delete all of pattern space */
|
||||||
|
#define CDCMD 0x06 /* D -- delete first line of pattern space */
|
||||||
|
#define GCMD 0x07 /* g -- copy hold space to pattern space */
|
||||||
|
#define CGCMD 0x08 /* G -- append hold space to pattern space */
|
||||||
|
#define HCMD 0x09 /* h -- copy pattern space to hold space */
|
||||||
|
#define CHCMD 0x0A /* H -- append hold space to pattern space */
|
||||||
|
#define ICMD 0x0B /* i -- insert text before current line */
|
||||||
|
#define LCMD 0x0C /* l -- print pattern space in escaped form */
|
||||||
|
#define CLCMD 0x20 /* L -- hexdump */
|
||||||
|
#define NCMD 0x0D /* n -- get next line into pattern space */
|
||||||
|
#define CNCMD 0x0E /* N -- append next line to pattern space */
|
||||||
|
#define PCMD 0x0F /* p -- print pattern space to output */
|
||||||
|
#define CPCMD 0x10 /* P -- print first line of pattern space */
|
||||||
|
#define QCMD 0x11 /* q -- exit the stream editor */
|
||||||
|
#define RCMD 0x12 /* r -- read in a file after current line */
|
||||||
|
#define SCMD 0x13 /* s -- regular-expression substitute */
|
||||||
|
#define TCMD 0x14 /* t -- branch on last substitute successful */
|
||||||
|
#define CTCMD 0x15 /* T -- branch on last substitute failed */
|
||||||
|
#define WCMD 0x16 /* w -- write pattern space to file */
|
||||||
|
#define CWCMD 0x17 /* W -- write first line of pattern space */
|
||||||
|
#define XCMD 0x18 /* x -- exhange pattern and hold spaces */
|
||||||
|
#define YCMD 0x19 /* y -- transliterate text */
|
||||||
|
|
||||||
|
typedef struct cmd_t /* compiled-command representation */
|
||||||
|
{
|
||||||
|
char *addr1; /* first address for command */
|
||||||
|
char *addr2; /* second address for command */
|
||||||
|
union
|
||||||
|
{
|
||||||
|
char *lhs; /* s command lhs */
|
||||||
|
struct cmd_t *link; /* label link */
|
||||||
|
} u;
|
||||||
|
char command; /* command code */
|
||||||
|
char *rhs; /* s command replacement string */
|
||||||
|
FILE *fout; /* associated output file descriptor */
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
unsigned allbut : 1; /* was negation specified? */
|
||||||
|
unsigned global : 1; /* was p postfix specified? */
|
||||||
|
unsigned print : 2; /* was g postfix specified? */
|
||||||
|
unsigned inrange : 1; /* in an address range? */
|
||||||
|
} flags;
|
||||||
|
unsigned nth; /* sed nth occurance */
|
||||||
|
}
|
||||||
|
sedcmd; /* use this name for declarations */
|
||||||
|
|
||||||
|
#define BAD ((char *) -1) /* guaranteed not a string ptr */
|
||||||
|
|
||||||
|
/* address and regular expression compiled-form markers */
|
||||||
|
#define STAR 1 /* marker for Kleene star */
|
||||||
|
#define CCHR 2 /* non-newline character to be matched follows */
|
||||||
|
#define CDOT 4 /* dot wild-card marker */
|
||||||
|
#define CCL 6 /* character class follows */
|
||||||
|
#define CNL 8 /* match line start */
|
||||||
|
#define CDOL 10 /* match line end */
|
||||||
|
#define CBRA 12 /* tagged pattern start marker */
|
||||||
|
#define CKET 14 /* tagged pattern end marker */
|
||||||
|
#define CBACK 16 /* backslash-digit pair marker */
|
||||||
|
#define CLNUM 18 /* numeric-address index follows */
|
||||||
|
#define CEND 20 /* symbol for end-of-source */
|
||||||
|
#define CEOF 22 /* end-of-field mark */
|
||||||
|
|
||||||
|
#define bits(b) (1 << (b))
|
||||||
|
|
||||||
|
/* sed.h ends here */
|
956
commands/sed/sedcomp.c
Normal file
956
commands/sed/sedcomp.c
Normal file
|
@ -0,0 +1,956 @@
|
||||||
|
/* sedcomp.c -- stream editor main and compilation phase
|
||||||
|
Copyright (C) 1995-2003 Eric S. Raymond
|
||||||
|
Copyright (C) 2004-2006 Rene Rebe
|
||||||
|
|
||||||
|
The stream editor compiles its command input (from files or -e options)
|
||||||
|
into an internal form using compile() then executes the compiled form using
|
||||||
|
execute(). Main() just initializes data structures, interprets command line
|
||||||
|
options, and calls compile() and execute() in appropriate sequence.
|
||||||
|
The data structure produced by compile() is an array of compiled-command
|
||||||
|
structures (type sedcmd). These contain several pointers into pool[], the
|
||||||
|
regular-expression and text-data pool, plus a command code and g & p flags.
|
||||||
|
In the special case that the command is a label the struct will hold a ptr
|
||||||
|
into the labels array labels[] during most of the compile, until resolve()
|
||||||
|
resolves references at the end.
|
||||||
|
The operation of execute() is described in its source module.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h> /* exit */
|
||||||
|
#include <stdio.h> /* uses getc, fprintf, fopen, fclose */
|
||||||
|
#include <ctype.h> /* isdigit */
|
||||||
|
#include <string.h> /* strcmp */
|
||||||
|
#include "sed.h" /* command type struct and name defines */
|
||||||
|
|
||||||
|
/***** public stuff ******/
|
||||||
|
|
||||||
|
#define MAXCMDS 200 /* maximum number of compiled commands */
|
||||||
|
#define MAXLINES 256 /* max # numeric addresses to compile */
|
||||||
|
|
||||||
|
/* main data areas */
|
||||||
|
char linebuf[MAXBUF+1]; /* current-line buffer */
|
||||||
|
sedcmd cmds[MAXCMDS+1]; /* hold compiled commands */
|
||||||
|
long linenum[MAXLINES]; /* numeric-addresses table */
|
||||||
|
|
||||||
|
/* miscellaneous shared variables */
|
||||||
|
int nflag; /* -n option flag */
|
||||||
|
int eargc; /* scratch copy of argument count */
|
||||||
|
sedcmd *pending = NULL; /* next command to be executed */
|
||||||
|
|
||||||
|
int last_line_used = 0; /* last line address ($) was used */
|
||||||
|
|
||||||
|
void die (const char* msg) {
|
||||||
|
fprintf(stderr, "sed: ");
|
||||||
|
fprintf(stderr, msg, linebuf);
|
||||||
|
fprintf(stderr, "\n");
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/***** module common stuff *****/
|
||||||
|
|
||||||
|
#define POOLSIZE 10000 /* size of string-pool space */
|
||||||
|
#define WFILES 10 /* max # w output files that can be compiled */
|
||||||
|
#define RELIMIT 256 /* max chars in compiled RE */
|
||||||
|
#define MAXDEPTH 20 /* maximum {}-nesting level */
|
||||||
|
#define MAXLABS 50 /* max # of labels that can be handled */
|
||||||
|
|
||||||
|
#define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++
|
||||||
|
#define IFEQ(x, v) if (*x == v) x++ , /* do expression */
|
||||||
|
|
||||||
|
/* error messages */
|
||||||
|
static char AGMSG[] = "garbled address %s";
|
||||||
|
static char CGMSG[] = "garbled command %s";
|
||||||
|
static char TMTXT[] = "too much text: %s";
|
||||||
|
static char AD1NG[] = "no addresses allowed for %s";
|
||||||
|
static char AD2NG[] = "only one address allowed for %s";
|
||||||
|
static char TMCDS[] = "too many commands, last was %s";
|
||||||
|
static char COCFI[] = "cannot open command-file %s";
|
||||||
|
static char UFLAG[] = "unknown flag %c";
|
||||||
|
/*static char COOFI[] = "cannot open %s";*/
|
||||||
|
static char CCOFI[] = "cannot create %s";
|
||||||
|
static char ULABL[] = "undefined label %s";
|
||||||
|
static char TMLBR[] = "too many {'s";
|
||||||
|
static char FRENL[] = "first RE must be non-null";
|
||||||
|
static char NSCAX[] = "no such command as %s";
|
||||||
|
static char TMRBR[] = "too many }'s";
|
||||||
|
static char DLABL[] = "duplicate label %s";
|
||||||
|
static char TMLAB[] = "too many labels: %s";
|
||||||
|
static char TMWFI[] = "too many w files";
|
||||||
|
static char REITL[] = "RE too long: %s";
|
||||||
|
static char TMLNR[] = "too many line numbers";
|
||||||
|
static char TRAIL[] = "command \"%s\" has trailing garbage";
|
||||||
|
static char RETER[] = "RE not terminated: %s";
|
||||||
|
static char CCERR[] = "unknown character class: %s";
|
||||||
|
|
||||||
|
/* cclass to c function mapping ,-) */
|
||||||
|
const char* cclasses[] = {
|
||||||
|
"alnum", "a-zA-Z0-9",
|
||||||
|
"lower", "a-z",
|
||||||
|
"space", " \f\n\r\t\v",
|
||||||
|
"alpha", "a-zA-Z",
|
||||||
|
"digit", "0-9",
|
||||||
|
"upper", "A-Z",
|
||||||
|
"blank", " \t",
|
||||||
|
"xdigit", "0-9A-Fa-f",
|
||||||
|
"cntrl", "\x01-\x1f\x7e",
|
||||||
|
"print", " -\x7e",
|
||||||
|
"graph", "!-\x7e",
|
||||||
|
"punct", "!-/:-@[-`{-\x7e",
|
||||||
|
NULL, NULL};
|
||||||
|
|
||||||
|
typedef struct /* represent a command label */
|
||||||
|
{
|
||||||
|
char *name; /* the label name */
|
||||||
|
sedcmd *last; /* it's on the label search list */
|
||||||
|
sedcmd *address; /* pointer to the cmd it labels */
|
||||||
|
} label;
|
||||||
|
|
||||||
|
/* label handling */
|
||||||
|
static label labels[MAXLABS]; /* here's the label table */
|
||||||
|
static label *lab = labels + 1; /* pointer to current label */
|
||||||
|
static label *lablst = labels; /* header for search list */
|
||||||
|
|
||||||
|
/* string pool for regular expressions, append text, etc. etc. */
|
||||||
|
static char pool[POOLSIZE]; /* the pool */
|
||||||
|
static char *fp = pool; /* current pool pointer */
|
||||||
|
static char *poolend = pool + POOLSIZE; /* pointer past pool end */
|
||||||
|
|
||||||
|
/* compilation state */
|
||||||
|
static FILE *cmdf = NULL; /* current command source */
|
||||||
|
static char *cp = linebuf; /* compile pointer */
|
||||||
|
static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */
|
||||||
|
static char *lastre = NULL; /* old RE pointer */
|
||||||
|
static int bdepth = 0; /* current {}-nesting level */
|
||||||
|
static int bcount = 0; /* # tagged patterns in current RE */
|
||||||
|
static char **eargv; /* scratch copy of argument list */
|
||||||
|
|
||||||
|
/* compilation flags */
|
||||||
|
static int eflag; /* -e option flag */
|
||||||
|
static int gflag; /* -g option flag */
|
||||||
|
|
||||||
|
/* prototypes */
|
||||||
|
static char *address(char *expbuf);
|
||||||
|
static char *gettext(char* txp);
|
||||||
|
static char *recomp(char *expbuf, char redelim);
|
||||||
|
static char *rhscomp(char* rhsp, char delim);
|
||||||
|
static char *ycomp(char *ep, char delim);
|
||||||
|
static int cmdcomp(char cchar);
|
||||||
|
static int cmdline(char *cbuf);
|
||||||
|
static label *search(label *ptr);
|
||||||
|
static void compile(void);
|
||||||
|
static void resolve(void);
|
||||||
|
|
||||||
|
/* sedexec.c protypes */
|
||||||
|
void execute(char* file);
|
||||||
|
|
||||||
|
/* main sequence of the stream editor */
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
eargc = argc; /* set local copy of argument count */
|
||||||
|
eargv = argv; /* set local copy of argument list */
|
||||||
|
cmdp->addr1 = pool; /* 1st addr expand will be at pool start */
|
||||||
|
if (eargc == 1)
|
||||||
|
exit(0); /* exit immediately if no arguments */
|
||||||
|
|
||||||
|
/* scan through the arguments, interpreting each one */
|
||||||
|
while ((--eargc > 0) && (**++eargv == '-'))
|
||||||
|
switch (eargv[0][1])
|
||||||
|
{
|
||||||
|
case 'e':
|
||||||
|
eflag++; compile(); /* compile with e flag on */
|
||||||
|
eflag = 0;
|
||||||
|
continue; /* get another argument */
|
||||||
|
case 'f':
|
||||||
|
if (eargc-- <= 0) /* barf if no -f file */
|
||||||
|
exit(2);
|
||||||
|
if ((cmdf = fopen(*++eargv, "r")) == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, COCFI, *eargv);
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
compile(); /* file is O.K., compile it */
|
||||||
|
fclose(cmdf);
|
||||||
|
continue; /* go back for another argument */
|
||||||
|
case 'g':
|
||||||
|
gflag++; /* set global flag on all s cmds */
|
||||||
|
continue;
|
||||||
|
case 'n':
|
||||||
|
nflag++; /* no print except on p flag or w */
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
fprintf(stdout, UFLAG, eargv[0][1]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmdp == cmds) /* no commands have been compiled */
|
||||||
|
{
|
||||||
|
eargv--; eargc++;
|
||||||
|
eflag++; compile(); eflag = 0;
|
||||||
|
eargv++; eargc--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bdepth) /* we have unbalanced squigglies */
|
||||||
|
die(TMLBR);
|
||||||
|
|
||||||
|
lablst->address = cmdp; /* set up header of label linked list */
|
||||||
|
resolve(); /* resolve label table indirections */
|
||||||
|
if (eargc <= 0) /* if there were no -e commands */
|
||||||
|
execute(NULL); /* execute commands from stdin only */
|
||||||
|
else while(--eargc>=0) /* else execute only -e commands */
|
||||||
|
execute(*eargv++);
|
||||||
|
exit(0); /* everything was O.K. if we got here */
|
||||||
|
}
|
||||||
|
|
||||||
|
#define H 0x80 /* 128 bit, on if there's really code for command */
|
||||||
|
#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */
|
||||||
|
|
||||||
|
/* indirect through this to get command internal code, if it exists */
|
||||||
|
static char cmdmask[] =
|
||||||
|
{
|
||||||
|
0, 0, H, 0, 0, H+EQCMD,0, 0,
|
||||||
|
0, 0, 0, 0, H+CDCMD,0, 0, CGCMD,
|
||||||
|
CHCMD, 0, 0, 0, H+CLCMD,0, CNCMD, 0,
|
||||||
|
CPCMD, 0, 0, 0, H+CTCMD,0, 0, H+CWCMD,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD,
|
||||||
|
HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0,
|
||||||
|
PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD,
|
||||||
|
XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* precompile sed commands out of a file */
|
||||||
|
static void compile(void)
|
||||||
|
{
|
||||||
|
char ccode;
|
||||||
|
|
||||||
|
for(;;) /* main compilation loop */
|
||||||
|
{
|
||||||
|
SKIPWS(cp);
|
||||||
|
if (*cp == ';') {
|
||||||
|
cp++;
|
||||||
|
SKIPWS(cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*cp == '\0' || *cp == '#') /* get a new command line */
|
||||||
|
if (cmdline(cp = linebuf) < 0)
|
||||||
|
break;
|
||||||
|
SKIPWS(cp);
|
||||||
|
|
||||||
|
if (*cp == '\0' || *cp == '#') /* a comment */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* compile first address */
|
||||||
|
if (fp > poolend)
|
||||||
|
die(TMTXT);
|
||||||
|
else if ((fp = address(cmdp->addr1 = fp)) == BAD)
|
||||||
|
die(AGMSG);
|
||||||
|
|
||||||
|
if (fp == cmdp->addr1) /* if empty RE was found */
|
||||||
|
{
|
||||||
|
if (lastre) /* if there was previous RE */
|
||||||
|
cmdp->addr1 = lastre; /* use it */
|
||||||
|
else
|
||||||
|
die(FRENL);
|
||||||
|
}
|
||||||
|
else if (fp == NULL) /* if fp was NULL */
|
||||||
|
{
|
||||||
|
fp = cmdp->addr1; /* use current pool location */
|
||||||
|
cmdp->addr1 = NULL;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lastre = cmdp->addr1;
|
||||||
|
if (*cp == ',' || *cp == ';') /* there's 2nd addr */
|
||||||
|
{
|
||||||
|
cp++;
|
||||||
|
if (fp > poolend) die(TMTXT);
|
||||||
|
fp = address(cmdp->addr2 = fp);
|
||||||
|
if (fp == BAD || fp == NULL) die(AGMSG);
|
||||||
|
if (fp == cmdp->addr2)
|
||||||
|
cmdp->addr2 = lastre;
|
||||||
|
else
|
||||||
|
lastre = cmdp->addr2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
cmdp->addr2 = NULL; /* no 2nd address */
|
||||||
|
}
|
||||||
|
if (fp > poolend) die(TMTXT);
|
||||||
|
|
||||||
|
SKIPWS(cp); /* discard whitespace after address */
|
||||||
|
|
||||||
|
if (*cp == '!') {
|
||||||
|
cmdp->flags.allbut = 1;
|
||||||
|
cp++; SKIPWS(cp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get cmd char, range-check it */
|
||||||
|
if ((*cp < LOWCMD) || (*cp > '~')
|
||||||
|
|| ((ccode = cmdmask[*cp - LOWCMD]) == 0))
|
||||||
|
die(NSCAX);
|
||||||
|
|
||||||
|
cmdp->command = ccode & ~H; /* fill in command value */
|
||||||
|
if ((ccode & H) == 0) /* if no compile-time code */
|
||||||
|
cp++; /* discard command char */
|
||||||
|
else if (cmdcomp(*cp++)) /* execute it; if ret = 1 */
|
||||||
|
continue; /* skip next line read */
|
||||||
|
|
||||||
|
if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
|
||||||
|
|
||||||
|
SKIPWS(cp); /* look for trailing stuff */
|
||||||
|
if (*cp != '\0')
|
||||||
|
{
|
||||||
|
if (*cp == ';')
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (*cp != '#' && *cp != '}')
|
||||||
|
die(TRAIL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compile a single command */
|
||||||
|
static int cmdcomp(char cchar)
|
||||||
|
{
|
||||||
|
static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */
|
||||||
|
static const char *fname[WFILES]; /* w file name pointers */
|
||||||
|
static FILE *fout[WFILES]; /* w file file ptrs */
|
||||||
|
static int nwfiles = 2; /* count of open w files */
|
||||||
|
int i; /* indexing dummy used in w */
|
||||||
|
sedcmd *sp1, *sp2; /* temps for label searches */
|
||||||
|
label *lpt; /* ditto, and the searcher */
|
||||||
|
char redelim; /* current RE delimiter */
|
||||||
|
|
||||||
|
fout[0] = stdout;
|
||||||
|
fout[1] = stderr;
|
||||||
|
|
||||||
|
fname[0] = "/dev/stdout";
|
||||||
|
fname[1] = "/dev/stderr";
|
||||||
|
|
||||||
|
switch(cchar)
|
||||||
|
{
|
||||||
|
case '{': /* start command group */
|
||||||
|
cmdp->flags.allbut = !cmdp->flags.allbut;
|
||||||
|
cmpstk[bdepth++] = &(cmdp->u.link);
|
||||||
|
if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
|
||||||
|
if (*cp == '\0') *cp++ = ';', *cp = '\0'; /* get next cmd w/o lineread */
|
||||||
|
return(1);
|
||||||
|
|
||||||
|
case '}': /* end command group */
|
||||||
|
if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
|
||||||
|
if (--bdepth < 0) die(TMRBR); /* too many right braces */
|
||||||
|
*cmpstk[bdepth] = cmdp; /* set the jump address */
|
||||||
|
return(1);
|
||||||
|
|
||||||
|
case '=': /* print current source line number */
|
||||||
|
case 'q': /* exit the stream editor */
|
||||||
|
if (cmdp->addr2) die(AD2NG);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ':': /* label declaration */
|
||||||
|
if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
|
||||||
|
fp = gettext(lab->name = fp); /* get the label name */
|
||||||
|
if ((lpt = search(lab))) /* does it have a double? */
|
||||||
|
{
|
||||||
|
if (lpt->address) die(DLABL); /* yes, abort */
|
||||||
|
}
|
||||||
|
else /* check that it doesn't overflow label table */
|
||||||
|
{
|
||||||
|
lab->last = NULL;
|
||||||
|
lpt = lab;
|
||||||
|
if (++lab >= labels + MAXLABS) die(TMLAB);
|
||||||
|
}
|
||||||
|
lpt->address = cmdp;
|
||||||
|
return(1);
|
||||||
|
|
||||||
|
case 'b': /* branch command */
|
||||||
|
case 't': /* branch-on-succeed command */
|
||||||
|
case 'T': /* branch-on-fail command */
|
||||||
|
SKIPWS(cp);
|
||||||
|
if (*cp == '\0') /* if branch is to start of cmds... */
|
||||||
|
{
|
||||||
|
/* add current command to end of label last */
|
||||||
|
if ((sp1 = lablst->last))
|
||||||
|
{
|
||||||
|
while((sp2 = sp1->u.link))
|
||||||
|
sp1 = sp2;
|
||||||
|
sp1->u.link = cmdp;
|
||||||
|
}
|
||||||
|
else /* lablst->last == NULL */
|
||||||
|
lablst->last = cmdp;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fp = gettext(lab->name = fp); /* else get label into pool */
|
||||||
|
if ((lpt = search(lab))) /* enter branch to it */
|
||||||
|
{
|
||||||
|
if (lpt->address)
|
||||||
|
cmdp->u.link = lpt->address;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
sp1 = lpt->last;
|
||||||
|
while((sp2 = sp1->u.link))
|
||||||
|
sp1 = sp2;
|
||||||
|
sp1->u.link = cmdp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else /* matching named label not found */
|
||||||
|
{
|
||||||
|
lab->last = cmdp; /* add the new label */
|
||||||
|
lab->address = NULL; /* it's forward of here */
|
||||||
|
if (++lab >= labels + MAXLABS) /* overflow if last */
|
||||||
|
die(TMLAB);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'a': /* append text */
|
||||||
|
case 'i': /* insert text */
|
||||||
|
case 'r': /* read file into stream */
|
||||||
|
if (cmdp->addr2) die(AD2NG);
|
||||||
|
case 'c': /* change text */
|
||||||
|
if ((*cp == '\\') && (*++cp == '\n')) cp++;
|
||||||
|
fp = gettext(cmdp->u.lhs = fp);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'D': /* delete current line in hold space */
|
||||||
|
cmdp->u.link = cmds;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 's': /* substitute regular expression */
|
||||||
|
if (*cp == 0) /* get delimiter from 1st ch */
|
||||||
|
die(RETER);
|
||||||
|
else
|
||||||
|
redelim = *cp++;
|
||||||
|
|
||||||
|
if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD)
|
||||||
|
die(CGMSG);
|
||||||
|
if (fp == cmdp->u.lhs) { /* if compiled RE zero len */
|
||||||
|
if (lastre) {
|
||||||
|
cmdp->u.lhs = lastre; /* use the previous one */
|
||||||
|
cp++; /* skip delim */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
die(FRENL);
|
||||||
|
}
|
||||||
|
else /* otherwise */
|
||||||
|
lastre = cmdp->u.lhs; /* save the one just found */
|
||||||
|
|
||||||
|
if ((cmdp->rhs = fp) > poolend) die(TMTXT);
|
||||||
|
if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) die(CGMSG);
|
||||||
|
if (gflag) cmdp->flags.global++;
|
||||||
|
while (*cp == 'g' || *cp == 'p' || *cp == 'P' || isdigit(*cp))
|
||||||
|
{
|
||||||
|
IFEQ(cp, 'g') cmdp->flags.global++;
|
||||||
|
IFEQ(cp, 'p') cmdp->flags.print = 1;
|
||||||
|
IFEQ(cp, 'P') cmdp->flags.print = 2;
|
||||||
|
if(isdigit(*cp))
|
||||||
|
{
|
||||||
|
if (cmdp->nth)
|
||||||
|
break; /* no multiple n args */
|
||||||
|
|
||||||
|
cmdp->nth = atoi(cp); /* check 0? */
|
||||||
|
while (isdigit(*cp)) cp++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
case 'l': /* list pattern space */
|
||||||
|
case 'L': /* dump pattern space */
|
||||||
|
if (*cp == 'w')
|
||||||
|
cp++; /* and execute a w command! */
|
||||||
|
else
|
||||||
|
break; /* s or L or l is done */
|
||||||
|
|
||||||
|
case 'w': /* write-pattern-space command */
|
||||||
|
case 'W': /* write-first-line command */
|
||||||
|
if (nwfiles >= WFILES) die(TMWFI);
|
||||||
|
fname[nwfiles] = fp;
|
||||||
|
fp = gettext((fname[nwfiles] = fp, fp)); /* filename will be in pool */
|
||||||
|
for(i = nwfiles-1; i >= 0; i--) /* match it in table */
|
||||||
|
if (strcmp(fname[nwfiles], fname[i]) == 0)
|
||||||
|
{
|
||||||
|
cmdp->fout = fout[i];
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
/* if didn't find one, open new out file */
|
||||||
|
if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, CCOFI, fname[nwfiles]);
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
fout[nwfiles++] = cmdp->fout;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'y': /* transliterate text */
|
||||||
|
fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */
|
||||||
|
if (fp == BAD) die(CGMSG); /* fail on bad form */
|
||||||
|
if (fp > poolend) die(TMTXT); /* fail on overflow */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return(0); /* succeeded in interpreting one command */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* generate replacement string for substitute command right hand side
|
||||||
|
rhsp: place to compile expression to
|
||||||
|
delim: regular-expression end-mark to look for */
|
||||||
|
static char *rhscomp(char* rhsp, char delim) /* uses bcount */
|
||||||
|
{
|
||||||
|
register char *p = cp;
|
||||||
|
|
||||||
|
for(;;)
|
||||||
|
/* copy for the likely case it is not s.th. special */
|
||||||
|
if ((*rhsp = *p++) == '\\') /* back reference or escape */
|
||||||
|
{
|
||||||
|
if (*p >= '0' && *p <= '9') /* back reference */
|
||||||
|
{
|
||||||
|
dobackref:
|
||||||
|
*rhsp = *p++;
|
||||||
|
/* check validity of pattern tag */
|
||||||
|
if (*rhsp > bcount + '0')
|
||||||
|
return(BAD);
|
||||||
|
*rhsp++ |= 0x80; /* mark the good ones */
|
||||||
|
}
|
||||||
|
else /* escape */
|
||||||
|
{
|
||||||
|
switch (*p) {
|
||||||
|
case 'n': *rhsp = '\n'; break;
|
||||||
|
case 'r': *rhsp = '\r'; break;
|
||||||
|
case 't': *rhsp = '\t'; break;
|
||||||
|
default: *rhsp = *p;
|
||||||
|
}
|
||||||
|
rhsp++; p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (*rhsp == delim) /* found RE end, hooray... */
|
||||||
|
{
|
||||||
|
*rhsp++ = '\0'; /* cap the expression string */
|
||||||
|
cp = p;
|
||||||
|
return(rhsp); /* pt at 1 past the RE */
|
||||||
|
}
|
||||||
|
else if (*rhsp == '&') /* special case, convert to backref \0 */
|
||||||
|
{
|
||||||
|
*--p = '0';
|
||||||
|
goto dobackref;
|
||||||
|
}
|
||||||
|
else if (*rhsp++ == '\0') /* last ch not RE end, help! */
|
||||||
|
return(BAD);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compile a regular expression to internal form
|
||||||
|
expbuf: place to compile it to
|
||||||
|
redelim: RE end-marker to look for */
|
||||||
|
static char *recomp(char *expbuf, char redelim) /* uses cp, bcount */
|
||||||
|
{
|
||||||
|
register char *ep = expbuf; /* current-compiled-char pointer */
|
||||||
|
register char *sp = cp; /* source-character ptr */
|
||||||
|
register int c; /* current-character pointer */
|
||||||
|
char negclass; /* all-but flag */
|
||||||
|
char *lastep; /* ptr to last expr compiled */
|
||||||
|
char *lastep2; /* dito, but from the last loop */
|
||||||
|
char *svclass; /* start of current char class */
|
||||||
|
char brnest[MAXTAGS]; /* bracket-nesting array */
|
||||||
|
char *brnestp; /* ptr to current bracket-nest */
|
||||||
|
char *pp; /* scratch pointer */
|
||||||
|
int classct; /* class element count */
|
||||||
|
int tags; /* # of closed tags */
|
||||||
|
|
||||||
|
if (*cp == redelim) { /* if first char is RE endmarker */
|
||||||
|
return(ep);
|
||||||
|
}
|
||||||
|
|
||||||
|
lastep = lastep2 = NULL; /* there's no previous RE */
|
||||||
|
brnestp = brnest; /* initialize ptr to brnest array */
|
||||||
|
tags = bcount = 0; /* initialize counters */
|
||||||
|
|
||||||
|
if ((*ep++ = (*sp == '^'))) /* check for start-of-line syntax */
|
||||||
|
sp++;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (*sp == 0) /* no termination */
|
||||||
|
die (RETER);
|
||||||
|
if (ep >= expbuf + RELIMIT) /* match is too large */
|
||||||
|
return(cp = sp, BAD);
|
||||||
|
if ((c = *sp++) == redelim) /* found the end of the RE */
|
||||||
|
{
|
||||||
|
cp = sp;
|
||||||
|
if (brnestp != brnest) /* \(, \) unbalanced */
|
||||||
|
return(BAD);
|
||||||
|
*ep++ = CEOF; /* write end-of-pattern mark */
|
||||||
|
return(ep); /* return ptr to compiled RE */
|
||||||
|
}
|
||||||
|
|
||||||
|
lastep = lastep2;
|
||||||
|
lastep2 = ep;
|
||||||
|
|
||||||
|
switch (c)
|
||||||
|
{
|
||||||
|
case '\\':
|
||||||
|
if ((c = *sp++) == '(') /* start tagged section */
|
||||||
|
{
|
||||||
|
if (bcount >= MAXTAGS)
|
||||||
|
return(cp = sp, BAD);
|
||||||
|
*brnestp++ = bcount; /* update tag stack */
|
||||||
|
*ep++ = CBRA; /* enter tag-start */
|
||||||
|
*ep++ = bcount++; /* bump tag count */
|
||||||
|
lastep2 = NULL;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (c == ')') /* end tagged section */
|
||||||
|
{
|
||||||
|
if (brnestp <= brnest) /* extra \) */
|
||||||
|
return(cp = sp, BAD);
|
||||||
|
*ep++ = CKET; /* enter end-of-tag */
|
||||||
|
*ep++ = *--brnestp; /* pop tag stack */
|
||||||
|
tags++; /* count closed tags */
|
||||||
|
for (lastep2 = ep-1; *lastep2 != CBRA; )
|
||||||
|
--lastep2; /* FIXME: lastep becomes start */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (c >= '1' && c <= '9' && c != redelim) /* tag use, if !delim */
|
||||||
|
{
|
||||||
|
if ((c -= '1') >= tags) /* too few */
|
||||||
|
return(BAD);
|
||||||
|
*ep++ = CBACK; /* enter tag mark */
|
||||||
|
*ep++ = c; /* and the number */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (c == '\n') /* escaped newline no good */
|
||||||
|
return(cp = sp, BAD);
|
||||||
|
else if (c == 'n') /* match a newline */
|
||||||
|
c = '\n';
|
||||||
|
else if (c == 't') /* match a tab */
|
||||||
|
c = '\t';
|
||||||
|
else if (c == 'r') /* match a return */
|
||||||
|
c = '\r';
|
||||||
|
else if (c == '+') /* 1..n repeat of previous pattern */
|
||||||
|
{
|
||||||
|
if (lastep == NULL) /* if + not first on line */
|
||||||
|
goto defchar; /* match a literal + */
|
||||||
|
pp = ep; /* else save old ep */
|
||||||
|
*ep++ = *lastep++ | STAR; /* flag the copy */
|
||||||
|
while (lastep < pp) /* so we can blt the pattern */
|
||||||
|
*ep++ = *lastep++;
|
||||||
|
lastep2 = lastep; /* no new expression */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
goto defchar; /* else match \c */
|
||||||
|
|
||||||
|
case '\0': /* ignore nuls */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case '\n': /* trailing pattern delimiter is missing */
|
||||||
|
return(cp = sp, BAD);
|
||||||
|
|
||||||
|
case '.': /* match any char except newline */
|
||||||
|
*ep++ = CDOT;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case '*': /* 0..n repeat of previous pattern */
|
||||||
|
if (lastep == NULL) /* if * isn't first on line */
|
||||||
|
goto defchar; /* match a literal * */
|
||||||
|
*lastep |= STAR; /* flag previous pattern */
|
||||||
|
lastep2 = lastep; /* no new expression */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case '$': /* match only end-of-line */
|
||||||
|
if (*sp != redelim) /* if we're not at end of RE */
|
||||||
|
goto defchar; /* match a literal $ */
|
||||||
|
*ep++ = CDOL; /* insert end-symbol mark */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case '[': /* begin character set pattern */
|
||||||
|
if (ep + 17 >= expbuf + RELIMIT)
|
||||||
|
die(REITL);
|
||||||
|
*ep++ = CCL; /* insert class mark */
|
||||||
|
if ((negclass = ((c = *sp++) == '^')))
|
||||||
|
c = *sp++;
|
||||||
|
svclass = sp; /* save ptr to class start */
|
||||||
|
do {
|
||||||
|
if (c == '\0') die(CGMSG);
|
||||||
|
/* handle predefined character classes */
|
||||||
|
if (c == '[' && *sp == ':')
|
||||||
|
{
|
||||||
|
/* look for the matching ":]]" */
|
||||||
|
char *p;
|
||||||
|
const char *p2;
|
||||||
|
for (p = sp+3; *p; p++)
|
||||||
|
if (*p == ']' &&
|
||||||
|
*(p-1) == ']' &&
|
||||||
|
*(p-2) == ':')
|
||||||
|
{
|
||||||
|
char cc[8];
|
||||||
|
const char **it;
|
||||||
|
p2 = sp+1;
|
||||||
|
for (p2 = sp+1;
|
||||||
|
p2 < p-2 && p2-sp-1 < sizeof(cc);
|
||||||
|
p2++)
|
||||||
|
cc[p2-sp-1] = *p2;
|
||||||
|
cc[p2-sp-1] = 0; /* termination */
|
||||||
|
|
||||||
|
it = cclasses;
|
||||||
|
while (*it && strcmp(*it, cc))
|
||||||
|
it +=2;
|
||||||
|
if (!*it++)
|
||||||
|
die(CCERR);
|
||||||
|
|
||||||
|
/* generate mask */
|
||||||
|
p2 = *it;
|
||||||
|
while (*p2) {
|
||||||
|
if (p2[1] == '-' && p2[2]) {
|
||||||
|
for (c = *p2; c <= p2[2]; c++)
|
||||||
|
ep[c >> 3] |= bits(c & 7);
|
||||||
|
p2 += 3;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
c = *p2++;
|
||||||
|
ep[c >> 3] |= bits(c & 7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sp = p; c = 0; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* handle character ranges */
|
||||||
|
if (c == '-' && sp > svclass && *sp != ']')
|
||||||
|
for (c = sp[-2]; c < *sp; c++)
|
||||||
|
ep[c >> 3] |= bits(c & 7);
|
||||||
|
|
||||||
|
/* handle escape sequences in sets */
|
||||||
|
if (c == '\\')
|
||||||
|
{
|
||||||
|
if ((c = *sp++) == 'n')
|
||||||
|
c = '\n';
|
||||||
|
else if (c == 't')
|
||||||
|
c = '\t';
|
||||||
|
else if (c == 'r')
|
||||||
|
c = '\r';
|
||||||
|
}
|
||||||
|
|
||||||
|
/* enter (possibly translated) char in set */
|
||||||
|
if (c)
|
||||||
|
ep[c >> 3] |= bits(c & 7);
|
||||||
|
} while
|
||||||
|
((c = *sp++) != ']');
|
||||||
|
|
||||||
|
/* invert the bitmask if all-but was specified */
|
||||||
|
if (negclass)
|
||||||
|
for(classct = 0; classct < 16; classct++)
|
||||||
|
ep[classct] ^= 0xFF;
|
||||||
|
ep[0] &= 0xFE; /* never match ASCII 0 */
|
||||||
|
ep += 16; /* advance ep past set mask */
|
||||||
|
continue;
|
||||||
|
|
||||||
|
defchar: /* match literal character */
|
||||||
|
default: /* which is what we'd do by default */
|
||||||
|
*ep++ = CCHR; /* insert character mark */
|
||||||
|
*ep++ = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* read next command from -e argument or command file */
|
||||||
|
static int cmdline(char *cbuf) /* uses eflag, eargc, cmdf */
|
||||||
|
{
|
||||||
|
register int inc; /* not char because must hold EOF */
|
||||||
|
|
||||||
|
cbuf--; /* so pre-increment points us at cbuf */
|
||||||
|
|
||||||
|
/* e command flag is on */
|
||||||
|
if (eflag)
|
||||||
|
{
|
||||||
|
register char *p; /* ptr to current -e argument */
|
||||||
|
static char *savep; /* saves previous value of p */
|
||||||
|
|
||||||
|
if (eflag > 0) /* there are pending -e arguments */
|
||||||
|
{
|
||||||
|
eflag = -1;
|
||||||
|
if (eargc-- <= 0)
|
||||||
|
exit(2); /* if no arguments, barf */
|
||||||
|
|
||||||
|
/* else transcribe next e argument into cbuf */
|
||||||
|
p = *++eargv;
|
||||||
|
while((*++cbuf = *p++))
|
||||||
|
if (*cbuf == '\\')
|
||||||
|
{
|
||||||
|
if ((*++cbuf = *p++) == '\0')
|
||||||
|
return(savep = NULL, -1);
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (*cbuf == '\n') /* end of 1 cmd line */
|
||||||
|
{
|
||||||
|
*cbuf = '\0';
|
||||||
|
return(savep = p, 1);
|
||||||
|
/* we'll be back for the rest... */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* found end-of-string; can advance to next argument */
|
||||||
|
return(savep = NULL, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((p = savep) == NULL)
|
||||||
|
return(-1);
|
||||||
|
|
||||||
|
while((*++cbuf = *p++))
|
||||||
|
if (*cbuf == '\\')
|
||||||
|
{
|
||||||
|
if ((*++cbuf = *p++) == '0')
|
||||||
|
return(savep = NULL, -1);
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (*cbuf == '\n')
|
||||||
|
{
|
||||||
|
*cbuf = '\0';
|
||||||
|
return(savep = p, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return(savep = NULL, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if no -e flag read from command file descriptor */
|
||||||
|
while((inc = getc(cmdf)) != EOF) /* get next char */
|
||||||
|
if ((*++cbuf = inc) == '\\') /* if it's escape */
|
||||||
|
*++cbuf = inc = getc(cmdf); /* get next char */
|
||||||
|
else if (*cbuf == '\n') /* end on newline */
|
||||||
|
return(*cbuf = '\0', 1); /* cap the string */
|
||||||
|
|
||||||
|
return(*++cbuf = '\0', -1); /* end-of-file, no more chars */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* expand an address at *cp... into expbuf, return ptr at following char */
|
||||||
|
static char *address(char *expbuf) /* uses cp, linenum */
|
||||||
|
{
|
||||||
|
static int numl = 0; /* current ind in addr-number table */
|
||||||
|
register char *rcp; /* temp compile ptr for forwd look */
|
||||||
|
long lno; /* computed value of numeric address */
|
||||||
|
|
||||||
|
if (*cp == '$') /* end-of-source address */
|
||||||
|
{
|
||||||
|
*expbuf++ = CEND; /* write symbolic end address */
|
||||||
|
*expbuf++ = CEOF; /* and the end-of-address mark (!) */
|
||||||
|
cp++; /* go to next source character */
|
||||||
|
last_line_used = TRUE;
|
||||||
|
return(expbuf); /* we're done */
|
||||||
|
}
|
||||||
|
if (*cp == '/') /* start of regular-expression match */
|
||||||
|
return(recomp(expbuf, *cp++)); /* compile the RE */
|
||||||
|
|
||||||
|
rcp = cp; lno = 0; /* now handle a numeric address */
|
||||||
|
while(*rcp >= '0' && *rcp <= '9') /* collect digits */
|
||||||
|
lno = lno*10 + *rcp++ - '0'; /* compute their value */
|
||||||
|
|
||||||
|
if (rcp > cp) /* if we caught a number... */
|
||||||
|
{
|
||||||
|
*expbuf++ = CLNUM; /* put a numeric-address marker */
|
||||||
|
*expbuf++ = numl; /* and the address table index */
|
||||||
|
linenum[numl++] = lno; /* and set the table entry */
|
||||||
|
if (numl >= MAXLINES) /* oh-oh, address table overflow */
|
||||||
|
die(TMLNR); /* abort with error message */
|
||||||
|
*expbuf++ = CEOF; /* write the end-of-address marker */
|
||||||
|
cp = rcp; /* point compile past the address */
|
||||||
|
return(expbuf); /* we're done */
|
||||||
|
}
|
||||||
|
|
||||||
|
return(NULL); /* no legal address was found */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* accept multiline input from *cp..., discarding leading whitespace
|
||||||
|
txp: where to put the text */
|
||||||
|
static char *gettext(char* txp) /* uses global cp */
|
||||||
|
{
|
||||||
|
register char *p = cp;
|
||||||
|
|
||||||
|
SKIPWS(p); /* discard whitespace */
|
||||||
|
do {
|
||||||
|
if ((*txp = *p++) == '\\') /* handle escapes */
|
||||||
|
*txp = *p++;
|
||||||
|
if (*txp == '\0') /* we're at end of input */
|
||||||
|
return(cp = --p, ++txp);
|
||||||
|
else if (*txp == '\n') /* also SKIPWS after newline */
|
||||||
|
SKIPWS(p);
|
||||||
|
} while (txp++); /* keep going till we find that nul */
|
||||||
|
return(txp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* find the label matching *ptr, return NULL if none */
|
||||||
|
static label *search(label *ptr) /* uses global lablst */
|
||||||
|
{
|
||||||
|
register label *rp;
|
||||||
|
for(rp = lablst; rp < ptr; rp++)
|
||||||
|
if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0))
|
||||||
|
return(rp);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* write label links into the compiled-command space */
|
||||||
|
static void resolve(void) /* uses global lablst */
|
||||||
|
{
|
||||||
|
register label *lptr;
|
||||||
|
register sedcmd *rptr, *trptr;
|
||||||
|
|
||||||
|
/* loop through the label table */
|
||||||
|
for(lptr = lablst; lptr < lab; lptr++)
|
||||||
|
if (lptr->address == NULL) /* barf if not defined */
|
||||||
|
{
|
||||||
|
fprintf(stderr, ULABL, lptr->name);
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
else if (lptr->last) /* if last is non-null */
|
||||||
|
{
|
||||||
|
rptr = lptr->last; /* chase it */
|
||||||
|
while((trptr = rptr->u.link)) /* resolve refs */
|
||||||
|
{
|
||||||
|
rptr->u.link = lptr->address;
|
||||||
|
rptr = trptr;
|
||||||
|
}
|
||||||
|
rptr->u.link = lptr->address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* compile a y (transliterate) command
|
||||||
|
ep: where to compile to
|
||||||
|
delim: end delimiter to look for */
|
||||||
|
static char *ycomp(char *ep, char delim)
|
||||||
|
{
|
||||||
|
char *tp, *sp;
|
||||||
|
int c;
|
||||||
|
|
||||||
|
/* scan the 'from' section for invalid chars */
|
||||||
|
for(sp = tp = cp; *tp != delim; tp++)
|
||||||
|
{
|
||||||
|
if (*tp == '\\')
|
||||||
|
tp++;
|
||||||
|
if ((*tp == '\n') || (*tp == '\0'))
|
||||||
|
return(BAD);
|
||||||
|
}
|
||||||
|
tp++; /* tp now points at first char of 'to' section */
|
||||||
|
|
||||||
|
/* now rescan the 'from' section */
|
||||||
|
while((c = *sp++ & 0x7F) != delim)
|
||||||
|
{
|
||||||
|
if (c == '\\' && *sp == 'n')
|
||||||
|
{
|
||||||
|
sp++;
|
||||||
|
c = '\n';
|
||||||
|
}
|
||||||
|
if ((ep[c] = *tp++) == '\\' && *tp == 'n')
|
||||||
|
{
|
||||||
|
ep[c] = '\n';
|
||||||
|
tp++;
|
||||||
|
}
|
||||||
|
if ((ep[c] == delim) || (ep[c] == '\0'))
|
||||||
|
return(BAD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*tp != delim) /* 'to', 'from' parts have unequal lengths */
|
||||||
|
return(BAD);
|
||||||
|
|
||||||
|
cp = ++tp; /* point compile ptr past translit */
|
||||||
|
|
||||||
|
for(c = 0; c < 128; c++) /* fill in self-map entries in table */
|
||||||
|
if (ep[c] == 0)
|
||||||
|
ep[c] = c;
|
||||||
|
|
||||||
|
return(ep + 0x80); /* return first free location past table end */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sedcomp.c ends here */
|
819
commands/sed/sedexec.c
Normal file
819
commands/sed/sedexec.c
Normal file
|
@ -0,0 +1,819 @@
|
||||||
|
/* sedexec.c -- axecute compiled form of stream editor commands
|
||||||
|
Copyright (C) 1995-2003 Eric S. Raymond
|
||||||
|
Copyright (C) 2004-2006 Rene Rebe
|
||||||
|
|
||||||
|
The single entry point of this module is the function execute(). It
|
||||||
|
may take a string argument (the name of a file to be used as text) or
|
||||||
|
the argument NULL which tells it to filter standard input. It executes
|
||||||
|
the compiled commands in cmds[] on each line in turn.
|
||||||
|
The function command() does most of the work. match() and advance()
|
||||||
|
are used for matching text against precompiled regular expressions and
|
||||||
|
dosub() does right-hand-side substitution. Getline() does text input;
|
||||||
|
readout() and memcmp() are output and string-comparison utilities.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdlib.h> /* exit */
|
||||||
|
#include <stdio.h> /* {f}puts, {f}printf, getc/putc, f{re}open, fclose */
|
||||||
|
#include <ctype.h> /* for isprint(), isdigit(), toascii() macros */
|
||||||
|
#include <string.h> /* for memcmp(3) */
|
||||||
|
#include "sed.h" /* command type structures & miscellaneous constants */
|
||||||
|
|
||||||
|
/***** shared variables imported from the main ******/
|
||||||
|
|
||||||
|
/* main data areas */
|
||||||
|
extern char linebuf[]; /* current-line buffer */
|
||||||
|
extern sedcmd cmds[]; /* hold compiled commands */
|
||||||
|
extern long linenum[]; /* numeric-addresses table */
|
||||||
|
|
||||||
|
/* miscellaneous shared variables */
|
||||||
|
extern int nflag; /* -n option flag */
|
||||||
|
extern int eargc; /* scratch copy of argument count */
|
||||||
|
extern sedcmd *pending; /* ptr to command waiting to be executed */
|
||||||
|
|
||||||
|
extern int last_line_used; /* last line address ($) used */
|
||||||
|
|
||||||
|
/***** end of imported stuff *****/
|
||||||
|
|
||||||
|
#define MAXHOLD MAXBUF /* size of the hold space */
|
||||||
|
#define GENSIZ MAXBUF /* maximum genbuf size */
|
||||||
|
|
||||||
|
static char LTLMSG[] = "sed: line too long\n";
|
||||||
|
|
||||||
|
static char *spend; /* current end-of-line-buffer pointer */
|
||||||
|
static long lnum = 0L; /* current source line number */
|
||||||
|
|
||||||
|
/* append buffer maintenance */
|
||||||
|
static sedcmd *appends[MAXAPPENDS]; /* array of ptrs to a,i,c commands */
|
||||||
|
static sedcmd **aptr = appends; /* ptr to current append */
|
||||||
|
|
||||||
|
/* genbuf and its pointers */
|
||||||
|
static char genbuf[GENSIZ];
|
||||||
|
static char *loc1;
|
||||||
|
static char *loc2;
|
||||||
|
static char *locs;
|
||||||
|
|
||||||
|
/* command-logic flags */
|
||||||
|
static int lastline; /* do-line flag */
|
||||||
|
static int line_with_newline; /* line had newline */
|
||||||
|
static int jump; /* jump to cmd's link address if set */
|
||||||
|
static int delete; /* delete command flag */
|
||||||
|
static int needs_advance; /* needs inc after substitution */
|
||||||
|
/* ugly HACK - neds REWORK */
|
||||||
|
|
||||||
|
/* tagged-pattern tracking */
|
||||||
|
static char *bracend[MAXTAGS]; /* tagged pattern start pointers */
|
||||||
|
static char *brastart[MAXTAGS]; /* tagged pattern end pointers */
|
||||||
|
|
||||||
|
/* prototypes */
|
||||||
|
static char *getline(char *buf, int max);
|
||||||
|
static char *place(char* asp, char* al1, char* al2);
|
||||||
|
static int advance(char* lp, char* ep, char** eob);
|
||||||
|
static int match(char *expbuf, int gf);
|
||||||
|
static int selected(sedcmd *ipc);
|
||||||
|
static int substitute(sedcmd *ipc);
|
||||||
|
static void command(sedcmd *ipc);
|
||||||
|
static void dosub(char *rhsbuf);
|
||||||
|
static void dumpto(char *p1, FILE *fp);
|
||||||
|
static void listto(char *p1, FILE *fp);
|
||||||
|
static void readout(void);
|
||||||
|
static void truncated(int h);
|
||||||
|
|
||||||
|
/* execute the compiled commands in cmds[] on a file
|
||||||
|
file: name of text source file to be filtered */
|
||||||
|
void execute(char* file)
|
||||||
|
{
|
||||||
|
register sedcmd *ipc; /* ptr to current command */
|
||||||
|
char *execp; /* ptr to source */
|
||||||
|
|
||||||
|
if (file != NULL) /* filter text from a named file */
|
||||||
|
if (freopen(file, "r", stdin) == NULL)
|
||||||
|
fprintf(stderr, "sed: can't open %s\n", file);
|
||||||
|
|
||||||
|
if (pending) /* there's a command waiting */
|
||||||
|
{
|
||||||
|
ipc = pending; /* it will be first executed */
|
||||||
|
pending = FALSE; /* turn off the waiting flag */
|
||||||
|
goto doit; /* go to execute it immediately */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* here's the main command-execution loop */
|
||||||
|
for(;;)
|
||||||
|
{
|
||||||
|
/* get next line to filter */
|
||||||
|
if ((execp = getline(linebuf, MAXBUF+1)) == BAD)
|
||||||
|
return;
|
||||||
|
spend = execp;
|
||||||
|
|
||||||
|
/* loop through compiled commands, executing them */
|
||||||
|
for(ipc = cmds; ipc->command; )
|
||||||
|
{
|
||||||
|
/* address command to select? - If not address
|
||||||
|
but allbut then invert, that is skip, the commmand */
|
||||||
|
if (ipc->addr1 || ipc->flags.allbut) {
|
||||||
|
if (!ipc->addr1 || !selected(ipc)) {
|
||||||
|
ipc++; /* not selected, next cmd */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
doit:
|
||||||
|
command(ipc); /* execute the command pointed at */
|
||||||
|
|
||||||
|
if (delete) /* if delete flag is set */
|
||||||
|
break; /* don't exec rest of compiled cmds */
|
||||||
|
|
||||||
|
if (jump) /* if jump set, follow cmd's link */
|
||||||
|
{
|
||||||
|
jump = FALSE;
|
||||||
|
if ((ipc = ipc->u.link) == 0)
|
||||||
|
{
|
||||||
|
ipc = cmds;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else /* normal goto next command */
|
||||||
|
ipc++;
|
||||||
|
}
|
||||||
|
/* we've now done all modification commands on the line */
|
||||||
|
|
||||||
|
/* here's where the transformed line is output */
|
||||||
|
if (!nflag && !delete)
|
||||||
|
{
|
||||||
|
fwrite(linebuf, spend - linebuf, 1, stdout);
|
||||||
|
if (line_with_newline)
|
||||||
|
putc('\n', stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if we've been set up for append, emit the text from it */
|
||||||
|
if (aptr > appends)
|
||||||
|
readout();
|
||||||
|
|
||||||
|
delete = FALSE; /* clear delete flag; about to get next cmd */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* is current command selected */
|
||||||
|
static int selected(sedcmd *ipc)
|
||||||
|
{
|
||||||
|
register char *p1 = ipc->addr1; /* point p1 at first address */
|
||||||
|
register char *p2 = ipc->addr2; /* and p2 at second */
|
||||||
|
unsigned char c;
|
||||||
|
int selected = FALSE;
|
||||||
|
|
||||||
|
if (ipc->flags.inrange)
|
||||||
|
{
|
||||||
|
selected = TRUE;
|
||||||
|
if (*p2 == CEND)
|
||||||
|
;
|
||||||
|
else if (*p2 == CLNUM)
|
||||||
|
{
|
||||||
|
c = p2[1];
|
||||||
|
if (lnum >= linenum[c])
|
||||||
|
ipc->flags.inrange = FALSE;
|
||||||
|
}
|
||||||
|
else if (match(p2, 0))
|
||||||
|
ipc->flags.inrange = FALSE;
|
||||||
|
}
|
||||||
|
else if (*p1 == CEND)
|
||||||
|
{
|
||||||
|
if (lastline)
|
||||||
|
selected = TRUE;
|
||||||
|
}
|
||||||
|
else if (*p1 == CLNUM)
|
||||||
|
{
|
||||||
|
c = p1[1];
|
||||||
|
if (lnum == linenum[c]) {
|
||||||
|
selected = TRUE;
|
||||||
|
if (p2)
|
||||||
|
ipc->flags.inrange = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (match(p1, 0))
|
||||||
|
{
|
||||||
|
selected = TRUE;
|
||||||
|
if (p2)
|
||||||
|
ipc->flags.inrange = TRUE;
|
||||||
|
}
|
||||||
|
return ipc->flags.allbut ? !selected : selected;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */
|
||||||
|
static int match(char *expbuf, int gf) /* uses genbuf */
|
||||||
|
{
|
||||||
|
char *p1, *p2, c;
|
||||||
|
|
||||||
|
if (gf)
|
||||||
|
{
|
||||||
|
if (*expbuf)
|
||||||
|
return(FALSE);
|
||||||
|
p1 = linebuf; p2 = genbuf;
|
||||||
|
while ((*p1++ = *p2++));
|
||||||
|
if (needs_advance) {
|
||||||
|
loc2++;
|
||||||
|
}
|
||||||
|
locs = p1 = loc2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p1 = linebuf + needs_advance;
|
||||||
|
locs = FALSE;
|
||||||
|
}
|
||||||
|
needs_advance = 0;
|
||||||
|
|
||||||
|
p2 = expbuf;
|
||||||
|
if (*p2++)
|
||||||
|
{
|
||||||
|
loc1 = p1;
|
||||||
|
if(*p2 == CCHR && p2[1] != *p1) /* 1st char is wrong */
|
||||||
|
return(FALSE); /* so fail */
|
||||||
|
return(advance(p1, p2, NULL)); /* else try to match rest */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* quick check for 1st character if it's literal */
|
||||||
|
if (*p2 == CCHR)
|
||||||
|
{
|
||||||
|
c = p2[1]; /* pull out character to search for */
|
||||||
|
do {
|
||||||
|
if (*p1 != c)
|
||||||
|
continue; /* scan the source string */
|
||||||
|
if (advance(p1, p2,NULL)) /* found it, match the rest */
|
||||||
|
return(loc1 = p1, 1);
|
||||||
|
} while
|
||||||
|
(*p1++);
|
||||||
|
return(FALSE); /* didn't find that first char */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* else try for unanchored match of the pattern */
|
||||||
|
do {
|
||||||
|
if (advance(p1, p2, NULL))
|
||||||
|
return(loc1 = p1, 1);
|
||||||
|
} while
|
||||||
|
(*p1++);
|
||||||
|
|
||||||
|
/* if got here, didn't match either way */
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* attempt to advance match pointer by one pattern element
|
||||||
|
lp: source (linebuf) ptr
|
||||||
|
ep: regular expression element ptr */
|
||||||
|
static int advance(char* lp, char* ep, char** eob)
|
||||||
|
{
|
||||||
|
char *curlp; /* save ptr for closures */
|
||||||
|
char c; /* scratch character holder */
|
||||||
|
char *bbeg;
|
||||||
|
int ct;
|
||||||
|
signed int bcount = -1;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
switch (*ep++)
|
||||||
|
{
|
||||||
|
case CCHR: /* literal character */
|
||||||
|
if (*ep++ == *lp++) /* if chars are equal */
|
||||||
|
continue; /* matched */
|
||||||
|
return(FALSE); /* else return false */
|
||||||
|
|
||||||
|
case CDOT: /* anything but newline */
|
||||||
|
if (*lp++) /* first NUL is at EOL */
|
||||||
|
continue; /* keep going if didn't find */
|
||||||
|
return(FALSE); /* else return false */
|
||||||
|
|
||||||
|
case CNL: /* start-of-line */
|
||||||
|
case CDOL: /* end-of-line */
|
||||||
|
if (*lp == 0) /* found that first NUL? */
|
||||||
|
continue; /* yes, keep going */
|
||||||
|
return(FALSE); /* else return false */
|
||||||
|
|
||||||
|
case CEOF: /* end-of-address mark */
|
||||||
|
loc2 = lp; /* set second loc */
|
||||||
|
return(TRUE); /* return true */
|
||||||
|
|
||||||
|
case CCL: /* a closure */
|
||||||
|
c = *lp++ & 0177;
|
||||||
|
if (ep[c>>3] & bits(c & 07)) /* is char in set? */
|
||||||
|
{
|
||||||
|
ep += 16; /* then skip rest of bitmask */
|
||||||
|
continue; /* and keep going */
|
||||||
|
}
|
||||||
|
return(FALSE); /* else return false */
|
||||||
|
|
||||||
|
case CBRA: /* start of tagged pattern */
|
||||||
|
brastart[(unsigned char)*ep++] = lp; /* mark it */
|
||||||
|
continue; /* and go */
|
||||||
|
|
||||||
|
case CKET: /* end of tagged pattern */
|
||||||
|
bcount = *ep;
|
||||||
|
if (eob) {
|
||||||
|
*eob = lp;
|
||||||
|
return (TRUE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bracend[(unsigned char)*ep++] = lp; /* mark it */
|
||||||
|
continue; /* and go */
|
||||||
|
|
||||||
|
case CBACK: /* match back reference */
|
||||||
|
bbeg = brastart[(unsigned char)*ep];
|
||||||
|
ct = bracend[(unsigned char)*ep++] - bbeg;
|
||||||
|
|
||||||
|
if (memcmp(bbeg, lp, ct) == 0)
|
||||||
|
{
|
||||||
|
lp += ct;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return(FALSE);
|
||||||
|
|
||||||
|
case CBRA|STAR: /* \(...\)* */
|
||||||
|
{
|
||||||
|
char *lastlp;
|
||||||
|
curlp = lp;
|
||||||
|
|
||||||
|
if (*ep > bcount)
|
||||||
|
brastart[(unsigned char)*ep] = bracend[(unsigned char)*ep] = lp;
|
||||||
|
|
||||||
|
while (advance(lastlp=lp, ep+1, &lp)) {
|
||||||
|
if (*ep > bcount && lp != lastlp) {
|
||||||
|
bracend[(unsigned char)*ep] = lp; /* mark it */
|
||||||
|
brastart[(unsigned char)*ep] = lastlp;
|
||||||
|
}
|
||||||
|
if (lp == lastlp) break;
|
||||||
|
}
|
||||||
|
ep++;
|
||||||
|
|
||||||
|
/* FIXME: scan for the brace end */
|
||||||
|
while (*ep != CKET)
|
||||||
|
ep++;
|
||||||
|
ep+=2;
|
||||||
|
|
||||||
|
needs_advance = 1;
|
||||||
|
if (lp == curlp) /* 0 matches */
|
||||||
|
continue;
|
||||||
|
lp++;
|
||||||
|
goto star;
|
||||||
|
}
|
||||||
|
case CBACK|STAR: /* \n* */
|
||||||
|
bbeg = brastart[(unsigned char)*ep];
|
||||||
|
ct = bracend[(unsigned char)*ep++] - bbeg;
|
||||||
|
curlp = lp;
|
||||||
|
while(memcmp(bbeg, lp, ct) == 0)
|
||||||
|
lp += ct;
|
||||||
|
|
||||||
|
while(lp >= curlp)
|
||||||
|
{
|
||||||
|
if (advance(lp, ep, eob))
|
||||||
|
return(TRUE);
|
||||||
|
lp -= ct;
|
||||||
|
}
|
||||||
|
return(FALSE);
|
||||||
|
|
||||||
|
case CDOT|STAR: /* match .* */
|
||||||
|
curlp = lp; /* save closure start loc */
|
||||||
|
while (*lp++); /* match anything */
|
||||||
|
goto star; /* now look for followers */
|
||||||
|
|
||||||
|
case CCHR|STAR: /* match <literal char>* */
|
||||||
|
curlp = lp; /* save closure start loc */
|
||||||
|
while (*lp++ == *ep); /* match many of that char */
|
||||||
|
ep++; /* to start of next element */
|
||||||
|
goto star; /* match it and followers */
|
||||||
|
|
||||||
|
case CCL|STAR: /* match [...]* */
|
||||||
|
curlp = lp; /* save closure start loc */
|
||||||
|
do {
|
||||||
|
c = *lp++ & 0x7F; /* match any in set */
|
||||||
|
} while
|
||||||
|
(ep[c>>3] & bits(c & 07));
|
||||||
|
ep += 16; /* skip past the set */
|
||||||
|
goto star; /* match followers */
|
||||||
|
|
||||||
|
star: /* the recursion part of a * or + match */
|
||||||
|
needs_advance = 1;
|
||||||
|
if (--lp == curlp) { /* 0 matches */
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
|
if (*ep == CCHR)
|
||||||
|
{
|
||||||
|
c = ep[1];
|
||||||
|
do {
|
||||||
|
if (*lp != c)
|
||||||
|
continue;
|
||||||
|
if (advance(lp, ep, eob))
|
||||||
|
return(TRUE);
|
||||||
|
} while
|
||||||
|
(lp-- > curlp);
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*ep == CBACK)
|
||||||
|
{
|
||||||
|
c = *(brastart[ep[1]]);
|
||||||
|
do {
|
||||||
|
if (*lp != c)
|
||||||
|
continue;
|
||||||
|
if (advance(lp, ep, eob))
|
||||||
|
return(TRUE);
|
||||||
|
} while
|
||||||
|
(lp-- > curlp);
|
||||||
|
return(FALSE);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/* match followers, try shorter match, if needed */
|
||||||
|
do {
|
||||||
|
if (lp == locs)
|
||||||
|
break;
|
||||||
|
if (advance(lp, ep, eob))
|
||||||
|
return(TRUE);
|
||||||
|
} while
|
||||||
|
(lp-- > curlp);
|
||||||
|
return(FALSE);
|
||||||
|
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "sed: internal RE error, %o\n", *--ep);
|
||||||
|
exit (2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* perform s command
|
||||||
|
ipc: ptr to s command struct */
|
||||||
|
static int substitute(sedcmd *ipc)
|
||||||
|
{
|
||||||
|
unsigned int n = 1;
|
||||||
|
/* find a match */
|
||||||
|
/* the needs_advance code got a bit tricky - might needs a clean
|
||||||
|
refactoring */
|
||||||
|
while (match(ipc->u.lhs, 0)) {
|
||||||
|
/* nth 0 is implied 1 */
|
||||||
|
if (!ipc->nth || n == ipc->nth) {
|
||||||
|
dosub(ipc->rhs); /* perform it once */
|
||||||
|
n++; /* mark for return */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
needs_advance = n++;
|
||||||
|
}
|
||||||
|
if (n == 1)
|
||||||
|
return(FALSE); /* command fails */
|
||||||
|
|
||||||
|
if (ipc->flags.global) /* if global flag enabled */
|
||||||
|
do { /* cycle through possibles */
|
||||||
|
if (match(ipc->u.lhs, 1)) { /* found another */
|
||||||
|
dosub(ipc->rhs); /* so substitute */
|
||||||
|
}
|
||||||
|
else /* otherwise, */
|
||||||
|
break; /* we're done */
|
||||||
|
} while (*loc2);
|
||||||
|
return(TRUE); /* we succeeded */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* generate substituted right-hand side (of s command)
|
||||||
|
rhsbuf: where to put the result */
|
||||||
|
static void dosub(char *rhsbuf) /* uses linebuf, genbuf, spend */
|
||||||
|
{
|
||||||
|
char *lp, *sp, *rp;
|
||||||
|
int c;
|
||||||
|
|
||||||
|
/* copy linebuf to genbuf up to location 1 */
|
||||||
|
lp = linebuf; sp = genbuf;
|
||||||
|
while (lp < loc1) *sp++ = *lp++;
|
||||||
|
|
||||||
|
for (rp = rhsbuf; (c = *rp++); )
|
||||||
|
{
|
||||||
|
if (c & 0200 && (c & 0177) == '0')
|
||||||
|
{
|
||||||
|
sp = place(sp, loc1, loc2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (c & 0200 && (c &= 0177) >= '1' && c < MAXTAGS+'1')
|
||||||
|
{
|
||||||
|
sp = place(sp, brastart[c-'1'], bracend[c-'1']);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
*sp++ = c & 0177;
|
||||||
|
if (sp >= genbuf + MAXBUF)
|
||||||
|
fprintf(stderr, LTLMSG);
|
||||||
|
|
||||||
|
}
|
||||||
|
lp = loc2;
|
||||||
|
loc2 = sp - genbuf + linebuf;
|
||||||
|
while ((*sp++ = *lp++))
|
||||||
|
if (sp >= genbuf + MAXBUF)
|
||||||
|
fprintf(stderr, LTLMSG);
|
||||||
|
lp = linebuf; sp = genbuf;
|
||||||
|
while ((*lp++ = *sp++));
|
||||||
|
spend = lp-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* place chars at *al1...*(al1 - 1) at asp... in genbuf[] */
|
||||||
|
static char *place(char* asp, char* al1, char* al2) /* uses genbuf */
|
||||||
|
{
|
||||||
|
while (al1 < al2)
|
||||||
|
{
|
||||||
|
*asp++ = *al1++;
|
||||||
|
if (asp >= genbuf + MAXBUF)
|
||||||
|
fprintf(stderr, LTLMSG);
|
||||||
|
}
|
||||||
|
return(asp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* list the pattern space in visually unambiguous form *p1... to fp
|
||||||
|
p1: the source
|
||||||
|
fp: output stream to write to */
|
||||||
|
static void listto(char *p1, FILE *fp)
|
||||||
|
{
|
||||||
|
for (; p1<spend; p1++)
|
||||||
|
if (isprint(*p1))
|
||||||
|
putc(*p1, fp); /* pass it through */
|
||||||
|
else
|
||||||
|
{
|
||||||
|
putc('\\', fp); /* emit a backslash */
|
||||||
|
switch(*p1)
|
||||||
|
{
|
||||||
|
case '\b': putc('b', fp); break; /* BS */
|
||||||
|
case '\t': putc('t', fp); break; /* TAB */
|
||||||
|
case '\n': putc('n', fp); break; /* NL */
|
||||||
|
case '\r': putc('r', fp); break; /* CR */
|
||||||
|
case '\033': putc('e', fp); break; /* ESC */
|
||||||
|
default: fprintf(fp, "%02x", *p1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
putc('\n', fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* write a hex dump expansion of *p1... to fp
|
||||||
|
p1: source
|
||||||
|
fp: output */
|
||||||
|
static void dumpto(char *p1, FILE *fp)
|
||||||
|
{
|
||||||
|
for (; p1<spend; p1++)
|
||||||
|
fprintf(fp, "%02x", *p1);
|
||||||
|
fprintf(fp, "%02x", '\n');
|
||||||
|
putc('\n', fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void truncated(int h)
|
||||||
|
{
|
||||||
|
static long last = 0L;
|
||||||
|
|
||||||
|
if (lnum == last) return;
|
||||||
|
last = lnum;
|
||||||
|
|
||||||
|
fprintf(stderr, "sed: ");
|
||||||
|
fprintf(stderr, h ? "hold space" : "line %ld", lnum);
|
||||||
|
fprintf(stderr, " truncated to %d characters\n", MAXBUF);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* execute compiled command pointed at by ipc */
|
||||||
|
static void command(sedcmd *ipc)
|
||||||
|
{
|
||||||
|
static int didsub; /* true if last s succeeded */
|
||||||
|
static char holdsp[MAXHOLD]; /* the hold space */
|
||||||
|
static char *hspend = holdsp; /* hold space end pointer */
|
||||||
|
register char *p1, *p2;
|
||||||
|
char *execp;
|
||||||
|
|
||||||
|
needs_advance = 0;
|
||||||
|
switch(ipc->command)
|
||||||
|
{
|
||||||
|
case ACMD: /* append */
|
||||||
|
*aptr++ = ipc;
|
||||||
|
if (aptr >= appends + MAXAPPENDS)
|
||||||
|
fprintf(stderr,
|
||||||
|
"sed: too many appends after line %ld\n",
|
||||||
|
lnum);
|
||||||
|
*aptr = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CCMD: /* change pattern space */
|
||||||
|
delete = TRUE;
|
||||||
|
if (!ipc->flags.inrange || lastline)
|
||||||
|
printf("%s\n", ipc->u.lhs);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case DCMD: /* delete pattern space */
|
||||||
|
delete++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CDCMD: /* delete a line in hold space */
|
||||||
|
p1 = p2 = linebuf;
|
||||||
|
while(*p1 != '\n')
|
||||||
|
if ((delete = (*p1++ == 0)))
|
||||||
|
return;
|
||||||
|
p1++;
|
||||||
|
while((*p2++ = *p1++)) continue;
|
||||||
|
spend = p2-1;
|
||||||
|
jump++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case EQCMD: /* show current line number */
|
||||||
|
fprintf(stdout, "%ld\n", lnum);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case GCMD: /* copy hold space to pattern space */
|
||||||
|
p1 = linebuf; p2 = holdsp; while((*p1++ = *p2++));
|
||||||
|
spend = p1-1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CGCMD: /* append hold space to pattern space */
|
||||||
|
*spend++ = '\n';
|
||||||
|
p1 = spend; p2 = holdsp;
|
||||||
|
do {
|
||||||
|
if (p1 > linebuf + MAXBUF) {
|
||||||
|
truncated(FALSE);
|
||||||
|
p1[-1] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while((*p1++ = *p2++));
|
||||||
|
|
||||||
|
spend = p1-1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case HCMD: /* copy pattern space to hold space */
|
||||||
|
p1 = holdsp; p2 = linebuf; while((*p1++ = *p2++));
|
||||||
|
hspend = p1-1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CHCMD: /* append pattern space to hold space */
|
||||||
|
*hspend++ = '\n';
|
||||||
|
p1 = hspend; p2 = linebuf;
|
||||||
|
do {
|
||||||
|
if (p1 > holdsp + MAXBUF) {
|
||||||
|
truncated(TRUE);
|
||||||
|
p1[-1] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while((*p1++ = *p2++));
|
||||||
|
|
||||||
|
hspend = p1-1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ICMD: /* insert text */
|
||||||
|
printf("%s\n", ipc->u.lhs);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BCMD: /* branch to label */
|
||||||
|
jump = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case LCMD: /* list text */
|
||||||
|
listto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
|
||||||
|
|
||||||
|
case CLCMD: /* dump text */
|
||||||
|
dumpto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
|
||||||
|
|
||||||
|
case NCMD: /* read next line into pattern space */
|
||||||
|
if (!nflag)
|
||||||
|
puts(linebuf); /* flush out the current line */
|
||||||
|
if (aptr > appends)
|
||||||
|
readout(); /* do pending a, r commands */
|
||||||
|
if ((execp = getline(linebuf, MAXBUF+1)) == BAD)
|
||||||
|
{
|
||||||
|
pending = ipc;
|
||||||
|
delete = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
spend = execp;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CNCMD: /* append next line to pattern space */
|
||||||
|
if (aptr > appends)
|
||||||
|
readout();
|
||||||
|
*spend++ = '\n';
|
||||||
|
if ((execp = getline(spend,
|
||||||
|
linebuf + MAXBUF+1 - spend)) == BAD)
|
||||||
|
{
|
||||||
|
pending = ipc;
|
||||||
|
delete = TRUE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
spend = execp;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCMD: /* print pattern space */
|
||||||
|
puts(linebuf);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CPCMD: /* print one line from pattern space */
|
||||||
|
cpcom: /* so s command can jump here */
|
||||||
|
for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
|
||||||
|
putc(*p1++, stdout);
|
||||||
|
putc('\n', stdout);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QCMD: /* quit the stream editor */
|
||||||
|
if (!nflag)
|
||||||
|
puts(linebuf); /* flush out the current line */
|
||||||
|
if (aptr > appends)
|
||||||
|
readout(); /* do any pending a and r commands */
|
||||||
|
exit(0);
|
||||||
|
|
||||||
|
case RCMD: /* read a file into the stream */
|
||||||
|
*aptr++ = ipc;
|
||||||
|
if (aptr >= appends + MAXAPPENDS)
|
||||||
|
fprintf(stderr,
|
||||||
|
"sed: too many reads after line %ld\n",
|
||||||
|
lnum);
|
||||||
|
*aptr = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SCMD: /* substitute RE */
|
||||||
|
didsub = substitute(ipc);
|
||||||
|
if (ipc->flags.print && didsub)
|
||||||
|
{
|
||||||
|
if (ipc->flags.print == TRUE)
|
||||||
|
puts(linebuf);
|
||||||
|
else
|
||||||
|
goto cpcom;
|
||||||
|
}
|
||||||
|
if (didsub && ipc->fout)
|
||||||
|
fprintf(ipc->fout, "%s\n", linebuf);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TCMD: /* branch on last s successful */
|
||||||
|
case CTCMD: /* branch on last s failed */
|
||||||
|
if (didsub == (ipc->command == CTCMD))
|
||||||
|
break; /* no branch if last s failed, else */
|
||||||
|
didsub = FALSE;
|
||||||
|
jump = TRUE; /* set up to jump to assoc'd label */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case CWCMD: /* write one line from pattern space */
|
||||||
|
for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
|
||||||
|
putc(*p1++, ipc->fout);
|
||||||
|
putc('\n', ipc->fout);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case WCMD: /* write pattern space to file */
|
||||||
|
fprintf(ipc->fout, "%s\n", linebuf);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case XCMD: /* exchange pattern and hold spaces */
|
||||||
|
p1 = linebuf; p2 = genbuf; while((*p2++ = *p1++)) continue;
|
||||||
|
p1 = holdsp; p2 = linebuf; while((*p2++ = *p1++)) continue;
|
||||||
|
spend = p2 - 1;
|
||||||
|
p1 = genbuf; p2 = holdsp; while((*p2++ = *p1++)) continue;
|
||||||
|
hspend = p2 - 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case YCMD:
|
||||||
|
p1 = linebuf; p2 = ipc->u.lhs;
|
||||||
|
while((*p1 = p2[(unsigned char)*p1]))
|
||||||
|
p1++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get next line of text to be filtered
|
||||||
|
buf: where to send the input
|
||||||
|
max: max chars to read */
|
||||||
|
static char *getline(char *buf, int max)
|
||||||
|
{
|
||||||
|
if (fgets(buf, max, stdin) != NULL)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
|
||||||
|
lnum++; /* note that we got another line */
|
||||||
|
/* find the end of the input and overwrite a possible '\n' */
|
||||||
|
while (*buf != '\n' && *buf != 0)
|
||||||
|
buf++;
|
||||||
|
line_with_newline = *buf == '\n';
|
||||||
|
*buf=0;
|
||||||
|
|
||||||
|
/* detect last line - but only if the address was used in a command */
|
||||||
|
if (last_line_used) {
|
||||||
|
if ((c = fgetc(stdin)) != EOF)
|
||||||
|
ungetc (c, stdin);
|
||||||
|
else {
|
||||||
|
if (eargc == 0) /* if no more args */
|
||||||
|
lastline = TRUE; /* set a flag */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return(buf); /* return ptr to terminating null */
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return(BAD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* write file indicated by r command to output */
|
||||||
|
static void readout(void)
|
||||||
|
{
|
||||||
|
register int t; /* hold input char or EOF */
|
||||||
|
FILE *fi; /* ptr to file to be read */
|
||||||
|
|
||||||
|
aptr = appends - 1; /* arrange for pre-increment to work right */
|
||||||
|
while(*++aptr)
|
||||||
|
if ((*aptr)->command == ACMD) /* process "a" cmd */
|
||||||
|
printf("%s\n", (*aptr)->u.lhs);
|
||||||
|
else /* process "r" cmd */
|
||||||
|
{
|
||||||
|
if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL)
|
||||||
|
continue;
|
||||||
|
while((t = getc(fi)) != EOF)
|
||||||
|
putc((char) t, stdout);
|
||||||
|
fclose(fi);
|
||||||
|
}
|
||||||
|
aptr = appends; /* reset the append ptr */
|
||||||
|
*aptr = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* sedexec.c ends here */
|
Loading…
Reference in a new issue