This commit is contained in:
Ben Gras 2006-11-02 16:48:15 +00:00
parent 64dbdd855d
commit aa8206941c
6 changed files with 1973 additions and 0 deletions

10
commands/sed/BUGS Normal file
View file

@ -0,0 +1,10 @@
So far no regression over the historic sed are known. If you find a bug,
please provide a test-case (.sed, .in and .out, look into tests/) - if
possible try to debug the problem and propose a patch.
We will focus on POSIX conformance and small size - GNU sed extensions are
most likely not accepted.
Please report issues to: Rene Rebe <rene@exactcode.de>

22
commands/sed/Makefile Normal file
View file

@ -0,0 +1,22 @@
# Makefile for minised
# If your compiler does not support this flags, just remove them.
# They only ensure that no new warning regressions make it into the source.
CFLAGS = -Wall -Wwrite-strings
minised: sedcomp.o sedexec.o
$(CC) $(LFLAGS) sedcomp.o sedexec.o -o minised
sedcomp.o: sedcomp.c sed.h
sedexec.o: sedexec.c sed.h
install: minised
install -o bin -m 755 minised /usr/bin/
install -o bin -m 755 minised /bin/
clean:
rm -f minised sedcomp.o sedexec.o
check: minised
cd tests; ./run ../minised

81
commands/sed/README Normal file
View file

@ -0,0 +1,81 @@
small-sed
by Eric S. Raymond, <esr@snark.thyrsus.com>
and Rene Rebe <rene@exactcode.de>
This is a smaller, cheaper, and faster SED utility. Minix uses it. GNU used
to use it, until they built their own sed around an extended (some would
say over-extended) regexp package and it is used for embedded tasks (for
example by the T2 SDE - http://www.t2-project.org).
The original sed 1.0 was written in three pieces; sed.h, sedcomp.c, sedexec.c.
Some Minix hacker ran them together into a single-file version, mnsed.c which
is not supported and shipped these days; if changes are needed for Minix please
send a patch to the normal source.
The 1.2 version (9 Oct 1996) add mnsed's support for detecting
truncated hold spaces. The mnsed version is missing one feature in
of the 1.2 version; support of +. Also, the multiple-file I/O is
organized slightly differently.
The 1.3 version added a bug fix by Tom Oehser, and the `L' command. Also
this program is now distributed under GPL.
The 1.5 version incooperated a lot of bug fixes by Rene Rebe as well as
a real test suite. Also the function declaration and definition have been
converted from the K&R C to ANSI C.
The 1.6 version includes support for the n'th match for the substitude command
as well as support for predefined character classes and only writes lines
with newline if one was present in the input line (compatible with GNU sed).
The 1.7 version fixed a segmentation fault with empty regular expressions,
not to leak other buffer content for groups of commands and escaping
numerical seperators in regular expressions by disabling obscure code.
Additionally compilation with older compilers as well as warnings with the
latest gcc versions have been corrected.
The 1.8 version fixes matching of some escaped characters (a regression
introduced with \+ star matching), \+ star matching to corretly copy
and mark the internal bytecode representation, back references inside lhs
regular expressions matching (to work at all) and marking the correct
regular expression for star matches.
The 1.9 version included a microoptimization shaving some bytes off the
binary and some cpu cycles at run time, reusing the previous regular
expressions for empty ones, predefined character classes with control
characters, handling of escaped ampesands and support for backreference
\0 and Kleene star operator on groups.
The 1.10 version fixed a special case of grouped star matching where
\+1..n overwrote the last match, not to infinite loop on certain zero match
grouped star cases and not to crash on w(rite to file). The version also
no longer falls into the conservative end-of-file matching mode when just
end-of-line matching was used.
The 1.11 version again fixed w(rite to file) handling to correctly honor
/dev/stdout and /dev/stderr as GNU sed does and thus keep the streams in
sync. Some unused variables have been removed and a two diagnostics
fixed to be printed correctly.
The 1.12 version fixed the l(ist) command to actually work, some tiny
optimizations have been performed as well as some more compiler warnings
fixed.
Makefile -- how to build sed
sed.h -- declarations and structures
sedcomp.c -- sed pattern compilation
sedexec.c -- sed program execution
sed.1 -- source for the man page
tests/ -- a small set of sed tests
For some releases the man page in the man format.
Surf to
http://www.exactcode.de/oss/minised/
http://www.catb.org/~esr/
for updates of this software. There is a sed FAQ kept at these
locations:
http://www.dreamwvr.com/sed-info/sed-faq.html

85
commands/sed/sed.h Normal file
View file

@ -0,0 +1,85 @@
/* sed.h -- types and constants for the stream editor
Copyright (C) 1995-2003 Eric S. Raymond
Copyright (C) 2004-2005 Rene Rebe
*/
#define TRUE 1
#define FALSE 0
/* data area sizes used by both modules */
#define MAXBUF 4000 /* current line buffer size */
#define MAXAPPENDS 20 /* maximum number of appends */
#define MAXTAGS 9 /* tagged patterns are \1 to \9 */
#define MAXCMDS 200 /* maximum number of compiled commands */
#define MAXLINES 256 /* max # numeric addresses to compile */
/* constants for compiled-command representation */
#define EQCMD 0x01 /* = -- print current line number */
#define ACMD 0x02 /* a -- append text after current line */
#define BCMD 0x03 /* b -- branch to label */
#define CCMD 0x04 /* c -- change current line */
#define DCMD 0x05 /* d -- delete all of pattern space */
#define CDCMD 0x06 /* D -- delete first line of pattern space */
#define GCMD 0x07 /* g -- copy hold space to pattern space */
#define CGCMD 0x08 /* G -- append hold space to pattern space */
#define HCMD 0x09 /* h -- copy pattern space to hold space */
#define CHCMD 0x0A /* H -- append hold space to pattern space */
#define ICMD 0x0B /* i -- insert text before current line */
#define LCMD 0x0C /* l -- print pattern space in escaped form */
#define CLCMD 0x20 /* L -- hexdump */
#define NCMD 0x0D /* n -- get next line into pattern space */
#define CNCMD 0x0E /* N -- append next line to pattern space */
#define PCMD 0x0F /* p -- print pattern space to output */
#define CPCMD 0x10 /* P -- print first line of pattern space */
#define QCMD 0x11 /* q -- exit the stream editor */
#define RCMD 0x12 /* r -- read in a file after current line */
#define SCMD 0x13 /* s -- regular-expression substitute */
#define TCMD 0x14 /* t -- branch on last substitute successful */
#define CTCMD 0x15 /* T -- branch on last substitute failed */
#define WCMD 0x16 /* w -- write pattern space to file */
#define CWCMD 0x17 /* W -- write first line of pattern space */
#define XCMD 0x18 /* x -- exhange pattern and hold spaces */
#define YCMD 0x19 /* y -- transliterate text */
typedef struct cmd_t /* compiled-command representation */
{
char *addr1; /* first address for command */
char *addr2; /* second address for command */
union
{
char *lhs; /* s command lhs */
struct cmd_t *link; /* label link */
} u;
char command; /* command code */
char *rhs; /* s command replacement string */
FILE *fout; /* associated output file descriptor */
struct
{
unsigned allbut : 1; /* was negation specified? */
unsigned global : 1; /* was p postfix specified? */
unsigned print : 2; /* was g postfix specified? */
unsigned inrange : 1; /* in an address range? */
} flags;
unsigned nth; /* sed nth occurance */
}
sedcmd; /* use this name for declarations */
#define BAD ((char *) -1) /* guaranteed not a string ptr */
/* address and regular expression compiled-form markers */
#define STAR 1 /* marker for Kleene star */
#define CCHR 2 /* non-newline character to be matched follows */
#define CDOT 4 /* dot wild-card marker */
#define CCL 6 /* character class follows */
#define CNL 8 /* match line start */
#define CDOL 10 /* match line end */
#define CBRA 12 /* tagged pattern start marker */
#define CKET 14 /* tagged pattern end marker */
#define CBACK 16 /* backslash-digit pair marker */
#define CLNUM 18 /* numeric-address index follows */
#define CEND 20 /* symbol for end-of-source */
#define CEOF 22 /* end-of-field mark */
#define bits(b) (1 << (b))
/* sed.h ends here */

956
commands/sed/sedcomp.c Normal file
View file

@ -0,0 +1,956 @@
/* sedcomp.c -- stream editor main and compilation phase
Copyright (C) 1995-2003 Eric S. Raymond
Copyright (C) 2004-2006 Rene Rebe
The stream editor compiles its command input (from files or -e options)
into an internal form using compile() then executes the compiled form using
execute(). Main() just initializes data structures, interprets command line
options, and calls compile() and execute() in appropriate sequence.
The data structure produced by compile() is an array of compiled-command
structures (type sedcmd). These contain several pointers into pool[], the
regular-expression and text-data pool, plus a command code and g & p flags.
In the special case that the command is a label the struct will hold a ptr
into the labels array labels[] during most of the compile, until resolve()
resolves references at the end.
The operation of execute() is described in its source module.
*/
#include <stdlib.h> /* exit */
#include <stdio.h> /* uses getc, fprintf, fopen, fclose */
#include <ctype.h> /* isdigit */
#include <string.h> /* strcmp */
#include "sed.h" /* command type struct and name defines */
/***** public stuff ******/
#define MAXCMDS 200 /* maximum number of compiled commands */
#define MAXLINES 256 /* max # numeric addresses to compile */
/* main data areas */
char linebuf[MAXBUF+1]; /* current-line buffer */
sedcmd cmds[MAXCMDS+1]; /* hold compiled commands */
long linenum[MAXLINES]; /* numeric-addresses table */
/* miscellaneous shared variables */
int nflag; /* -n option flag */
int eargc; /* scratch copy of argument count */
sedcmd *pending = NULL; /* next command to be executed */
int last_line_used = 0; /* last line address ($) was used */
void die (const char* msg) {
fprintf(stderr, "sed: ");
fprintf(stderr, msg, linebuf);
fprintf(stderr, "\n");
exit(2);
}
/***** module common stuff *****/
#define POOLSIZE 10000 /* size of string-pool space */
#define WFILES 10 /* max # w output files that can be compiled */
#define RELIMIT 256 /* max chars in compiled RE */
#define MAXDEPTH 20 /* maximum {}-nesting level */
#define MAXLABS 50 /* max # of labels that can be handled */
#define SKIPWS(pc) while ((*pc==' ') || (*pc=='\t')) pc++
#define IFEQ(x, v) if (*x == v) x++ , /* do expression */
/* error messages */
static char AGMSG[] = "garbled address %s";
static char CGMSG[] = "garbled command %s";
static char TMTXT[] = "too much text: %s";
static char AD1NG[] = "no addresses allowed for %s";
static char AD2NG[] = "only one address allowed for %s";
static char TMCDS[] = "too many commands, last was %s";
static char COCFI[] = "cannot open command-file %s";
static char UFLAG[] = "unknown flag %c";
/*static char COOFI[] = "cannot open %s";*/
static char CCOFI[] = "cannot create %s";
static char ULABL[] = "undefined label %s";
static char TMLBR[] = "too many {'s";
static char FRENL[] = "first RE must be non-null";
static char NSCAX[] = "no such command as %s";
static char TMRBR[] = "too many }'s";
static char DLABL[] = "duplicate label %s";
static char TMLAB[] = "too many labels: %s";
static char TMWFI[] = "too many w files";
static char REITL[] = "RE too long: %s";
static char TMLNR[] = "too many line numbers";
static char TRAIL[] = "command \"%s\" has trailing garbage";
static char RETER[] = "RE not terminated: %s";
static char CCERR[] = "unknown character class: %s";
/* cclass to c function mapping ,-) */
const char* cclasses[] = {
"alnum", "a-zA-Z0-9",
"lower", "a-z",
"space", " \f\n\r\t\v",
"alpha", "a-zA-Z",
"digit", "0-9",
"upper", "A-Z",
"blank", " \t",
"xdigit", "0-9A-Fa-f",
"cntrl", "\x01-\x1f\x7e",
"print", " -\x7e",
"graph", "!-\x7e",
"punct", "!-/:-@[-`{-\x7e",
NULL, NULL};
typedef struct /* represent a command label */
{
char *name; /* the label name */
sedcmd *last; /* it's on the label search list */
sedcmd *address; /* pointer to the cmd it labels */
} label;
/* label handling */
static label labels[MAXLABS]; /* here's the label table */
static label *lab = labels + 1; /* pointer to current label */
static label *lablst = labels; /* header for search list */
/* string pool for regular expressions, append text, etc. etc. */
static char pool[POOLSIZE]; /* the pool */
static char *fp = pool; /* current pool pointer */
static char *poolend = pool + POOLSIZE; /* pointer past pool end */
/* compilation state */
static FILE *cmdf = NULL; /* current command source */
static char *cp = linebuf; /* compile pointer */
static sedcmd *cmdp = cmds; /* current compiled-cmd ptr */
static char *lastre = NULL; /* old RE pointer */
static int bdepth = 0; /* current {}-nesting level */
static int bcount = 0; /* # tagged patterns in current RE */
static char **eargv; /* scratch copy of argument list */
/* compilation flags */
static int eflag; /* -e option flag */
static int gflag; /* -g option flag */
/* prototypes */
static char *address(char *expbuf);
static char *gettext(char* txp);
static char *recomp(char *expbuf, char redelim);
static char *rhscomp(char* rhsp, char delim);
static char *ycomp(char *ep, char delim);
static int cmdcomp(char cchar);
static int cmdline(char *cbuf);
static label *search(label *ptr);
static void compile(void);
static void resolve(void);
/* sedexec.c protypes */
void execute(char* file);
/* main sequence of the stream editor */
int main(int argc, char *argv[])
{
eargc = argc; /* set local copy of argument count */
eargv = argv; /* set local copy of argument list */
cmdp->addr1 = pool; /* 1st addr expand will be at pool start */
if (eargc == 1)
exit(0); /* exit immediately if no arguments */
/* scan through the arguments, interpreting each one */
while ((--eargc > 0) && (**++eargv == '-'))
switch (eargv[0][1])
{
case 'e':
eflag++; compile(); /* compile with e flag on */
eflag = 0;
continue; /* get another argument */
case 'f':
if (eargc-- <= 0) /* barf if no -f file */
exit(2);
if ((cmdf = fopen(*++eargv, "r")) == NULL)
{
fprintf(stderr, COCFI, *eargv);
exit(2);
}
compile(); /* file is O.K., compile it */
fclose(cmdf);
continue; /* go back for another argument */
case 'g':
gflag++; /* set global flag on all s cmds */
continue;
case 'n':
nflag++; /* no print except on p flag or w */
continue;
default:
fprintf(stdout, UFLAG, eargv[0][1]);
continue;
}
if (cmdp == cmds) /* no commands have been compiled */
{
eargv--; eargc++;
eflag++; compile(); eflag = 0;
eargv++; eargc--;
}
if (bdepth) /* we have unbalanced squigglies */
die(TMLBR);
lablst->address = cmdp; /* set up header of label linked list */
resolve(); /* resolve label table indirections */
if (eargc <= 0) /* if there were no -e commands */
execute(NULL); /* execute commands from stdin only */
else while(--eargc>=0) /* else execute only -e commands */
execute(*eargv++);
exit(0); /* everything was O.K. if we got here */
}
#define H 0x80 /* 128 bit, on if there's really code for command */
#define LOWCMD 56 /* = '8', lowest char indexed in cmdmask */
/* indirect through this to get command internal code, if it exists */
static char cmdmask[] =
{
0, 0, H, 0, 0, H+EQCMD,0, 0,
0, 0, 0, 0, H+CDCMD,0, 0, CGCMD,
CHCMD, 0, 0, 0, H+CLCMD,0, CNCMD, 0,
CPCMD, 0, 0, 0, H+CTCMD,0, 0, H+CWCMD,
0, 0, 0, 0, 0, 0, 0, 0,
0, H+ACMD, H+BCMD, H+CCMD, DCMD, 0, 0, GCMD,
HCMD, H+ICMD, 0, 0, H+LCMD, 0, NCMD, 0,
PCMD, H+QCMD, H+RCMD, H+SCMD, H+TCMD, 0, 0, H+WCMD,
XCMD, H+YCMD, 0, H+BCMD, 0, H, 0, 0,
};
/* precompile sed commands out of a file */
static void compile(void)
{
char ccode;
for(;;) /* main compilation loop */
{
SKIPWS(cp);
if (*cp == ';') {
cp++;
SKIPWS(cp);
}
if (*cp == '\0' || *cp == '#') /* get a new command line */
if (cmdline(cp = linebuf) < 0)
break;
SKIPWS(cp);
if (*cp == '\0' || *cp == '#') /* a comment */
continue;
/* compile first address */
if (fp > poolend)
die(TMTXT);
else if ((fp = address(cmdp->addr1 = fp)) == BAD)
die(AGMSG);
if (fp == cmdp->addr1) /* if empty RE was found */
{
if (lastre) /* if there was previous RE */
cmdp->addr1 = lastre; /* use it */
else
die(FRENL);
}
else if (fp == NULL) /* if fp was NULL */
{
fp = cmdp->addr1; /* use current pool location */
cmdp->addr1 = NULL;
}
else
{
lastre = cmdp->addr1;
if (*cp == ',' || *cp == ';') /* there's 2nd addr */
{
cp++;
if (fp > poolend) die(TMTXT);
fp = address(cmdp->addr2 = fp);
if (fp == BAD || fp == NULL) die(AGMSG);
if (fp == cmdp->addr2)
cmdp->addr2 = lastre;
else
lastre = cmdp->addr2;
}
else
cmdp->addr2 = NULL; /* no 2nd address */
}
if (fp > poolend) die(TMTXT);
SKIPWS(cp); /* discard whitespace after address */
if (*cp == '!') {
cmdp->flags.allbut = 1;
cp++; SKIPWS(cp);
}
/* get cmd char, range-check it */
if ((*cp < LOWCMD) || (*cp > '~')
|| ((ccode = cmdmask[*cp - LOWCMD]) == 0))
die(NSCAX);
cmdp->command = ccode & ~H; /* fill in command value */
if ((ccode & H) == 0) /* if no compile-time code */
cp++; /* discard command char */
else if (cmdcomp(*cp++)) /* execute it; if ret = 1 */
continue; /* skip next line read */
if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
SKIPWS(cp); /* look for trailing stuff */
if (*cp != '\0')
{
if (*cp == ';')
{
continue;
}
else if (*cp != '#' && *cp != '}')
die(TRAIL);
}
}
}
/* compile a single command */
static int cmdcomp(char cchar)
{
static sedcmd **cmpstk[MAXDEPTH]; /* current cmd stack for {} */
static const char *fname[WFILES]; /* w file name pointers */
static FILE *fout[WFILES]; /* w file file ptrs */
static int nwfiles = 2; /* count of open w files */
int i; /* indexing dummy used in w */
sedcmd *sp1, *sp2; /* temps for label searches */
label *lpt; /* ditto, and the searcher */
char redelim; /* current RE delimiter */
fout[0] = stdout;
fout[1] = stderr;
fname[0] = "/dev/stdout";
fname[1] = "/dev/stderr";
switch(cchar)
{
case '{': /* start command group */
cmdp->flags.allbut = !cmdp->flags.allbut;
cmpstk[bdepth++] = &(cmdp->u.link);
if (++cmdp >= cmds + MAXCMDS) die(TMCDS);
if (*cp == '\0') *cp++ = ';', *cp = '\0'; /* get next cmd w/o lineread */
return(1);
case '}': /* end command group */
if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
if (--bdepth < 0) die(TMRBR); /* too many right braces */
*cmpstk[bdepth] = cmdp; /* set the jump address */
return(1);
case '=': /* print current source line number */
case 'q': /* exit the stream editor */
if (cmdp->addr2) die(AD2NG);
break;
case ':': /* label declaration */
if (cmdp->addr1) die(AD1NG); /* no addresses allowed */
fp = gettext(lab->name = fp); /* get the label name */
if ((lpt = search(lab))) /* does it have a double? */
{
if (lpt->address) die(DLABL); /* yes, abort */
}
else /* check that it doesn't overflow label table */
{
lab->last = NULL;
lpt = lab;
if (++lab >= labels + MAXLABS) die(TMLAB);
}
lpt->address = cmdp;
return(1);
case 'b': /* branch command */
case 't': /* branch-on-succeed command */
case 'T': /* branch-on-fail command */
SKIPWS(cp);
if (*cp == '\0') /* if branch is to start of cmds... */
{
/* add current command to end of label last */
if ((sp1 = lablst->last))
{
while((sp2 = sp1->u.link))
sp1 = sp2;
sp1->u.link = cmdp;
}
else /* lablst->last == NULL */
lablst->last = cmdp;
break;
}
fp = gettext(lab->name = fp); /* else get label into pool */
if ((lpt = search(lab))) /* enter branch to it */
{
if (lpt->address)
cmdp->u.link = lpt->address;
else
{
sp1 = lpt->last;
while((sp2 = sp1->u.link))
sp1 = sp2;
sp1->u.link = cmdp;
}
}
else /* matching named label not found */
{
lab->last = cmdp; /* add the new label */
lab->address = NULL; /* it's forward of here */
if (++lab >= labels + MAXLABS) /* overflow if last */
die(TMLAB);
}
break;
case 'a': /* append text */
case 'i': /* insert text */
case 'r': /* read file into stream */
if (cmdp->addr2) die(AD2NG);
case 'c': /* change text */
if ((*cp == '\\') && (*++cp == '\n')) cp++;
fp = gettext(cmdp->u.lhs = fp);
break;
case 'D': /* delete current line in hold space */
cmdp->u.link = cmds;
break;
case 's': /* substitute regular expression */
if (*cp == 0) /* get delimiter from 1st ch */
die(RETER);
else
redelim = *cp++;
if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD)
die(CGMSG);
if (fp == cmdp->u.lhs) { /* if compiled RE zero len */
if (lastre) {
cmdp->u.lhs = lastre; /* use the previous one */
cp++; /* skip delim */
}
else
die(FRENL);
}
else /* otherwise */
lastre = cmdp->u.lhs; /* save the one just found */
if ((cmdp->rhs = fp) > poolend) die(TMTXT);
if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) die(CGMSG);
if (gflag) cmdp->flags.global++;
while (*cp == 'g' || *cp == 'p' || *cp == 'P' || isdigit(*cp))
{
IFEQ(cp, 'g') cmdp->flags.global++;
IFEQ(cp, 'p') cmdp->flags.print = 1;
IFEQ(cp, 'P') cmdp->flags.print = 2;
if(isdigit(*cp))
{
if (cmdp->nth)
break; /* no multiple n args */
cmdp->nth = atoi(cp); /* check 0? */
while (isdigit(*cp)) cp++;
}
}
case 'l': /* list pattern space */
case 'L': /* dump pattern space */
if (*cp == 'w')
cp++; /* and execute a w command! */
else
break; /* s or L or l is done */
case 'w': /* write-pattern-space command */
case 'W': /* write-first-line command */
if (nwfiles >= WFILES) die(TMWFI);
fname[nwfiles] = fp;
fp = gettext((fname[nwfiles] = fp, fp)); /* filename will be in pool */
for(i = nwfiles-1; i >= 0; i--) /* match it in table */
if (strcmp(fname[nwfiles], fname[i]) == 0)
{
cmdp->fout = fout[i];
return(0);
}
/* if didn't find one, open new out file */
if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL)
{
fprintf(stderr, CCOFI, fname[nwfiles]);
exit(2);
}
fout[nwfiles++] = cmdp->fout;
break;
case 'y': /* transliterate text */
fp = ycomp(cmdp->u.lhs = fp, *cp++); /* compile translit */
if (fp == BAD) die(CGMSG); /* fail on bad form */
if (fp > poolend) die(TMTXT); /* fail on overflow */
break;
}
return(0); /* succeeded in interpreting one command */
}
/* generate replacement string for substitute command right hand side
rhsp: place to compile expression to
delim: regular-expression end-mark to look for */
static char *rhscomp(char* rhsp, char delim) /* uses bcount */
{
register char *p = cp;
for(;;)
/* copy for the likely case it is not s.th. special */
if ((*rhsp = *p++) == '\\') /* back reference or escape */
{
if (*p >= '0' && *p <= '9') /* back reference */
{
dobackref:
*rhsp = *p++;
/* check validity of pattern tag */
if (*rhsp > bcount + '0')
return(BAD);
*rhsp++ |= 0x80; /* mark the good ones */
}
else /* escape */
{
switch (*p) {
case 'n': *rhsp = '\n'; break;
case 'r': *rhsp = '\r'; break;
case 't': *rhsp = '\t'; break;
default: *rhsp = *p;
}
rhsp++; p++;
}
}
else if (*rhsp == delim) /* found RE end, hooray... */
{
*rhsp++ = '\0'; /* cap the expression string */
cp = p;
return(rhsp); /* pt at 1 past the RE */
}
else if (*rhsp == '&') /* special case, convert to backref \0 */
{
*--p = '0';
goto dobackref;
}
else if (*rhsp++ == '\0') /* last ch not RE end, help! */
return(BAD);
}
/* compile a regular expression to internal form
expbuf: place to compile it to
redelim: RE end-marker to look for */
static char *recomp(char *expbuf, char redelim) /* uses cp, bcount */
{
register char *ep = expbuf; /* current-compiled-char pointer */
register char *sp = cp; /* source-character ptr */
register int c; /* current-character pointer */
char negclass; /* all-but flag */
char *lastep; /* ptr to last expr compiled */
char *lastep2; /* dito, but from the last loop */
char *svclass; /* start of current char class */
char brnest[MAXTAGS]; /* bracket-nesting array */
char *brnestp; /* ptr to current bracket-nest */
char *pp; /* scratch pointer */
int classct; /* class element count */
int tags; /* # of closed tags */
if (*cp == redelim) { /* if first char is RE endmarker */
return(ep);
}
lastep = lastep2 = NULL; /* there's no previous RE */
brnestp = brnest; /* initialize ptr to brnest array */
tags = bcount = 0; /* initialize counters */
if ((*ep++ = (*sp == '^'))) /* check for start-of-line syntax */
sp++;
for (;;)
{
if (*sp == 0) /* no termination */
die (RETER);
if (ep >= expbuf + RELIMIT) /* match is too large */
return(cp = sp, BAD);
if ((c = *sp++) == redelim) /* found the end of the RE */
{
cp = sp;
if (brnestp != brnest) /* \(, \) unbalanced */
return(BAD);
*ep++ = CEOF; /* write end-of-pattern mark */
return(ep); /* return ptr to compiled RE */
}
lastep = lastep2;
lastep2 = ep;
switch (c)
{
case '\\':
if ((c = *sp++) == '(') /* start tagged section */
{
if (bcount >= MAXTAGS)
return(cp = sp, BAD);
*brnestp++ = bcount; /* update tag stack */
*ep++ = CBRA; /* enter tag-start */
*ep++ = bcount++; /* bump tag count */
lastep2 = NULL;
continue;
}
else if (c == ')') /* end tagged section */
{
if (brnestp <= brnest) /* extra \) */
return(cp = sp, BAD);
*ep++ = CKET; /* enter end-of-tag */
*ep++ = *--brnestp; /* pop tag stack */
tags++; /* count closed tags */
for (lastep2 = ep-1; *lastep2 != CBRA; )
--lastep2; /* FIXME: lastep becomes start */
continue;
}
else if (c >= '1' && c <= '9' && c != redelim) /* tag use, if !delim */
{
if ((c -= '1') >= tags) /* too few */
return(BAD);
*ep++ = CBACK; /* enter tag mark */
*ep++ = c; /* and the number */
continue;
}
else if (c == '\n') /* escaped newline no good */
return(cp = sp, BAD);
else if (c == 'n') /* match a newline */
c = '\n';
else if (c == 't') /* match a tab */
c = '\t';
else if (c == 'r') /* match a return */
c = '\r';
else if (c == '+') /* 1..n repeat of previous pattern */
{
if (lastep == NULL) /* if + not first on line */
goto defchar; /* match a literal + */
pp = ep; /* else save old ep */
*ep++ = *lastep++ | STAR; /* flag the copy */
while (lastep < pp) /* so we can blt the pattern */
*ep++ = *lastep++;
lastep2 = lastep; /* no new expression */
continue;
}
goto defchar; /* else match \c */
case '\0': /* ignore nuls */
continue;
case '\n': /* trailing pattern delimiter is missing */
return(cp = sp, BAD);
case '.': /* match any char except newline */
*ep++ = CDOT;
continue;
case '*': /* 0..n repeat of previous pattern */
if (lastep == NULL) /* if * isn't first on line */
goto defchar; /* match a literal * */
*lastep |= STAR; /* flag previous pattern */
lastep2 = lastep; /* no new expression */
continue;
case '$': /* match only end-of-line */
if (*sp != redelim) /* if we're not at end of RE */
goto defchar; /* match a literal $ */
*ep++ = CDOL; /* insert end-symbol mark */
continue;
case '[': /* begin character set pattern */
if (ep + 17 >= expbuf + RELIMIT)
die(REITL);
*ep++ = CCL; /* insert class mark */
if ((negclass = ((c = *sp++) == '^')))
c = *sp++;
svclass = sp; /* save ptr to class start */
do {
if (c == '\0') die(CGMSG);
/* handle predefined character classes */
if (c == '[' && *sp == ':')
{
/* look for the matching ":]]" */
char *p;
const char *p2;
for (p = sp+3; *p; p++)
if (*p == ']' &&
*(p-1) == ']' &&
*(p-2) == ':')
{
char cc[8];
const char **it;
p2 = sp+1;
for (p2 = sp+1;
p2 < p-2 && p2-sp-1 < sizeof(cc);
p2++)
cc[p2-sp-1] = *p2;
cc[p2-sp-1] = 0; /* termination */
it = cclasses;
while (*it && strcmp(*it, cc))
it +=2;
if (!*it++)
die(CCERR);
/* generate mask */
p2 = *it;
while (*p2) {
if (p2[1] == '-' && p2[2]) {
for (c = *p2; c <= p2[2]; c++)
ep[c >> 3] |= bits(c & 7);
p2 += 3;
}
else {
c = *p2++;
ep[c >> 3] |= bits(c & 7);
}
}
sp = p; c = 0; break;
}
}
/* handle character ranges */
if (c == '-' && sp > svclass && *sp != ']')
for (c = sp[-2]; c < *sp; c++)
ep[c >> 3] |= bits(c & 7);
/* handle escape sequences in sets */
if (c == '\\')
{
if ((c = *sp++) == 'n')
c = '\n';
else if (c == 't')
c = '\t';
else if (c == 'r')
c = '\r';
}
/* enter (possibly translated) char in set */
if (c)
ep[c >> 3] |= bits(c & 7);
} while
((c = *sp++) != ']');
/* invert the bitmask if all-but was specified */
if (negclass)
for(classct = 0; classct < 16; classct++)
ep[classct] ^= 0xFF;
ep[0] &= 0xFE; /* never match ASCII 0 */
ep += 16; /* advance ep past set mask */
continue;
defchar: /* match literal character */
default: /* which is what we'd do by default */
*ep++ = CCHR; /* insert character mark */
*ep++ = c;
}
}
}
/* read next command from -e argument or command file */
static int cmdline(char *cbuf) /* uses eflag, eargc, cmdf */
{
register int inc; /* not char because must hold EOF */
cbuf--; /* so pre-increment points us at cbuf */
/* e command flag is on */
if (eflag)
{
register char *p; /* ptr to current -e argument */
static char *savep; /* saves previous value of p */
if (eflag > 0) /* there are pending -e arguments */
{
eflag = -1;
if (eargc-- <= 0)
exit(2); /* if no arguments, barf */
/* else transcribe next e argument into cbuf */
p = *++eargv;
while((*++cbuf = *p++))
if (*cbuf == '\\')
{
if ((*++cbuf = *p++) == '\0')
return(savep = NULL, -1);
else
continue;
}
else if (*cbuf == '\n') /* end of 1 cmd line */
{
*cbuf = '\0';
return(savep = p, 1);
/* we'll be back for the rest... */
}
/* found end-of-string; can advance to next argument */
return(savep = NULL, 1);
}
if ((p = savep) == NULL)
return(-1);
while((*++cbuf = *p++))
if (*cbuf == '\\')
{
if ((*++cbuf = *p++) == '0')
return(savep = NULL, -1);
else
continue;
}
else if (*cbuf == '\n')
{
*cbuf = '\0';
return(savep = p, 1);
}
return(savep = NULL, 1);
}
/* if no -e flag read from command file descriptor */
while((inc = getc(cmdf)) != EOF) /* get next char */
if ((*++cbuf = inc) == '\\') /* if it's escape */
*++cbuf = inc = getc(cmdf); /* get next char */
else if (*cbuf == '\n') /* end on newline */
return(*cbuf = '\0', 1); /* cap the string */
return(*++cbuf = '\0', -1); /* end-of-file, no more chars */
}
/* expand an address at *cp... into expbuf, return ptr at following char */
static char *address(char *expbuf) /* uses cp, linenum */
{
static int numl = 0; /* current ind in addr-number table */
register char *rcp; /* temp compile ptr for forwd look */
long lno; /* computed value of numeric address */
if (*cp == '$') /* end-of-source address */
{
*expbuf++ = CEND; /* write symbolic end address */
*expbuf++ = CEOF; /* and the end-of-address mark (!) */
cp++; /* go to next source character */
last_line_used = TRUE;
return(expbuf); /* we're done */
}
if (*cp == '/') /* start of regular-expression match */
return(recomp(expbuf, *cp++)); /* compile the RE */
rcp = cp; lno = 0; /* now handle a numeric address */
while(*rcp >= '0' && *rcp <= '9') /* collect digits */
lno = lno*10 + *rcp++ - '0'; /* compute their value */
if (rcp > cp) /* if we caught a number... */
{
*expbuf++ = CLNUM; /* put a numeric-address marker */
*expbuf++ = numl; /* and the address table index */
linenum[numl++] = lno; /* and set the table entry */
if (numl >= MAXLINES) /* oh-oh, address table overflow */
die(TMLNR); /* abort with error message */
*expbuf++ = CEOF; /* write the end-of-address marker */
cp = rcp; /* point compile past the address */
return(expbuf); /* we're done */
}
return(NULL); /* no legal address was found */
}
/* accept multiline input from *cp..., discarding leading whitespace
txp: where to put the text */
static char *gettext(char* txp) /* uses global cp */
{
register char *p = cp;
SKIPWS(p); /* discard whitespace */
do {
if ((*txp = *p++) == '\\') /* handle escapes */
*txp = *p++;
if (*txp == '\0') /* we're at end of input */
return(cp = --p, ++txp);
else if (*txp == '\n') /* also SKIPWS after newline */
SKIPWS(p);
} while (txp++); /* keep going till we find that nul */
return(txp);
}
/* find the label matching *ptr, return NULL if none */
static label *search(label *ptr) /* uses global lablst */
{
register label *rp;
for(rp = lablst; rp < ptr; rp++)
if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0))
return(rp);
return(NULL);
}
/* write label links into the compiled-command space */
static void resolve(void) /* uses global lablst */
{
register label *lptr;
register sedcmd *rptr, *trptr;
/* loop through the label table */
for(lptr = lablst; lptr < lab; lptr++)
if (lptr->address == NULL) /* barf if not defined */
{
fprintf(stderr, ULABL, lptr->name);
exit(2);
}
else if (lptr->last) /* if last is non-null */
{
rptr = lptr->last; /* chase it */
while((trptr = rptr->u.link)) /* resolve refs */
{
rptr->u.link = lptr->address;
rptr = trptr;
}
rptr->u.link = lptr->address;
}
}
/* compile a y (transliterate) command
ep: where to compile to
delim: end delimiter to look for */
static char *ycomp(char *ep, char delim)
{
char *tp, *sp;
int c;
/* scan the 'from' section for invalid chars */
for(sp = tp = cp; *tp != delim; tp++)
{
if (*tp == '\\')
tp++;
if ((*tp == '\n') || (*tp == '\0'))
return(BAD);
}
tp++; /* tp now points at first char of 'to' section */
/* now rescan the 'from' section */
while((c = *sp++ & 0x7F) != delim)
{
if (c == '\\' && *sp == 'n')
{
sp++;
c = '\n';
}
if ((ep[c] = *tp++) == '\\' && *tp == 'n')
{
ep[c] = '\n';
tp++;
}
if ((ep[c] == delim) || (ep[c] == '\0'))
return(BAD);
}
if (*tp != delim) /* 'to', 'from' parts have unequal lengths */
return(BAD);
cp = ++tp; /* point compile ptr past translit */
for(c = 0; c < 128; c++) /* fill in self-map entries in table */
if (ep[c] == 0)
ep[c] = c;
return(ep + 0x80); /* return first free location past table end */
}
/* sedcomp.c ends here */

819
commands/sed/sedexec.c Normal file
View file

@ -0,0 +1,819 @@
/* sedexec.c -- axecute compiled form of stream editor commands
Copyright (C) 1995-2003 Eric S. Raymond
Copyright (C) 2004-2006 Rene Rebe
The single entry point of this module is the function execute(). It
may take a string argument (the name of a file to be used as text) or
the argument NULL which tells it to filter standard input. It executes
the compiled commands in cmds[] on each line in turn.
The function command() does most of the work. match() and advance()
are used for matching text against precompiled regular expressions and
dosub() does right-hand-side substitution. Getline() does text input;
readout() and memcmp() are output and string-comparison utilities.
*/
#include <stdlib.h> /* exit */
#include <stdio.h> /* {f}puts, {f}printf, getc/putc, f{re}open, fclose */
#include <ctype.h> /* for isprint(), isdigit(), toascii() macros */
#include <string.h> /* for memcmp(3) */
#include "sed.h" /* command type structures & miscellaneous constants */
/***** shared variables imported from the main ******/
/* main data areas */
extern char linebuf[]; /* current-line buffer */
extern sedcmd cmds[]; /* hold compiled commands */
extern long linenum[]; /* numeric-addresses table */
/* miscellaneous shared variables */
extern int nflag; /* -n option flag */
extern int eargc; /* scratch copy of argument count */
extern sedcmd *pending; /* ptr to command waiting to be executed */
extern int last_line_used; /* last line address ($) used */
/***** end of imported stuff *****/
#define MAXHOLD MAXBUF /* size of the hold space */
#define GENSIZ MAXBUF /* maximum genbuf size */
static char LTLMSG[] = "sed: line too long\n";
static char *spend; /* current end-of-line-buffer pointer */
static long lnum = 0L; /* current source line number */
/* append buffer maintenance */
static sedcmd *appends[MAXAPPENDS]; /* array of ptrs to a,i,c commands */
static sedcmd **aptr = appends; /* ptr to current append */
/* genbuf and its pointers */
static char genbuf[GENSIZ];
static char *loc1;
static char *loc2;
static char *locs;
/* command-logic flags */
static int lastline; /* do-line flag */
static int line_with_newline; /* line had newline */
static int jump; /* jump to cmd's link address if set */
static int delete; /* delete command flag */
static int needs_advance; /* needs inc after substitution */
/* ugly HACK - neds REWORK */
/* tagged-pattern tracking */
static char *bracend[MAXTAGS]; /* tagged pattern start pointers */
static char *brastart[MAXTAGS]; /* tagged pattern end pointers */
/* prototypes */
static char *getline(char *buf, int max);
static char *place(char* asp, char* al1, char* al2);
static int advance(char* lp, char* ep, char** eob);
static int match(char *expbuf, int gf);
static int selected(sedcmd *ipc);
static int substitute(sedcmd *ipc);
static void command(sedcmd *ipc);
static void dosub(char *rhsbuf);
static void dumpto(char *p1, FILE *fp);
static void listto(char *p1, FILE *fp);
static void readout(void);
static void truncated(int h);
/* execute the compiled commands in cmds[] on a file
file: name of text source file to be filtered */
void execute(char* file)
{
register sedcmd *ipc; /* ptr to current command */
char *execp; /* ptr to source */
if (file != NULL) /* filter text from a named file */
if (freopen(file, "r", stdin) == NULL)
fprintf(stderr, "sed: can't open %s\n", file);
if (pending) /* there's a command waiting */
{
ipc = pending; /* it will be first executed */
pending = FALSE; /* turn off the waiting flag */
goto doit; /* go to execute it immediately */
}
/* here's the main command-execution loop */
for(;;)
{
/* get next line to filter */
if ((execp = getline(linebuf, MAXBUF+1)) == BAD)
return;
spend = execp;
/* loop through compiled commands, executing them */
for(ipc = cmds; ipc->command; )
{
/* address command to select? - If not address
but allbut then invert, that is skip, the commmand */
if (ipc->addr1 || ipc->flags.allbut) {
if (!ipc->addr1 || !selected(ipc)) {
ipc++; /* not selected, next cmd */
continue;
}
}
doit:
command(ipc); /* execute the command pointed at */
if (delete) /* if delete flag is set */
break; /* don't exec rest of compiled cmds */
if (jump) /* if jump set, follow cmd's link */
{
jump = FALSE;
if ((ipc = ipc->u.link) == 0)
{
ipc = cmds;
break;
}
}
else /* normal goto next command */
ipc++;
}
/* we've now done all modification commands on the line */
/* here's where the transformed line is output */
if (!nflag && !delete)
{
fwrite(linebuf, spend - linebuf, 1, stdout);
if (line_with_newline)
putc('\n', stdout);
}
/* if we've been set up for append, emit the text from it */
if (aptr > appends)
readout();
delete = FALSE; /* clear delete flag; about to get next cmd */
}
}
/* is current command selected */
static int selected(sedcmd *ipc)
{
register char *p1 = ipc->addr1; /* point p1 at first address */
register char *p2 = ipc->addr2; /* and p2 at second */
unsigned char c;
int selected = FALSE;
if (ipc->flags.inrange)
{
selected = TRUE;
if (*p2 == CEND)
;
else if (*p2 == CLNUM)
{
c = p2[1];
if (lnum >= linenum[c])
ipc->flags.inrange = FALSE;
}
else if (match(p2, 0))
ipc->flags.inrange = FALSE;
}
else if (*p1 == CEND)
{
if (lastline)
selected = TRUE;
}
else if (*p1 == CLNUM)
{
c = p1[1];
if (lnum == linenum[c]) {
selected = TRUE;
if (p2)
ipc->flags.inrange = TRUE;
}
}
else if (match(p1, 0))
{
selected = TRUE;
if (p2)
ipc->flags.inrange = TRUE;
}
return ipc->flags.allbut ? !selected : selected;
}
/* match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */
static int match(char *expbuf, int gf) /* uses genbuf */
{
char *p1, *p2, c;
if (gf)
{
if (*expbuf)
return(FALSE);
p1 = linebuf; p2 = genbuf;
while ((*p1++ = *p2++));
if (needs_advance) {
loc2++;
}
locs = p1 = loc2;
}
else
{
p1 = linebuf + needs_advance;
locs = FALSE;
}
needs_advance = 0;
p2 = expbuf;
if (*p2++)
{
loc1 = p1;
if(*p2 == CCHR && p2[1] != *p1) /* 1st char is wrong */
return(FALSE); /* so fail */
return(advance(p1, p2, NULL)); /* else try to match rest */
}
/* quick check for 1st character if it's literal */
if (*p2 == CCHR)
{
c = p2[1]; /* pull out character to search for */
do {
if (*p1 != c)
continue; /* scan the source string */
if (advance(p1, p2,NULL)) /* found it, match the rest */
return(loc1 = p1, 1);
} while
(*p1++);
return(FALSE); /* didn't find that first char */
}
/* else try for unanchored match of the pattern */
do {
if (advance(p1, p2, NULL))
return(loc1 = p1, 1);
} while
(*p1++);
/* if got here, didn't match either way */
return(FALSE);
}
/* attempt to advance match pointer by one pattern element
lp: source (linebuf) ptr
ep: regular expression element ptr */
static int advance(char* lp, char* ep, char** eob)
{
char *curlp; /* save ptr for closures */
char c; /* scratch character holder */
char *bbeg;
int ct;
signed int bcount = -1;
for (;;)
switch (*ep++)
{
case CCHR: /* literal character */
if (*ep++ == *lp++) /* if chars are equal */
continue; /* matched */
return(FALSE); /* else return false */
case CDOT: /* anything but newline */
if (*lp++) /* first NUL is at EOL */
continue; /* keep going if didn't find */
return(FALSE); /* else return false */
case CNL: /* start-of-line */
case CDOL: /* end-of-line */
if (*lp == 0) /* found that first NUL? */
continue; /* yes, keep going */
return(FALSE); /* else return false */
case CEOF: /* end-of-address mark */
loc2 = lp; /* set second loc */
return(TRUE); /* return true */
case CCL: /* a closure */
c = *lp++ & 0177;
if (ep[c>>3] & bits(c & 07)) /* is char in set? */
{
ep += 16; /* then skip rest of bitmask */
continue; /* and keep going */
}
return(FALSE); /* else return false */
case CBRA: /* start of tagged pattern */
brastart[(unsigned char)*ep++] = lp; /* mark it */
continue; /* and go */
case CKET: /* end of tagged pattern */
bcount = *ep;
if (eob) {
*eob = lp;
return (TRUE);
}
else
bracend[(unsigned char)*ep++] = lp; /* mark it */
continue; /* and go */
case CBACK: /* match back reference */
bbeg = brastart[(unsigned char)*ep];
ct = bracend[(unsigned char)*ep++] - bbeg;
if (memcmp(bbeg, lp, ct) == 0)
{
lp += ct;
continue;
}
return(FALSE);
case CBRA|STAR: /* \(...\)* */
{
char *lastlp;
curlp = lp;
if (*ep > bcount)
brastart[(unsigned char)*ep] = bracend[(unsigned char)*ep] = lp;
while (advance(lastlp=lp, ep+1, &lp)) {
if (*ep > bcount && lp != lastlp) {
bracend[(unsigned char)*ep] = lp; /* mark it */
brastart[(unsigned char)*ep] = lastlp;
}
if (lp == lastlp) break;
}
ep++;
/* FIXME: scan for the brace end */
while (*ep != CKET)
ep++;
ep+=2;
needs_advance = 1;
if (lp == curlp) /* 0 matches */
continue;
lp++;
goto star;
}
case CBACK|STAR: /* \n* */
bbeg = brastart[(unsigned char)*ep];
ct = bracend[(unsigned char)*ep++] - bbeg;
curlp = lp;
while(memcmp(bbeg, lp, ct) == 0)
lp += ct;
while(lp >= curlp)
{
if (advance(lp, ep, eob))
return(TRUE);
lp -= ct;
}
return(FALSE);
case CDOT|STAR: /* match .* */
curlp = lp; /* save closure start loc */
while (*lp++); /* match anything */
goto star; /* now look for followers */
case CCHR|STAR: /* match <literal char>* */
curlp = lp; /* save closure start loc */
while (*lp++ == *ep); /* match many of that char */
ep++; /* to start of next element */
goto star; /* match it and followers */
case CCL|STAR: /* match [...]* */
curlp = lp; /* save closure start loc */
do {
c = *lp++ & 0x7F; /* match any in set */
} while
(ep[c>>3] & bits(c & 07));
ep += 16; /* skip past the set */
goto star; /* match followers */
star: /* the recursion part of a * or + match */
needs_advance = 1;
if (--lp == curlp) { /* 0 matches */
continue;
}
#if 0
if (*ep == CCHR)
{
c = ep[1];
do {
if (*lp != c)
continue;
if (advance(lp, ep, eob))
return(TRUE);
} while
(lp-- > curlp);
return(FALSE);
}
if (*ep == CBACK)
{
c = *(brastart[ep[1]]);
do {
if (*lp != c)
continue;
if (advance(lp, ep, eob))
return(TRUE);
} while
(lp-- > curlp);
return(FALSE);
}
#endif
/* match followers, try shorter match, if needed */
do {
if (lp == locs)
break;
if (advance(lp, ep, eob))
return(TRUE);
} while
(lp-- > curlp);
return(FALSE);
default:
fprintf(stderr, "sed: internal RE error, %o\n", *--ep);
exit (2);
}
}
/* perform s command
ipc: ptr to s command struct */
static int substitute(sedcmd *ipc)
{
unsigned int n = 1;
/* find a match */
/* the needs_advance code got a bit tricky - might needs a clean
refactoring */
while (match(ipc->u.lhs, 0)) {
/* nth 0 is implied 1 */
if (!ipc->nth || n == ipc->nth) {
dosub(ipc->rhs); /* perform it once */
n++; /* mark for return */
break;
}
needs_advance = n++;
}
if (n == 1)
return(FALSE); /* command fails */
if (ipc->flags.global) /* if global flag enabled */
do { /* cycle through possibles */
if (match(ipc->u.lhs, 1)) { /* found another */
dosub(ipc->rhs); /* so substitute */
}
else /* otherwise, */
break; /* we're done */
} while (*loc2);
return(TRUE); /* we succeeded */
}
/* generate substituted right-hand side (of s command)
rhsbuf: where to put the result */
static void dosub(char *rhsbuf) /* uses linebuf, genbuf, spend */
{
char *lp, *sp, *rp;
int c;
/* copy linebuf to genbuf up to location 1 */
lp = linebuf; sp = genbuf;
while (lp < loc1) *sp++ = *lp++;
for (rp = rhsbuf; (c = *rp++); )
{
if (c & 0200 && (c & 0177) == '0')
{
sp = place(sp, loc1, loc2);
continue;
}
else if (c & 0200 && (c &= 0177) >= '1' && c < MAXTAGS+'1')
{
sp = place(sp, brastart[c-'1'], bracend[c-'1']);
continue;
}
*sp++ = c & 0177;
if (sp >= genbuf + MAXBUF)
fprintf(stderr, LTLMSG);
}
lp = loc2;
loc2 = sp - genbuf + linebuf;
while ((*sp++ = *lp++))
if (sp >= genbuf + MAXBUF)
fprintf(stderr, LTLMSG);
lp = linebuf; sp = genbuf;
while ((*lp++ = *sp++));
spend = lp-1;
}
/* place chars at *al1...*(al1 - 1) at asp... in genbuf[] */
static char *place(char* asp, char* al1, char* al2) /* uses genbuf */
{
while (al1 < al2)
{
*asp++ = *al1++;
if (asp >= genbuf + MAXBUF)
fprintf(stderr, LTLMSG);
}
return(asp);
}
/* list the pattern space in visually unambiguous form *p1... to fp
p1: the source
fp: output stream to write to */
static void listto(char *p1, FILE *fp)
{
for (; p1<spend; p1++)
if (isprint(*p1))
putc(*p1, fp); /* pass it through */
else
{
putc('\\', fp); /* emit a backslash */
switch(*p1)
{
case '\b': putc('b', fp); break; /* BS */
case '\t': putc('t', fp); break; /* TAB */
case '\n': putc('n', fp); break; /* NL */
case '\r': putc('r', fp); break; /* CR */
case '\033': putc('e', fp); break; /* ESC */
default: fprintf(fp, "%02x", *p1);
}
}
putc('\n', fp);
}
/* write a hex dump expansion of *p1... to fp
p1: source
fp: output */
static void dumpto(char *p1, FILE *fp)
{
for (; p1<spend; p1++)
fprintf(fp, "%02x", *p1);
fprintf(fp, "%02x", '\n');
putc('\n', fp);
}
static void truncated(int h)
{
static long last = 0L;
if (lnum == last) return;
last = lnum;
fprintf(stderr, "sed: ");
fprintf(stderr, h ? "hold space" : "line %ld", lnum);
fprintf(stderr, " truncated to %d characters\n", MAXBUF);
}
/* execute compiled command pointed at by ipc */
static void command(sedcmd *ipc)
{
static int didsub; /* true if last s succeeded */
static char holdsp[MAXHOLD]; /* the hold space */
static char *hspend = holdsp; /* hold space end pointer */
register char *p1, *p2;
char *execp;
needs_advance = 0;
switch(ipc->command)
{
case ACMD: /* append */
*aptr++ = ipc;
if (aptr >= appends + MAXAPPENDS)
fprintf(stderr,
"sed: too many appends after line %ld\n",
lnum);
*aptr = 0;
break;
case CCMD: /* change pattern space */
delete = TRUE;
if (!ipc->flags.inrange || lastline)
printf("%s\n", ipc->u.lhs);
break;
case DCMD: /* delete pattern space */
delete++;
break;
case CDCMD: /* delete a line in hold space */
p1 = p2 = linebuf;
while(*p1 != '\n')
if ((delete = (*p1++ == 0)))
return;
p1++;
while((*p2++ = *p1++)) continue;
spend = p2-1;
jump++;
break;
case EQCMD: /* show current line number */
fprintf(stdout, "%ld\n", lnum);
break;
case GCMD: /* copy hold space to pattern space */
p1 = linebuf; p2 = holdsp; while((*p1++ = *p2++));
spend = p1-1;
break;
case CGCMD: /* append hold space to pattern space */
*spend++ = '\n';
p1 = spend; p2 = holdsp;
do {
if (p1 > linebuf + MAXBUF) {
truncated(FALSE);
p1[-1] = 0;
break;
}
} while((*p1++ = *p2++));
spend = p1-1;
break;
case HCMD: /* copy pattern space to hold space */
p1 = holdsp; p2 = linebuf; while((*p1++ = *p2++));
hspend = p1-1;
break;
case CHCMD: /* append pattern space to hold space */
*hspend++ = '\n';
p1 = hspend; p2 = linebuf;
do {
if (p1 > holdsp + MAXBUF) {
truncated(TRUE);
p1[-1] = 0;
break;
}
} while((*p1++ = *p2++));
hspend = p1-1;
break;
case ICMD: /* insert text */
printf("%s\n", ipc->u.lhs);
break;
case BCMD: /* branch to label */
jump = TRUE;
break;
case LCMD: /* list text */
listto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
case CLCMD: /* dump text */
dumpto(linebuf, (ipc->fout != NULL)?ipc->fout:stdout); break;
case NCMD: /* read next line into pattern space */
if (!nflag)
puts(linebuf); /* flush out the current line */
if (aptr > appends)
readout(); /* do pending a, r commands */
if ((execp = getline(linebuf, MAXBUF+1)) == BAD)
{
pending = ipc;
delete = TRUE;
break;
}
spend = execp;
break;
case CNCMD: /* append next line to pattern space */
if (aptr > appends)
readout();
*spend++ = '\n';
if ((execp = getline(spend,
linebuf + MAXBUF+1 - spend)) == BAD)
{
pending = ipc;
delete = TRUE;
break;
}
spend = execp;
break;
case PCMD: /* print pattern space */
puts(linebuf);
break;
case CPCMD: /* print one line from pattern space */
cpcom: /* so s command can jump here */
for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
putc(*p1++, stdout);
putc('\n', stdout);
break;
case QCMD: /* quit the stream editor */
if (!nflag)
puts(linebuf); /* flush out the current line */
if (aptr > appends)
readout(); /* do any pending a and r commands */
exit(0);
case RCMD: /* read a file into the stream */
*aptr++ = ipc;
if (aptr >= appends + MAXAPPENDS)
fprintf(stderr,
"sed: too many reads after line %ld\n",
lnum);
*aptr = 0;
break;
case SCMD: /* substitute RE */
didsub = substitute(ipc);
if (ipc->flags.print && didsub)
{
if (ipc->flags.print == TRUE)
puts(linebuf);
else
goto cpcom;
}
if (didsub && ipc->fout)
fprintf(ipc->fout, "%s\n", linebuf);
break;
case TCMD: /* branch on last s successful */
case CTCMD: /* branch on last s failed */
if (didsub == (ipc->command == CTCMD))
break; /* no branch if last s failed, else */
didsub = FALSE;
jump = TRUE; /* set up to jump to assoc'd label */
break;
case CWCMD: /* write one line from pattern space */
for(p1 = linebuf; *p1 != '\n' && *p1 != '\0'; )
putc(*p1++, ipc->fout);
putc('\n', ipc->fout);
break;
case WCMD: /* write pattern space to file */
fprintf(ipc->fout, "%s\n", linebuf);
break;
case XCMD: /* exchange pattern and hold spaces */
p1 = linebuf; p2 = genbuf; while((*p2++ = *p1++)) continue;
p1 = holdsp; p2 = linebuf; while((*p2++ = *p1++)) continue;
spend = p2 - 1;
p1 = genbuf; p2 = holdsp; while((*p2++ = *p1++)) continue;
hspend = p2 - 1;
break;
case YCMD:
p1 = linebuf; p2 = ipc->u.lhs;
while((*p1 = p2[(unsigned char)*p1]))
p1++;
break;
}
}
/* get next line of text to be filtered
buf: where to send the input
max: max chars to read */
static char *getline(char *buf, int max)
{
if (fgets(buf, max, stdin) != NULL)
{
int c;
lnum++; /* note that we got another line */
/* find the end of the input and overwrite a possible '\n' */
while (*buf != '\n' && *buf != 0)
buf++;
line_with_newline = *buf == '\n';
*buf=0;
/* detect last line - but only if the address was used in a command */
if (last_line_used) {
if ((c = fgetc(stdin)) != EOF)
ungetc (c, stdin);
else {
if (eargc == 0) /* if no more args */
lastline = TRUE; /* set a flag */
}
}
return(buf); /* return ptr to terminating null */
}
else
{
return(BAD);
}
}
/* write file indicated by r command to output */
static void readout(void)
{
register int t; /* hold input char or EOF */
FILE *fi; /* ptr to file to be read */
aptr = appends - 1; /* arrange for pre-increment to work right */
while(*++aptr)
if ((*aptr)->command == ACMD) /* process "a" cmd */
printf("%s\n", (*aptr)->u.lhs);
else /* process "r" cmd */
{
if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL)
continue;
while((t = getc(fi)) != EOF)
putc((char) t, stdout);
fclose(fi);
}
aptr = appends; /* reset the append ptr */
*aptr = 0;
}
/* sedexec.c ends here */