2013-12-06 12:04:52 +01:00
|
|
|
/* $NetBSD: ex_subst.c,v 1.3 2013/11/25 22:43:46 christos Exp $ */
|
2013-01-22 12:03:53 +01:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1992, 1993, 1994
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
* Copyright (c) 1992, 1993, 1994, 1995, 1996
|
|
|
|
* Keith Bostic. All rights reserved.
|
|
|
|
*
|
|
|
|
* See the LICENSE file for redistribution information.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
#ifndef lint
|
2013-12-06 12:04:52 +01:00
|
|
|
static const char sccsid[] = "Id: ex_subst.c,v 10.50 2002/02/09 21:18:23 skimo Exp (Berkeley) Date: 2002/02/09 21:18:23 ";
|
2013-01-22 12:03:53 +01:00
|
|
|
#endif /* not lint */
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/queue.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
|
|
|
|
#include <bitstring.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "../common/common.h"
|
|
|
|
#include "../vi/vi.h"
|
|
|
|
|
|
|
|
#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */
|
|
|
|
#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */
|
|
|
|
|
|
|
|
static int re_conv __P((SCR *, CHAR_T **, size_t *, int *));
|
|
|
|
static int re_cscope_conv __P((SCR *, CHAR_T **, size_t *, int *));
|
|
|
|
static int re_sub __P((SCR *,
|
|
|
|
CHAR_T *, CHAR_T **, size_t *, size_t *, regmatch_t [10]));
|
|
|
|
static int re_tag_conv __P((SCR *, CHAR_T **, size_t *, int *));
|
|
|
|
static int s __P((SCR *, EXCMD *, CHAR_T *, regex_t *, u_int));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ex_s --
|
|
|
|
* [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
|
|
|
|
*
|
|
|
|
* Substitute on lines matching a pattern.
|
|
|
|
*
|
|
|
|
* PUBLIC: int ex_s __P((SCR *, EXCMD *));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ex_s(SCR *sp, EXCMD *cmdp)
|
|
|
|
{
|
|
|
|
regex_t *re;
|
|
|
|
size_t blen, len;
|
|
|
|
u_int flags;
|
|
|
|
ARG_CHAR_T delim;
|
|
|
|
CHAR_T *bp, *p, *ptrn, *rep, *t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Skip leading white space.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* Historic vi allowed any non-alphanumeric to serve as the
|
|
|
|
* substitution command delimiter.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* If the arguments are empty, it's the same as &, i.e. we
|
|
|
|
* repeat the last substitution.
|
|
|
|
*/
|
|
|
|
if (cmdp->argc == 0)
|
|
|
|
goto subagain;
|
|
|
|
for (p = cmdp->argv[0]->bp,
|
|
|
|
len = cmdp->argv[0]->len; len > 0; --len, ++p) {
|
|
|
|
if (!ISBLANK((UCHAR_T)*p))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (len == 0)
|
|
|
|
subagain: return (ex_subagain(sp, cmdp));
|
|
|
|
|
|
|
|
delim = (UCHAR_T)*p++;
|
|
|
|
if (ISALNUM(delim) || delim == '\\')
|
|
|
|
return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* The full-blown substitute command reset the remembered
|
|
|
|
* state of the 'c' and 'g' suffices.
|
|
|
|
*/
|
|
|
|
sp->c_suffix = sp->g_suffix = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the pattern string, toss escaping characters.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* Historic vi accepted any of the following forms:
|
|
|
|
*
|
|
|
|
* :s/abc/def/ change "abc" to "def"
|
|
|
|
* :s/abc/def change "abc" to "def"
|
|
|
|
* :s/abc/ delete "abc"
|
|
|
|
* :s/abc delete "abc"
|
|
|
|
*
|
|
|
|
* QUOTING NOTE:
|
|
|
|
*
|
|
|
|
* Only toss an escaping character if it escapes a delimiter.
|
|
|
|
* This means that "s/A/\\\\f" replaces "A" with "\\f". It
|
|
|
|
* would be nice to be more regular, i.e. for each layer of
|
|
|
|
* escaping a single escaping character is removed, but that's
|
|
|
|
* not how the historic vi worked.
|
|
|
|
*/
|
|
|
|
for (ptrn = t = p;;) {
|
|
|
|
if (p[0] == '\0' || p[0] == delim) {
|
|
|
|
if (p[0] == delim)
|
|
|
|
++p;
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* Nul terminate the pattern string -- it's passed
|
|
|
|
* to regcomp which doesn't understand anything else.
|
|
|
|
*/
|
|
|
|
*t = '\0';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (p[0] == '\\') {
|
|
|
|
if (p[1] == delim)
|
|
|
|
++p;
|
|
|
|
else if (p[1] == '\\')
|
|
|
|
*t++ = *p++;
|
|
|
|
}
|
|
|
|
*t++ = *p++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the pattern string is empty, use the last RE (not just the
|
|
|
|
* last substitution RE).
|
|
|
|
*/
|
|
|
|
if (*ptrn == '\0') {
|
|
|
|
if (sp->re == NULL) {
|
|
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Re-compile the RE if necessary. */
|
|
|
|
if (!F_ISSET(sp, SC_RE_SEARCH) &&
|
|
|
|
re_compile(sp, sp->re, sp->re_len,
|
|
|
|
NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
|
|
|
|
return (1);
|
|
|
|
flags = 0;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* Compile the RE. Historic practice is that substitutes set
|
|
|
|
* the search direction as well as both substitute and search
|
|
|
|
* RE's. We compile the RE twice, as we don't want to bother
|
|
|
|
* ref counting the pattern string and (opaque) structure.
|
|
|
|
*/
|
|
|
|
if (re_compile(sp, ptrn, t - ptrn, &sp->re,
|
|
|
|
&sp->re_len, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
|
|
|
|
return (1);
|
|
|
|
if (re_compile(sp, ptrn, t - ptrn, &sp->subre,
|
|
|
|
&sp->subre_len, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
flags = SUB_FIRST;
|
|
|
|
sp->searchdir = FORWARD;
|
|
|
|
}
|
|
|
|
re = &sp->re_c;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the replacement string.
|
|
|
|
*
|
|
|
|
* The special character & (\& if O_MAGIC not set) matches the
|
|
|
|
* entire RE. No handling of & is required here, it's done by
|
|
|
|
* re_sub().
|
|
|
|
*
|
|
|
|
* The special character ~ (\~ if O_MAGIC not set) inserts the
|
|
|
|
* previous replacement string into this replacement string.
|
|
|
|
* Count ~'s to figure out how much space we need. We could
|
|
|
|
* special case nonexistent last patterns or whether or not
|
|
|
|
* O_MAGIC is set, but it's probably not worth the effort.
|
|
|
|
*
|
|
|
|
* QUOTING NOTE:
|
|
|
|
*
|
|
|
|
* Only toss an escaping character if it escapes a delimiter or
|
|
|
|
* if O_MAGIC is set and it escapes a tilde.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* If the entire replacement pattern is "%", then use the last
|
|
|
|
* replacement pattern. This semantic was added to vi in System
|
|
|
|
* V and then percolated elsewhere, presumably around the time
|
|
|
|
* that it was added to their version of ed(1).
|
|
|
|
*/
|
|
|
|
if (p[0] == L('\0') || p[0] == delim) {
|
|
|
|
if (p[0] == delim)
|
|
|
|
++p;
|
|
|
|
if (sp->repl != NULL)
|
|
|
|
free(sp->repl);
|
|
|
|
sp->repl = NULL;
|
|
|
|
sp->repl_len = 0;
|
|
|
|
} else if (p[0] == L('%') && (p[1] == L('\0') || p[1] == delim))
|
|
|
|
p += p[1] == delim ? 2 : 1;
|
|
|
|
else {
|
|
|
|
for (rep = p, len = 0;
|
|
|
|
p[0] != L('\0') && p[0] != delim; ++p, ++len)
|
|
|
|
if (p[0] == L('~'))
|
|
|
|
len += sp->repl_len;
|
|
|
|
GET_SPACE_RETW(sp, bp, blen, len);
|
|
|
|
for (t = bp, len = 0, p = rep;;) {
|
|
|
|
if (p[0] == L('\0') || p[0] == delim) {
|
|
|
|
if (p[0] == delim)
|
|
|
|
++p;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (p[0] == L('\\')) {
|
|
|
|
if (p[1] == delim)
|
|
|
|
++p;
|
|
|
|
else if (p[1] == L('\\')) {
|
|
|
|
*t++ = *p++;
|
|
|
|
++len;
|
|
|
|
} else if (p[1] == L('~')) {
|
|
|
|
++p;
|
|
|
|
if (!O_ISSET(sp, O_MAGIC))
|
|
|
|
goto tilde;
|
|
|
|
}
|
|
|
|
} else if (p[0] == L('~') && O_ISSET(sp, O_MAGIC)) {
|
|
|
|
tilde: ++p;
|
|
|
|
MEMCPYW(t, sp->repl, sp->repl_len);
|
|
|
|
t += sp->repl_len;
|
|
|
|
len += sp->repl_len;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
*t++ = *p++;
|
|
|
|
++len;
|
|
|
|
}
|
|
|
|
if ((sp->repl_len = len) != 0) {
|
|
|
|
if (sp->repl != NULL)
|
|
|
|
free(sp->repl);
|
|
|
|
if ((sp->repl = malloc(len * sizeof(CHAR_T))) == NULL) {
|
|
|
|
msgq(sp, M_SYSERR, NULL);
|
|
|
|
FREE_SPACEW(sp, bp, blen);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
MEMCPYW(sp->repl, bp, len);
|
|
|
|
}
|
|
|
|
FREE_SPACEW(sp, bp, blen);
|
|
|
|
}
|
|
|
|
return (s(sp, cmdp, p, re, flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ex_subagain --
|
|
|
|
* [line [,line]] & [cgr] [count] [#lp]]
|
|
|
|
*
|
|
|
|
* Substitute using the last substitute RE and replacement pattern.
|
|
|
|
*
|
|
|
|
* PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ex_subagain(SCR *sp, EXCMD *cmdp)
|
|
|
|
{
|
|
|
|
if (sp->subre == NULL) {
|
|
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
if (!F_ISSET(sp, SC_RE_SUBST) &&
|
|
|
|
re_compile(sp, sp->subre, sp->subre_len,
|
|
|
|
NULL, NULL, &sp->subre_c, SEARCH_CSUBST | SEARCH_MSG))
|
|
|
|
return (1);
|
|
|
|
return (s(sp,
|
|
|
|
cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ex_subtilde --
|
|
|
|
* [line [,line]] ~ [cgr] [count] [#lp]]
|
|
|
|
*
|
|
|
|
* Substitute using the last RE and last substitute replacement pattern.
|
|
|
|
*
|
|
|
|
* PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
ex_subtilde(SCR *sp, EXCMD *cmdp)
|
|
|
|
{
|
|
|
|
if (sp->re == NULL) {
|
|
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, sp->re,
|
|
|
|
sp->re_len, NULL, NULL, &sp->re_c, SEARCH_CSEARCH | SEARCH_MSG))
|
|
|
|
return (1);
|
|
|
|
return (s(sp,
|
|
|
|
cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* s --
|
|
|
|
* Do the substitution. This stuff is *really* tricky. There are lots of
|
|
|
|
* special cases, and general nastiness. Don't mess with it unless you're
|
|
|
|
* pretty confident.
|
|
|
|
*
|
|
|
|
* The nasty part of the substitution is what happens when the replacement
|
|
|
|
* string contains newlines. It's a bit tricky -- consider the information
|
|
|
|
* that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is
|
|
|
|
* to build a set of newline offsets which we use to break the line up later,
|
|
|
|
* when the replacement is done. Don't change it unless you're *damned*
|
|
|
|
* confident.
|
|
|
|
*/
|
|
|
|
#define NEEDNEWLINE(sp) { \
|
|
|
|
if (sp->newl_len == sp->newl_cnt) { \
|
|
|
|
sp->newl_len += 25; \
|
|
|
|
REALLOC(sp, sp->newl, size_t *, \
|
|
|
|
sp->newl_len * sizeof(size_t)); \
|
|
|
|
if (sp->newl == NULL) { \
|
|
|
|
sp->newl_len = 0; \
|
|
|
|
return (1); \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define BUILD(sp, l, len) { \
|
|
|
|
if (lbclen + (len) > lblen) { \
|
|
|
|
lblen += MAX(lbclen + (len), 256); \
|
|
|
|
REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
|
|
|
|
if (lb == NULL) { \
|
|
|
|
lbclen = 0; \
|
|
|
|
return (1); \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
MEMCPYW(lb + lbclen, l, len); \
|
|
|
|
lbclen += len; \
|
|
|
|
}
|
|
|
|
|
|
|
|
#define NEEDSP(sp, len, pnt) { \
|
|
|
|
if (lbclen + (len) > lblen) { \
|
|
|
|
lblen += MAX(lbclen + (len), 256); \
|
|
|
|
REALLOC(sp, lb, CHAR_T *, lblen * sizeof(CHAR_T)); \
|
|
|
|
if (lb == NULL) { \
|
|
|
|
lbclen = 0; \
|
|
|
|
return (1); \
|
|
|
|
} \
|
|
|
|
pnt = lb + lbclen; \
|
|
|
|
} \
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
s(SCR *sp, EXCMD *cmdp, CHAR_T *st, regex_t *re, u_int flags)
|
|
|
|
{
|
|
|
|
EVENT ev;
|
|
|
|
MARK from, to;
|
|
|
|
TEXTH tiq;
|
|
|
|
db_recno_t elno, lno, slno;
|
|
|
|
u_long ul;
|
|
|
|
regmatch_t match[10];
|
|
|
|
size_t blen, cnt, last, lbclen, lblen, len, llen;
|
|
|
|
size_t offset, saved_offset, scno;
|
2013-12-06 12:04:52 +01:00
|
|
|
int lflag, nflag, pflag, rflag;
|
2013-01-22 12:03:53 +01:00
|
|
|
int didsub, do_eol_match, eflags, empty_ok, eval;
|
|
|
|
int linechanged, matched, quit, rval;
|
|
|
|
CHAR_T *lb, *bp;
|
|
|
|
enum nresult nret;
|
|
|
|
|
|
|
|
NEEDFILE(sp, cmdp);
|
|
|
|
|
|
|
|
slno = sp->lno;
|
|
|
|
scno = sp->cno;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* Historically, the 'g' and 'c' suffices were always toggled as flags,
|
|
|
|
* so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was
|
|
|
|
* not set, they were initialized to 0 for all substitute commands. If
|
|
|
|
* O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
|
|
|
|
* specified substitute/replacement patterns (see ex_s()).
|
|
|
|
*/
|
|
|
|
if (!O_ISSET(sp, O_EDCOMPATIBLE))
|
|
|
|
sp->c_suffix = sp->g_suffix = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
|
|
|
|
* it only displayed the last change. I'd disallow them, but they are
|
|
|
|
* useful in combination with the [v]global commands. In the current
|
|
|
|
* model the problem is combining them with the 'c' flag -- the screen
|
|
|
|
* would have to flip back and forth between the confirm screen and the
|
|
|
|
* ex print screen, which would be pretty awful. We do display all
|
|
|
|
* changes, though, for what that's worth.
|
|
|
|
*
|
|
|
|
* !!!
|
|
|
|
* Historic vi was fairly strict about the order of "options", the
|
|
|
|
* count, and "flags". I'm somewhat fuzzy on the difference between
|
|
|
|
* options and flags, anyway, so this is a simpler approach, and we
|
|
|
|
* just take it them in whatever order the user gives them. (The ex
|
|
|
|
* usage statement doesn't reflect this.)
|
|
|
|
*/
|
2013-12-06 12:04:52 +01:00
|
|
|
lflag = nflag = pflag = rflag = 0;
|
2013-01-22 12:03:53 +01:00
|
|
|
if (st == NULL)
|
|
|
|
goto noargs;
|
|
|
|
for (lno = OOBLNO; *st != '\0'; ++st)
|
|
|
|
switch (*st) {
|
|
|
|
case ' ':
|
|
|
|
case '\t':
|
|
|
|
continue;
|
|
|
|
case '+':
|
|
|
|
++cmdp->flagoff;
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
--cmdp->flagoff;
|
|
|
|
break;
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
if (lno != OOBLNO)
|
|
|
|
goto usage;
|
|
|
|
errno = 0;
|
|
|
|
nret = nget_uslong(sp, &ul, st, &st, 10);
|
|
|
|
lno = ul;
|
|
|
|
if (*st == '\0') /* Loop increment correction. */
|
|
|
|
--st;
|
|
|
|
if (nret != NUM_OK) {
|
|
|
|
if (nret == NUM_OVER)
|
|
|
|
msgq(sp, M_ERR, "153|Count overflow");
|
|
|
|
else if (nret == NUM_UNDER)
|
|
|
|
msgq(sp, M_ERR, "154|Count underflow");
|
|
|
|
else
|
|
|
|
msgq(sp, M_SYSERR, NULL);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* In historic vi, the count was inclusive from the
|
|
|
|
* second address.
|
|
|
|
*/
|
|
|
|
cmdp->addr1.lno = cmdp->addr2.lno;
|
|
|
|
cmdp->addr2.lno += lno - 1;
|
|
|
|
if (!db_exist(sp, cmdp->addr2.lno) &&
|
|
|
|
db_last(sp, &cmdp->addr2.lno))
|
|
|
|
return (1);
|
|
|
|
break;
|
|
|
|
case '#':
|
|
|
|
nflag = 1;
|
|
|
|
break;
|
|
|
|
case 'c':
|
|
|
|
sp->c_suffix = !sp->c_suffix;
|
|
|
|
|
|
|
|
/* Ex text structure initialization. */
|
|
|
|
if (F_ISSET(sp, SC_EX)) {
|
|
|
|
memset(&tiq, 0, sizeof(TEXTH));
|
2013-12-06 12:04:52 +01:00
|
|
|
TAILQ_INIT(&tiq);
|
2013-01-22 12:03:53 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'g':
|
|
|
|
sp->g_suffix = !sp->g_suffix;
|
|
|
|
break;
|
|
|
|
case 'l':
|
|
|
|
lflag = 1;
|
|
|
|
break;
|
|
|
|
case 'p':
|
|
|
|
pflag = 1;
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
if (LF_ISSET(SUB_FIRST)) {
|
|
|
|
msgq(sp, M_ERR,
|
|
|
|
"155|Regular expression specified; r flag meaningless");
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
if (!F_ISSET(sp, SC_RE_SEARCH)) {
|
|
|
|
ex_emsg(sp, NULL, EXM_NOPREVRE);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
rflag = 1;
|
|
|
|
re = &sp->re_c;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
goto usage;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*st != '\0' || (!rflag && LF_ISSET(SUB_MUSTSETR))) {
|
|
|
|
usage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
noargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
|
|
|
|
msgq(sp, M_ERR,
|
|
|
|
"156|The #, l and p flags may not be combined with the c flag in vi mode");
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bp: if interactive, line cache
|
|
|
|
* blen: if interactive, line cache length
|
|
|
|
* lb: build buffer pointer.
|
|
|
|
* lbclen: current length of built buffer.
|
|
|
|
* lblen; length of build buffer.
|
|
|
|
*/
|
|
|
|
bp = lb = NULL;
|
|
|
|
blen = lbclen = lblen = 0;
|
|
|
|
|
|
|
|
/* For each line... */
|
|
|
|
lno = cmdp->addr1.lno == 0 ? 1 : cmdp->addr1.lno;
|
|
|
|
for (matched = quit = 0,
|
|
|
|
elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
|
|
|
|
|
|
|
|
/* Someone's unhappy, time to stop. */
|
|
|
|
if (INTERRUPTED(sp))
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* Get the line. */
|
|
|
|
if (db_get(sp, lno, DBG_FATAL, &st, &llen))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a local copy if doing confirmation -- when calling
|
|
|
|
* the confirm routine we're likely to lose the cached copy.
|
|
|
|
*/
|
|
|
|
if (sp->c_suffix) {
|
|
|
|
if (bp == NULL) {
|
|
|
|
GET_SPACE_RETW(sp, bp, blen, llen);
|
|
|
|
} else
|
|
|
|
ADD_SPACE_RETW(sp, bp, blen, llen);
|
|
|
|
MEMCPYW(bp, st, llen);
|
|
|
|
st = bp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Start searching from the beginning. */
|
|
|
|
offset = 0;
|
|
|
|
len = llen;
|
|
|
|
|
|
|
|
/* Reset the build buffer offset. */
|
|
|
|
lbclen = 0;
|
|
|
|
|
|
|
|
/* Reset empty match flag. */
|
|
|
|
empty_ok = 1;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We don't want to have to do a setline if the line didn't
|
|
|
|
* change -- keep track of whether or not this line changed.
|
|
|
|
* If doing confirmations, don't want to keep setting the
|
|
|
|
* line if change is refused -- keep track of substitutions.
|
|
|
|
*/
|
|
|
|
didsub = linechanged = 0;
|
|
|
|
|
|
|
|
/* New line, do an EOL match. */
|
|
|
|
do_eol_match = 1;
|
|
|
|
|
|
|
|
/* It's not nul terminated, but we pretend it is. */
|
|
|
|
eflags = REG_STARTEND;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The search area is from st + offset to the EOL.
|
|
|
|
*
|
|
|
|
* Generally, match[0].rm_so is the offset of the start
|
|
|
|
* of the match from the start of the search, and offset
|
|
|
|
* is the offset of the start of the last search.
|
|
|
|
*/
|
|
|
|
nextmatch: match[0].rm_so = 0;
|
|
|
|
match[0].rm_eo = len;
|
|
|
|
|
|
|
|
/* Get the next match. */
|
|
|
|
eval = regexec(re, st + offset, 10, match, eflags);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* There wasn't a match or if there was an error, deal with
|
|
|
|
* it. If there was a previous match in this line, resolve
|
|
|
|
* the changes into the database. Otherwise, just move on.
|
|
|
|
*/
|
|
|
|
if (eval == REG_NOMATCH)
|
|
|
|
goto endmatch;
|
|
|
|
if (eval != 0) {
|
|
|
|
re_error(sp, eval, re);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
matched = 1;
|
|
|
|
|
|
|
|
/* Only the first search can match an anchored expression. */
|
|
|
|
eflags |= REG_NOTBOL;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* It's possible to match 0-length strings -- for example, the
|
|
|
|
* command s;a*;X;, when matched against the string "aabb" will
|
|
|
|
* result in "XbXbX", i.e. the matches are "aa", the space
|
|
|
|
* between the b's and the space between the b's and the end of
|
|
|
|
* the string. There is a similar space between the beginning
|
|
|
|
* of the string and the a's. The rule that we use (because vi
|
|
|
|
* historically used it) is that any 0-length match, occurring
|
|
|
|
* immediately after a match, is ignored. Otherwise, the above
|
|
|
|
* example would have resulted in "XXbXbX". Another example is
|
|
|
|
* incorrectly using " *" to replace groups of spaces with one
|
|
|
|
* space.
|
|
|
|
*
|
|
|
|
* The way we do this is that if we just had a successful match,
|
|
|
|
* the starting offset does not skip characters, and the match
|
|
|
|
* is empty, ignore the match and move forward. If there's no
|
|
|
|
* more characters in the string, we were attempting to match
|
|
|
|
* after the last character, so quit.
|
|
|
|
*/
|
|
|
|
if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
|
|
|
|
empty_ok = 1;
|
|
|
|
if (len == 0)
|
|
|
|
goto endmatch;
|
|
|
|
BUILD(sp, st + offset, 1)
|
|
|
|
++offset;
|
|
|
|
--len;
|
|
|
|
goto nextmatch;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Confirm change. */
|
|
|
|
if (sp->c_suffix) {
|
|
|
|
/*
|
|
|
|
* Set the cursor position for confirmation. Note,
|
|
|
|
* if we matched on a '$', the cursor may be past
|
|
|
|
* the end of line.
|
|
|
|
*/
|
|
|
|
from.lno = to.lno = lno;
|
|
|
|
from.cno = match[0].rm_so + offset;
|
|
|
|
to.cno = match[0].rm_eo + offset;
|
|
|
|
/*
|
|
|
|
* Both ex and vi have to correct for a change before
|
|
|
|
* the first character in the line.
|
|
|
|
*/
|
|
|
|
if (llen == 0)
|
|
|
|
from.cno = to.cno = 0;
|
|
|
|
if (F_ISSET(sp, SC_VI)) {
|
|
|
|
/*
|
|
|
|
* Only vi has to correct for a change after
|
|
|
|
* the last character in the line.
|
|
|
|
*
|
|
|
|
* XXX
|
|
|
|
* It would be nice to change the vi code so
|
|
|
|
* that we could display a cursor past EOL.
|
|
|
|
*/
|
|
|
|
if (to.cno >= llen)
|
|
|
|
to.cno = llen - 1;
|
|
|
|
if (from.cno >= llen)
|
|
|
|
from.cno = llen - 1;
|
|
|
|
|
|
|
|
sp->lno = from.lno;
|
|
|
|
sp->cno = from.cno;
|
|
|
|
if (vs_refresh(sp, 1))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
vs_update(sp, msg_cat(sp,
|
|
|
|
"169|Confirm change? [n]", NULL), NULL);
|
|
|
|
|
|
|
|
if (v_event_get(sp, &ev, 0, 0))
|
|
|
|
goto err;
|
|
|
|
switch (ev.e_event) {
|
|
|
|
case E_CHARACTER:
|
|
|
|
break;
|
|
|
|
case E_EOF:
|
|
|
|
case E_ERR:
|
|
|
|
case E_INTERRUPT:
|
|
|
|
goto lquit;
|
|
|
|
default:
|
|
|
|
v_event_err(sp, &ev);
|
|
|
|
goto lquit;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (ex_print(sp, cmdp, &from, &to, 0) ||
|
|
|
|
ex_scprint(sp, &from, &to))
|
|
|
|
goto lquit;
|
|
|
|
if (ex_txt(sp, &tiq, 0, TXT_CR))
|
|
|
|
goto err;
|
2013-12-06 12:04:52 +01:00
|
|
|
ev.e_c = TAILQ_FIRST(&tiq)->lb[0];
|
2013-01-22 12:03:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (ev.e_c) {
|
|
|
|
case CH_YES:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
case CH_NO:
|
|
|
|
didsub = 0;
|
|
|
|
BUILD(sp, st + offset, match[0].rm_eo);
|
|
|
|
goto skip;
|
|
|
|
case CH_QUIT:
|
|
|
|
/* Set the quit/interrupted flags. */
|
|
|
|
lquit: quit = 1;
|
|
|
|
F_SET(sp->gp, G_INTERRUPTED);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Resolve any changes, then return to (and
|
|
|
|
* exit from) the main loop.
|
|
|
|
*/
|
|
|
|
goto endmatch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the cursor to the last position changed, converting
|
|
|
|
* from 1-based to 0-based.
|
|
|
|
*/
|
|
|
|
sp->lno = lno;
|
|
|
|
sp->cno = match[0].rm_so;
|
|
|
|
|
|
|
|
/* Copy the bytes before the match into the build buffer. */
|
|
|
|
BUILD(sp, st + offset, match[0].rm_so);
|
|
|
|
|
|
|
|
/* Substitute the matching bytes. */
|
|
|
|
didsub = 1;
|
|
|
|
if (re_sub(sp, st + offset, &lb, &lbclen, &lblen, match))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Set the change flag so we know this line was modified. */
|
|
|
|
linechanged = 1;
|
|
|
|
|
|
|
|
/* Move past the matched bytes. */
|
|
|
|
skip: offset += match[0].rm_eo;
|
|
|
|
len -= match[0].rm_eo;
|
|
|
|
|
|
|
|
/* A match cannot be followed by an empty pattern. */
|
|
|
|
empty_ok = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If doing a global change with confirmation, we have to
|
|
|
|
* update the screen. The basic idea is to store the line
|
|
|
|
* so the screen update routines can find it, and restart.
|
|
|
|
*/
|
|
|
|
if (didsub && sp->c_suffix && sp->g_suffix) {
|
|
|
|
/*
|
|
|
|
* The new search offset will be the end of the
|
|
|
|
* modified line.
|
|
|
|
*/
|
|
|
|
saved_offset = lbclen;
|
|
|
|
|
|
|
|
/* Copy the rest of the line. */
|
|
|
|
if (len)
|
|
|
|
BUILD(sp, st + offset, len)
|
|
|
|
|
|
|
|
/* Set the new offset. */
|
|
|
|
offset = saved_offset;
|
|
|
|
|
|
|
|
/* Store inserted lines, adjusting the build buffer. */
|
|
|
|
last = 0;
|
|
|
|
if (sp->newl_cnt) {
|
|
|
|
for (cnt = 0;
|
|
|
|
cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
|
|
|
|
if (db_insert(sp, lno,
|
|
|
|
lb + last, sp->newl[cnt] - last))
|
|
|
|
goto err;
|
|
|
|
last = sp->newl[cnt] + 1;
|
|
|
|
++sp->rptlines[L_ADDED];
|
|
|
|
}
|
|
|
|
lbclen -= last;
|
|
|
|
offset -= last;
|
|
|
|
sp->newl_cnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Store and retrieve the line. */
|
|
|
|
if (db_set(sp, lno, lb + last, lbclen))
|
|
|
|
goto err;
|
|
|
|
if (db_get(sp, lno, DBG_FATAL, &st, &llen))
|
|
|
|
goto err;
|
|
|
|
ADD_SPACE_RETW(sp, bp, blen, llen)
|
|
|
|
MEMCPYW(bp, st, llen);
|
|
|
|
st = bp;
|
|
|
|
len = llen - offset;
|
|
|
|
|
|
|
|
/* Restart the build. */
|
|
|
|
lbclen = 0;
|
|
|
|
BUILD(sp, st, offset);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we haven't already done the after-the-string
|
|
|
|
* match, do one. Set REG_NOTEOL so the '$' pattern
|
|
|
|
* only matches once.
|
|
|
|
*/
|
|
|
|
if (!do_eol_match)
|
|
|
|
goto endmatch;
|
|
|
|
if (offset == len) {
|
|
|
|
do_eol_match = 0;
|
|
|
|
eflags |= REG_NOTEOL;
|
|
|
|
}
|
|
|
|
goto nextmatch;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If it's a global:
|
|
|
|
*
|
|
|
|
* If at the end of the string, do a test for the after
|
|
|
|
* the string match. Set REG_NOTEOL so the '$' pattern
|
|
|
|
* only matches once.
|
|
|
|
*/
|
|
|
|
if (sp->g_suffix && do_eol_match) {
|
|
|
|
if (len == 0) {
|
|
|
|
do_eol_match = 0;
|
|
|
|
eflags |= REG_NOTEOL;
|
|
|
|
}
|
|
|
|
goto nextmatch;
|
|
|
|
}
|
|
|
|
|
|
|
|
endmatch: if (!linechanged)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* Copy any remaining bytes into the build buffer. */
|
|
|
|
if (len)
|
|
|
|
BUILD(sp, st + offset, len)
|
|
|
|
|
|
|
|
/* Store inserted lines, adjusting the build buffer. */
|
|
|
|
last = 0;
|
|
|
|
if (sp->newl_cnt) {
|
|
|
|
for (cnt = 0;
|
|
|
|
cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
|
|
|
|
if (db_insert(sp,
|
|
|
|
lno, lb + last, sp->newl[cnt] - last))
|
|
|
|
goto err;
|
|
|
|
last = sp->newl[cnt] + 1;
|
|
|
|
++sp->rptlines[L_ADDED];
|
|
|
|
}
|
|
|
|
lbclen -= last;
|
|
|
|
sp->newl_cnt = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Store the changed line. */
|
|
|
|
if (db_set(sp, lno, lb + last, lbclen))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
/* Update changed line counter. */
|
|
|
|
if (sp->rptlchange != lno) {
|
|
|
|
sp->rptlchange = lno;
|
|
|
|
++sp->rptlines[L_CHANGED];
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* Display as necessary. Historic practice is to only
|
|
|
|
* display the last line of a line split into multiple
|
|
|
|
* lines.
|
|
|
|
*/
|
|
|
|
if (lflag || nflag || pflag) {
|
|
|
|
from.lno = to.lno = lno;
|
|
|
|
from.cno = to.cno = 0;
|
|
|
|
if (lflag)
|
|
|
|
(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
|
|
|
|
if (nflag)
|
|
|
|
(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
|
|
|
|
if (pflag)
|
|
|
|
(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* !!!
|
|
|
|
* Historically, vi attempted to leave the cursor at the same place if
|
|
|
|
* the substitution was done at the current cursor position. Otherwise
|
|
|
|
* it moved it to the first non-blank of the last line changed. There
|
|
|
|
* were some problems: for example, :s/$/foo/ with the cursor on the
|
|
|
|
* last character of the line left the cursor on the last character, or
|
|
|
|
* the & command with multiple occurrences of the matching string in the
|
|
|
|
* line usually left the cursor in a fairly random position.
|
|
|
|
*
|
|
|
|
* We try to do the same thing, with the exception that if the user is
|
|
|
|
* doing substitution with confirmation, we move to the last line about
|
|
|
|
* which the user was consulted, as opposed to the last line that they
|
|
|
|
* actually changed. This prevents a screen flash if the user doesn't
|
|
|
|
* change many of the possible lines.
|
|
|
|
*/
|
|
|
|
if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
|
|
|
|
sp->cno = 0;
|
|
|
|
(void)nonblank(sp, sp->lno, &sp->cno);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If not in a global command, and nothing matched, say so.
|
|
|
|
* Else, if none of the lines displayed, put something up.
|
|
|
|
*/
|
|
|
|
rval = 0;
|
|
|
|
if (!matched) {
|
|
|
|
if (!F_ISSET(sp, SC_EX_GLOBAL)) {
|
|
|
|
msgq(sp, M_ERR, "157|No match found");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
} else if (!lflag && !nflag && !pflag)
|
|
|
|
F_SET(cmdp, E_AUTOPRINT);
|
|
|
|
|
|
|
|
if (0) {
|
|
|
|
err: rval = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bp != NULL)
|
|
|
|
FREE_SPACEW(sp, bp, blen);
|
|
|
|
if (lb != NULL)
|
|
|
|
free(lb);
|
|
|
|
return (rval);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re_compile --
|
|
|
|
* Compile the RE.
|
|
|
|
*
|
|
|
|
* PUBLIC: int re_compile __P((SCR *,
|
|
|
|
* PUBLIC: CHAR_T *, size_t, CHAR_T **, size_t *, regex_t *, u_int));
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
re_compile(SCR *sp, CHAR_T *ptrn, size_t plen, CHAR_T **ptrnp, size_t *lenp, regex_t *rep, u_int flags)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
int reflags, replaced, rval;
|
|
|
|
CHAR_T *p;
|
|
|
|
|
|
|
|
/* Set RE flags. */
|
|
|
|
reflags = 0;
|
|
|
|
if (LF_ISSET(SEARCH_EXTEND))
|
|
|
|
reflags |= REG_EXTENDED;
|
|
|
|
if (LF_ISSET(SEARCH_IC))
|
|
|
|
reflags |= REG_ICASE;
|
|
|
|
if (LF_ISSET(SEARCH_LITERAL))
|
|
|
|
reflags |= REG_NOSPEC;
|
|
|
|
if (!LF_ISSET(SEARCH_NOOPT | SEARCH_CSCOPE | SEARCH_TAG)) {
|
|
|
|
if (O_ISSET(sp, O_EXTENDED))
|
|
|
|
reflags |= REG_EXTENDED;
|
|
|
|
if (O_ISSET(sp, O_IGNORECASE))
|
|
|
|
reflags |= REG_ICASE;
|
|
|
|
if (O_ISSET(sp, O_ICLOWER))
|
|
|
|
goto iclower;
|
|
|
|
}
|
|
|
|
if (LF_ISSET(SEARCH_ICL)) {
|
|
|
|
iclower: for (p = ptrn, len = plen; len > 0; ++p, --len)
|
|
|
|
if (ISUPPER((UCHAR_T)*p))
|
|
|
|
break;
|
|
|
|
if (len == 0)
|
|
|
|
reflags |= REG_ICASE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we're replacing a saved value, clear the old one. */
|
|
|
|
if (LF_ISSET(SEARCH_CSEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
|
|
|
|
regfree(&sp->re_c);
|
|
|
|
F_CLR(sp, SC_RE_SEARCH);
|
|
|
|
}
|
|
|
|
if (LF_ISSET(SEARCH_CSUBST) && F_ISSET(sp, SC_RE_SUBST)) {
|
|
|
|
regfree(&sp->subre_c);
|
|
|
|
F_CLR(sp, SC_RE_SUBST);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we're saving the string, it's a pattern we haven't seen before,
|
|
|
|
* so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for
|
|
|
|
* later recompilation. Free any previously saved value.
|
|
|
|
*/
|
|
|
|
if (ptrnp != NULL) {
|
|
|
|
replaced = 0;
|
|
|
|
if (LF_ISSET(SEARCH_CSCOPE)) {
|
|
|
|
if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
|
|
|
|
return (1);
|
|
|
|
/*
|
|
|
|
* XXX
|
|
|
|
* Currently, the match-any-<blank> expression used in
|
|
|
|
* re_cscope_conv() requires extended RE's. This may
|
|
|
|
* not be right or safe.
|
|
|
|
*/
|
|
|
|
reflags |= REG_EXTENDED;
|
|
|
|
} else if (LF_ISSET(SEARCH_TAG)) {
|
|
|
|
if (re_tag_conv(sp, &ptrn, &plen, &replaced))
|
|
|
|
return (1);
|
|
|
|
} else if (!LF_ISSET(SEARCH_LITERAL))
|
|
|
|
if (re_conv(sp, &ptrn, &plen, &replaced))
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
/* Discard previous pattern. */
|
|
|
|
if (*ptrnp != NULL) {
|
|
|
|
free(*ptrnp);
|
|
|
|
*ptrnp = NULL;
|
|
|
|
}
|
|
|
|
if (lenp != NULL)
|
|
|
|
*lenp = plen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy the string into allocated memory.
|
|
|
|
*
|
|
|
|
* XXX
|
|
|
|
* Regcomp isn't 8-bit clean, so the pattern is nul-terminated
|
|
|
|
* for now. There's just no other solution.
|
|
|
|
*/
|
|
|
|
MALLOC(sp, *ptrnp, CHAR_T *, (plen + 1) * sizeof(CHAR_T));
|
|
|
|
if (*ptrnp != NULL) {
|
|
|
|
MEMCPYW(*ptrnp, ptrn, plen);
|
|
|
|
(*ptrnp)[plen] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Free up conversion-routine-allocated memory. */
|
|
|
|
if (replaced)
|
|
|
|
FREE_SPACEW(sp, ptrn, 0);
|
|
|
|
|
|
|
|
if (*ptrnp == NULL)
|
|
|
|
return (1);
|
|
|
|
|
|
|
|
ptrn = *ptrnp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX
|
|
|
|
* Regcomp isn't 8-bit clean, so we just lost if the pattern
|
|
|
|
* contained a nul. Bummer!
|
|
|
|
*/
|
|
|
|
if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
|
|
|
|
if (LF_ISSET(SEARCH_MSG))
|
|
|
|
re_error(sp, rval, rep);
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (LF_ISSET(SEARCH_CSEARCH))
|
|
|
|
F_SET(sp, SC_RE_SEARCH);
|
|
|
|
if (LF_ISSET(SEARCH_CSUBST))
|
|
|
|
F_SET(sp, SC_RE_SUBST);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re_conv --
|
|
|
|
* Convert vi's regular expressions into something that the
|
|
|
|
* the POSIX 1003.2 RE functions can handle.
|
|
|
|
*
|
|
|
|
* There are three conversions we make to make vi's RE's (specifically
|
|
|
|
* the global, search, and substitute patterns) work with POSIX RE's.
|
|
|
|
*
|
|
|
|
* 1: If O_MAGIC is not set, strip backslashes from the magic character
|
|
|
|
* set (.[*~) that have them, and add them to the ones that don't.
|
|
|
|
* 2: If O_MAGIC is not set, the string "\~" is replaced with the text
|
|
|
|
* from the last substitute command's replacement string. If O_MAGIC
|
|
|
|
* is set, it's the string "~".
|
|
|
|
* 3: The pattern \<ptrn\> does "word" searches, convert it to use the
|
|
|
|
* new RE escapes.
|
|
|
|
*
|
|
|
|
* !!!/XXX
|
|
|
|
* This doesn't exactly match the historic behavior of vi because we do
|
|
|
|
* the ~ substitution before calling the RE engine, so magic characters
|
|
|
|
* in the replacement string will be expanded by the RE engine, and they
|
|
|
|
* weren't historically. It's a bug.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
re_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
|
|
|
|
{
|
|
|
|
size_t blen, len, needlen;
|
|
|
|
int magic;
|
|
|
|
CHAR_T *bp, *p, *t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First pass through, we figure out how much space we'll need.
|
|
|
|
* We do it in two passes, on the grounds that most of the time
|
|
|
|
* the user is doing a search and won't have magic characters.
|
|
|
|
* That way we can skip most of the memory allocation and copies.
|
|
|
|
*/
|
|
|
|
magic = 0;
|
|
|
|
for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
|
|
|
|
switch (*p) {
|
|
|
|
case '\\':
|
|
|
|
if (len > 1) {
|
|
|
|
--len;
|
|
|
|
switch (*++p) {
|
|
|
|
case '<':
|
|
|
|
magic = 1;
|
|
|
|
needlen += RE_WSTART_LEN + 1;
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
magic = 1;
|
|
|
|
needlen += RE_WSTOP_LEN + 1;
|
|
|
|
break;
|
|
|
|
case '~':
|
|
|
|
if (!O_ISSET(sp, O_MAGIC)) {
|
|
|
|
magic = 1;
|
|
|
|
needlen += sp->repl_len;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '.':
|
|
|
|
case '[':
|
|
|
|
case '*':
|
|
|
|
if (!O_ISSET(sp, O_MAGIC)) {
|
|
|
|
magic = 1;
|
|
|
|
needlen += 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
needlen += 2;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
needlen += 1;
|
|
|
|
break;
|
|
|
|
case '~':
|
|
|
|
if (O_ISSET(sp, O_MAGIC)) {
|
|
|
|
magic = 1;
|
|
|
|
needlen += sp->repl_len;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '.':
|
|
|
|
case '[':
|
|
|
|
case '*':
|
|
|
|
if (!O_ISSET(sp, O_MAGIC)) {
|
|
|
|
magic = 1;
|
|
|
|
needlen += 2;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
needlen += 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!magic) {
|
|
|
|
*replacedp = 0;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get enough memory to hold the final pattern. */
|
|
|
|
*replacedp = 1;
|
|
|
|
GET_SPACE_RETW(sp, bp, blen, needlen);
|
|
|
|
|
|
|
|
for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
|
|
|
|
switch (*p) {
|
|
|
|
case '\\':
|
|
|
|
if (len > 1) {
|
|
|
|
--len;
|
|
|
|
switch (*++p) {
|
|
|
|
case '<':
|
|
|
|
MEMCPY(t,
|
|
|
|
RE_WSTART, RE_WSTART_LEN);
|
|
|
|
t += RE_WSTART_LEN;
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
MEMCPY(t,
|
|
|
|
RE_WSTOP, RE_WSTOP_LEN);
|
|
|
|
t += RE_WSTOP_LEN;
|
|
|
|
break;
|
|
|
|
case '~':
|
|
|
|
if (O_ISSET(sp, O_MAGIC))
|
|
|
|
*t++ = '~';
|
|
|
|
else {
|
|
|
|
MEMCPYW(t,
|
|
|
|
sp->repl, sp->repl_len);
|
|
|
|
t += sp->repl_len;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '.':
|
|
|
|
case '[':
|
|
|
|
case '*':
|
|
|
|
if (O_ISSET(sp, O_MAGIC))
|
|
|
|
*t++ = '\\';
|
|
|
|
*t++ = *p;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*t++ = '\\';
|
|
|
|
*t++ = *p;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
*t++ = '\\';
|
|
|
|
break;
|
|
|
|
case '~':
|
|
|
|
if (O_ISSET(sp, O_MAGIC)) {
|
|
|
|
MEMCPYW(t, sp->repl, sp->repl_len);
|
|
|
|
t += sp->repl_len;
|
|
|
|
} else
|
|
|
|
*t++ = '~';
|
|
|
|
break;
|
|
|
|
case '.':
|
|
|
|
case '[':
|
|
|
|
case '*':
|
|
|
|
if (!O_ISSET(sp, O_MAGIC))
|
|
|
|
*t++ = '\\';
|
|
|
|
*t++ = *p;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
*t++ = *p;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
*ptrnp = bp;
|
|
|
|
*plenp = t - bp;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re_tag_conv --
|
|
|
|
* Convert a tags search path into something that the POSIX
|
|
|
|
* 1003.2 RE functions can handle.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
re_tag_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
|
|
|
|
{
|
|
|
|
size_t blen, len;
|
|
|
|
int lastdollar;
|
|
|
|
CHAR_T *bp, *p, *t;
|
|
|
|
|
|
|
|
len = *plenp;
|
|
|
|
|
|
|
|
/* Max memory usage is 2 times the length of the string. */
|
|
|
|
*replacedp = 1;
|
|
|
|
GET_SPACE_RETW(sp, bp, blen, len * 2);
|
|
|
|
|
|
|
|
p = *ptrnp;
|
|
|
|
t = bp;
|
|
|
|
|
|
|
|
/* If the last character is a '/' or '?', we just strip it. */
|
|
|
|
if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
|
|
|
|
--len;
|
|
|
|
|
|
|
|
/* If the next-to-last or last character is a '$', it's magic. */
|
|
|
|
if (len > 0 && p[len - 1] == '$') {
|
|
|
|
--len;
|
|
|
|
lastdollar = 1;
|
|
|
|
} else
|
|
|
|
lastdollar = 0;
|
|
|
|
|
|
|
|
/* If the first character is a '/' or '?', we just strip it. */
|
|
|
|
if (len > 0 && (p[0] == '/' || p[0] == '?')) {
|
|
|
|
++p;
|
|
|
|
--len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the first or second character is a '^', it's magic. */
|
|
|
|
if (p[0] == '^') {
|
|
|
|
*t++ = *p++;
|
|
|
|
--len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Escape every other magic character we can find, meanwhile stripping
|
|
|
|
* the backslashes ctags inserts when escaping the search delimiter
|
|
|
|
* characters.
|
|
|
|
*/
|
|
|
|
for (; len > 0; --len) {
|
|
|
|
if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
|
|
|
|
++p;
|
|
|
|
--len;
|
|
|
|
} else if (strchr("^.[]$*", p[0]))
|
|
|
|
*t++ = '\\';
|
|
|
|
*t++ = *p++;
|
|
|
|
}
|
|
|
|
if (lastdollar)
|
|
|
|
*t++ = '$';
|
|
|
|
|
|
|
|
*ptrnp = bp;
|
|
|
|
*plenp = t - bp;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re_cscope_conv --
|
|
|
|
* Convert a cscope search path into something that the POSIX
|
|
|
|
* 1003.2 RE functions can handle.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
re_cscope_conv(SCR *sp, CHAR_T **ptrnp, size_t *plenp, int *replacedp)
|
|
|
|
{
|
|
|
|
size_t blen, len, nspaces;
|
|
|
|
CHAR_T *bp, *t;
|
|
|
|
CHAR_T *p;
|
|
|
|
const CHAR_T *wp;
|
|
|
|
size_t wlen;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each space in the source line printed by cscope represents an
|
|
|
|
* arbitrary sequence of spaces, tabs, and comments.
|
|
|
|
*/
|
|
|
|
#define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*"
|
|
|
|
#define CSCOPE_LEN sizeof(CSCOPE_RE_SPACE) - 1
|
|
|
|
CHAR2INT(sp, CSCOPE_RE_SPACE, CSCOPE_LEN, wp, wlen);
|
|
|
|
for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
|
|
|
|
if (*p == ' ')
|
|
|
|
++nspaces;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate plenty of space:
|
|
|
|
* the string, plus potential escaping characters;
|
|
|
|
* nspaces + 2 copies of CSCOPE_RE_SPACE;
|
|
|
|
* ^, $, nul terminator characters.
|
|
|
|
*/
|
|
|
|
*replacedp = 1;
|
|
|
|
len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
|
|
|
|
GET_SPACE_RETW(sp, bp, blen, len);
|
|
|
|
|
|
|
|
p = *ptrnp;
|
|
|
|
t = bp;
|
|
|
|
|
|
|
|
*t++ = '^';
|
|
|
|
MEMCPYW(t, wp, wlen);
|
|
|
|
t += wlen;
|
|
|
|
|
|
|
|
for (len = *plenp; len > 0; ++p, --len)
|
|
|
|
if (*p == ' ') {
|
|
|
|
MEMCPYW(t, wp, wlen);
|
|
|
|
t += wlen;
|
|
|
|
} else {
|
|
|
|
if (strchr("\\^.[]$*+?()|{}", *p))
|
|
|
|
*t++ = '\\';
|
|
|
|
*t++ = *p;
|
|
|
|
}
|
|
|
|
|
|
|
|
MEMCPYW(t, wp, wlen);
|
|
|
|
t += wlen;
|
|
|
|
*t++ = '$';
|
|
|
|
|
|
|
|
*ptrnp = bp;
|
|
|
|
*plenp = t - bp;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re_error --
|
|
|
|
* Report a regular expression error.
|
|
|
|
*
|
|
|
|
* PUBLIC: void re_error __P((SCR *, int, regex_t *));
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
re_error(SCR *sp, int errcode, regex_t *preg)
|
|
|
|
{
|
|
|
|
size_t sz;
|
|
|
|
char *oe;
|
|
|
|
|
|
|
|
sz = regerror(errcode, preg, NULL, 0);
|
|
|
|
if ((oe = malloc(sz)) == NULL)
|
|
|
|
msgq(sp, M_SYSERR, NULL);
|
|
|
|
else {
|
|
|
|
(void)regerror(errcode, preg, oe, sz);
|
|
|
|
msgq(sp, M_ERR, "RE error: %s", oe);
|
|
|
|
free(oe);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* re_sub --
|
|
|
|
* Do the substitution for a regular expression.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
re_sub(SCR *sp, CHAR_T *ip, CHAR_T **lbp, size_t *lbclenp, size_t *lblenp, regmatch_t *match)
|
|
|
|
|
|
|
|
/* Input line. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
enum { C_NOT_SET, C_LOWER, C_ONE_LOWER, C_ONE_UPPER, C_UPPER } conv;
|
|
|
|
size_t lbclen, lblen; /* Local copies. */
|
|
|
|
size_t mlen; /* Match length. */
|
|
|
|
size_t rpl; /* Remaining replacement length. */
|
|
|
|
CHAR_T *rp; /* Replacement pointer. */
|
|
|
|
int ch;
|
|
|
|
int no; /* Match replacement offset. */
|
|
|
|
CHAR_T *p, *t; /* Buffer pointers. */
|
|
|
|
CHAR_T *lb; /* Local copies. */
|
|
|
|
|
|
|
|
lb = *lbp; /* Get local copies. */
|
|
|
|
lbclen = *lbclenp;
|
|
|
|
lblen = *lblenp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* QUOTING NOTE:
|
|
|
|
*
|
|
|
|
* There are some special sequences that vi provides in the
|
|
|
|
* replacement patterns.
|
|
|
|
* & string the RE matched (\& if nomagic set)
|
|
|
|
* \# n-th regular subexpression
|
|
|
|
* \E end \U, \L conversion
|
|
|
|
* \e end \U, \L conversion
|
|
|
|
* \l convert the next character to lower-case
|
|
|
|
* \L convert to lower-case, until \E, \e, or end of replacement
|
|
|
|
* \u convert the next character to upper-case
|
|
|
|
* \U convert to upper-case, until \E, \e, or end of replacement
|
|
|
|
*
|
|
|
|
* Otherwise, since this is the lowest level of replacement, discard
|
|
|
|
* all escaping characters. This (hopefully) matches historic practice.
|
|
|
|
*/
|
|
|
|
#define OUTCH(ch, nltrans) { \
|
|
|
|
ARG_CHAR_T __ch = (ch); \
|
|
|
|
e_key_t __value = KEY_VAL(sp, __ch); \
|
|
|
|
if (nltrans && (__value == K_CR || __value == K_NL)) { \
|
|
|
|
NEEDNEWLINE(sp); \
|
|
|
|
sp->newl[sp->newl_cnt++] = lbclen; \
|
|
|
|
} else if (conv != C_NOT_SET) { \
|
|
|
|
switch (conv) { \
|
|
|
|
case C_ONE_LOWER: \
|
|
|
|
conv = C_NOT_SET; \
|
|
|
|
/* FALLTHROUGH */ \
|
|
|
|
case C_LOWER: \
|
|
|
|
if (ISUPPER(__ch)) \
|
|
|
|
__ch = TOLOWER(__ch); \
|
|
|
|
break; \
|
|
|
|
case C_ONE_UPPER: \
|
|
|
|
conv = C_NOT_SET; \
|
|
|
|
/* FALLTHROUGH */ \
|
|
|
|
case C_UPPER: \
|
|
|
|
if (ISLOWER(__ch)) \
|
|
|
|
__ch = TOUPPER(__ch); \
|
|
|
|
break; \
|
|
|
|
default: \
|
|
|
|
abort(); \
|
|
|
|
} \
|
|
|
|
} \
|
|
|
|
NEEDSP(sp, 1, p); \
|
|
|
|
*p++ = __ch; \
|
|
|
|
++lbclen; \
|
|
|
|
}
|
|
|
|
conv = C_NOT_SET;
|
|
|
|
for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
|
|
|
|
switch (ch = *rp++) {
|
|
|
|
case '&':
|
|
|
|
if (O_ISSET(sp, O_MAGIC)) {
|
|
|
|
no = 0;
|
|
|
|
goto subzero;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
if (rpl == 0)
|
|
|
|
break;
|
|
|
|
--rpl;
|
|
|
|
switch (ch = *rp) {
|
|
|
|
case '&':
|
|
|
|
++rp;
|
|
|
|
if (!O_ISSET(sp, O_MAGIC)) {
|
|
|
|
no = 0;
|
|
|
|
goto subzero;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
no = *rp++ - '0';
|
|
|
|
subzero: if (match[no].rm_so == -1 ||
|
|
|
|
match[no].rm_eo == -1)
|
|
|
|
break;
|
|
|
|
mlen = match[no].rm_eo - match[no].rm_so;
|
|
|
|
for (t = ip + match[no].rm_so; mlen--; ++t)
|
|
|
|
OUTCH((UCHAR_T)*t, 0);
|
|
|
|
continue;
|
|
|
|
case 'e':
|
|
|
|
case 'E':
|
|
|
|
++rp;
|
|
|
|
conv = C_NOT_SET;
|
|
|
|
continue;
|
|
|
|
case 'l':
|
|
|
|
++rp;
|
|
|
|
conv = C_ONE_LOWER;
|
|
|
|
continue;
|
|
|
|
case 'L':
|
|
|
|
++rp;
|
|
|
|
conv = C_LOWER;
|
|
|
|
continue;
|
|
|
|
case 'u':
|
|
|
|
++rp;
|
|
|
|
conv = C_ONE_UPPER;
|
|
|
|
continue;
|
|
|
|
case 'U':
|
|
|
|
++rp;
|
|
|
|
conv = C_UPPER;
|
|
|
|
continue;
|
|
|
|
default:
|
|
|
|
++rp;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
OUTCH(ch, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
*lbp = lb; /* Update caller's information. */
|
|
|
|
*lbclenp = lbclen;
|
|
|
|
*lblenp = lblen;
|
|
|
|
return (0);
|
|
|
|
}
|