minix/external/bsd/flex/dist/parse.y

1092 lines
23 KiB
Text
Raw Permalink Normal View History

/* $NetBSD: parse.y,v 1.3 2013/04/06 14:27:52 christos Exp $ */
2012-06-06 16:10:32 +02:00
2005-07-11 14:54:10 +02:00
/* parse.y - parser for flex input */
%token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
2012-06-06 16:10:32 +02:00
%token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
%token OPT_TABLES
2005-07-11 14:54:10 +02:00
%token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
%token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
2012-06-06 16:10:32 +02:00
%token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
%token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
%left CCL_OP_DIFF CCL_OP_UNION
/*
*POSIX and AT&T lex place the
* precedence of the repeat operator, {}, below that of concatenation.
* Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
* Regular Expression (ERE) precedence that has the repeat operator
* higher than concatenation. This causes ab{3} to yield abbb.
2005-07-11 14:54:10 +02:00
*
2012-06-06 16:10:32 +02:00
* In order to support the POSIX and AT&T precedence and the flex
* precedence we define two token sets for the begin and end tokens of
* the repeat operator, '{' and '}'. The lexical scanner chooses
* which tokens to return based on whether posix_compat or lex_compat
* are specified. Specifying either posix_compat or lex_compat will
* cause flex to parse scanner files as per the AT&T and
* POSIX-mandated behavior.
2005-07-11 14:54:10 +02:00
*/
2012-06-06 16:10:32 +02:00
%token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
2005-07-11 14:54:10 +02:00
2012-06-06 16:10:32 +02:00
%{
/* Copyright (c) 1990 The Regents of the University of California. */
/* All rights reserved. */
/* This code is derived from software contributed to Berkeley by */
/* Vern Paxson. */
/* The United States Government has rights in this work pursuant */
/* to contract no. DE-AC03-76SF00098 between the United States */
/* Department of Energy and the University of California. */
/* This file is part of flex. */
/* Redistribution and use in source and binary forms, with or without */
/* modification, are permitted provided that the following conditions */
/* are met: */
/* 1. Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* 2. Redistributions in binary form must reproduce the above copyright */
/* notice, this list of conditions and the following disclaimer in the */
/* documentation and/or other materials provided with the distribution. */
/* Neither the name of the University nor the names of its contributors */
/* may be used to endorse or promote products derived from this software */
/* without specific prior written permission. */
/* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
/* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
/* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
/* PURPOSE. */
2005-07-11 14:54:10 +02:00
#include "flexdef.h"
2012-06-06 16:10:32 +02:00
#include "tables.h"
2005-07-11 14:54:10 +02:00
2012-06-06 16:10:32 +02:00
int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
2005-07-11 14:54:10 +02:00
int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
int *scon_stk;
int scon_stk_ptr;
static int madeany = false; /* whether we've made the '.' character class */
2012-06-06 16:10:32 +02:00
static int ccldot, cclany;
2005-07-11 14:54:10 +02:00
int previous_continued_action; /* whether the previous rule's action was '|' */
2012-06-06 16:10:32 +02:00
#define format_warn3(fmt, a1, a2) \
do{ \
char fw3_msg[MAXLINE];\
snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
lwarn( fw3_msg );\
}while(0)
2005-07-11 14:54:10 +02:00
/* Expand a POSIX character class expression. */
#define CCL_EXPR(func) \
2012-06-06 16:10:32 +02:00
do{ \
2005-07-11 14:54:10 +02:00
int c; \
for ( c = 0; c < csize; ++c ) \
if ( isascii(c) && func(c) ) \
ccladd( currccl, c ); \
2012-06-06 16:10:32 +02:00
}while(0)
/* negated class */
#define CCL_NEG_EXPR(func) \
do{ \
int c; \
for ( c = 0; c < csize; ++c ) \
if ( !func(c) ) \
ccladd( currccl, c ); \
}while(0)
2005-07-11 14:54:10 +02:00
/* While POSIX defines isblank(), it's not ANSI C. */
#define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
/* On some over-ambitious machines, such as DEC Alpha's, the default
* token type is "long" instead of "int"; this leads to problems with
* declaring yylval in flexdef.h. But so far, all the yacc's I've seen
* wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
* following should ensure that the default token type is "int".
*/
#define YYSTYPE int
%}
%%
goal : initlex sect1 sect1end sect2 initforrule
{ /* add default rule */
int def_rule;
pat = cclinit();
cclnegate( pat );
def_rule = mkstate( -pat );
/* Remember the number of the default rule so we
* don't generate "can't match" warnings for it.
*/
default_rule = num_rules;
2012-06-06 16:10:32 +02:00
finish_rule( def_rule, false, 0, 0, 0);
2005-07-11 14:54:10 +02:00
for ( i = 1; i <= lastsc; ++i )
scset[i] = mkbranch( scset[i], def_rule );
if ( spprdflt )
add_action(
"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
else
add_action( "ECHO" );
add_action( ";\n\tYY_BREAK\n" );
}
;
initlex :
{ /* initialize for processing rules */
/* Create default DFA start condition. */
scinstal( "INITIAL", false );
}
;
sect1 : sect1 startconddecl namelist1
| sect1 options
|
| error
2012-06-06 16:10:32 +02:00
{ synerr( _("unknown error processing section 1") ); }
2005-07-11 14:54:10 +02:00
;
sect1end : SECTEND
{
check_options();
scon_stk = allocate_integer_array( lastsc + 1 );
scon_stk_ptr = 0;
}
;
startconddecl : SCDECL
{ xcluflg = false; }
| XSCDECL
{ xcluflg = true; }
;
namelist1 : namelist1 NAME
{ scinstal( nmstr, xcluflg ); }
| NAME
{ scinstal( nmstr, xcluflg ); }
| error
2012-06-06 16:10:32 +02:00
{ synerr( _("bad start condition list") ); }
2005-07-11 14:54:10 +02:00
;
options : OPTION_OP optionlist
;
optionlist : optionlist option
|
;
option : OPT_OUTFILE '=' NAME
{
outfilename = copy_string( nmstr );
did_outfilename = 1;
}
2012-06-06 16:10:32 +02:00
| OPT_EXTRA_TYPE '=' NAME
{ extra_type = copy_string( nmstr ); }
2005-07-11 14:54:10 +02:00
| OPT_PREFIX '=' NAME
{ prefix = copy_string( nmstr ); }
| OPT_YYCLASS '=' NAME
{ yyclass = copy_string( nmstr ); }
2012-06-06 16:10:32 +02:00
| OPT_HEADER '=' NAME
{ headerfilename = copy_string( nmstr ); }
| OPT_TABLES '=' NAME
{ tablesext = true; tablesfilename = copy_string( nmstr ); }
2005-07-11 14:54:10 +02:00
;
sect2 : sect2 scon initforrule flexrule '\n'
{ scon_stk_ptr = $2; }
| sect2 scon '{' sect2 '}'
{ scon_stk_ptr = $2; }
|
;
initforrule :
{
/* Initialize for a parse of one rule. */
trlcontxt = variable_trail_rule = varlength = false;
trailcnt = headcnt = rulelen = 0;
current_state_type = STATE_NORMAL;
previous_continued_action = continued_action;
in_rule = true;
new_rule();
}
;
flexrule : '^' rule
{
pat = $2;
finish_rule( pat, variable_trail_rule,
2012-06-06 16:10:32 +02:00
headcnt, trailcnt , previous_continued_action);
2005-07-11 14:54:10 +02:00
if ( scon_stk_ptr > 0 )
{
for ( i = 1; i <= scon_stk_ptr; ++i )
scbol[scon_stk[i]] =
mkbranch( scbol[scon_stk[i]],
pat );
}
else
{
/* Add to all non-exclusive start conditions,
* including the default (0) start condition.
*/
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scbol[i] = mkbranch( scbol[i],
pat );
}
if ( ! bol_needed )
{
bol_needed = true;
if ( performance_report > 1 )
pinpoint_message(
"'^' operator results in sub-optimal performance" );
}
}
| rule
{
pat = $1;
finish_rule( pat, variable_trail_rule,
2012-06-06 16:10:32 +02:00
headcnt, trailcnt , previous_continued_action);
2005-07-11 14:54:10 +02:00
if ( scon_stk_ptr > 0 )
{
for ( i = 1; i <= scon_stk_ptr; ++i )
scset[scon_stk[i]] =
mkbranch( scset[scon_stk[i]],
pat );
}
else
{
for ( i = 1; i <= lastsc; ++i )
if ( ! scxclu[i] )
scset[i] =
mkbranch( scset[i],
pat );
}
}
| EOF_OP
{
if ( scon_stk_ptr > 0 )
build_eof_action();
else
{
/* This EOF applies to all start conditions
* which don't already have EOF actions.
*/
for ( i = 1; i <= lastsc; ++i )
if ( ! sceof[i] )
scon_stk[++scon_stk_ptr] = i;
if ( scon_stk_ptr == 0 )
2012-06-06 16:10:32 +02:00
lwarn(
2005-07-11 14:54:10 +02:00
"all start conditions already have <<EOF>> rules" );
else
build_eof_action();
}
}
| error
2012-06-06 16:10:32 +02:00
{ synerr( _("unrecognized rule") ); }
2005-07-11 14:54:10 +02:00
;
scon_stk_ptr :
{ $$ = scon_stk_ptr; }
;
scon : '<' scon_stk_ptr namelist2 '>'
{ $$ = $2; }
| '<' '*' '>'
{
$$ = scon_stk_ptr;
for ( i = 1; i <= lastsc; ++i )
{
int j;
for ( j = 1; j <= scon_stk_ptr; ++j )
if ( scon_stk[j] == i )
break;
if ( j > scon_stk_ptr )
scon_stk[++scon_stk_ptr] = i;
}
}
|
{ $$ = scon_stk_ptr; }
;
namelist2 : namelist2 ',' sconname
| sconname
| error
2012-06-06 16:10:32 +02:00
{ synerr( _("bad start condition list") ); }
2005-07-11 14:54:10 +02:00
;
sconname : NAME
{
if ( (scnum = sclookup( nmstr )) == 0 )
format_pinpoint_message(
"undeclared start condition %s",
nmstr );
else
{
for ( i = 1; i <= scon_stk_ptr; ++i )
if ( scon_stk[i] == scnum )
{
format_warn(
"<%s> specified twice",
scname[scnum] );
break;
}
if ( i > scon_stk_ptr )
scon_stk[++scon_stk_ptr] = scnum;
}
}
;
rule : re2 re
{
if ( transchar[lastst[$2]] != SYM_EPSILON )
/* Provide final transition \now/ so it
* will be marked as a trailing context
* state.
*/
$2 = link_machines( $2,
mkstate( SYM_EPSILON ) );
mark_beginning_as_normal( $2 );
current_state_type = STATE_NORMAL;
if ( previous_continued_action )
{
/* We need to treat this as variable trailing
* context so that the backup does not happen
* in the action but before the action switch
* statement. If the backup happens in the
* action, then the rules "falling into" this
* one's action will *also* do the backup,
* erroneously.
*/
if ( ! varlength || headcnt != 0 )
2012-06-06 16:10:32 +02:00
lwarn(
2005-07-11 14:54:10 +02:00
"trailing context made variable due to preceding '|' action" );
/* Mark as variable. */
varlength = true;
headcnt = 0;
2012-06-06 16:10:32 +02:00
2005-07-11 14:54:10 +02:00
}
if ( lex_compat || (varlength && headcnt == 0) )
{ /* variable trailing context rule */
/* Mark the first part of the rule as the
* accepting "head" part of a trailing
* context rule.
*
* By the way, we didn't do this at the
* beginning of this production because back
* then current_state_type was set up for a
* trail rule, and add_accept() can create
* a new state ...
*/
add_accept( $1,
num_rules | YY_TRAILING_HEAD_MASK );
variable_trail_rule = true;
}
else
trailcnt = rulelen;
$$ = link_machines( $1, $2 );
}
| re2 re '$'
2012-06-06 16:10:32 +02:00
{ synerr( _("trailing context used twice") ); }
2005-07-11 14:54:10 +02:00
| re '$'
{
headcnt = 0;
trailcnt = 1;
rulelen = 1;
varlength = false;
current_state_type = STATE_TRAILING_CONTEXT;
if ( trlcontxt )
{
2012-06-06 16:10:32 +02:00
synerr( _("trailing context used twice") );
2005-07-11 14:54:10 +02:00
$$ = mkstate( SYM_EPSILON );
}
else if ( previous_continued_action )
{
/* See the comment in the rule for "re2 re"
* above.
*/
2012-06-06 16:10:32 +02:00
lwarn(
2005-07-11 14:54:10 +02:00
"trailing context made variable due to preceding '|' action" );
varlength = true;
}
if ( lex_compat || varlength )
{
/* Again, see the comment in the rule for
* "re2 re" above.
*/
add_accept( $1,
num_rules | YY_TRAILING_HEAD_MASK );
variable_trail_rule = true;
}
trlcontxt = true;
eps = mkstate( SYM_EPSILON );
$$ = link_machines( $1,
link_machines( eps, mkstate( '\n' ) ) );
}
| re
{
$$ = $1;
if ( trlcontxt )
{
if ( lex_compat || (varlength && headcnt == 0) )
/* Both head and trail are
* variable-length.
*/
variable_trail_rule = true;
else
trailcnt = rulelen;
}
}
;
re : re '|' series
{
varlength = true;
$$ = mkor( $1, $3 );
}
| series
{ $$ = $1; }
;
re2 : re '/'
{
/* This rule is written separately so the
* reduction will occur before the trailing
* series is parsed.
*/
if ( trlcontxt )
2012-06-06 16:10:32 +02:00
synerr( _("trailing context used twice") );
2005-07-11 14:54:10 +02:00
else
trlcontxt = true;
if ( varlength )
/* We hope the trailing context is
* fixed-length.
*/
varlength = false;
else
headcnt = rulelen;
rulelen = 0;
current_state_type = STATE_TRAILING_CONTEXT;
$$ = $1;
}
;
series : series singleton
{
/* This is where concatenation of adjacent patterns
* gets done.
*/
$$ = link_machines( $1, $2 );
}
| singleton
{ $$ = $1; }
2012-06-06 16:10:32 +02:00
| series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
{
varlength = true;
if ( $3 > $5 || $3 < 0 )
{
synerr( _("bad iteration values") );
$$ = $1;
}
else
{
if ( $3 == 0 )
{
if ( $5 <= 0 )
{
synerr(
_("bad iteration values") );
$$ = $1;
}
else
$$ = mkopt(
mkrep( $1, 1, $5 ) );
}
else
$$ = mkrep( $1, $3, $5 );
}
}
| series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
{
varlength = true;
if ( $3 <= 0 )
{
synerr( _("iteration value must be positive") );
$$ = $1;
}
else
$$ = mkrep( $1, $3, INFINITE_REPEAT );
}
| series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
{
/* The series could be something like "(foo)",
* in which case we have no idea what its length
* is, so we punt here.
*/
varlength = true;
if ( $3 <= 0 )
{
synerr( _("iteration value must be positive")
);
$$ = $1;
}
else
$$ = link_machines( $1,
copysingl( $1, $3 - 1 ) );
}
2005-07-11 14:54:10 +02:00
;
singleton : singleton '*'
{
varlength = true;
$$ = mkclos( $1 );
}
| singleton '+'
{
varlength = true;
$$ = mkposcl( $1 );
}
| singleton '?'
{
varlength = true;
$$ = mkopt( $1 );
}
2012-06-06 16:10:32 +02:00
| singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
2005-07-11 14:54:10 +02:00
{
varlength = true;
if ( $3 > $5 || $3 < 0 )
{
2012-06-06 16:10:32 +02:00
synerr( _("bad iteration values") );
2005-07-11 14:54:10 +02:00
$$ = $1;
}
else
{
if ( $3 == 0 )
{
if ( $5 <= 0 )
{
synerr(
2012-06-06 16:10:32 +02:00
_("bad iteration values") );
2005-07-11 14:54:10 +02:00
$$ = $1;
}
else
$$ = mkopt(
mkrep( $1, 1, $5 ) );
}
else
$$ = mkrep( $1, $3, $5 );
}
}
2012-06-06 16:10:32 +02:00
| singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
2005-07-11 14:54:10 +02:00
{
varlength = true;
if ( $3 <= 0 )
{
2012-06-06 16:10:32 +02:00
synerr( _("iteration value must be positive") );
2005-07-11 14:54:10 +02:00
$$ = $1;
}
else
2012-06-06 16:10:32 +02:00
$$ = mkrep( $1, $3, INFINITE_REPEAT );
2005-07-11 14:54:10 +02:00
}
2012-06-06 16:10:32 +02:00
| singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
2005-07-11 14:54:10 +02:00
{
/* The singleton could be something like "(foo)",
* in which case we have no idea what its length
* is, so we punt here.
*/
varlength = true;
if ( $3 <= 0 )
{
2012-06-06 16:10:32 +02:00
synerr( _("iteration value must be positive") );
2005-07-11 14:54:10 +02:00
$$ = $1;
}
else
$$ = link_machines( $1,
copysingl( $1, $3 - 1 ) );
}
| '.'
{
if ( ! madeany )
{
/* Create the '.' character class. */
2012-06-06 16:10:32 +02:00
ccldot = cclinit();
ccladd( ccldot, '\n' );
cclnegate( ccldot );
2005-07-11 14:54:10 +02:00
2012-06-06 16:10:32 +02:00
if ( useecs )
mkeccl( ccltbl + cclmap[ccldot],
ccllen[ccldot], nextecm,
ecgroup, csize, csize );
/* Create the (?s:'.') character class. */
cclany = cclinit();
cclnegate( cclany );
if ( useecs )
mkeccl( ccltbl + cclmap[cclany],
ccllen[cclany], nextecm,
ecgroup, csize, csize );
2005-07-11 14:54:10 +02:00
madeany = true;
}
++rulelen;
2012-06-06 16:10:32 +02:00
if (sf_dot_all())
$$ = mkstate( -cclany );
else
$$ = mkstate( -ccldot );
2005-07-11 14:54:10 +02:00
}
| fullccl
{
/* Sort characters for fast searching.
2005-07-11 14:54:10 +02:00
*/
qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
2005-07-11 14:54:10 +02:00
if ( useecs )
mkeccl( ccltbl + cclmap[$1], ccllen[$1],
nextecm, ecgroup, csize, csize );
++rulelen;
2012-06-06 16:10:32 +02:00
if (ccl_has_nl[$1])
rule_has_nl[num_rules] = true;
2005-07-11 14:54:10 +02:00
$$ = mkstate( -$1 );
}
| PREVCCL
{
++rulelen;
2012-06-06 16:10:32 +02:00
if (ccl_has_nl[$1])
rule_has_nl[num_rules] = true;
2005-07-11 14:54:10 +02:00
$$ = mkstate( -$1 );
}
| '"' string '"'
{ $$ = $2; }
| '(' re ')'
{ $$ = $2; }
| CHAR
{
++rulelen;
2012-06-06 16:10:32 +02:00
if ($1 == nlch)
rule_has_nl[num_rules] = true;
2005-07-11 14:54:10 +02:00
2012-06-06 16:10:32 +02:00
if (sf_case_ins() && has_case($1))
/* create an alternation, as in (a|A) */
$$ = mkor (mkstate($1), mkstate(reverse_case($1)));
else
$$ = mkstate( $1 );
2005-07-11 14:54:10 +02:00
}
;
2012-06-06 16:10:32 +02:00
fullccl:
fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
| fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
| braceccl
;
2005-07-11 14:54:10 +02:00
2012-06-06 16:10:32 +02:00
braceccl:
'[' ccl ']' { $$ = $2; }
2005-07-11 14:54:10 +02:00
| '[' '^' ccl ']'
{
cclnegate( $3 );
$$ = $3;
}
;
ccl : ccl CHAR '-' CHAR
{
2012-06-06 16:10:32 +02:00
if (sf_case_ins())
{
/* If one end of the range has case and the other
* does not, or the cases are different, then we're not
* sure what range the user is trying to express.
* Examples: [@-z] or [S-t]
*/
if (has_case ($2) != has_case ($4)
|| (has_case ($2) && (b_islower ($2) != b_islower ($4)))
|| (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
format_warn3 (
_("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
$2, $4);
/* If the range spans uppercase characters but not
* lowercase (or vice-versa), then should we automatically
* include lowercase characters in the range?
* Example: [@-_] spans [a-z] but not [A-Z]
*/
else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
format_warn3 (
_("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
$2, $4);
}
2005-07-11 14:54:10 +02:00
if ( $2 > $4 )
2012-06-06 16:10:32 +02:00
synerr( _("negative range in character class") );
2005-07-11 14:54:10 +02:00
else
{
for ( i = $2; i <= $4; ++i )
ccladd( $1, i );
/* Keep track if this ccl is staying in
* alphabetical order.
*/
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $4;
2012-06-06 16:10:32 +02:00
/* Do it again for upper/lowercase */
if (sf_case_ins() && has_case($2) && has_case($4)){
$2 = reverse_case ($2);
$4 = reverse_case ($4);
for ( i = $2; i <= $4; ++i )
ccladd( $1, i );
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $4;
}
2005-07-11 14:54:10 +02:00
}
$$ = $1;
}
| ccl CHAR
{
ccladd( $1, $2 );
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $2;
2012-06-06 16:10:32 +02:00
/* Do it again for upper/lowercase */
if (sf_case_ins() && has_case($2)){
$2 = reverse_case ($2);
ccladd ($1, $2);
cclsorted = cclsorted && ($2 > lastchar);
lastchar = $2;
}
2005-07-11 14:54:10 +02:00
$$ = $1;
}
| ccl ccl_expr
{
/* Too hard to properly maintain cclsorted. */
cclsorted = false;
$$ = $1;
}
|
{
cclsorted = true;
lastchar = 0;
currccl = $$ = cclinit();
}
;
2012-06-06 16:10:32 +02:00
ccl_expr:
CCE_ALNUM { CCL_EXPR(isalnum); }
| CCE_ALPHA { CCL_EXPR(isalpha); }
| CCE_BLANK { CCL_EXPR(IS_BLANK); }
| CCE_CNTRL { CCL_EXPR(iscntrl); }
| CCE_DIGIT { CCL_EXPR(isdigit); }
| CCE_GRAPH { CCL_EXPR(isgraph); }
| CCE_LOWER {
CCL_EXPR(islower);
if (sf_case_ins())
CCL_EXPR(isupper);
}
| CCE_PRINT { CCL_EXPR(isprint); }
| CCE_PUNCT { CCL_EXPR(ispunct); }
| CCE_SPACE { CCL_EXPR(isspace); }
| CCE_XDIGIT { CCL_EXPR(isxdigit); }
2005-07-11 14:54:10 +02:00
| CCE_UPPER {
2012-06-06 16:10:32 +02:00
CCL_EXPR(isupper);
if (sf_case_ins())
CCL_EXPR(islower);
}
| CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
| CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
| CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
| CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
| CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
| CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
| CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
| CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
| CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
| CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
| CCE_NEG_LOWER {
if ( sf_case_ins() )
lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
2005-07-11 14:54:10 +02:00
else
2012-06-06 16:10:32 +02:00
CCL_NEG_EXPR(islower);
}
| CCE_NEG_UPPER {
if ( sf_case_ins() )
lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
else
CCL_NEG_EXPR(isupper);
2005-07-11 14:54:10 +02:00
}
;
string : string CHAR
{
2012-06-06 16:10:32 +02:00
if ( $2 == nlch )
rule_has_nl[num_rules] = true;
2005-07-11 14:54:10 +02:00
++rulelen;
2012-06-06 16:10:32 +02:00
if (sf_case_ins() && has_case($2))
$$ = mkor (mkstate($2), mkstate(reverse_case($2)));
else
$$ = mkstate ($2);
$$ = link_machines( $1, $$);
2005-07-11 14:54:10 +02:00
}
|
{ $$ = mkstate( SYM_EPSILON ); }
;
%%
/* build_eof_action - build the "<<EOF>>" action for the active start
* conditions
*/
void build_eof_action()
{
register int i;
char action_text[MAXLINE];
for ( i = 1; i <= scon_stk_ptr; ++i )
{
if ( sceof[scon_stk[i]] )
format_pinpoint_message(
"multiple <<EOF>> rules for start condition %s",
scname[scon_stk[i]] );
else
{
sceof[scon_stk[i]] = true;
if (previous_continued_action /* && previous action was regular */)
add_action("YY_RULE_SETUP\n");
2012-06-06 16:10:32 +02:00
snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
2005-07-11 14:54:10 +02:00
scname[scon_stk[i]] );
add_action( action_text );
}
}
line_directive_out( (FILE *) 0, 1 );
/* This isn't a normal rule after all - don't count it as
* such, so we don't have any holes in the rule numbering
* (which make generating "rule can never match" warnings
* more difficult.
*/
--num_rules;
++num_eof_rules;
}
/* format_synerr - write out formatted syntax error */
void format_synerr( msg, arg )
2012-06-06 16:10:32 +02:00
const char *msg, arg[];
2005-07-11 14:54:10 +02:00
{
char errmsg[MAXLINE];
2012-06-06 16:10:32 +02:00
(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
2005-07-11 14:54:10 +02:00
synerr( errmsg );
}
/* synerr - report a syntax error */
void synerr( str )
2012-06-06 16:10:32 +02:00
const char *str;
2005-07-11 14:54:10 +02:00
{
syntaxerror = true;
pinpoint_message( str );
}
/* format_warn - write out formatted warning */
void format_warn( msg, arg )
2012-06-06 16:10:32 +02:00
const char *msg, arg[];
2005-07-11 14:54:10 +02:00
{
char warn_msg[MAXLINE];
2012-06-06 16:10:32 +02:00
snprintf( warn_msg, sizeof(warn_msg), msg, arg );
lwarn( warn_msg );
2005-07-11 14:54:10 +02:00
}
2012-06-06 16:10:32 +02:00
/* lwarn - report a warning, unless -w was given */
2005-07-11 14:54:10 +02:00
2012-06-06 16:10:32 +02:00
void lwarn( str )
const char *str;
2005-07-11 14:54:10 +02:00
{
line_warning( str, linenum );
}
/* format_pinpoint_message - write out a message formatted with one string,
* pinpointing its location
*/
void format_pinpoint_message( msg, arg )
2012-06-06 16:10:32 +02:00
const char *msg, arg[];
2005-07-11 14:54:10 +02:00
{
char errmsg[MAXLINE];
2012-06-06 16:10:32 +02:00
snprintf( errmsg, sizeof(errmsg), msg, arg );
2005-07-11 14:54:10 +02:00
pinpoint_message( errmsg );
}
/* pinpoint_message - write out a message, pinpointing its location */
void pinpoint_message( str )
2012-06-06 16:10:32 +02:00
const char *str;
2005-07-11 14:54:10 +02:00
{
line_pinpoint( str, linenum );
}
/* line_warning - report a warning at a given line, unless -w was given */
void line_warning( str, line )
2012-06-06 16:10:32 +02:00
const char *str;
2005-07-11 14:54:10 +02:00
int line;
{
char warning[MAXLINE];
if ( ! nowarn )
{
2012-06-06 16:10:32 +02:00
snprintf( warning, sizeof(warning), "warning, %s", str );
2005-07-11 14:54:10 +02:00
line_pinpoint( warning, line );
}
}
/* line_pinpoint - write out a message, pinpointing it at the given line */
void line_pinpoint( str, line )
2012-06-06 16:10:32 +02:00
const char *str;
2005-07-11 14:54:10 +02:00
int line;
{
2012-06-06 16:10:32 +02:00
fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
2005-07-11 14:54:10 +02:00
}
/* yyerror - eat up an error message from the parser;
* currently, messages are ignore
*/
void yyerror( msg )
2012-06-06 16:10:32 +02:00
const char *msg;
2005-07-11 14:54:10 +02:00
{
}