218 lines
4.7 KiB
Python
218 lines
4.7 KiB
Python
|
# parser for Unix yacc-based grammars
|
||
|
#
|
||
|
# Author: David Beazley (dave@dabeaz.com)
|
||
|
# Date : October 2, 2006
|
||
|
|
||
|
import ylex
|
||
|
tokens = ylex.tokens
|
||
|
|
||
|
from ply import *
|
||
|
|
||
|
tokenlist = []
|
||
|
preclist = []
|
||
|
|
||
|
emit_code = 1
|
||
|
|
||
|
def p_yacc(p):
|
||
|
'''yacc : defsection rulesection'''
|
||
|
|
||
|
def p_defsection(p):
|
||
|
'''defsection : definitions SECTION
|
||
|
| SECTION'''
|
||
|
p.lexer.lastsection = 1
|
||
|
print "tokens = ", repr(tokenlist)
|
||
|
print
|
||
|
print "precedence = ", repr(preclist)
|
||
|
print
|
||
|
print "# -------------- RULES ----------------"
|
||
|
print
|
||
|
|
||
|
def p_rulesection(p):
|
||
|
'''rulesection : rules SECTION'''
|
||
|
|
||
|
print "# -------------- RULES END ----------------"
|
||
|
print_code(p[2],0)
|
||
|
|
||
|
def p_definitions(p):
|
||
|
'''definitions : definitions definition
|
||
|
| definition'''
|
||
|
|
||
|
def p_definition_literal(p):
|
||
|
'''definition : LITERAL'''
|
||
|
print_code(p[1],0)
|
||
|
|
||
|
def p_definition_start(p):
|
||
|
'''definition : START ID'''
|
||
|
print "start = '%s'" % p[2]
|
||
|
|
||
|
def p_definition_token(p):
|
||
|
'''definition : toktype opttype idlist optsemi '''
|
||
|
for i in p[3]:
|
||
|
if i[0] not in "'\"":
|
||
|
tokenlist.append(i)
|
||
|
if p[1] == '%left':
|
||
|
preclist.append(('left',) + tuple(p[3]))
|
||
|
elif p[1] == '%right':
|
||
|
preclist.append(('right',) + tuple(p[3]))
|
||
|
elif p[1] == '%nonassoc':
|
||
|
preclist.append(('nonassoc',)+ tuple(p[3]))
|
||
|
|
||
|
def p_toktype(p):
|
||
|
'''toktype : TOKEN
|
||
|
| LEFT
|
||
|
| RIGHT
|
||
|
| NONASSOC'''
|
||
|
p[0] = p[1]
|
||
|
|
||
|
def p_opttype(p):
|
||
|
'''opttype : '<' ID '>'
|
||
|
| empty'''
|
||
|
|
||
|
def p_idlist(p):
|
||
|
'''idlist : idlist optcomma tokenid
|
||
|
| tokenid'''
|
||
|
if len(p) == 2:
|
||
|
p[0] = [p[1]]
|
||
|
else:
|
||
|
p[0] = p[1]
|
||
|
p[1].append(p[3])
|
||
|
|
||
|
def p_tokenid(p):
|
||
|
'''tokenid : ID
|
||
|
| ID NUMBER
|
||
|
| QLITERAL
|
||
|
| QLITERAL NUMBER'''
|
||
|
p[0] = p[1]
|
||
|
|
||
|
def p_optsemi(p):
|
||
|
'''optsemi : ';'
|
||
|
| empty'''
|
||
|
|
||
|
def p_optcomma(p):
|
||
|
'''optcomma : ','
|
||
|
| empty'''
|
||
|
|
||
|
def p_definition_type(p):
|
||
|
'''definition : TYPE '<' ID '>' namelist optsemi'''
|
||
|
# type declarations are ignored
|
||
|
|
||
|
def p_namelist(p):
|
||
|
'''namelist : namelist optcomma ID
|
||
|
| ID'''
|
||
|
|
||
|
def p_definition_union(p):
|
||
|
'''definition : UNION CODE optsemi'''
|
||
|
# Union declarations are ignored
|
||
|
|
||
|
def p_rules(p):
|
||
|
'''rules : rules rule
|
||
|
| rule'''
|
||
|
if len(p) == 2:
|
||
|
rule = p[1]
|
||
|
else:
|
||
|
rule = p[2]
|
||
|
|
||
|
# Print out a Python equivalent of this rule
|
||
|
|
||
|
embedded = [ ] # Embedded actions (a mess)
|
||
|
embed_count = 0
|
||
|
|
||
|
rulename = rule[0]
|
||
|
rulecount = 1
|
||
|
for r in rule[1]:
|
||
|
# r contains one of the rule possibilities
|
||
|
print "def p_%s_%d(p):" % (rulename,rulecount)
|
||
|
prod = []
|
||
|
prodcode = ""
|
||
|
for i in range(len(r)):
|
||
|
item = r[i]
|
||
|
if item[0] == '{': # A code block
|
||
|
if i == len(r) - 1:
|
||
|
prodcode = item
|
||
|
break
|
||
|
else:
|
||
|
# an embedded action
|
||
|
embed_name = "_embed%d_%s" % (embed_count,rulename)
|
||
|
prod.append(embed_name)
|
||
|
embedded.append((embed_name,item))
|
||
|
embed_count += 1
|
||
|
else:
|
||
|
prod.append(item)
|
||
|
print " '''%s : %s'''" % (rulename, " ".join(prod))
|
||
|
# Emit code
|
||
|
print_code(prodcode,4)
|
||
|
print
|
||
|
rulecount += 1
|
||
|
|
||
|
for e,code in embedded:
|
||
|
print "def p_%s(p):" % e
|
||
|
print " '''%s : '''" % e
|
||
|
print_code(code,4)
|
||
|
print
|
||
|
|
||
|
def p_rule(p):
|
||
|
'''rule : ID ':' rulelist ';' '''
|
||
|
p[0] = (p[1],[p[3]])
|
||
|
|
||
|
def p_rule2(p):
|
||
|
'''rule : ID ':' rulelist morerules ';' '''
|
||
|
p[4].insert(0,p[3])
|
||
|
p[0] = (p[1],p[4])
|
||
|
|
||
|
def p_rule_empty(p):
|
||
|
'''rule : ID ':' ';' '''
|
||
|
p[0] = (p[1],[[]])
|
||
|
|
||
|
def p_rule_empty2(p):
|
||
|
'''rule : ID ':' morerules ';' '''
|
||
|
|
||
|
p[3].insert(0,[])
|
||
|
p[0] = (p[1],p[3])
|
||
|
|
||
|
def p_morerules(p):
|
||
|
'''morerules : morerules '|' rulelist
|
||
|
| '|' rulelist
|
||
|
| '|' '''
|
||
|
|
||
|
if len(p) == 2:
|
||
|
p[0] = [[]]
|
||
|
elif len(p) == 3:
|
||
|
p[0] = [p[2]]
|
||
|
else:
|
||
|
p[0] = p[1]
|
||
|
p[0].append(p[3])
|
||
|
|
||
|
# print "morerules", len(p), p[0]
|
||
|
|
||
|
def p_rulelist(p):
|
||
|
'''rulelist : rulelist ruleitem
|
||
|
| ruleitem'''
|
||
|
|
||
|
if len(p) == 2:
|
||
|
p[0] = [p[1]]
|
||
|
else:
|
||
|
p[0] = p[1]
|
||
|
p[1].append(p[2])
|
||
|
|
||
|
def p_ruleitem(p):
|
||
|
'''ruleitem : ID
|
||
|
| QLITERAL
|
||
|
| CODE
|
||
|
| PREC'''
|
||
|
p[0] = p[1]
|
||
|
|
||
|
def p_empty(p):
|
||
|
'''empty : '''
|
||
|
|
||
|
def p_error(p):
|
||
|
pass
|
||
|
|
||
|
yacc.yacc(debug=0)
|
||
|
|
||
|
def print_code(code,indent):
|
||
|
if not emit_code: return
|
||
|
codelines = code.splitlines()
|
||
|
for c in codelines:
|
||
|
print "%s# %s" % (" "*indent,c)
|
||
|
|