isa_parser: Turn the ISA Parser into a subclass of Grammar.

This is to prepare for future cleanup where we allow SCons to create a
separate grammar class for each ISA
This commit is contained in:
Nathan Binkert 2009-09-23 18:28:29 -07:00
parent bae6a4a4d9
commit baca1f0566

View file

@ -34,8 +34,12 @@ import traceback
# get type names # get type names
from types import * from types import *
from ply import lex from m5.util.grammar import Grammar
from ply import yacc
class ISAParser(Grammar):
def __init__(self, *args, **kwargs):
super(ISAParser, self).__init__(*args, **kwargs)
self.templateMap = {}
##################################################################### #####################################################################
# #
@ -117,13 +121,13 @@ reserved_map = { }
for r in reserved: for r in reserved:
reserved_map[r.lower()] = r reserved_map[r.lower()] = r
def t_ID(t): def t_ID(self, t):
r'[A-Za-z_]\w*' r'[A-Za-z_]\w*'
t.type = reserved_map.get(t.value,'ID') t.type = self.reserved_map.get(t.value, 'ID')
return t return t
# Integer literal # Integer literal
def t_INTLIT(t): def t_INTLIT(self, t):
r'(0x[\da-fA-F]+)|\d+' r'(0x[\da-fA-F]+)|\d+'
try: try:
t.value = int(t.value,0) t.value = int(t.value,0)
@ -134,7 +138,7 @@ def t_INTLIT(t):
# String literal. Note that these use only single quotes, and # String literal. Note that these use only single quotes, and
# can span multiple lines. # can span multiple lines.
def t_STRLIT(t): def t_STRLIT(self, t):
r"(?m)'([^'])+'" r"(?m)'([^'])+'"
# strip off quotes # strip off quotes
t.value = t.value[1:-1] t.value = t.value[1:-1]
@ -144,24 +148,24 @@ def t_STRLIT(t):
# "Code literal"... like a string literal, but delimiters are # "Code literal"... like a string literal, but delimiters are
# '{{' and '}}' so they get formatted nicely under emacs c-mode # '{{' and '}}' so they get formatted nicely under emacs c-mode
def t_CODELIT(t): def t_CODELIT(self, t):
r"(?m)\{\{([^\}]|}(?!\}))+\}\}" r"(?m)\{\{([^\}]|}(?!\}))+\}\}"
# strip off {{ & }} # strip off {{ & }}
t.value = t.value[2:-2] t.value = t.value[2:-2]
t.lexer.lineno += t.value.count('\n') t.lexer.lineno += t.value.count('\n')
return t return t
def t_CPPDIRECTIVE(t): def t_CPPDIRECTIVE(self, t):
r'^\#[^\#].*\n' r'^\#[^\#].*\n'
t.lexer.lineno += t.value.count('\n') t.lexer.lineno += t.value.count('\n')
return t return t
def t_NEWFILE(t): def t_NEWFILE(self, t):
r'^\#\#newfile\s+"[\w/.-]*"' r'^\#\#newfile\s+"[\w/.-]*"'
fileNameStack.push((t.value[11:-1], t.lexer.lineno)) fileNameStack.push((t.value[11:-1], t.lexer.lineno))
t.lexer.lineno = 0 t.lexer.lineno = 0
def t_ENDFILE(t): def t_ENDFILE(self, t):
r'^\#\#endfile' r'^\#\#endfile'
(old_filename, t.lexer.lineno) = fileNameStack.pop() (old_filename, t.lexer.lineno) = fileNameStack.pop()
@ -171,48 +175,46 @@ def t_ENDFILE(t):
# #
# Newlines # Newlines
def t_NEWLINE(t): def t_NEWLINE(self, t):
r'\n+' r'\n+'
t.lexer.lineno += t.value.count('\n') t.lexer.lineno += t.value.count('\n')
# Comments # Comments
def t_comment(t): def t_comment(self, t):
r'//.*' r'//.*'
# Completely ignored characters # Completely ignored characters
t_ignore = ' \t\x0c' t_ignore = ' \t\x0c'
# Error handler # Error handler
def t_error(t): def t_error(self, t):
error(t.lexer.lineno, "illegal character '%s'" % t.value[0]) error(t.lexer.lineno, "illegal character '%s'" % t.value[0])
t.skip(1) t.skip(1)
# Build the lexer
lexer = lex.lex()
##################################################################### #####################################################################
# #
# Parser # Parser
# #
# Every function whose name starts with 'p_' defines a grammar rule. # Every function whose name starts with 'p_' defines a grammar
# The rule is encoded in the function's doc string, while the # rule. The rule is encoded in the function's doc string, while
# function body provides the action taken when the rule is matched. # the function body provides the action taken when the rule is
# The argument to each function is a list of the values of the # matched. The argument to each function is a list of the values
# rule's symbols: t[0] for the LHS, and t[1..n] for the symbols # of the rule's symbols: t[0] for the LHS, and t[1..n] for the
# on the RHS. For tokens, the value is copied from the t.value # symbols on the RHS. For tokens, the value is copied from the
# attribute provided by the lexer. For non-terminals, the value # t.value attribute provided by the lexer. For non-terminals, the
# is assigned by the producing rule; i.e., the job of the grammar # value is assigned by the producing rule; i.e., the job of the
# rule function is to set the value for the non-terminal on the LHS # grammar rule function is to set the value for the non-terminal
# (by assigning to t[0]). # on the LHS (by assigning to t[0]).
##################################################################### #####################################################################
# The LHS of the first grammar rule is used as the start symbol # The LHS of the first grammar rule is used as the start symbol
# (in this case, 'specification'). Note that this rule enforces # (in this case, 'specification'). Note that this rule enforces
# that there will be exactly one namespace declaration, with 0 or more # that there will be exactly one namespace declaration, with 0 or
# global defs/decls before and after it. The defs & decls before # more global defs/decls before and after it. The defs & decls
# the namespace decl will be outside the namespace; those after # before the namespace decl will be outside the namespace; those
# will be inside. The decoder function is always inside the namespace. # after will be inside. The decoder function is always inside the
def p_specification(t): # namespace.
def p_specification(self, t):
'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block' 'specification : opt_defs_and_outputs name_decl opt_defs_and_outputs decode_block'
global_code = t[1] global_code = t[1]
isa_name = t[2] isa_name = t[2]
@ -224,36 +226,37 @@ StaticInstPtr
{ {
using namespace %(namespace)s; using namespace %(namespace)s;
''' % vars(), '}') ''' % vars(), '}')
# both the latter output blocks and the decode block are in the namespace # both the latter output blocks and the decode block are in
# the namespace
namespace_code = t[3] + t[4] namespace_code = t[3] + t[4]
# pass it all back to the caller of yacc.parse() # pass it all back to the caller of yacc.parse()
t[0] = (isa_name, namespace, global_code, namespace_code) t[0] = (isa_name, namespace, global_code, namespace_code)
# ISA name declaration looks like "namespace <foo>;" # ISA name declaration looks like "namespace <foo>;"
def p_name_decl(t): def p_name_decl(self, t):
'name_decl : NAMESPACE ID SEMI' 'name_decl : NAMESPACE ID SEMI'
t[0] = t[2] t[0] = t[2]
# 'opt_defs_and_outputs' is a possibly empty sequence of # 'opt_defs_and_outputs' is a possibly empty sequence of
# def and/or output statements. # def and/or output statements.
def p_opt_defs_and_outputs_0(t): def p_opt_defs_and_outputs_0(self, t):
'opt_defs_and_outputs : empty' 'opt_defs_and_outputs : empty'
t[0] = GenCode() t[0] = GenCode()
def p_opt_defs_and_outputs_1(t): def p_opt_defs_and_outputs_1(self, t):
'opt_defs_and_outputs : defs_and_outputs' 'opt_defs_and_outputs : defs_and_outputs'
t[0] = t[1] t[0] = t[1]
def p_defs_and_outputs_0(t): def p_defs_and_outputs_0(self, t):
'defs_and_outputs : def_or_output' 'defs_and_outputs : def_or_output'
t[0] = t[1] t[0] = t[1]
def p_defs_and_outputs_1(t): def p_defs_and_outputs_1(self, t):
'defs_and_outputs : defs_and_outputs def_or_output' 'defs_and_outputs : defs_and_outputs def_or_output'
t[0] = t[1] + t[2] t[0] = t[1] + t[2]
# The list of possible definition/output statements. # The list of possible definition/output statements.
def p_def_or_output(t): def p_def_or_output(self, t):
'''def_or_output : def_format '''def_or_output : def_format
| def_bitfield | def_bitfield
| def_bitfield_struct | def_bitfield_struct
@ -269,40 +272,34 @@ def p_def_or_output(t):
# Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied # Output blocks 'output <foo> {{...}}' (C++ code blocks) are copied
# directly to the appropriate output section. # directly to the appropriate output section.
# Massage output block by substituting in template definitions and
# Protect any non-dict-substitution '%'s in a format string # bit operators. We handle '%'s embedded in the string that don't
# (i.e. those not followed by '(') # indicate template substitutions (or CPU-specific symbols, which
def protect_non_subst_percents(s): # get handled in GenCode) by doubling them first so that the
return re.sub(r'%(?!\()', '%%', s) # format operation will reduce them back to single '%'s.
def process_output(self, s):
# Massage output block by substituting in template definitions and bit
# operators. We handle '%'s embedded in the string that don't
# indicate template substitutions (or CPU-specific symbols, which get
# handled in GenCode) by doubling them first so that the format
# operation will reduce them back to single '%'s.
def process_output(s):
s = protect_non_subst_percents(s) s = protect_non_subst_percents(s)
# protects cpu-specific symbols too # protects cpu-specific symbols too
s = protect_cpu_symbols(s) s = protect_cpu_symbols(s)
return substBitOps(s % templateMap) return substBitOps(s % self.templateMap)
def p_output_header(t): def p_output_header(self, t):
'output_header : OUTPUT HEADER CODELIT SEMI' 'output_header : OUTPUT HEADER CODELIT SEMI'
t[0] = GenCode(header_output = process_output(t[3])) t[0] = GenCode(header_output = self.process_output(t[3]))
def p_output_decoder(t): def p_output_decoder(self, t):
'output_decoder : OUTPUT DECODER CODELIT SEMI' 'output_decoder : OUTPUT DECODER CODELIT SEMI'
t[0] = GenCode(decoder_output = process_output(t[3])) t[0] = GenCode(decoder_output = self.process_output(t[3]))
def p_output_exec(t): def p_output_exec(self, t):
'output_exec : OUTPUT EXEC CODELIT SEMI' 'output_exec : OUTPUT EXEC CODELIT SEMI'
t[0] = GenCode(exec_output = process_output(t[3])) t[0] = GenCode(exec_output = self.process_output(t[3]))
# global let blocks 'let {{...}}' (Python code blocks) are executed # global let blocks 'let {{...}}' (Python code blocks) are
# directly when seen. Note that these execute in a special variable # executed directly when seen. Note that these execute in a
# context 'exportContext' to prevent the code from polluting this # special variable context 'exportContext' to prevent the code
# script's namespace. # from polluting this script's namespace.
def p_global_let(t): def p_global_let(self, t):
'global_let : LET CODELIT SEMI' 'global_let : LET CODELIT SEMI'
updateExportContext() updateExportContext()
exportContext["header_output"] = '' exportContext["header_output"] = ''
@ -319,9 +316,9 @@ def p_global_let(t):
exec_output = exportContext["exec_output"], exec_output = exportContext["exec_output"],
decode_block = exportContext["decode_block"]) decode_block = exportContext["decode_block"])
# Define the mapping from operand type extensions to C++ types and bit # Define the mapping from operand type extensions to C++ types and
# widths (stored in operandTypeMap). # bit widths (stored in operandTypeMap).
def p_def_operand_types(t): def p_def_operand_types(self, t):
'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI' 'def_operand_types : DEF OPERAND_TYPES CODELIT SEMI'
try: try:
userDict = eval('{' + t[3] + '}') userDict = eval('{' + t[3] + '}')
@ -331,9 +328,9 @@ def p_def_operand_types(t):
buildOperandTypeMap(userDict, t.lexer.lineno) buildOperandTypeMap(userDict, t.lexer.lineno)
t[0] = GenCode() # contributes nothing to the output C++ file t[0] = GenCode() # contributes nothing to the output C++ file
# Define the mapping from operand names to operand classes and other # Define the mapping from operand names to operand classes and
# traits. Stored in operandNameMap. # other traits. Stored in operandNameMap.
def p_def_operands(t): def p_def_operands(self, t):
'def_operands : DEF OPERANDS CODELIT SEMI' 'def_operands : DEF OPERANDS CODELIT SEMI'
if not globals().has_key('operandTypeMap'): if not globals().has_key('operandTypeMap'):
error(t.lexer.lineno, error(t.lexer.lineno,
@ -349,7 +346,7 @@ def p_def_operands(t):
# A bitfield definition looks like: # A bitfield definition looks like:
# 'def [signed] bitfield <ID> [<first>:<last>]' # 'def [signed] bitfield <ID> [<first>:<last>]'
# This generates a preprocessor macro in the output file. # This generates a preprocessor macro in the output file.
def p_def_bitfield_0(t): def p_def_bitfield_0(self, t):
'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI' 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT COLON INTLIT GREATER SEMI'
expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8]) expr = 'bits(machInst, %2d, %2d)' % (t[6], t[8])
if (t[2] == 'signed'): if (t[2] == 'signed'):
@ -358,7 +355,7 @@ def p_def_bitfield_0(t):
t[0] = GenCode(header_output = hash_define) t[0] = GenCode(header_output = hash_define)
# alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]' # alternate form for single bit: 'def [signed] bitfield <ID> [<bit>]'
def p_def_bitfield_1(t): def p_def_bitfield_1(self, t):
'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI' 'def_bitfield : DEF opt_signed BITFIELD ID LESS INTLIT GREATER SEMI'
expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6]) expr = 'bits(machInst, %2d, %2d)' % (t[6], t[6])
if (t[2] == 'signed'): if (t[2] == 'signed'):
@ -367,103 +364,101 @@ def p_def_bitfield_1(t):
t[0] = GenCode(header_output = hash_define) t[0] = GenCode(header_output = hash_define)
# alternate form for structure member: 'def bitfield <ID> <ID>' # alternate form for structure member: 'def bitfield <ID> <ID>'
def p_def_bitfield_struct(t): def p_def_bitfield_struct(self, t):
'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI' 'def_bitfield_struct : DEF opt_signed BITFIELD ID id_with_dot SEMI'
if (t[2] != ''): if (t[2] != ''):
error(t.lexer.lineno, 'error: structure bitfields are always unsigned.') error(t.lexer.lineno,
'error: structure bitfields are always unsigned.')
expr = 'machInst.%s' % t[5] expr = 'machInst.%s' % t[5]
hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr) hash_define = '#undef %s\n#define %s\t%s\n' % (t[4], t[4], expr)
t[0] = GenCode(header_output = hash_define) t[0] = GenCode(header_output = hash_define)
def p_id_with_dot_0(t): def p_id_with_dot_0(self, t):
'id_with_dot : ID' 'id_with_dot : ID'
t[0] = t[1] t[0] = t[1]
def p_id_with_dot_1(t): def p_id_with_dot_1(self, t):
'id_with_dot : ID DOT id_with_dot' 'id_with_dot : ID DOT id_with_dot'
t[0] = t[1] + t[2] + t[3] t[0] = t[1] + t[2] + t[3]
def p_opt_signed_0(t): def p_opt_signed_0(self, t):
'opt_signed : SIGNED' 'opt_signed : SIGNED'
t[0] = t[1] t[0] = t[1]
def p_opt_signed_1(t): def p_opt_signed_1(self, t):
'opt_signed : empty' 'opt_signed : empty'
t[0] = '' t[0] = ''
# Global map variable to hold templates def p_def_template(self, t):
templateMap = {}
def p_def_template(t):
'def_template : DEF TEMPLATE ID CODELIT SEMI' 'def_template : DEF TEMPLATE ID CODELIT SEMI'
templateMap[t[3]] = Template(t[4]) self.templateMap[t[3]] = Template(t[4])
t[0] = GenCode() t[0] = GenCode()
# An instruction format definition looks like # An instruction format definition looks like
# "def format <fmt>(<params>) {{...}};" # "def format <fmt>(<params>) {{...}};"
def p_def_format(t): def p_def_format(self, t):
'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI' 'def_format : DEF FORMAT ID LPAREN param_list RPAREN CODELIT SEMI'
(id, params, code) = (t[3], t[5], t[7]) (id, params, code) = (t[3], t[5], t[7])
defFormat(id, params, code, t.lexer.lineno) defFormat(id, params, code, t.lexer.lineno)
t[0] = GenCode() t[0] = GenCode()
# The formal parameter list for an instruction format is a possibly # The formal parameter list for an instruction format is a
# empty list of comma-separated parameters. Positional (standard, # possibly empty list of comma-separated parameters. Positional
# non-keyword) parameters must come first, followed by keyword # (standard, non-keyword) parameters must come first, followed by
# parameters, followed by a '*foo' parameter that gets excess # keyword parameters, followed by a '*foo' parameter that gets
# positional arguments (as in Python). Each of these three parameter # excess positional arguments (as in Python). Each of these three
# categories is optional. # parameter categories is optional.
# #
# Note that we do not support the '**foo' parameter for collecting # Note that we do not support the '**foo' parameter for collecting
# otherwise undefined keyword args. Otherwise the parameter list is # otherwise undefined keyword args. Otherwise the parameter list
# (I believe) identical to what is supported in Python. # is (I believe) identical to what is supported in Python.
# #
# The param list generates a tuple, where the first element is a list of # The param list generates a tuple, where the first element is a
# the positional params and the second element is a dict containing the # list of the positional params and the second element is a dict
# keyword params. # containing the keyword params.
def p_param_list_0(t): def p_param_list_0(self, t):
'param_list : positional_param_list COMMA nonpositional_param_list' 'param_list : positional_param_list COMMA nonpositional_param_list'
t[0] = t[1] + t[3] t[0] = t[1] + t[3]
def p_param_list_1(t): def p_param_list_1(self, t):
'''param_list : positional_param_list '''param_list : positional_param_list
| nonpositional_param_list''' | nonpositional_param_list'''
t[0] = t[1] t[0] = t[1]
def p_positional_param_list_0(t): def p_positional_param_list_0(self, t):
'positional_param_list : empty' 'positional_param_list : empty'
t[0] = [] t[0] = []
def p_positional_param_list_1(t): def p_positional_param_list_1(self, t):
'positional_param_list : ID' 'positional_param_list : ID'
t[0] = [t[1]] t[0] = [t[1]]
def p_positional_param_list_2(t): def p_positional_param_list_2(self, t):
'positional_param_list : positional_param_list COMMA ID' 'positional_param_list : positional_param_list COMMA ID'
t[0] = t[1] + [t[3]] t[0] = t[1] + [t[3]]
def p_nonpositional_param_list_0(t): def p_nonpositional_param_list_0(self, t):
'nonpositional_param_list : keyword_param_list COMMA excess_args_param' 'nonpositional_param_list : keyword_param_list COMMA excess_args_param'
t[0] = t[1] + t[3] t[0] = t[1] + t[3]
def p_nonpositional_param_list_1(t): def p_nonpositional_param_list_1(self, t):
'''nonpositional_param_list : keyword_param_list '''nonpositional_param_list : keyword_param_list
| excess_args_param''' | excess_args_param'''
t[0] = t[1] t[0] = t[1]
def p_keyword_param_list_0(t): def p_keyword_param_list_0(self, t):
'keyword_param_list : keyword_param' 'keyword_param_list : keyword_param'
t[0] = [t[1]] t[0] = [t[1]]
def p_keyword_param_list_1(t): def p_keyword_param_list_1(self, t):
'keyword_param_list : keyword_param_list COMMA keyword_param' 'keyword_param_list : keyword_param_list COMMA keyword_param'
t[0] = t[1] + [t[3]] t[0] = t[1] + [t[3]]
def p_keyword_param(t): def p_keyword_param(self, t):
'keyword_param : ID EQUALS expr' 'keyword_param : ID EQUALS expr'
t[0] = t[1] + ' = ' + t[3].__repr__() t[0] = t[1] + ' = ' + t[3].__repr__()
def p_excess_args_param(t): def p_excess_args_param(self, t):
'excess_args_param : ASTERISK ID' 'excess_args_param : ASTERISK ID'
# Just concatenate them: '*ID'. Wrap in list to be consistent # Just concatenate them: '*ID'. Wrap in list to be consistent
# with positional_param_list and keyword_param_list. # with positional_param_list and keyword_param_list.
@ -476,7 +471,7 @@ def p_excess_args_param(t):
# A decode block looks like: # A decode block looks like:
# decode <field1> [, <field2>]* [default <inst>] { ... } # decode <field1> [, <field2>]* [default <inst>] { ... }
# #
def p_decode_block(t): def p_decode_block(self, t):
'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE' 'decode_block : DECODE ID opt_default LBRACE decode_stmt_list RBRACE'
default_defaults = defaultStack.pop() default_defaults = defaultStack.pop()
codeObj = t[5] codeObj = t[5]
@ -487,18 +482,19 @@ def p_decode_block(t):
codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n') codeObj.wrap_decode_block('switch (%s) {\n' % t[2], '}\n')
t[0] = codeObj t[0] = codeObj
# The opt_default statement serves only to push the "default defaults" # The opt_default statement serves only to push the "default
# onto defaultStack. This value will be used by nested decode blocks, # defaults" onto defaultStack. This value will be used by nested
# and used and popped off when the current decode_block is processed # decode blocks, and used and popped off when the current
# (in p_decode_block() above). # decode_block is processed (in p_decode_block() above).
def p_opt_default_0(t): def p_opt_default_0(self, t):
'opt_default : empty' 'opt_default : empty'
# no default specified: reuse the one currently at the top of the stack # no default specified: reuse the one currently at the top of
# the stack
defaultStack.push(defaultStack.top()) defaultStack.push(defaultStack.top())
# no meaningful value returned # no meaningful value returned
t[0] = None t[0] = None
def p_opt_default_1(t): def p_opt_default_1(self, t):
'opt_default : DEFAULT inst' 'opt_default : DEFAULT inst'
# push the new default # push the new default
codeObj = t[2] codeObj = t[2]
@ -507,11 +503,11 @@ def p_opt_default_1(t):
# no meaningful value returned # no meaningful value returned
t[0] = None t[0] = None
def p_decode_stmt_list_0(t): def p_decode_stmt_list_0(self, t):
'decode_stmt_list : decode_stmt' 'decode_stmt_list : decode_stmt'
t[0] = t[1] t[0] = t[1]
def p_decode_stmt_list_1(t): def p_decode_stmt_list_1(self, t):
'decode_stmt_list : decode_stmt decode_stmt_list' 'decode_stmt_list : decode_stmt decode_stmt_list'
if (t[1].has_decode_default and t[2].has_decode_default): if (t[1].has_decode_default and t[2].has_decode_default):
error(t.lexer.lineno, 'Two default cases in decode block') error(t.lexer.lineno, 'Two default cases in decode block')
@ -527,43 +523,46 @@ def p_decode_stmt_list_1(t):
# 4. C preprocessor directives. # 4. C preprocessor directives.
# Preprocessor directives found in a decode statement list are passed # Preprocessor directives found in a decode statement list are
# through to the output, replicated to all of the output code # passed through to the output, replicated to all of the output
# streams. This works well for ifdefs, so we can ifdef out both the # code streams. This works well for ifdefs, so we can ifdef out
# declarations and the decode cases generated by an instruction # both the declarations and the decode cases generated by an
# definition. Handling them as part of the grammar makes it easy to # instruction definition. Handling them as part of the grammar
# keep them in the right place with respect to the code generated by # makes it easy to keep them in the right place with respect to
# the other statements. # the code generated by the other statements.
def p_decode_stmt_cpp(t): def p_decode_stmt_cpp(self, t):
'decode_stmt : CPPDIRECTIVE' 'decode_stmt : CPPDIRECTIVE'
t[0] = GenCode(t[1], t[1], t[1], t[1]) t[0] = GenCode(t[1], t[1], t[1], t[1])
# A format block 'format <foo> { ... }' sets the default instruction # A format block 'format <foo> { ... }' sets the default
# format used to handle instruction definitions inside the block. # instruction format used to handle instruction definitions inside
# This format can be overridden by using an explicit format on the # the block. This format can be overridden by using an explicit
# instruction definition or with a nested format block. # format on the instruction definition or with a nested format
def p_decode_stmt_format(t): # block.
def p_decode_stmt_format(self, t):
'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE' 'decode_stmt : FORMAT push_format_id LBRACE decode_stmt_list RBRACE'
# The format will be pushed on the stack when 'push_format_id' is # The format will be pushed on the stack when 'push_format_id'
# processed (see below). Once the parser has recognized the full # is processed (see below). Once the parser has recognized
# production (though the right brace), we're done with the format, # the full production (though the right brace), we're done
# so now we can pop it. # with the format, so now we can pop it.
formatStack.pop() formatStack.pop()
t[0] = t[4] t[0] = t[4]
# This rule exists so we can set the current format (& push the stack) # This rule exists so we can set the current format (& push the
# when we recognize the format name part of the format block. # stack) when we recognize the format name part of the format
def p_push_format_id(t): # block.
def p_push_format_id(self, t):
'push_format_id : ID' 'push_format_id : ID'
try: try:
formatStack.push(formatMap[t[1]]) formatStack.push(formatMap[t[1]])
t[0] = ('', '// format %s' % t[1]) t[0] = ('', '// format %s' % t[1])
except KeyError: except KeyError:
error(t.lexer.lineno, 'instruction format "%s" not defined.' % t[1]) error(t.lexer.lineno,
'instruction format "%s" not defined.' % t[1])
# Nested decode block: if the value of the current field matches the # Nested decode block: if the value of the current field matches
# specified constant, do a nested decode on some other field. # the specified constant, do a nested decode on some other field.
def p_decode_stmt_decode(t): def p_decode_stmt_decode(self, t):
'decode_stmt : case_label COLON decode_block' 'decode_stmt : case_label COLON decode_block'
label = t[1] label = t[1]
codeObj = t[3] codeObj = t[3]
@ -574,7 +573,7 @@ def p_decode_stmt_decode(t):
t[0] = codeObj t[0] = codeObj
# Instruction definition (finally!). # Instruction definition (finally!).
def p_decode_stmt_inst(t): def p_decode_stmt_inst(self, t):
'decode_stmt : case_label COLON inst SEMI' 'decode_stmt : case_label COLON inst SEMI'
label = t[1] label = t[1]
codeObj = t[3] codeObj = t[3]
@ -582,32 +581,33 @@ def p_decode_stmt_inst(t):
codeObj.has_decode_default = (label == 'default') codeObj.has_decode_default = (label == 'default')
t[0] = codeObj t[0] = codeObj
# The case label is either a list of one or more constants or 'default' # The case label is either a list of one or more constants or
def p_case_label_0(t): # 'default'
def p_case_label_0(self, t):
'case_label : intlit_list' 'case_label : intlit_list'
t[0] = ': '.join(map(lambda a: 'case %#x' % a, t[1])) t[0] = ': '.join(map(lambda a: 'case %#x' % a, t[1]))
def p_case_label_1(t): def p_case_label_1(self, t):
'case_label : DEFAULT' 'case_label : DEFAULT'
t[0] = 'default' t[0] = 'default'
# #
# The constant list for a decode case label must be non-empty, but may have # The constant list for a decode case label must be non-empty, but
# one or more comma-separated integer literals in it. # may have one or more comma-separated integer literals in it.
# #
def p_intlit_list_0(t): def p_intlit_list_0(self, t):
'intlit_list : INTLIT' 'intlit_list : INTLIT'
t[0] = [t[1]] t[0] = [t[1]]
def p_intlit_list_1(t): def p_intlit_list_1(self, t):
'intlit_list : intlit_list COMMA INTLIT' 'intlit_list : intlit_list COMMA INTLIT'
t[0] = t[1] t[0] = t[1]
t[0].append(t[3]) t[0].append(t[3])
# Define an instruction using the current instruction format (specified # Define an instruction using the current instruction format
# by an enclosing format block). # (specified by an enclosing format block).
# "<mnemonic>(<args>)" # "<mnemonic>(<args>)"
def p_inst_0(t): def p_inst_0(self, t):
'inst : ID LPAREN arg_list RPAREN' 'inst : ID LPAREN arg_list RPAREN'
# Pass the ID and arg list to the current format class to deal with. # Pass the ID and arg list to the current format class to deal with.
currentFormat = formatStack.top() currentFormat = formatStack.top()
@ -621,110 +621,112 @@ def p_inst_0(t):
# Define an instruction using an explicitly specified format: # Define an instruction using an explicitly specified format:
# "<fmt>::<mnemonic>(<args>)" # "<fmt>::<mnemonic>(<args>)"
def p_inst_1(t): def p_inst_1(self, t):
'inst : ID DBLCOLON ID LPAREN arg_list RPAREN' 'inst : ID DBLCOLON ID LPAREN arg_list RPAREN'
try: try:
format = formatMap[t[1]] format = formatMap[t[1]]
except KeyError: except KeyError:
error(t.lexer.lineno, 'instruction format "%s" not defined.' % t[1]) error(t.lexer.lineno,
'instruction format "%s" not defined.' % t[1])
codeObj = format.defineInst(t[3], t[5], t.lexer.lineno) codeObj = format.defineInst(t[3], t[5], t.lexer.lineno)
comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5]) comment = '\n// %s::%s(%s)\n' % (t[1], t[3], t[5])
codeObj.prepend_all(comment) codeObj.prepend_all(comment)
t[0] = codeObj t[0] = codeObj
# The arg list generates a tuple, where the first element is a list of # The arg list generates a tuple, where the first element is a
# the positional args and the second element is a dict containing the # list of the positional args and the second element is a dict
# keyword args. # containing the keyword args.
def p_arg_list_0(t): def p_arg_list_0(self, t):
'arg_list : positional_arg_list COMMA keyword_arg_list' 'arg_list : positional_arg_list COMMA keyword_arg_list'
t[0] = ( t[1], t[3] ) t[0] = ( t[1], t[3] )
def p_arg_list_1(t): def p_arg_list_1(self, t):
'arg_list : positional_arg_list' 'arg_list : positional_arg_list'
t[0] = ( t[1], {} ) t[0] = ( t[1], {} )
def p_arg_list_2(t): def p_arg_list_2(self, t):
'arg_list : keyword_arg_list' 'arg_list : keyword_arg_list'
t[0] = ( [], t[1] ) t[0] = ( [], t[1] )
def p_positional_arg_list_0(t): def p_positional_arg_list_0(self, t):
'positional_arg_list : empty' 'positional_arg_list : empty'
t[0] = [] t[0] = []
def p_positional_arg_list_1(t): def p_positional_arg_list_1(self, t):
'positional_arg_list : expr' 'positional_arg_list : expr'
t[0] = [t[1]] t[0] = [t[1]]
def p_positional_arg_list_2(t): def p_positional_arg_list_2(self, t):
'positional_arg_list : positional_arg_list COMMA expr' 'positional_arg_list : positional_arg_list COMMA expr'
t[0] = t[1] + [t[3]] t[0] = t[1] + [t[3]]
def p_keyword_arg_list_0(t): def p_keyword_arg_list_0(self, t):
'keyword_arg_list : keyword_arg' 'keyword_arg_list : keyword_arg'
t[0] = t[1] t[0] = t[1]
def p_keyword_arg_list_1(t): def p_keyword_arg_list_1(self, t):
'keyword_arg_list : keyword_arg_list COMMA keyword_arg' 'keyword_arg_list : keyword_arg_list COMMA keyword_arg'
t[0] = t[1] t[0] = t[1]
t[0].update(t[3]) t[0].update(t[3])
def p_keyword_arg(t): def p_keyword_arg(self, t):
'keyword_arg : ID EQUALS expr' 'keyword_arg : ID EQUALS expr'
t[0] = { t[1] : t[3] } t[0] = { t[1] : t[3] }
# #
# Basic expressions. These constitute the argument values of # Basic expressions. These constitute the argument values of
# "function calls" (i.e. instruction definitions in the decode block) # "function calls" (i.e. instruction definitions in the decode
# and default values for formal parameters of format functions. # block) and default values for formal parameters of format
# functions.
# #
# Right now, these are either strings, integers, or (recursively) # Right now, these are either strings, integers, or (recursively)
# lists of exprs (using Python square-bracket list syntax). Note that # lists of exprs (using Python square-bracket list syntax). Note
# bare identifiers are trated as string constants here (since there # that bare identifiers are trated as string constants here (since
# isn't really a variable namespace to refer to). # there isn't really a variable namespace to refer to).
# #
def p_expr_0(t): def p_expr_0(self, t):
'''expr : ID '''expr : ID
| INTLIT | INTLIT
| STRLIT | STRLIT
| CODELIT''' | CODELIT'''
t[0] = t[1] t[0] = t[1]
def p_expr_1(t): def p_expr_1(self, t):
'''expr : LBRACKET list_expr RBRACKET''' '''expr : LBRACKET list_expr RBRACKET'''
t[0] = t[2] t[0] = t[2]
def p_list_expr_0(t): def p_list_expr_0(self, t):
'list_expr : expr' 'list_expr : expr'
t[0] = [t[1]] t[0] = [t[1]]
def p_list_expr_1(t): def p_list_expr_1(self, t):
'list_expr : list_expr COMMA expr' 'list_expr : list_expr COMMA expr'
t[0] = t[1] + [t[3]] t[0] = t[1] + [t[3]]
def p_list_expr_2(t): def p_list_expr_2(self, t):
'list_expr : empty' 'list_expr : empty'
t[0] = [] t[0] = []
# #
# Empty production... use in other rules for readability. # Empty production... use in other rules for readability.
# #
def p_empty(t): def p_empty(self, t):
'empty :' 'empty :'
pass pass
# Parse error handler. Note that the argument here is the offending # Parse error handler. Note that the argument here is the
# *token*, not a grammar symbol (hence the need to use t.value) # offending *token*, not a grammar symbol (hence the need to use
def p_error(t): # t.value)
def p_error(self, t):
if t: if t:
error(t.lexer.lineno, "syntax error at '%s'" % t.value) error(t.lexer.lineno, "syntax error at '%s'" % t.value)
else: else:
error(0, "unknown syntax error", True) error(0, "unknown syntax error", True)
# END OF GRAMMAR RULES # END OF GRAMMAR RULES
#
# Now build the parser.
parser = yacc.yacc()
# Now build the parser.
parser = ISAParser()
##################################################################### #####################################################################
# #
@ -761,6 +763,11 @@ def expand_cpu_symbols_to_string(template):
def protect_cpu_symbols(template): def protect_cpu_symbols(template):
return re.sub(r'%(?=\(CPU_)', '%%', template) return re.sub(r'%(?=\(CPU_)', '%%', template)
# Protect any non-dict-substitution '%'s in a format string
# (i.e. those not followed by '(')
def protect_non_subst_percents(s):
return re.sub(r'%(?!\()', '%%', s)
############### ###############
# GenCode class # GenCode class
# #
@ -834,7 +841,7 @@ exportContext = {}
def updateExportContext(): def updateExportContext():
exportContext.update(exportDict(*exportContextSymbols)) exportContext.update(exportDict(*exportContextSymbols))
exportContext.update(templateMap) exportContext.update(parser.templateMap)
def exportDict(*symNames): def exportDict(*symNames):
return dict([(s, eval(s)) for s in symNames]) return dict([(s, eval(s)) for s in symNames])
@ -1044,7 +1051,7 @@ class Template:
# Build a dict ('myDict') to use for the template substitution. # Build a dict ('myDict') to use for the template substitution.
# Start with the template namespace. Make a copy since we're # Start with the template namespace. Make a copy since we're
# going to modify it. # going to modify it.
myDict = templateMap.copy() myDict = parser.templateMap.copy()
if isinstance(d, InstObjParams): if isinstance(d, InstObjParams):
# If we're dealing with an InstObjParams object, we need # If we're dealing with an InstObjParams object, we need
@ -1970,8 +1977,7 @@ def parse_isa_desc(isa_desc_file, output_dir):
fileNameStack.push((isa_desc_file, 0)) fileNameStack.push((isa_desc_file, 0))
# Parse it. # Parse it.
(isa_name, namespace, global_code, namespace_code) = \ (isa_name, namespace, global_code, namespace_code) = parser.parse(isa_desc)
parser.parse(isa_desc, lexer=lexer)
# grab the last three path components of isa_desc_file to put in # grab the last three path components of isa_desc_file to put in
# the output # the output