504 lines
14 KiB
Python
504 lines
14 KiB
Python
# Copyright (c) 2003-2005 The Regents of The University of Michigan
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met: redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer;
|
|
# redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution;
|
|
# neither the name of the copyright holders nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# Authors: Gabe Black
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
import string
|
|
import traceback
|
|
# get type names
|
|
from types import *
|
|
|
|
# Prepend the directory where the PLY lex & yacc modules are found
|
|
# to the search path.
|
|
sys.path[0:0] = [os.environ['M5_PLY']]
|
|
|
|
from ply import lex
|
|
from ply import yacc
|
|
|
|
##########################################################################
|
|
#
|
|
# Base classes for use outside of the assembler
|
|
#
|
|
##########################################################################
|
|
|
|
class Micro_Container(object):
|
|
def __init__(self, name):
|
|
self.microops = []
|
|
self.name = name
|
|
self.directives = {}
|
|
self.micro_classes = {}
|
|
self.labels = {}
|
|
|
|
def add_microop(self, mnemonic, microop):
|
|
self.microops.append(microop)
|
|
|
|
def __str__(self):
|
|
string = "%s:\n" % self.name
|
|
for microop in self.microops:
|
|
string += " %s\n" % microop
|
|
return string
|
|
|
|
class Combinational_Macroop(Micro_Container):
|
|
pass
|
|
|
|
class Rom_Macroop(object):
|
|
def __init__(self, name, target):
|
|
self.name = name
|
|
self.target = target
|
|
|
|
def __str__(self):
|
|
return "%s: %s\n" % (self.name, self.target)
|
|
|
|
class Rom(Micro_Container):
|
|
def __init__(self, name):
|
|
super(Rom, self).__init__(name)
|
|
self.externs = {}
|
|
|
|
##########################################################################
|
|
#
|
|
# Support classes
|
|
#
|
|
##########################################################################
|
|
|
|
class Label(object):
|
|
def __init__(self):
|
|
self.extern = False
|
|
self.name = ""
|
|
|
|
class Block(object):
|
|
def __init__(self):
|
|
self.statements = []
|
|
|
|
class Statement(object):
|
|
def __init__(self):
|
|
self.is_microop = False
|
|
self.is_directive = False
|
|
self.params = ""
|
|
|
|
class Microop(Statement):
|
|
def __init__(self):
|
|
super(Microop, self).__init__()
|
|
self.mnemonic = ""
|
|
self.labels = []
|
|
self.is_microop = True
|
|
|
|
class Directive(Statement):
|
|
def __init__(self):
|
|
super(Directive, self).__init__()
|
|
self.name = ""
|
|
self.is_directive = True
|
|
|
|
##########################################################################
|
|
#
|
|
# Functions that handle common tasks
|
|
#
|
|
##########################################################################
|
|
|
|
def print_error(message):
|
|
print
|
|
print "*** %s" % message
|
|
print
|
|
|
|
def handle_statement(parser, container, statement):
|
|
if statement.is_microop:
|
|
if statement.mnemonic not in parser.microops.keys():
|
|
raise Exception, "Unrecognized mnemonic: %s" % statement.mnemonic
|
|
parser.symbols["__microopClassFromInsideTheAssembler"] = \
|
|
parser.microops[statement.mnemonic]
|
|
try:
|
|
microop = eval('__microopClassFromInsideTheAssembler(%s)' %
|
|
statement.params, {}, parser.symbols)
|
|
except:
|
|
print_error("Error creating microop object with mnemonic %s." % \
|
|
statement.mnemonic)
|
|
raise
|
|
try:
|
|
for label in statement.labels:
|
|
container.labels[label.text] = microop
|
|
if label.is_extern:
|
|
container.externs[label.text] = microop
|
|
container.add_microop(statement.mnemonic, microop)
|
|
except:
|
|
print_error("Error adding microop.")
|
|
raise
|
|
elif statement.is_directive:
|
|
if statement.name not in container.directives.keys():
|
|
raise Exception, "Unrecognized directive: %s" % statement.name
|
|
parser.symbols["__directiveFunctionFromInsideTheAssembler"] = \
|
|
container.directives[statement.name]
|
|
try:
|
|
eval('__directiveFunctionFromInsideTheAssembler(%s)' %
|
|
statement.params, {}, parser.symbols)
|
|
except:
|
|
print_error("Error executing directive.")
|
|
print container.directives
|
|
raise
|
|
else:
|
|
raise Exception, "Didn't recognize the type of statement", statement
|
|
|
|
##########################################################################
|
|
#
|
|
# Lexer specification
|
|
#
|
|
##########################################################################
|
|
|
|
# Error handler. Just call exit. Output formatted to work under
|
|
# Emacs compile-mode. Optional 'print_traceback' arg, if set to True,
|
|
# prints a Python stack backtrace too (can be handy when trying to
|
|
# debug the parser itself).
|
|
def error(lineno, string, print_traceback = False):
|
|
# Print a Python stack backtrace if requested.
|
|
if (print_traceback):
|
|
traceback.print_exc()
|
|
if lineno != 0:
|
|
line_str = "%d:" % lineno
|
|
else:
|
|
line_str = ""
|
|
sys.exit("%s %s" % (line_str, string))
|
|
|
|
reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
|
|
|
|
tokens = reserved + (
|
|
# identifier
|
|
'ID',
|
|
# arguments for microops and directives
|
|
'PARAMS',
|
|
|
|
'LPAREN', 'RPAREN',
|
|
'LBRACE', 'RBRACE',
|
|
'COLON', 'SEMI', 'DOT',
|
|
'NEWLINE'
|
|
)
|
|
|
|
# New lines are ignored at the top level, but they end statements in the
|
|
# assembler
|
|
states = (
|
|
('asm', 'exclusive'),
|
|
('params', 'exclusive'),
|
|
)
|
|
|
|
reserved_map = { }
|
|
for r in reserved:
|
|
reserved_map[r.lower()] = r
|
|
|
|
# Ignore comments
|
|
def t_ANY_COMMENT(t):
|
|
r'\#[^\n]*(?=\n)'
|
|
|
|
def t_ANY_MULTILINECOMMENT(t):
|
|
r'/\*([^/]|((?<!\*)/))*\*/'
|
|
|
|
# A colon marks the end of a label. It should follow an ID which will
|
|
# put the lexer in the "params" state. Seeing the colon will put it back
|
|
# in the "asm" state since it knows it saw a label and not a mnemonic.
|
|
def t_params_COLON(t):
|
|
r':'
|
|
t.lexer.begin('asm')
|
|
return t
|
|
|
|
# Parameters are a string of text which don't contain an unescaped statement
|
|
# statement terminator, ie a newline or semi colon.
|
|
def t_params_PARAMS(t):
|
|
r'([^\n;\\]|(\\[\n;\\]))+'
|
|
t.lineno += t.value.count('\n')
|
|
unescapeParamsRE = re.compile(r'(\\[\n;\\])')
|
|
def unescapeParams(mo):
|
|
val = mo.group(0)
|
|
return val[1]
|
|
t.value = unescapeParamsRE.sub(unescapeParams, t.value)
|
|
t.lexer.begin('asm')
|
|
return t
|
|
|
|
# An "ID" in the micro assembler is either a label, directive, or mnemonic
|
|
# If it's either a directive or a mnemonic, it will be optionally followed by
|
|
# parameters. If it's a label, the following colon will make the lexer stop
|
|
# looking for parameters.
|
|
def t_asm_ID(t):
|
|
r'[A-Za-z_]\w*'
|
|
t.type = reserved_map.get(t.value, 'ID')
|
|
# If the ID is really "extern", we shouldn't start looking for parameters
|
|
# yet. The real ID, the label itself, is coming up.
|
|
if t.type != 'EXTERN':
|
|
t.lexer.begin('params')
|
|
return t
|
|
|
|
# If there is a label and you're -not- in the assembler (which would be caught
|
|
# above), don't start looking for parameters.
|
|
def t_ANY_ID(t):
|
|
r'[A-Za-z_]\w*'
|
|
t.type = reserved_map.get(t.value, 'ID')
|
|
return t
|
|
|
|
# Braces enter and exit micro assembly
|
|
def t_INITIAL_LBRACE(t):
|
|
r'\{'
|
|
t.lexer.begin('asm')
|
|
return t
|
|
|
|
def t_asm_RBRACE(t):
|
|
r'\}'
|
|
t.lexer.begin('INITIAL')
|
|
return t
|
|
|
|
# At the top level, keep track of newlines only for line counting.
|
|
def t_INITIAL_NEWLINE(t):
|
|
r'\n+'
|
|
t.lineno += t.value.count('\n')
|
|
|
|
# In the micro assembler, do line counting but also return a token. The
|
|
# token is needed by the parser to detect the end of a statement.
|
|
def t_asm_NEWLINE(t):
|
|
r'\n+'
|
|
t.lineno += t.value.count('\n')
|
|
return t
|
|
|
|
# A newline or semi colon when looking for params signals that the statement
|
|
# is over and the lexer should go back to looking for regular assembly.
|
|
def t_params_NEWLINE(t):
|
|
r'\n+'
|
|
t.lineno += t.value.count('\n')
|
|
t.lexer.begin('asm')
|
|
return t
|
|
|
|
def t_params_SEMI(t):
|
|
r';'
|
|
t.lexer.begin('asm')
|
|
return t
|
|
|
|
# Basic regular expressions to pick out simple tokens
|
|
t_ANY_LPAREN = r'\('
|
|
t_ANY_RPAREN = r'\)'
|
|
t_ANY_SEMI = r';'
|
|
t_ANY_DOT = r'\.'
|
|
|
|
t_ANY_ignore = ' \t\x0c'
|
|
|
|
def t_ANY_error(t):
|
|
error(t.lineno, "illegal character '%s'" % t.value[0])
|
|
t.skip(1)
|
|
|
|
##########################################################################
|
|
#
|
|
# Parser specification
|
|
#
|
|
##########################################################################
|
|
|
|
# Start symbol for a file which may have more than one macroop or rom
|
|
# specification.
|
|
def p_file(t):
|
|
'file : opt_rom_or_macros'
|
|
|
|
def p_opt_rom_or_macros_0(t):
|
|
'opt_rom_or_macros : '
|
|
|
|
def p_opt_rom_or_macros_1(t):
|
|
'opt_rom_or_macros : rom_or_macros'
|
|
|
|
def p_rom_or_macros_0(t):
|
|
'rom_or_macros : rom_or_macro'
|
|
|
|
def p_rom_or_macros_1(t):
|
|
'rom_or_macros : rom_or_macros rom_or_macro'
|
|
|
|
def p_rom_or_macro_0(t):
|
|
'''rom_or_macro : rom_block
|
|
| macroop_def'''
|
|
|
|
# Defines a section of microcode that should go in the current ROM
|
|
def p_rom_block(t):
|
|
'rom_block : DEF ROM block SEMI'
|
|
if not t.parser.rom:
|
|
print_error("Rom block found, but no Rom object specified.")
|
|
raise TypeError, "Rom block found, but no Rom object was specified."
|
|
for statement in t[3].statements:
|
|
handle_statement(t.parser, t.parser.rom, statement)
|
|
t[0] = t.parser.rom
|
|
|
|
# Defines a macroop that jumps to an external label in the ROM
|
|
def p_macroop_def_0(t):
|
|
'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
|
|
if not t.parser.rom_macroop_type:
|
|
print_error("ROM based macroop found, but no ROM macroop class was specified.")
|
|
raise TypeError, "ROM based macroop found, but no ROM macroop class was specified."
|
|
macroop = t.parser.rom_macroop_type(t[3], t[5])
|
|
t.parser.macroops[t[3]] = macroop
|
|
|
|
|
|
# Defines a macroop that is combinationally generated
|
|
def p_macroop_def_1(t):
|
|
'macroop_def : DEF MACROOP ID block SEMI'
|
|
try:
|
|
curop = t.parser.macro_type(t[3])
|
|
except TypeError:
|
|
print_error("Error creating macroop object.")
|
|
raise
|
|
for statement in t[4].statements:
|
|
handle_statement(t.parser, curop, statement)
|
|
t.parser.macroops[t[3]] = curop
|
|
|
|
# A block of statements
|
|
def p_block(t):
|
|
'block : LBRACE statements RBRACE'
|
|
block = Block()
|
|
block.statements = t[2]
|
|
t[0] = block
|
|
|
|
def p_statements_0(t):
|
|
'statements : statement'
|
|
if t[1]:
|
|
t[0] = [t[1]]
|
|
else:
|
|
t[0] = []
|
|
|
|
def p_statements_1(t):
|
|
'statements : statements statement'
|
|
if t[2]:
|
|
t[1].append(t[2])
|
|
t[0] = t[1]
|
|
|
|
def p_statement(t):
|
|
'statement : content_of_statement end_of_statement'
|
|
t[0] = t[1]
|
|
|
|
# A statement can be a microop or an assembler directive
|
|
def p_content_of_statement_0(t):
|
|
'''content_of_statement : microop
|
|
| directive'''
|
|
t[0] = t[1]
|
|
|
|
# Ignore empty statements
|
|
def p_content_of_statement_1(t):
|
|
'content_of_statement : '
|
|
pass
|
|
|
|
# Statements are ended by newlines or a semi colon
|
|
def p_end_of_statement(t):
|
|
'''end_of_statement : NEWLINE
|
|
| SEMI'''
|
|
pass
|
|
|
|
# Different flavors of microop to avoid shift/reduce errors
|
|
def p_microop_0(t):
|
|
'microop : labels ID'
|
|
microop = Microop()
|
|
microop.labels = t[1]
|
|
microop.mnemonic = t[2]
|
|
t[0] = microop
|
|
|
|
def p_microop_1(t):
|
|
'microop : ID'
|
|
microop = Microop()
|
|
microop.mnemonic = t[1]
|
|
t[0] = microop
|
|
|
|
def p_microop_2(t):
|
|
'microop : labels ID PARAMS'
|
|
microop = Microop()
|
|
microop.labels = t[1]
|
|
microop.mnemonic = t[2]
|
|
microop.params = t[3]
|
|
t[0] = microop
|
|
|
|
def p_microop_3(t):
|
|
'microop : ID PARAMS'
|
|
microop = Microop()
|
|
microop.mnemonic = t[1]
|
|
microop.params = t[2]
|
|
t[0] = microop
|
|
|
|
# Labels in the microcode
|
|
def p_labels_0(t):
|
|
'labels : label'
|
|
t[0] = [t[1]]
|
|
|
|
def p_labels_1(t):
|
|
'labels : labels label'
|
|
t[1].append(t[2])
|
|
t[0] = t[1]
|
|
|
|
# labels on lines by themselves are attached to the following instruction.
|
|
def p_labels_2(t):
|
|
'labels : labels NEWLINE'
|
|
t[0] = t[1]
|
|
|
|
def p_label_0(t):
|
|
'label : ID COLON'
|
|
label = Label()
|
|
label.is_extern = False
|
|
label.text = t[1]
|
|
t[0] = label
|
|
|
|
def p_label_1(t):
|
|
'label : EXTERN ID COLON'
|
|
label = Label()
|
|
label.is_extern = True
|
|
label.text = t[2]
|
|
t[0] = label
|
|
|
|
# Directives for the macroop
|
|
def p_directive_0(t):
|
|
'directive : DOT ID'
|
|
directive = Directive()
|
|
directive.name = t[2]
|
|
t[0] = directive
|
|
|
|
def p_directive_1(t):
|
|
'directive : DOT ID PARAMS'
|
|
directive = Directive()
|
|
directive.name = t[2]
|
|
directive.params = t[3]
|
|
t[0] = directive
|
|
|
|
# Parse error handler. Note that the argument here is the offending
|
|
# *token*, not a grammar symbol (hence the need to use t.value)
|
|
def p_error(t):
|
|
if t:
|
|
error(t.lineno, "syntax error at '%s'" % t.value)
|
|
else:
|
|
error(0, "unknown syntax error", True)
|
|
|
|
class MicroAssembler(object):
|
|
|
|
def __init__(self, macro_type, microops,
|
|
rom = None, rom_macroop_type = None):
|
|
self.lexer = lex.lex()
|
|
self.parser = yacc.yacc()
|
|
self.parser.macro_type = macro_type
|
|
self.parser.macroops = {}
|
|
self.parser.microops = microops
|
|
self.parser.rom = rom
|
|
self.parser.rom_macroop_type = rom_macroop_type
|
|
self.parser.symbols = {}
|
|
self.symbols = self.parser.symbols
|
|
|
|
def assemble(self, asm):
|
|
self.parser.parse(asm, lexer=self.lexer)
|
|
macroops = self.parser.macroops
|
|
self.parser.macroops = {}
|
|
return macroops
|