X86: Don't read in dest regs if all bits are replaced.

In x86, 32 and 64 bit writes to registers in which registers appear to be 32 or
64 bits wide overwrite all bits of the destination register. This change
removes false dependencies in these cases where the previous value of a
register doesn't need to be read to write a new value. New versions of most
microops are created that have a "Big" suffix which simply overwrite their
destination, and the right version to use is selected during microop
allocation based on the selected data size.

This does not change the performance of the O3 CPU model significantly, I
assume because there are other false dependencies from the condition code bits
in the flags register.
This commit is contained in:
Gabe Black 2011-02-13 17:44:24 -08:00
parent 399e095510
commit 4e1adf85f7
3 changed files with 266 additions and 108 deletions

View file

@ -301,6 +301,46 @@ let {{
"dataSize" : self.dataSize, "addressSize" : self.addressSize, "dataSize" : self.dataSize, "addressSize" : self.addressSize,
"memFlags" : self.memFlags} "memFlags" : self.memFlags}
return allocator return allocator
class BigLdStOp(X86Microop):
def __init__(self, data, segment, addr, disp,
dataSize, addressSize, baseFlags, atCPL0, prefetch):
self.data = data
[self.scale, self.index, self.base] = addr
self.disp = disp
self.segment = segment
self.dataSize = dataSize
self.addressSize = addressSize
self.memFlags = baseFlags
if atCPL0:
self.memFlags += " | (CPL0FlagBit << FlagShift)"
if prefetch:
self.memFlags += " | Request::PREFETCH"
self.memFlags += " | (machInst.legacy.addr ? " + \
"(AddrSizeFlagBit << FlagShift) : 0)"
def getAllocator(self, microFlags):
allocString = '''
(%(dataSize)s >= 4) ?
(StaticInstPtr)(new %(class_name)sBig(machInst,
macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
%(base)s, %(disp)s, %(segment)s, %(data)s,
%(dataSize)s, %(addressSize)s, %(memFlags)s)) :
(StaticInstPtr)(new %(class_name)s(machInst,
macrocodeBlock, %(flags)s, %(scale)s, %(index)s,
%(base)s, %(disp)s, %(segment)s, %(data)s,
%(dataSize)s, %(addressSize)s, %(memFlags)s))
'''
allocator = allocString % {
"class_name" : self.className,
"flags" : self.microFlagsText(microFlags),
"scale" : self.scale, "index" : self.index,
"base" : self.base,
"disp" : self.disp,
"segment" : self.segment, "data" : self.data,
"dataSize" : self.dataSize, "addressSize" : self.addressSize,
"memFlags" : self.memFlags}
return allocator
}}; }};
let {{ let {{
@ -315,7 +355,8 @@ let {{
EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0); EA = bits(SegBase + scale * Index + Base + disp, addressSize * 8 - 1, 0);
''' '''
def defineMicroLoadOp(mnemonic, code, mem_flags="0"): def defineMicroLoadOp(mnemonic, code, bigCode='',
mem_flags="0", big=True):
global header_output global header_output
global decoder_output global decoder_output
global exec_output global exec_output
@ -324,16 +365,22 @@ let {{
name = mnemonic.lower() name = mnemonic.lower()
# Build up the all register version of this micro op # Build up the all register version of this micro op
iop = InstObjParams(name, Name, 'X86ISA::LdStOp', iops = [InstObjParams(name, Name, 'X86ISA::LdStOp',
{"code": code, {"code": code, "ea_code": calculateEA})]
"ea_code": calculateEA}) if big:
iops += [InstObjParams(name, Name + "Big", 'X86ISA::LdStOp',
{"code": bigCode, "ea_code": calculateEA})]
for iop in iops:
header_output += MicroLdStOpDeclare.subst(iop) header_output += MicroLdStOpDeclare.subst(iop)
decoder_output += MicroLdStOpConstructor.subst(iop) decoder_output += MicroLdStOpConstructor.subst(iop)
exec_output += MicroLoadExecute.subst(iop) exec_output += MicroLoadExecute.subst(iop)
exec_output += MicroLoadInitiateAcc.subst(iop) exec_output += MicroLoadInitiateAcc.subst(iop)
exec_output += MicroLoadCompleteAcc.subst(iop) exec_output += MicroLoadCompleteAcc.subst(iop)
class LoadOp(LdStOp): base = LdStOp
if big:
base = BigLdStOp
class LoadOp(base):
def __init__(self, data, segment, addr, disp = 0, def __init__(self, data, segment, addr, disp = 0,
dataSize="env.dataSize", dataSize="env.dataSize",
addressSize="env.addressSize", addressSize="env.addressSize",
@ -346,12 +393,15 @@ let {{
microopClasses[name] = LoadOp microopClasses[name] = LoadOp
defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);') defineMicroLoadOp('Ld', 'Data = merge(Data, Mem, dataSize);',
'Data = Mem & mask(dataSize * 8);')
defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);', defineMicroLoadOp('Ldst', 'Data = merge(Data, Mem, dataSize);',
'Data = Mem & mask(dataSize * 8);',
'(StoreCheck << FlagShift)') '(StoreCheck << FlagShift)')
defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);', defineMicroLoadOp('Ldstl', 'Data = merge(Data, Mem, dataSize);',
'Data = Mem & mask(dataSize * 8);',
'(StoreCheck << FlagShift) | Request::LOCKED') '(StoreCheck << FlagShift) | Request::LOCKED')
defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;') defineMicroLoadOp('Ldfp', 'FpData.uqw = Mem;', big = False)
def defineMicroStoreOp(mnemonic, code, \ def defineMicroStoreOp(mnemonic, code, \
postCode="", completeCode="", mem_flags="0"): postCode="", completeCode="", mem_flags="0"):

View file

@ -114,8 +114,16 @@ let {{
self.dataSize = dataSize self.dataSize = dataSize
def getAllocator(self, microFlags): def getAllocator(self, microFlags):
allocator = '''new %(class_name)s(machInst, macrocodeBlock, allocString = '''
%(flags)s, %(dest)s, %(imm)s, %(dataSize)s)''' % { (%(dataSize)s >= 4) ?
(StaticInstPtr)(new %(class_name)sBig(machInst,
macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
%(dataSize)s)) :
(StaticInstPtr)(new %(class_name)s(machInst,
macrocodeBlock, %(flags)s, %(dest)s, %(imm)s,
%(dataSize)s))
'''
allocator = allocString % {
"class_name" : self.className, "class_name" : self.className,
"mnemonic" : self.mnemonic, "mnemonic" : self.mnemonic,
"flags" : self.microFlagsText(microFlags), "flags" : self.microFlagsText(microFlags),
@ -152,8 +160,11 @@ let {{
let {{ let {{
# Build up the all register version of this micro op # Build up the all register version of this micro op
iop = InstObjParams("limm", "Limm", 'X86MicroopBase', iops = [InstObjParams("limm", "Limm", 'X86MicroopBase',
{"code" : "DestReg = merge(DestReg, imm, dataSize);"}) {"code" : "DestReg = merge(DestReg, imm, dataSize);"}),
InstObjParams("limm", "LimmBig", 'X86MicroopBase',
{"code" : "DestReg = imm & mask(dataSize * 8);"})]
for iop in iops:
header_output += MicroLimmOpDeclare.subst(iop) header_output += MicroLimmOpDeclare.subst(iop)
decoder_output += MicroLimmOpConstructor.subst(iop) decoder_output += MicroLimmOpConstructor.subst(iop)
decoder_output += MicroLimmOpDisassembly.subst(iop) decoder_output += MicroLimmOpDisassembly.subst(iop)

View file

@ -224,8 +224,8 @@ let {{
MicroRegOpExecute) MicroRegOpExecute)
class RegOpMeta(type): class RegOpMeta(type):
def buildCppClasses(self, name, Name, suffix, \ def buildCppClasses(self, name, Name, suffix, code, big_code, \
code, flag_code, cond_check, else_code, cond_control_flag_init): flag_code, cond_check, else_code, cond_control_flag_init):
# Globals to stick the output in # Globals to stick the output in
global header_output global header_output
@ -235,11 +235,13 @@ let {{
# Stick all the code together so it can be searched at once # Stick all the code together so it can be searched at once
allCode = "|".join((code, flag_code, cond_check, else_code, allCode = "|".join((code, flag_code, cond_check, else_code,
cond_control_flag_init)) cond_control_flag_init))
allBigCode = "|".join((big_code, flag_code, cond_check, else_code,
cond_control_flag_init))
# If op2 is used anywhere, make register and immediate versions # If op2 is used anywhere, make register and immediate versions
# of this code. # of this code.
matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?") matcher = re.compile("(?<!\\w)(?P<prefix>s?)op2(?P<typeQual>\\.\\w+)?")
match = matcher.search(allCode) match = matcher.search(allCode + allBigCode)
if match: if match:
typeQual = "" typeQual = ""
if match.group("typeQual"): if match.group("typeQual"):
@ -247,6 +249,7 @@ let {{
src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual) src2_name = "%spsrc2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix, self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code), matcher.sub(src2_name, code),
matcher.sub(src2_name, big_code),
matcher.sub(src2_name, flag_code), matcher.sub(src2_name, flag_code),
matcher.sub(src2_name, cond_check), matcher.sub(src2_name, cond_check),
matcher.sub(src2_name, else_code), matcher.sub(src2_name, else_code),
@ -254,6 +257,7 @@ let {{
imm_name = "%simm8" % match.group("prefix") imm_name = "%simm8" % match.group("prefix")
self.buildCppClasses(name + "i", Name, suffix + "Imm", self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub(imm_name, code), matcher.sub(imm_name, code),
matcher.sub(imm_name, big_code),
matcher.sub(imm_name, flag_code), matcher.sub(imm_name, flag_code),
matcher.sub(imm_name, cond_check), matcher.sub(imm_name, cond_check),
matcher.sub(imm_name, else_code), matcher.sub(imm_name, else_code),
@ -264,27 +268,32 @@ let {{
# a version without it and fix up this version to use it. # a version without it and fix up this version to use it.
if flag_code != "" or cond_check != "true": if flag_code != "" or cond_check != "true":
self.buildCppClasses(name, Name, suffix, self.buildCppClasses(name, Name, suffix,
code, "", "true", else_code, "") code, big_code, "", "true", else_code, "")
suffix = "Flags" + suffix suffix = "Flags" + suffix
# If psrc1 or psrc2 is used, we need to actually insert code to # If psrc1 or psrc2 is used, we need to actually insert code to
# compute it. # compute it.
matcher = re.compile("(?<!\w)psrc1(?!\w)") for (big, all) in ((False, allCode), (True, allBigCode)):
if matcher.search(allCode): prefix = ""
code = "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);" + code for (rex, decl) in (
matcher = re.compile("(?<!\w)psrc2(?!\w)") ("(?<!\w)psrc1(?!\w)",
if matcher.search(allCode): "uint64_t psrc1 = pick(SrcReg1, 0, dataSize);"),
code = "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);" + code ("(?<!\w)psrc2(?!\w)",
# Also make available versions which do sign extension "uint64_t psrc2 = pick(SrcReg2, 1, dataSize);"),
matcher = re.compile("(?<!\w)spsrc1(?!\w)") ("(?<!\w)spsrc1(?!\w)",
if matcher.search(allCode): "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);"),
code = "int64_t spsrc1 = signedPick(SrcReg1, 0, dataSize);" + code ("(?<!\w)spsrc2(?!\w)",
matcher = re.compile("(?<!\w)spsrc2(?!\w)") "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);"),
if matcher.search(allCode): ("(?<!\w)simm8(?!\w)",
code = "int64_t spsrc2 = signedPick(SrcReg2, 1, dataSize);" + code "int8_t simm8 = imm8;")):
matcher = re.compile("(?<!\w)simm8(?!\w)") matcher = re.compile(rex)
if matcher.search(allCode): if matcher.search(all):
code = "int8_t simm8 = imm8;" + code prefix += decl + "\n"
if big:
if big_code != "":
big_code = prefix + big_code
else:
code = prefix + code
base = "X86ISA::RegOp" base = "X86ISA::RegOp"
@ -297,14 +306,23 @@ let {{
templates = immTemplates templates = immTemplates
# Get everything ready for the substitution # Get everything ready for the substitution
iop = InstObjParams(name, Name + suffix, base, iops = [InstObjParams(name, Name + suffix, base,
{"code" : code, {"code" : code,
"flag_code" : flag_code, "flag_code" : flag_code,
"cond_check" : cond_check, "cond_check" : cond_check,
"else_code" : else_code, "else_code" : else_code,
"cond_control_flag_init": cond_control_flag_init}) "cond_control_flag_init" : cond_control_flag_init})]
if big_code != "":
iops += [InstObjParams(name, Name + suffix + "Big", base,
{"code" : big_code,
"flag_code" : flag_code,
"cond_check" : cond_check,
"else_code" : else_code,
"cond_control_flag_init" :
cond_control_flag_init})]
# Generate the actual code (finally!) # Generate the actual code (finally!)
for iop in iops:
header_output += templates[0].subst(iop) header_output += templates[0].subst(iop)
decoder_output += templates[1].subst(iop) decoder_output += templates[1].subst(iop)
exec_output += templates[2].subst(iop) exec_output += templates[2].subst(iop)
@ -322,14 +340,16 @@ let {{
cls.className = Name cls.className = Name
cls.base_mnemonic = name cls.base_mnemonic = name
code = cls.code code = cls.code
big_code = cls.big_code
flag_code = cls.flag_code flag_code = cls.flag_code
cond_check = cls.cond_check cond_check = cls.cond_check
else_code = cls.else_code else_code = cls.else_code
cond_control_flag_init = cls.cond_control_flag_init cond_control_flag_init = cls.cond_control_flag_init
# Set up the C++ classes # Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "", code, flag_code, mcls.buildCppClasses(cls, name, Name, "", code, big_code,
cond_check, else_code, cond_control_flag_init) flag_code, cond_check, else_code,
cond_control_flag_init)
# Hook into the microassembler dict # Hook into the microassembler dict
global microopClasses global microopClasses
@ -352,6 +372,7 @@ let {{
abstract = True abstract = True
# Default template parameter values # Default template parameter values
big_code = ""
flag_code = "" flag_code = ""
cond_check = "true" cond_check = "true"
else_code = ";" else_code = ";"
@ -372,6 +393,28 @@ let {{
self.className += "Flags" self.className += "Flags"
def getAllocator(self, microFlags): def getAllocator(self, microFlags):
if self.big_code != "":
className = self.className
if self.mnemonic == self.base_mnemonic + 'i':
className += "Imm"
allocString = '''
(%(dataSize)s >= 4) ?
(StaticInstPtr)(new %(class_name)sBig(machInst,
macrocodeBlock, %(flags)s, %(src1)s, %(op2)s,
%(dest)s, %(dataSize)s, %(ext)s)) :
(StaticInstPtr)(new %(class_name)s(machInst,
macrocodeBlock, %(flags)s, %(src1)s, %(op2)s,
%(dest)s, %(dataSize)s, %(ext)s))
'''
allocator = allocString % {
"class_name" : className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "op2" : self.op2,
"dest" : self.dest,
"dataSize" : self.dataSize,
"ext" : self.ext}
return allocator
else:
className = self.className className = self.className
if self.mnemonic == self.base_mnemonic + 'i': if self.mnemonic == self.base_mnemonic + 'i':
className += "Imm" className += "Imm"
@ -429,30 +472,43 @@ let {{
class Add(FlagRegOp): class Add(FlagRegOp):
code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);' code = 'DestReg = merge(DestReg, psrc1 + op2, dataSize);'
big_code = 'DestReg = (psrc1 + op2) & mask(dataSize * 8);'
class Or(LogicRegOp): class Or(LogicRegOp):
code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);' code = 'DestReg = merge(DestReg, psrc1 | op2, dataSize);'
big_code = 'DestReg = (psrc1 | op2) & mask(dataSize * 8);'
class Adc(FlagRegOp): class Adc(FlagRegOp):
code = ''' code = '''
CCFlagBits flags = ccFlagBits; CCFlagBits flags = ccFlagBits;
DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize); DestReg = merge(DestReg, psrc1 + op2 + flags.cf, dataSize);
''' '''
big_code = '''
CCFlagBits flags = ccFlagBits;
DestReg = (psrc1 + op2 + flags.cf) & mask(dataSize * 8);
'''
class Sbb(SubRegOp): class Sbb(SubRegOp):
code = ''' code = '''
CCFlagBits flags = ccFlagBits; CCFlagBits flags = ccFlagBits;
DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize); DestReg = merge(DestReg, psrc1 - op2 - flags.cf, dataSize);
''' '''
big_code = '''
CCFlagBits flags = ccFlagBits;
DestReg = (psrc1 - op2 - flags.cf) & mask(dataSize * 8);
'''
class And(LogicRegOp): class And(LogicRegOp):
code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)' code = 'DestReg = merge(DestReg, psrc1 & op2, dataSize)'
big_code = 'DestReg = (psrc1 & op2) & mask(dataSize * 8)'
class Sub(SubRegOp): class Sub(SubRegOp):
code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)' code = 'DestReg = merge(DestReg, psrc1 - op2, dataSize)'
big_code = 'DestReg = (psrc1 - op2) & mask(dataSize * 8)'
class Xor(LogicRegOp): class Xor(LogicRegOp):
code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)' code = 'DestReg = merge(DestReg, psrc1 ^ op2, dataSize)'
big_code = 'DestReg = (psrc1 ^ op2) & mask(dataSize * 8)'
class Mul1s(WrRegOp): class Mul1s(WrRegOp):
code = ''' code = '''
@ -505,6 +561,7 @@ let {{
class Mulel(RdRegOp): class Mulel(RdRegOp):
code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);' code = 'DestReg = merge(SrcReg1, ProdLow, dataSize);'
big_code = 'DestReg = ProdLow & mask(dataSize * 8);'
class Muleh(RdRegOp): class Muleh(RdRegOp):
def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"): def __init__(self, dest, src1=None, flags=None, dataSize="env.dataSize"):
@ -513,6 +570,7 @@ let {{
super(RdRegOp, self).__init__(dest, src1, \ super(RdRegOp, self).__init__(dest, src1, \
"InstRegIndex(NUM_INTREGS)", flags, dataSize) "InstRegIndex(NUM_INTREGS)", flags, dataSize)
code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);' code = 'DestReg = merge(SrcReg1, ProdHi, dataSize);'
big_code = 'DestReg = ProdHi & mask(dataSize * 8);'
# One or two bit divide # One or two bit divide
class Div1(WrRegOp): class Div1(WrRegOp):
@ -540,7 +598,7 @@ let {{
# Step divide # Step divide
class Div2(RegOp): class Div2(RegOp):
code = ''' divCode = '''
uint64_t dividend = Remainder; uint64_t dividend = Remainder;
uint64_t divisor = Divisor; uint64_t divisor = Divisor;
uint64_t quotient = Quotient; uint64_t quotient = Quotient;
@ -587,11 +645,13 @@ let {{
} }
} }
//Keep track of how many bits there are still to pull in. //Keep track of how many bits there are still to pull in.
DestReg = merge(DestReg, remaining, dataSize); %s
//Record the final results //Record the final results
Remainder = remainder; Remainder = remainder;
Quotient = quotient; Quotient = quotient;
''' '''
code = divCode % "DestReg = merge(DestReg, remaining, dataSize);"
big_code = divCode % "DestReg = remaining & mask(dataSize * 8);"
flag_code = ''' flag_code = '''
if (remaining == 0) if (remaining == 0)
ccFlagBits = ccFlagBits | (ext & EZFBit); ccFlagBits = ccFlagBits | (ext & EZFBit);
@ -601,9 +661,11 @@ let {{
class Divq(RdRegOp): class Divq(RdRegOp):
code = 'DestReg = merge(SrcReg1, Quotient, dataSize);' code = 'DestReg = merge(SrcReg1, Quotient, dataSize);'
big_code = 'DestReg = Quotient & mask(dataSize * 8);'
class Divr(RdRegOp): class Divr(RdRegOp):
code = 'DestReg = merge(SrcReg1, Remainder, dataSize);' code = 'DestReg = merge(SrcReg1, Remainder, dataSize);'
big_code = 'DestReg = Remainder & mask(dataSize * 8);'
class Mov(CondRegOp): class Mov(CondRegOp):
code = 'DestReg = merge(SrcReg1, op2, dataSize)' code = 'DestReg = merge(SrcReg1, op2, dataSize)'
@ -616,6 +678,10 @@ let {{
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
DestReg = merge(DestReg, psrc1 << shiftAmt, dataSize); DestReg = merge(DestReg, psrc1 << shiftAmt, dataSize);
''' '''
big_code = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
DestReg = (psrc1 << shiftAmt) & mask(dataSize * 8);
'''
flag_code = ''' flag_code = '''
// If the shift amount is zero, no flags should be modified. // If the shift amount is zero, no flags should be modified.
if (shiftAmt) { if (shiftAmt) {
@ -641,14 +707,19 @@ let {{
''' '''
class Srl(RegOp): class Srl(RegOp):
# Because what happens to the bits shift -in- on a right shift
# is not defined in the C/C++ standard, we have to mask them out
# to be sure they're zero.
code = ''' code = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
// Because what happens to the bits shift -in- on a right shift
// is not defined in the C/C++ standard, we have to mask them out
// to be sure they're zero.
uint64_t logicalMask = mask(dataSize * 8 - shiftAmt); uint64_t logicalMask = mask(dataSize * 8 - shiftAmt);
DestReg = merge(DestReg, (psrc1 >> shiftAmt) & logicalMask, dataSize); DestReg = merge(DestReg, (psrc1 >> shiftAmt) & logicalMask, dataSize);
''' '''
big_code = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint64_t logicalMask = mask(dataSize * 8 - shiftAmt);
DestReg = (psrc1 >> shiftAmt) & logicalMask;
'''
flag_code = ''' flag_code = '''
// If the shift amount is zero, no flags should be modified. // If the shift amount is zero, no flags should be modified.
if (shiftAmt) { if (shiftAmt) {
@ -671,15 +742,21 @@ let {{
''' '''
class Sra(RegOp): class Sra(RegOp):
# Because what happens to the bits shift -in- on a right shift
# is not defined in the C/C++ standard, we have to sign extend
# them manually to be sure.
code = ''' code = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
// Because what happens to the bits shift -in- on a right shift
// is not defined in the C/C++ standard, we have to sign extend
// them manually to be sure.
uint64_t arithMask = (shiftAmt == 0) ? 0 : uint64_t arithMask = (shiftAmt == 0) ? 0 :
-bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt); -bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize); DestReg = merge(DestReg, (psrc1 >> shiftAmt) | arithMask, dataSize);
''' '''
big_code = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint64_t arithMask = (shiftAmt == 0) ? 0 :
-bits(psrc1, dataSize * 8 - 1) << (dataSize * 8 - shiftAmt);
DestReg = ((psrc1 >> shiftAmt) | arithMask) & mask(dataSize * 8);
'''
flag_code = ''' flag_code = '''
// If the shift amount is zero, no flags should be modified. // If the shift amount is zero, no flags should be modified.
if (shiftAmt) { if (shiftAmt) {
@ -704,13 +781,11 @@ let {{
uint8_t shiftAmt = uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5))); (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint8_t realShiftAmt = shiftAmt % (dataSize * 8); uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
if(realShiftAmt) if (realShiftAmt) {
{
uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt); uint64_t top = psrc1 << (dataSize * 8 - realShiftAmt);
uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt); uint64_t bottom = bits(psrc1, dataSize * 8, realShiftAmt);
DestReg = merge(DestReg, top | bottom, dataSize); DestReg = merge(DestReg, top | bottom, dataSize);
} } else
else
DestReg = merge(DestReg, DestReg, dataSize); DestReg = merge(DestReg, DestReg, dataSize);
''' '''
flag_code = ''' flag_code = '''
@ -739,16 +814,14 @@ let {{
uint8_t shiftAmt = uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5))); (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1); uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
if(realShiftAmt) if (realShiftAmt) {
{
CCFlagBits flags = ccFlagBits; CCFlagBits flags = ccFlagBits;
uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt); uint64_t top = flags.cf << (dataSize * 8 - realShiftAmt);
if (realShiftAmt > 1) if (realShiftAmt > 1)
top |= psrc1 << (dataSize * 8 - realShiftAmt + 1); top |= psrc1 << (dataSize * 8 - realShiftAmt + 1);
uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt); uint64_t bottom = bits(psrc1, dataSize * 8 - 1, realShiftAmt);
DestReg = merge(DestReg, top | bottom, dataSize); DestReg = merge(DestReg, top | bottom, dataSize);
} } else
else
DestReg = merge(DestReg, DestReg, dataSize); DestReg = merge(DestReg, DestReg, dataSize);
''' '''
flag_code = ''' flag_code = '''
@ -780,14 +853,12 @@ let {{
uint8_t shiftAmt = uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5))); (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint8_t realShiftAmt = shiftAmt % (dataSize * 8); uint8_t realShiftAmt = shiftAmt % (dataSize * 8);
if(realShiftAmt) if (realShiftAmt) {
{
uint64_t top = psrc1 << realShiftAmt; uint64_t top = psrc1 << realShiftAmt;
uint64_t bottom = uint64_t bottom =
bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt); bits(psrc1, dataSize * 8 - 1, dataSize * 8 - realShiftAmt);
DestReg = merge(DestReg, top | bottom, dataSize); DestReg = merge(DestReg, top | bottom, dataSize);
} } else
else
DestReg = merge(DestReg, DestReg, dataSize); DestReg = merge(DestReg, DestReg, dataSize);
''' '''
flag_code = ''' flag_code = '''
@ -816,8 +887,7 @@ let {{
uint8_t shiftAmt = uint8_t shiftAmt =
(op2 & ((dataSize == 8) ? mask(6) : mask(5))); (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1); uint8_t realShiftAmt = shiftAmt % (dataSize * 8 + 1);
if(realShiftAmt) if (realShiftAmt) {
{
CCFlagBits flags = ccFlagBits; CCFlagBits flags = ccFlagBits;
uint64_t top = psrc1 << realShiftAmt; uint64_t top = psrc1 << realShiftAmt;
uint64_t bottom = flags.cf << (realShiftAmt - 1); uint64_t bottom = flags.cf << (realShiftAmt - 1);
@ -826,8 +896,7 @@ let {{
bits(psrc1, dataSize * 8 - 1, bits(psrc1, dataSize * 8 - 1,
dataSize * 8 - realShiftAmt + 1); dataSize * 8 - realShiftAmt + 1);
DestReg = merge(DestReg, top | bottom, dataSize); DestReg = merge(DestReg, top | bottom, dataSize);
} } else
else
DestReg = merge(DestReg, DestReg, dataSize); DestReg = merge(DestReg, DestReg, dataSize);
''' '''
flag_code = ''' flag_code = '''
@ -853,10 +922,10 @@ let {{
''' '''
class Sld(RegOp): class Sld(RegOp):
code = ''' sldCode = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint8_t dataBits = dataSize * 8; uint8_t dataBits = dataSize * 8;
uint8_t realShiftAmt = shiftAmt % (2 * dataBits); uint8_t realShiftAmt = shiftAmt %% (2 * dataBits);
uint64_t result; uint64_t result;
if (realShiftAmt == 0) { if (realShiftAmt == 0) {
result = psrc1; result = psrc1;
@ -867,8 +936,10 @@ let {{
result = (DoubleBits << (realShiftAmt - dataBits)) | result = (DoubleBits << (realShiftAmt - dataBits)) |
(psrc1 >> (2 * dataBits - realShiftAmt)); (psrc1 >> (2 * dataBits - realShiftAmt));
} }
DestReg = merge(DestReg, result, dataSize); %s
''' '''
code = sldCode % "DestReg = merge(DestReg, result, dataSize);"
big_code = sldCode % "DestReg = result & mask(dataSize * 8);"
flag_code = ''' flag_code = '''
// If the shift amount is zero, no flags should be modified. // If the shift amount is zero, no flags should be modified.
if (shiftAmt) { if (shiftAmt) {
@ -899,10 +970,10 @@ let {{
''' '''
class Srd(RegOp): class Srd(RegOp):
code = ''' srdCode = '''
uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5))); uint8_t shiftAmt = (op2 & ((dataSize == 8) ? mask(6) : mask(5)));
uint8_t dataBits = dataSize * 8; uint8_t dataBits = dataSize * 8;
uint8_t realShiftAmt = shiftAmt % (2 * dataBits); uint8_t realShiftAmt = shiftAmt %% (2 * dataBits);
uint64_t result; uint64_t result;
if (realShiftAmt == 0) { if (realShiftAmt == 0) {
result = psrc1; result = psrc1;
@ -919,8 +990,10 @@ let {{
logicalMask) | logicalMask) |
(psrc1 << (2 * dataBits - realShiftAmt)); (psrc1 << (2 * dataBits - realShiftAmt));
} }
DestReg = merge(DestReg, result, dataSize); %s
''' '''
code = srdCode % "DestReg = merge(DestReg, result, dataSize);"
big_code = srdCode % "DestReg = result & mask(dataSize * 8);"
flag_code = ''' flag_code = '''
// If the shift amount is zero, no flags should be modified. // If the shift amount is zero, no flags should be modified.
if (shiftAmt) { if (shiftAmt) {
@ -986,6 +1059,12 @@ let {{
ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
(ccFlagBits & ~EZFBit); (ccFlagBits & ~EZFBit);
''' '''
big_code = '''
int flag = bits(ccFlagBits, imm8);
DestReg = flag & mask(dataSize * 8);
ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
(ccFlagBits & ~EZFBit);
'''
def __init__(self, dest, imm, flags=None, \ def __init__(self, dest, imm, flags=None, \
dataSize="env.dataSize"): dataSize="env.dataSize"):
super(Ruflag, self).__init__(dest, \ super(Ruflag, self).__init__(dest, \
@ -1000,6 +1079,14 @@ let {{
ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) : ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
(ccFlagBits & ~EZFBit); (ccFlagBits & ~EZFBit);
''' '''
big_code = '''
MiscReg flagMask = 0x3F7FDD5;
MiscReg flags = (nccFlagBits | ccFlagBits) & flagMask;
int flag = bits(flags, imm8);
DestReg = flag & mask(dataSize * 8);
ccFlagBits = (flag == 0) ? (ccFlagBits | EZFBit) :
(ccFlagBits & ~EZFBit);
'''
def __init__(self, dest, imm, flags=None, \ def __init__(self, dest, imm, flags=None, \
dataSize="env.dataSize"): dataSize="env.dataSize"):
super(Rflag, self).__init__(dest, \ super(Rflag, self).__init__(dest, \
@ -1015,6 +1102,15 @@ let {{
val = sign_bit ? (val | ~maskVal) : (val & maskVal); val = sign_bit ? (val | ~maskVal) : (val & maskVal);
DestReg = merge(DestReg, val, dataSize); DestReg = merge(DestReg, val, dataSize);
''' '''
big_code = '''
IntReg val = psrc1;
// Mask the bit position so that it wraps.
int bitPos = op2 & (dataSize * 8 - 1);
int sign_bit = bits(val, bitPos, bitPos);
uint64_t maskVal = mask(bitPos+1);
val = sign_bit ? (val | ~maskVal) : (val & maskVal);
DestReg = val & mask(dataSize * 8);
'''
flag_code = ''' flag_code = '''
if (!sign_bit) if (!sign_bit)
ccFlagBits = ccFlagBits & ccFlagBits = ccFlagBits &
@ -1026,12 +1122,13 @@ let {{
class Zext(RegOp): class Zext(RegOp):
code = 'DestReg = merge(DestReg, bits(psrc1, op2, 0), dataSize);' code = 'DestReg = merge(DestReg, bits(psrc1, op2, 0), dataSize);'
big_code = 'DestReg = bits(psrc1, op2, 0) & mask(dataSize * 8);'
class Rddr(RegOp): class Rddr(RegOp):
def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
super(Rddr, self).__init__(dest, \ super(Rddr, self).__init__(dest, \
src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize) src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize)
code = ''' rdrCode = '''
CR4 cr4 = CR4Op; CR4 cr4 = CR4Op;
DR7 dr7 = DR7Op; DR7 dr7 = DR7Op;
if ((cr4.de == 1 && (src1 == 4 || src1 == 5)) || src1 >= 8) { if ((cr4.de == 1 && (src1 == 4 || src1 == 5)) || src1 >= 8) {
@ -1039,9 +1136,11 @@ let {{
} else if (dr7.gd) { } else if (dr7.gd) {
fault = new DebugException(); fault = new DebugException();
} else { } else {
DestReg = merge(DestReg, DebugSrc1, dataSize); %s
} }
''' '''
code = rdrCode % "DestReg = merge(DestReg, DebugSrc1, dataSize);"
big_code = rdrCode % "DestReg = DebugSrc1 & mask(dataSize * 8);"
class Wrdr(RegOp): class Wrdr(RegOp):
def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
@ -1066,13 +1165,15 @@ let {{
def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
super(Rdcr, self).__init__(dest, \ super(Rdcr, self).__init__(dest, \
src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize) src1, "InstRegIndex(NUM_INTREGS)", flags, dataSize)
code = ''' rdcrCode = '''
if (src1 == 1 || (src1 > 4 && src1 < 8) || (src1 > 8)) { if (src1 == 1 || (src1 > 4 && src1 < 8) || (src1 > 8)) {
fault = new InvalidOpcode(); fault = new InvalidOpcode();
} else { } else {
DestReg = merge(DestReg, ControlSrc1, dataSize); %s
} }
''' '''
code = rdcrCode % "DestReg = merge(DestReg, ControlSrc1, dataSize);"
big_code = rdcrCode % "DestReg = ControlSrc1 & mask(dataSize * 8);"
class Wrcr(RegOp): class Wrcr(RegOp):
def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):
@ -1154,24 +1255,20 @@ let {{
''' '''
class Rdbase(SegOp): class Rdbase(SegOp):
code = ''' code = 'DestReg = merge(DestReg, SegBaseSrc1, dataSize);'
DestReg = merge(DestReg, SegBaseSrc1, dataSize); big_code = 'DestReg = SegBaseSrc1 & mask(dataSize * 8);'
'''
class Rdlimit(SegOp): class Rdlimit(SegOp):
code = ''' code = 'DestReg = merge(DestReg, SegLimitSrc1, dataSize);'
DestReg = merge(DestReg, SegLimitSrc1, dataSize); big_code = 'DestReg = SegLimitSrc1 & mask(dataSize * 8);'
'''
class RdAttr(SegOp): class RdAttr(SegOp):
code = ''' code = 'DestReg = merge(DestReg, SegAttrSrc1, dataSize);'
DestReg = merge(DestReg, SegAttrSrc1, dataSize); big_code = 'DestReg = SegAttrSrc1 & mask(dataSize * 8);'
'''
class Rdsel(SegOp): class Rdsel(SegOp):
code = ''' code = 'DestReg = merge(DestReg, SegSelSrc1, dataSize);'
DestReg = merge(DestReg, SegSelSrc1, dataSize); big_code = 'DestReg = SegSelSrc1 & mask(dataSize * 8);'
'''
class Rdval(RegOp): class Rdval(RegOp):
def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"): def __init__(self, dest, src1, flags=None, dataSize="env.dataSize"):