gem5/src/arch/x86/isa/microops/mediaop.isa
Tony Gutierrez 0799600686 x86: fix issue with casting in Cvtf2i
UBSAN flags this operation because it detects that arg is being cast directly
to an unsigned type, argBits. this patch fixes this by first casting the
value to a signed int type, then reintrepreting the raw bits of the signed
int into argBits.
2016-11-21 15:35:56 -05:00

1551 lines
54 KiB
Plaintext

// Copyright (c) 2009 The Regents of The University of Michigan
// Copyright (c) 2015 Advanced Micro Devices, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
def template MediaOpExecute {{
Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
//Write the resulting state to the execution context
if(fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};
def template MediaOpRegDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
%(BasicExecDeclare)s
};
}};
def template MediaOpImmDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
%(BasicExecDeclare)s
};
}};
def template MediaOpRegConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _src2, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
def template MediaOpImmConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _imm8, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
header_output = ""
decoder_output = ""
exec_output = ""
immTemplates = (
MediaOpImmDeclare,
MediaOpImmConstructor,
MediaOpExecute)
regTemplates = (
MediaOpRegDeclare,
MediaOpRegConstructor,
MediaOpExecute)
class MediaOpMeta(type):
def buildCppClasses(self, name, Name, suffix, code):
# Globals to stick the output in
global header_output
global decoder_output
global exec_output
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
match = matcher.search(code)
if match:
typeQual = ""
if match.group("typeQual"):
typeQual = match.group("typeQual")
src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code))
self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub("imm8", code))
return
base = "X86ISA::MediaOp"
# If imm8 shows up in the code, use the immediate templates, if
# not, hopefully the register ones will be correct.
matcher = re.compile("(?<!\w)imm8(?!\w)")
if matcher.search(code):
base += "Imm"
templates = immTemplates
else:
base += "Reg"
templates = regTemplates
# Get everything ready for the substitution
iop = InstObjParams(name, Name + suffix, base, {"code" : code})
# Generate the actual code (finally!)
header_output += templates[0].subst(iop)
decoder_output += templates[1].subst(iop)
exec_output += templates[2].subst(iop)
def __new__(mcls, Name, bases, dict):
abstract = False
name = Name.lower()
if "abstract" in dict:
abstract = dict['abstract']
del dict['abstract']
cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
if not abstract:
cls.className = Name
cls.base_mnemonic = name
code = cls.code
# Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "", code)
# Hook into the microassembler dict
global microopClasses
microopClasses[name] = cls
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
if matcher.search(code):
microopClasses[name + 'i'] = cls
return cls
class MediaOp(X86Microop):
__metaclass__ = MediaOpMeta
# This class itself doesn't act as a microop
abstract = True
def __init__(self, dest, src1, op2,
size = None, destSize = None, srcSize = None, ext = None):
self.dest = dest
self.src1 = src1
self.op2 = op2
if size is not None:
self.srcSize = size
self.destSize = size
if srcSize is not None:
self.srcSize = srcSize
if destSize is not None:
self.destSize = destSize
if self.srcSize is None:
raise Exception, "Source size not set."
if self.destSize is None:
raise Exception, "Dest size not set."
if ext is None:
self.ext = 0
else:
self.ext = ext
def getAllocator(self, microFlags):
className = self.className
if self.mnemonic == self.base_mnemonic + 'i':
className += "Imm"
allocator = '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(src1)s, %(op2)s, %(dest)s,
%(srcSize)s, %(destSize)s, %(ext)s)''' % {
"class_name" : className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "op2" : self.op2,
"dest" : self.dest,
"srcSize" : self.srcSize,
"destSize" : self.destSize,
"ext" : self.ext}
return allocator
class Mov2int(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2int, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / srcSize;
int offset = imm8;
if (bits(src1, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t fpSrcReg1 =
bits(FpSrcReg1_uqw,
(offset + 1) * srcSize * 8 - 1,
(offset + 0) * srcSize * 8);
DestReg = merge(0, fpSrcReg1, destSize);
} else {
DestReg = DestReg;
}
'''
class Mov2fp(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2fp, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / destSize;
int offset = imm8;
if (bits(dest, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
FpDestReg_uqw =
insertBits(FpDestReg_uqw,
(offset + 1) * destSize * 8 - 1,
(offset + 0) * destSize * 8, srcReg1);
} else {
FpDestReg_uqw = FpDestReg_uqw;
}
'''
class Movsign(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Movsign, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / srcSize;
uint64_t result = 0;
int offset = (ext & 0x1) ? items : 0;
for (int i = 0; i < items; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
result = insertBits(result, i + offset, i + offset, picked);
}
DestReg = DestReg | result;
'''
class Maskmov(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (bits(FpSrcReg2_uqw, hiIndex))
result = insertBits(result, hiIndex, loIndex, arg1Bits);
}
FpDestReg_uqw = result;
'''
class shuffle(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = sizeof(FloatRegBits) / size;
int options;
int optionBits;
if (size == 8) {
options = 2;
optionBits = 1;
} else {
options = 4;
optionBits = 2;
}
uint64_t result = 0;
uint8_t sel = ext;
for (int i = 0; i < items; i++) {
uint64_t resBits;
uint8_t lsel = sel & mask(optionBits);
if (lsel * size >= sizeof(FloatRegBits)) {
lsel -= options / 2;
resBits = bits(FpSrcReg2_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
} else {
resBits = bits(FpSrcReg1_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
}
sel >>= optionBits;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Unpack(MediaOp):
code = '''
assert(srcSize == destSize);
int size = destSize;
int items = (sizeof(FloatRegBits) / size) / 2;
int offset = ext ? items : 0;
uint64_t result = 0;
for (int i = 0; i < items; i++) {
uint64_t pickedLow =
bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 1) * 8 * size - 1,
(2 * i + 0) * 8 * size,
pickedLow);
uint64_t pickedHigh =
bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 2) * 8 * size - 1,
(2 * i + 1) * 8 * size,
pickedHigh);
}
FpDestReg_uqw = result;
'''
class Pack(MediaOp):
code = '''
assert(srcSize == destSize * 2);
int items = (sizeof(FloatRegBits) / destSize);
int destBits = destSize * 8;
int srcBits = srcSize * 8;
uint64_t result = 0;
int i;
for (i = 0; i < items / 2; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
(i + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
for (;i < items; i++) {
uint64_t picked =
bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
(i - items + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
FpDestReg_uqw = result;
'''
class Mxor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mxor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
'''
class Mor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
'''
class Mand(MediaOp):
def __init__(self, dest, src1, src2):
super(Mand, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mandn(MediaOp):
def __init__(self, dest, src1, src2):
super(Mandn, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mminf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 < arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmaxf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 > arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmini(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 < arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits < arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmaxi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 > arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits > arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msqrt(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Msqrt, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = argBits;
fi.f = sqrt(fi.f);
argBits = fi.i;
} else {
doubleInt di;
di.i = argBits;
di.d = sqrt(di.d);
argBits = di.i;
}
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
# compute approximate reciprocal --- single-precision only
class Mrcp(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mrcp, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
assert(srcSize == 4); // ISA defines single-precision only
assert(srcSize == destSize);
const int size = 4;
const int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
floatInt fi;
fi.i = argBits;
// This is more accuracy than HW provides, but oh well
fi.f = 1.0 / fi.f;
argBits = fi.i;
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Maddf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f + arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d + arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f - arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d - arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmulf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f * arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d * arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mdivf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f / arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d / arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Maddi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits + arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
resBits = mask(sizeBits);
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits - arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = !bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (arg2Bits > arg1Bits) {
resBits = 0;
} else if (!findCarry(sizeBits, resBits,
arg1Bits, ~arg2Bits)) {
resBits = mask(sizeBits);
}
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmuli(MediaOp):
code = '''
int srcBits = srcSize * 8;
int destBits = destSize * 8;
assert(destBits <= 64);
assert(destSize >= srcSize);
int items = numItems(destSize);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int offset = 0;
if (ext & 16) {
if (ext & 32)
offset = i * (destBits - srcBits);
else
offset = i * (destBits - srcBits) + srcBits;
}
int srcHiIndex = (i + 1) * srcBits - 1 + offset;
int srcLoIndex = (i + 0) * srcBits + offset;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
uint64_t resBits;
if (signedOp()) {
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
resBits = (uint64_t)(arg1 * arg2);
} else {
resBits = arg1Bits * arg2Bits;
}
if (ext & 0x4)
resBits += (ULL(1) << (destBits - 1));
if (multHi())
resBits >>= destBits;
int destHiIndex = (i + 1) * destBits - 1;
int destLoIndex = (i + 0) * destBits;
result = insertBits(result, destHiIndex, destLoIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mavg(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msad(MediaOp):
code = '''
int srcBits = srcSize * 8;
int items = sizeof(FloatRegBits) / srcSize;
uint64_t sum = 0;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * srcBits - 1;
int loIndex = (i + 0) * srcBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t resBits = arg1Bits - arg2Bits;
if (resBits < 0)
resBits = -resBits;
sum += resBits;
}
FpDestReg_uqw = sum & mask(destSize * 8);
'''
class Msrl(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt) &
mask(sizeBits - shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msra(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
if (bits(arg1Bits, sizeBits - 1))
resBits = mask(sizeBits);
else
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt);
resBits = resBits |
(0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msll(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits << shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2i(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2i, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (ext & 0x4) {
if (arg >= 0)
arg += 0.5;
else
arg -= 0.5;
}
if (destSize == 4) {
int32_t i_arg = (int32_t)arg;
argBits = *((uint32_t*)&i_arg);
} else {
int64_t i_arg = (int64_t)arg;
argBits = *((uint64_t*)&i_arg);
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvti2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvti2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
int64_t sArg = argBits |
(0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
double arg = sArg;
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Mcmpi2r(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits = 0;
if (((ext & 0x2) == 0 && arg1 == arg2) ||
((ext & 0x2) == 0x2 && arg1 > arg2))
resBits = mask(sizeBits);
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2r(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
double arg1, arg2;
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
uint64_t resBits = 0;
bool nanop = std::isnan(arg1) || std::isnan(arg2);
switch (ext & mask(3)) {
case 0:
if (arg1 == arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 1:
if (arg1 < arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 2:
if (arg1 <= arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 3:
if (nanop)
resBits = mask(sizeBits);
break;
case 4:
if (arg1 != arg2 || nanop)
resBits = mask(sizeBits);
break;
case 5:
if (!(arg1 < arg2) || nanop)
resBits = mask(sizeBits);
break;
case 6:
if (!(arg1 <= arg2) || nanop)
resBits = mask(sizeBits);
break;
case 7:
if (!nanop)
resBits = mask(sizeBits);
break;
};
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2rf(MediaOp):
def __init__(self, src1, src2,\
size = None, destSize = None, srcSize = None, ext = None):
super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
src2, size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
assert(srcSize == 4 || srcSize == 8);
int size = srcSize;
int sizeBits = size * 8;
double arg1, arg2;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
// ZF PF CF
// Unordered 1 1 1
// Greater than 0 0 0
// Less than 0 0 1
// Equal 1 0 0
// OF = SF = AF = 0
ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
cfofBits = cfofBits & ~(OFBit | CFBit);
if (std::isnan(arg1) || std::isnan(arg2)) {
ccFlagBits = ccFlagBits | (ZFBit | PFBit);
cfofBits = cfofBits | CFBit;
}
else if(arg1 < arg2)
cfofBits = cfofBits | CFBit;
else if(arg1 == arg2)
ccFlagBits = ccFlagBits | ZFBit;
'''
class Emms(MediaOp):
def __init__(self):
super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
'InstRegIndex(0)', 'InstRegIndex(0)', 2)
code = 'FTW = 0xFFFF;'
}};