gem5/src/arch/x86/isa/microops/mediaop.isa

1503 lines
52 KiB
Text
Raw Normal View History

/// Copyright (c) 2009 The Regents of The University of Michigan
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
def template MediaOpExecute {{
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
//Write the resulting state to the execution context
if(fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};
def template MediaOpRegDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
%(BasicExecDeclare)s
};
}};
def template MediaOpImmDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
%(BasicExecDeclare)s
};
}};
def template MediaOpRegConstructor {{
inline %(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _src2, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
def template MediaOpImmConstructor {{
inline %(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _imm8, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
header_output = ""
decoder_output = ""
exec_output = ""
immTemplates = (
MediaOpImmDeclare,
MediaOpImmConstructor,
MediaOpExecute)
regTemplates = (
MediaOpRegDeclare,
MediaOpRegConstructor,
MediaOpExecute)
class MediaOpMeta(type):
def buildCppClasses(self, name, Name, suffix, code):
# Globals to stick the output in
global header_output
global decoder_output
global exec_output
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
match = matcher.search(code)
if match:
typeQual = ""
if match.group("typeQual"):
typeQual = match.group("typeQual")
src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code))
self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub("imm8", code))
return
base = "X86ISA::MediaOp"
# If imm8 shows up in the code, use the immediate templates, if
# not, hopefully the register ones will be correct.
matcher = re.compile("(?<!\w)imm8(?!\w)")
if matcher.search(code):
base += "Imm"
templates = immTemplates
else:
base += "Reg"
templates = regTemplates
# Get everything ready for the substitution
iop = InstObjParams(name, Name + suffix, base, {"code" : code})
# Generate the actual code (finally!)
header_output += templates[0].subst(iop)
decoder_output += templates[1].subst(iop)
exec_output += templates[2].subst(iop)
def __new__(mcls, Name, bases, dict):
abstract = False
name = Name.lower()
if "abstract" in dict:
abstract = dict['abstract']
del dict['abstract']
cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
if not abstract:
cls.className = Name
cls.base_mnemonic = name
code = cls.code
# Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "", code)
# Hook into the microassembler dict
global microopClasses
microopClasses[name] = cls
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
if matcher.search(code):
microopClasses[name + 'i'] = cls
return cls
class MediaOp(X86Microop):
__metaclass__ = MediaOpMeta
# This class itself doesn't act as a microop
abstract = True
def __init__(self, dest, src1, op2,
size = None, destSize = None, srcSize = None, ext = None):
self.dest = dest
self.src1 = src1
self.op2 = op2
if size is not None:
self.srcSize = size
self.destSize = size
if srcSize is not None:
self.srcSize = srcSize
if destSize is not None:
self.destSize = destSize
if self.srcSize is None:
raise Exception, "Source size not set."
if self.destSize is None:
raise Exception, "Dest size not set."
if ext is None:
self.ext = 0
else:
self.ext = ext
def getAllocator(self, microFlags):
className = self.className
if self.mnemonic == self.base_mnemonic + 'i':
className += "Imm"
allocator = '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(src1)s, %(op2)s, %(dest)s,
%(srcSize)s, %(destSize)s, %(ext)s)''' % {
"class_name" : className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "op2" : self.op2,
"dest" : self.dest,
"srcSize" : self.srcSize,
"destSize" : self.destSize,
"ext" : self.ext}
return allocator
class Mov2int(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2int, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / srcSize;
int offset = imm8;
if (bits(src1, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t fpSrcReg1 =
bits(FpSrcReg1_uqw,
(offset + 1) * srcSize * 8 - 1,
(offset + 0) * srcSize * 8);
DestReg = merge(0, fpSrcReg1, destSize);
} else {
DestReg = DestReg;
}
'''
class Mov2fp(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2fp, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / destSize;
int offset = imm8;
if (bits(dest, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
FpDestReg_uqw =
insertBits(FpDestReg_uqw,
(offset + 1) * destSize * 8 - 1,
(offset + 0) * destSize * 8, srcReg1);
} else {
FpDestReg_uqw = FpDestReg_uqw;
}
'''
2009-08-18 03:15:39 +02:00
class Movsign(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Movsign, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / srcSize;
uint64_t result = 0;
int offset = (ext & 0x1) ? items : 0;
for (int i = 0; i < items; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
result = insertBits(result, i + offset, i + offset, picked);
}
DestReg = DestReg | result;
'''
2009-08-18 05:22:56 +02:00
class Maskmov(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
2009-08-18 05:22:56 +02:00
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (bits(FpSrcReg2_uqw, hiIndex))
2009-08-18 05:22:56 +02:00
result = insertBits(result, hiIndex, loIndex, arg1Bits);
}
FpDestReg_uqw = result;
2009-08-18 05:22:56 +02:00
'''
class shuffle(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = sizeof(FloatRegBits) / size;
int options;
int optionBits;
if (size == 8) {
options = 2;
optionBits = 1;
} else {
options = 4;
optionBits = 2;
}
uint64_t result = 0;
uint8_t sel = ext;
for (int i = 0; i < items; i++) {
uint64_t resBits;
uint8_t lsel = sel & mask(optionBits);
if (lsel * size >= sizeof(FloatRegBits)) {
lsel -= options / 2;
resBits = bits(FpSrcReg2_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
} else {
resBits = bits(FpSrcReg1_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
}
sel >>= optionBits;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
2009-08-18 03:15:39 +02:00
class Unpack(MediaOp):
code = '''
assert(srcSize == destSize);
int size = destSize;
int items = (sizeof(FloatRegBits) / size) / 2;
int offset = ext ? items : 0;
2009-08-18 03:15:39 +02:00
uint64_t result = 0;
for (int i = 0; i < items; i++) {
uint64_t pickedLow =
bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
2009-08-18 03:15:39 +02:00
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 1) * 8 * size - 1,
(2 * i + 0) * 8 * size,
pickedLow);
uint64_t pickedHigh =
bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
2009-08-18 03:15:39 +02:00
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 2) * 8 * size - 1,
(2 * i + 1) * 8 * size,
pickedHigh);
}
FpDestReg_uqw = result;
2009-08-18 03:15:39 +02:00
'''
2009-08-18 03:22:33 +02:00
2009-08-18 03:27:54 +02:00
class Pack(MediaOp):
code = '''
assert(srcSize == destSize * 2);
int items = (sizeof(FloatRegBits) / destSize);
int destBits = destSize * 8;
int srcBits = srcSize * 8;
uint64_t result = 0;
int i;
for (i = 0; i < items / 2; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
2009-08-18 03:27:54 +02:00
(i + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
2009-08-18 03:27:54 +02:00
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
2009-08-18 03:27:54 +02:00
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
for (;i < items; i++) {
uint64_t picked =
bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
2009-08-18 03:27:54 +02:00
(i - items + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
2009-08-18 03:27:54 +02:00
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
2009-08-18 03:27:54 +02:00
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
FpDestReg_uqw = result;
2009-08-18 03:27:54 +02:00
'''
2009-08-18 03:22:33 +02:00
class Mxor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mxor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
2009-08-18 03:22:33 +02:00
'''
2009-08-18 03:23:30 +02:00
class Mor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
2009-08-18 03:23:30 +02:00
'''
class Mand(MediaOp):
def __init__(self, dest, src1, src2):
super(Mand, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mandn(MediaOp):
def __init__(self, dest, src1, src2):
super(Mandn, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mminf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 < arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmaxf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 > arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
2009-08-18 05:04:02 +02:00
class Mmini(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
2009-08-18 05:04:02 +02:00
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
2009-08-18 05:04:02 +02:00
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
2009-08-18 05:04:02 +02:00
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
2009-08-18 05:04:02 +02:00
uint64_t resBits;
if (signedOp()) {
2009-08-18 05:04:02 +02:00
if (arg1 < arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits < arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmaxi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 > arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits > arg2Bits) {
resBits = arg1Bits;
} else {
2009-08-18 05:04:02 +02:00
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
2009-08-18 05:04:02 +02:00
'''
class Msqrt(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Msqrt, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = argBits;
fi.f = sqrt(fi.f);
argBits = fi.i;
} else {
doubleInt di;
di.i = argBits;
di.d = sqrt(di.d);
argBits = di.i;
}
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Maddf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f + arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d + arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f - arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d - arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmulf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f * arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d * arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mdivf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f / arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d / arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Maddi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits + arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
resBits = mask(sizeBits);
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits - arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = !bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (arg2Bits > arg1Bits) {
resBits = 0;
} else if (!findCarry(sizeBits, resBits,
arg1Bits, ~arg2Bits)) {
resBits = mask(sizeBits);
}
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmuli(MediaOp):
code = '''
int srcBits = srcSize * 8;
int destBits = destSize * 8;
assert(destBits <= 64);
assert(destSize >= srcSize);
int items = numItems(destSize);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int offset = 0;
if (ext & 16) {
if (ext & 32)
offset = i * (destBits - srcBits);
else
offset = i * (destBits - srcBits) + srcBits;
}
int srcHiIndex = (i + 1) * srcBits - 1 + offset;
int srcLoIndex = (i + 0) * srcBits + offset;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
uint64_t resBits;
if (signedOp()) {
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
resBits = (uint64_t)(arg1 * arg2);
} else {
resBits = arg1Bits * arg2Bits;
}
if (ext & 0x4)
resBits += (ULL(1) << (destBits - 1));
if (multHi())
resBits >>= destBits;
int destHiIndex = (i + 1) * destBits - 1;
int destLoIndex = (i + 0) * destBits;
result = insertBits(result, destHiIndex, destLoIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mavg(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msad(MediaOp):
code = '''
int srcBits = srcSize * 8;
int items = sizeof(FloatRegBits) / srcSize;
uint64_t sum = 0;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * srcBits - 1;
int loIndex = (i + 0) * srcBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t resBits = arg1Bits - arg2Bits;
if (resBits < 0)
resBits = -resBits;
sum += resBits;
}
FpDestReg_uqw = sum & mask(destSize * 8);
'''
class Msrl(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt) &
mask(sizeBits - shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msra(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
if (bits(arg1Bits, sizeBits - 1))
resBits = mask(sizeBits);
else
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt);
resBits = resBits |
(0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msll(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits << shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2i(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2i, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (ext & 0x4) {
if (arg >= 0)
arg += 0.5;
else
arg -= 0.5;
}
if (destSize == 4) {
argBits = (uint32_t)arg;
} else {
argBits = (uint64_t)arg;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvti2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvti2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
int64_t sArg = argBits |
(0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
double arg = sArg;
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Mcmpi2r(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits = 0;
if (((ext & 0x2) == 0 && arg1 == arg2) ||
((ext & 0x2) == 0x2 && arg1 > arg2))
resBits = mask(sizeBits);
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2r(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
double arg1, arg2;
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
uint64_t resBits = 0;
clang/gcc: Fix compilation issues with clang 3.0 and gcc 4.6 This patch addresses a number of minor issues that cause problems when compiling with clang >= 3.0 and gcc >= 4.6. Most importantly, it avoids using the deprecated ext/hash_map and instead uses unordered_map (and similarly so for the hash_set). To make use of the new STL containers, g++ and clang has to be invoked with "-std=c++0x", and this is now added for all gcc versions >= 4.6, and for clang >= 3.0. For gcc >= 4.3 and <= 4.5 and clang <= 3.0 we use the tr1 unordered_map to avoid the deprecation warning. The addition of c++0x in turn causes a few problems, as the compiler is more stringent and adds a number of new warnings. Below, the most important issues are enumerated: 1) the use of namespaces is more strict, e.g. for isnan, and all headers opening the entire namespace std are now fixed. 2) another other issue caused by the more stringent compiler is the narrowing of the embedded python, which used to be a char array, and is now unsigned char since there were values larger than 128. 3) a particularly odd issue that arose with the new c++0x behaviour is found in range.hh, where the operator< causes gcc to complain about the template type parsing (the "<" is interpreted as the beginning of a template argument), and the problem seems to be related to the begin/end members introduced for the range-type iteration, which is a new feature in c++11. As a minor update, this patch also fixes the build flags for the clang debug target that used to be shared with gcc and incorrectly use "-ggdb".
2012-04-14 11:43:31 +02:00
bool nanop = std::isnan(arg1) || std::isnan(arg2);
switch (ext & mask(3)) {
case 0:
if (arg1 == arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 1:
if (arg1 < arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 2:
if (arg1 <= arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 3:
if (nanop)
resBits = mask(sizeBits);
break;
case 4:
if (arg1 != arg2 || nanop)
resBits = mask(sizeBits);
break;
case 5:
if (!(arg1 < arg2) || nanop)
resBits = mask(sizeBits);
break;
case 6:
if (!(arg1 <= arg2) || nanop)
resBits = mask(sizeBits);
break;
case 7:
if (!nanop)
resBits = mask(sizeBits);
break;
};
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2rf(MediaOp):
def __init__(self, src1, src2,\
size = None, destSize = None, srcSize = None, ext = None):
super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
src2, size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
assert(srcSize == 4 || srcSize == 8);
int size = srcSize;
int sizeBits = size * 8;
double arg1, arg2;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
// ZF PF CF
// Unordered 1 1 1
// Greater than 0 0 0
// Less than 0 0 1
// Equal 1 0 0
// OF = SF = AF = 0
ccFlagBits = ccFlagBits & ~(OFBit | SFBit | AFBit |
ZFBit | PFBit | CFBit);
clang/gcc: Fix compilation issues with clang 3.0 and gcc 4.6 This patch addresses a number of minor issues that cause problems when compiling with clang >= 3.0 and gcc >= 4.6. Most importantly, it avoids using the deprecated ext/hash_map and instead uses unordered_map (and similarly so for the hash_set). To make use of the new STL containers, g++ and clang has to be invoked with "-std=c++0x", and this is now added for all gcc versions >= 4.6, and for clang >= 3.0. For gcc >= 4.3 and <= 4.5 and clang <= 3.0 we use the tr1 unordered_map to avoid the deprecation warning. The addition of c++0x in turn causes a few problems, as the compiler is more stringent and adds a number of new warnings. Below, the most important issues are enumerated: 1) the use of namespaces is more strict, e.g. for isnan, and all headers opening the entire namespace std are now fixed. 2) another other issue caused by the more stringent compiler is the narrowing of the embedded python, which used to be a char array, and is now unsigned char since there were values larger than 128. 3) a particularly odd issue that arose with the new c++0x behaviour is found in range.hh, where the operator< causes gcc to complain about the template type parsing (the "<" is interpreted as the beginning of a template argument), and the problem seems to be related to the begin/end members introduced for the range-type iteration, which is a new feature in c++11. As a minor update, this patch also fixes the build flags for the clang debug target that used to be shared with gcc and incorrectly use "-ggdb".
2012-04-14 11:43:31 +02:00
if (std::isnan(arg1) || std::isnan(arg2))
ccFlagBits = ccFlagBits | (ZFBit | PFBit | CFBit);
else if(arg1 < arg2)
ccFlagBits = ccFlagBits | CFBit;
else if(arg1 == arg2)
ccFlagBits = ccFlagBits | ZFBit;
'''
}};