gem5/src/arch/x86/isa/microops/mediaop.isa

1551 lines
54 KiB
Plaintext
Raw Normal View History

// Copyright (c) 2009 The Regents of The University of Michigan
// Copyright (c) 2015 Advanced Micro Devices, Inc.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
def template MediaOpExecute {{
arch: teach ISA parser how to split code across files This patch encompasses several interrelated and interdependent changes to the ISA generation step. The end goal is to reduce the size of the generated compilation units for instruction execution and decoding so that batch compilation can proceed with all CPUs active without exhausting physical memory. The ISA parser (src/arch/isa_parser.py) has been improved so that it can accept 'split [output_type];' directives at the top level of the grammar and 'split(output_type)' python calls within 'exec {{ ... }}' blocks. This has the effect of "splitting" the files into smaller compilation units. I use air-quotes around "splitting" because the files themselves are not split, but preprocessing directives are inserted to have the same effect. Architecturally, the ISA parser has had some changes in how it works. In general, it emits code sooner. It doesn't generate per-CPU files, and instead defers to the C preprocessor to create the duplicate copies for each CPU type. Likewise there are more files emitted and the C preprocessor does more substitution that used to be done by the ISA parser. Finally, the build system (SCons) needs to be able to cope with a dynamic list of source files coming out of the ISA parser. The changes to the SCons{cript,truct} files support this. In broad strokes, the targets requested on the command line are hidden from SCons until all the build dependencies are determined, otherwise it would try, realize it can't reach the goal, and terminate in failure. Since build steps (i.e. running the ISA parser) must be taken to determine the file list, several new build stages have been inserted at the very start of the build. First, the build dependencies from the ISA parser will be emitted to arch/$ISA/generated/inc.d, which is then read by a new SCons builder to finalize the dependencies. (Once inc.d exists, the ISA parser will not need to be run to complete this step.) Once the dependencies are known, the 'Environments' are made by the makeEnv() function. This function used to be called before the build began but now happens during the build. It is easy to see that this step is quite slow; this is a known issue and it's important to realize that it was already slow, but there was no obvious cause to attribute it to since nothing was displayed to the terminal. Since new steps that used to be performed serially are now in a potentially-parallel build phase, the pathname handling in the SCons scripts has been tightened up to deal with chdir() race conditions. In general, pathnames are computed earlier and more likely to be stored, passed around, and processed as absolute paths rather than relative paths. In the end, some of these issues had to be fixed by inserting serializing dependencies in the build. Minor note: For the null ISA, we just provide a dummy inc.d so SCons is never compelled to try to generate it. While it seems slightly wrong to have anything in src/arch/*/generated (i.e. a non-generated 'generated' file), it's by far the simplest solution.
2014-05-10 00:58:47 +02:00
Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
//Write the resulting state to the execution context
if(fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};
def template MediaOpRegDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
%(BasicExecDeclare)s
};
}};
def template MediaOpImmDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext);
%(BasicExecDeclare)s
};
}};
def template MediaOpRegConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _src2, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
def template MediaOpImmConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, uint16_t _imm8, InstRegIndex _dest,
uint8_t _srcSize, uint8_t _destSize, uint16_t _ext) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _imm8, _dest, _srcSize, _destSize, _ext,
%(op_class)s)
{
%(constructor)s;
}
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
header_output = ""
decoder_output = ""
exec_output = ""
immTemplates = (
MediaOpImmDeclare,
MediaOpImmConstructor,
MediaOpExecute)
regTemplates = (
MediaOpRegDeclare,
MediaOpRegConstructor,
MediaOpExecute)
class MediaOpMeta(type):
def buildCppClasses(self, name, Name, suffix, code):
# Globals to stick the output in
global header_output
global decoder_output
global exec_output
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"(?<!\w)(?P<prefix>s?)op2(?P<typeQual>_[^\W_]+)?")
match = matcher.search(code)
if match:
typeQual = ""
if match.group("typeQual"):
typeQual = match.group("typeQual")
src2_name = "%sFpSrcReg2%s" % (match.group("prefix"), typeQual)
self.buildCppClasses(name, Name, suffix,
matcher.sub(src2_name, code))
self.buildCppClasses(name + "i", Name, suffix + "Imm",
matcher.sub("imm8", code))
return
base = "X86ISA::MediaOp"
# If imm8 shows up in the code, use the immediate templates, if
# not, hopefully the register ones will be correct.
matcher = re.compile("(?<!\w)imm8(?!\w)")
if matcher.search(code):
base += "Imm"
templates = immTemplates
else:
base += "Reg"
templates = regTemplates
# Get everything ready for the substitution
iop = InstObjParams(name, Name + suffix, base, {"code" : code})
# Generate the actual code (finally!)
header_output += templates[0].subst(iop)
decoder_output += templates[1].subst(iop)
exec_output += templates[2].subst(iop)
def __new__(mcls, Name, bases, dict):
abstract = False
name = Name.lower()
if "abstract" in dict:
abstract = dict['abstract']
del dict['abstract']
cls = super(MediaOpMeta, mcls).__new__(mcls, Name, bases, dict)
if not abstract:
cls.className = Name
cls.base_mnemonic = name
code = cls.code
# Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "", code)
# Hook into the microassembler dict
global microopClasses
microopClasses[name] = cls
# If op2 is used anywhere, make register and immediate versions
# of this code.
matcher = re.compile(r"op2(?P<typeQual>_[^\W_]+)?")
if matcher.search(code):
microopClasses[name + 'i'] = cls
return cls
class MediaOp(X86Microop):
__metaclass__ = MediaOpMeta
# This class itself doesn't act as a microop
abstract = True
def __init__(self, dest, src1, op2,
size = None, destSize = None, srcSize = None, ext = None):
self.dest = dest
self.src1 = src1
self.op2 = op2
if size is not None:
self.srcSize = size
self.destSize = size
if srcSize is not None:
self.srcSize = srcSize
if destSize is not None:
self.destSize = destSize
if self.srcSize is None:
raise Exception, "Source size not set."
if self.destSize is None:
raise Exception, "Dest size not set."
if ext is None:
self.ext = 0
else:
self.ext = ext
def getAllocator(self, microFlags):
className = self.className
if self.mnemonic == self.base_mnemonic + 'i':
className += "Imm"
allocator = '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(src1)s, %(op2)s, %(dest)s,
%(srcSize)s, %(destSize)s, %(ext)s)''' % {
"class_name" : className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "op2" : self.op2,
"dest" : self.dest,
"srcSize" : self.srcSize,
"destSize" : self.destSize,
"ext" : self.ext}
return allocator
class Mov2int(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2int, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / srcSize;
int offset = imm8;
if (bits(src1, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t fpSrcReg1 =
bits(FpSrcReg1_uqw,
(offset + 1) * srcSize * 8 - 1,
(offset + 0) * srcSize * 8);
DestReg = merge(0, fpSrcReg1, destSize);
} else {
DestReg = DestReg;
}
'''
class Mov2fp(MediaOp):
def __init__(self, dest, src1, src2 = 0, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mov2fp, self).__init__(dest, src1,\
src2, size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / destSize;
int offset = imm8;
if (bits(dest, 0) && (ext & 0x1))
offset -= items;
if (offset >= 0 && offset < items) {
uint64_t srcReg1 = pick(SrcReg1, 0, srcSize);
FpDestReg_uqw =
insertBits(FpDestReg_uqw,
(offset + 1) * destSize * 8 - 1,
(offset + 0) * destSize * 8, srcReg1);
} else {
FpDestReg_uqw = FpDestReg_uqw;
}
'''
2009-08-18 03:15:39 +02:00
class Movsign(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Movsign, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
int items = sizeof(FloatRegBits) / srcSize;
uint64_t result = 0;
int offset = (ext & 0x1) ? items : 0;
for (int i = 0; i < items; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * 8 * srcSize - 1);
result = insertBits(result, i + offset, i + offset, picked);
}
DestReg = DestReg | result;
'''
2009-08-18 05:22:56 +02:00
class Maskmov(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
2009-08-18 05:22:56 +02:00
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (bits(FpSrcReg2_uqw, hiIndex))
2009-08-18 05:22:56 +02:00
result = insertBits(result, hiIndex, loIndex, arg1Bits);
}
FpDestReg_uqw = result;
2009-08-18 05:22:56 +02:00
'''
class shuffle(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = sizeof(FloatRegBits) / size;
int options;
int optionBits;
if (size == 8) {
options = 2;
optionBits = 1;
} else {
options = 4;
optionBits = 2;
}
uint64_t result = 0;
uint8_t sel = ext;
for (int i = 0; i < items; i++) {
uint64_t resBits;
uint8_t lsel = sel & mask(optionBits);
if (lsel * size >= sizeof(FloatRegBits)) {
lsel -= options / 2;
resBits = bits(FpSrcReg2_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
} else {
resBits = bits(FpSrcReg1_uqw,
(lsel + 1) * sizeBits - 1,
(lsel + 0) * sizeBits);
}
sel >>= optionBits;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
2009-08-18 03:15:39 +02:00
class Unpack(MediaOp):
code = '''
assert(srcSize == destSize);
int size = destSize;
int items = (sizeof(FloatRegBits) / size) / 2;
int offset = ext ? items : 0;
2009-08-18 03:15:39 +02:00
uint64_t result = 0;
for (int i = 0; i < items; i++) {
uint64_t pickedLow =
bits(FpSrcReg1_uqw, (i + offset + 1) * 8 * size - 1,
2009-08-18 03:15:39 +02:00
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 1) * 8 * size - 1,
(2 * i + 0) * 8 * size,
pickedLow);
uint64_t pickedHigh =
bits(FpSrcReg2_uqw, (i + offset + 1) * 8 * size - 1,
2009-08-18 03:15:39 +02:00
(i + offset) * 8 * size);
result = insertBits(result,
(2 * i + 2) * 8 * size - 1,
(2 * i + 1) * 8 * size,
pickedHigh);
}
FpDestReg_uqw = result;
2009-08-18 03:15:39 +02:00
'''
2009-08-18 03:22:33 +02:00
2009-08-18 03:27:54 +02:00
class Pack(MediaOp):
code = '''
assert(srcSize == destSize * 2);
int items = (sizeof(FloatRegBits) / destSize);
int destBits = destSize * 8;
int srcBits = srcSize * 8;
uint64_t result = 0;
int i;
for (i = 0; i < items / 2; i++) {
uint64_t picked =
bits(FpSrcReg1_uqw, (i + 1) * srcBits - 1,
2009-08-18 03:27:54 +02:00
(i + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
2009-08-18 03:27:54 +02:00
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
2009-08-18 03:27:54 +02:00
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
for (;i < items; i++) {
uint64_t picked =
bits(FpSrcReg2_uqw, (i - items + 1) * srcBits - 1,
2009-08-18 03:27:54 +02:00
(i - items + 0) * srcBits);
unsigned signBit = bits(picked, srcBits - 1);
uint64_t overflow = bits(picked, srcBits - 1, destBits - 1);
// Handle saturation.
if (signBit) {
if (overflow != mask(destBits - srcBits + 1)) {
if (signedOp())
picked = (ULL(1) << (destBits - 1));
2009-08-18 03:27:54 +02:00
else
picked = 0;
}
} else {
if (overflow != 0) {
if (signedOp())
2009-08-18 03:27:54 +02:00
picked = mask(destBits - 1);
else
picked = mask(destBits);
}
}
result = insertBits(result,
(i + 1) * destBits - 1,
(i + 0) * destBits,
picked);
}
FpDestReg_uqw = result;
2009-08-18 03:27:54 +02:00
'''
2009-08-18 03:22:33 +02:00
class Mxor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mxor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;
2009-08-18 03:22:33 +02:00
'''
2009-08-18 03:23:30 +02:00
class Mor(MediaOp):
def __init__(self, dest, src1, src2):
super(Mor, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw | FpSrcReg2_uqw;
2009-08-18 03:23:30 +02:00
'''
class Mand(MediaOp):
def __init__(self, dest, src1, src2):
super(Mand, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mandn(MediaOp):
def __init__(self, dest, src1, src2):
super(Mandn, self).__init__(dest, src1, src2, 1)
code = '''
FpDestReg_uqw = ~FpSrcReg1_uqw & FpSrcReg2_uqw;
'''
class Mminf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 < arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
class Mmaxf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
double arg1, arg2;
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
if (arg1 > arg2) {
result = insertBits(result, hiIndex, loIndex, arg1Bits);
} else {
result = insertBits(result, hiIndex, loIndex, arg2Bits);
}
}
FpDestReg_uqw = result;
'''
2009-08-18 05:04:02 +02:00
class Mmini(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
2009-08-18 05:04:02 +02:00
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
2009-08-18 05:04:02 +02:00
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
2009-08-18 05:04:02 +02:00
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
2009-08-18 05:04:02 +02:00
uint64_t resBits;
if (signedOp()) {
2009-08-18 05:04:02 +02:00
if (arg1 < arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits < arg2Bits) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmaxi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits;
if (signedOp()) {
if (arg1 > arg2) {
resBits = arg1Bits;
} else {
resBits = arg2Bits;
}
} else {
if (arg1Bits > arg2Bits) {
resBits = arg1Bits;
} else {
2009-08-18 05:04:02 +02:00
resBits = arg2Bits;
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
2009-08-18 05:04:02 +02:00
'''
class Msqrt(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Msqrt, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
if (size == 4) {
floatInt fi;
fi.i = argBits;
fi.f = sqrt(fi.f);
argBits = fi.i;
} else {
doubleInt di;
di.i = argBits;
di.d = sqrt(di.d);
argBits = di.i;
}
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
# compute approximate reciprocal --- single-precision only
class Mrcp(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Mrcp, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
assert(srcSize == 4); // ISA defines single-precision only
assert(srcSize == destSize);
const int size = 4;
const int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
floatInt fi;
fi.i = argBits;
// This is more accuracy than HW provides, but oh well
fi.f = 1.0 / fi.f;
argBits = fi.i;
result = insertBits(result, hiIndex, loIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Maddf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f + arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d + arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f - arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d - arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmulf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f * arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d * arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mdivf(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
assert(srcSize == 4 || srcSize == 8);
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits;
if (size == 4) {
floatInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.f = arg1.f / arg2.f;
resBits = res.i;
} else {
doubleInt arg1, arg2, res;
arg1.i = arg1Bits;
arg2.i = arg2Bits;
res.d = arg1.d / arg2.d;
resBits = res.i;
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Maddi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits + arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (findCarry(sizeBits, resBits, arg1Bits, arg2Bits))
resBits = mask(sizeBits);
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msubi(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = arg1Bits - arg2Bits;
if (ext & 0x2) {
if (signedOp()) {
int arg1Sign = bits(arg1Bits, sizeBits - 1);
int arg2Sign = !bits(arg2Bits, sizeBits - 1);
int resSign = bits(resBits, sizeBits - 1);
if ((arg1Sign == arg2Sign) && (arg1Sign != resSign)) {
if (resSign == 0)
resBits = (ULL(1) << (sizeBits - 1));
else
resBits = mask(sizeBits - 1);
}
} else {
if (arg2Bits > arg1Bits) {
resBits = 0;
} else if (!findCarry(sizeBits, resBits,
arg1Bits, ~arg2Bits)) {
resBits = mask(sizeBits);
}
}
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mmuli(MediaOp):
code = '''
int srcBits = srcSize * 8;
int destBits = destSize * 8;
assert(destBits <= 64);
assert(destSize >= srcSize);
int items = numItems(destSize);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int offset = 0;
if (ext & 16) {
if (ext & 32)
offset = i * (destBits - srcBits);
else
offset = i * (destBits - srcBits) + srcBits;
}
int srcHiIndex = (i + 1) * srcBits - 1 + offset;
int srcLoIndex = (i + 0) * srcBits + offset;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, srcHiIndex, srcLoIndex);
uint64_t resBits;
if (signedOp()) {
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (srcBits - 1))));
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (srcBits - 1))));
resBits = (uint64_t)(arg1 * arg2);
} else {
resBits = arg1Bits * arg2Bits;
}
if (ext & 0x4)
resBits += (ULL(1) << (destBits - 1));
if (multHi())
resBits >>= destBits;
int destHiIndex = (i + 1) * destBits - 1;
int destLoIndex = (i + 0) * destBits;
result = insertBits(result, destHiIndex, destLoIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mavg(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
uint64_t resBits = (arg1Bits + arg2Bits + 1) / 2;
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msad(MediaOp):
code = '''
int srcBits = srcSize * 8;
int items = sizeof(FloatRegBits) / srcSize;
uint64_t sum = 0;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * srcBits - 1;
int loIndex = (i + 0) * srcBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t resBits = arg1Bits - arg2Bits;
if (resBits < 0)
resBits = -resBits;
sum += resBits;
}
FpDestReg_uqw = sum & mask(destSize * 8);
'''
class Msrl(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt) &
mask(sizeBits - shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msra(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
if (bits(arg1Bits, sizeBits - 1))
resBits = mask(sizeBits);
else
resBits = 0;
} else {
resBits = (arg1Bits >> shiftAmt);
resBits = resBits |
(0 - (resBits & (ULL(1) << (sizeBits - 1 - shiftAmt))));
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Msll(MediaOp):
code = '''
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t shiftAmt = op2_uqw;
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t resBits;
if (shiftAmt >= sizeBits) {
resBits = 0;
} else {
resBits = (arg1Bits << shiftAmt);
}
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2i(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2i, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (ext & 0x4) {
if (arg >= 0)
arg += 0.5;
else
arg -= 0.5;
}
if (destSize == 4) {
int32_t i_arg = (int32_t)arg;
argBits = *((uint32_t*)&i_arg);
} else {
int64_t i_arg = (int64_t)arg;
argBits = *((uint64_t*)&i_arg);
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvti2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvti2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
int64_t sArg = argBits |
(0 - (argBits & (ULL(1) << (srcSizeBits - 1))));
double arg = sArg;
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Cvtf2f(MediaOp):
def __init__(self, dest, src, \
size = None, destSize = None, srcSize = None, ext = None):
super(Cvtf2f, self).__init__(dest, src,\
"InstRegIndex(0)", size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(destSize == 4 || destSize == 8);
assert(srcSize == 4 || srcSize == 8);
int srcSizeBits = srcSize * 8;
int destSizeBits = destSize * 8;
int items;
int srcStart = 0;
int destStart = 0;
if (srcSize == 2 * destSize) {
items = numItems(srcSize);
if (ext & 0x2)
destStart = destSizeBits * items;
} else if (destSize == 2 * srcSize) {
items = numItems(destSize);
if (ext & 0x2)
srcStart = srcSizeBits * items;
} else {
items = numItems(destSize);
}
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int srcHiIndex = srcStart + (i + 1) * srcSizeBits - 1;
int srcLoIndex = srcStart + (i + 0) * srcSizeBits;
uint64_t argBits = bits(FpSrcReg1_uqw, srcHiIndex, srcLoIndex);
double arg;
if (srcSize == 4) {
floatInt fi;
fi.i = argBits;
arg = fi.f;
} else {
doubleInt di;
di.i = argBits;
arg = di.d;
}
if (destSize == 4) {
floatInt fi;
fi.f = arg;
argBits = fi.i;
} else {
doubleInt di;
di.d = arg;
argBits = di.i;
}
int destHiIndex = destStart + (i + 1) * destSizeBits - 1;
int destLoIndex = destStart + (i + 0) * destSizeBits;
result = insertBits(result, destHiIndex, destLoIndex, argBits);
}
FpDestReg_uqw = result;
'''
class Mcmpi2r(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
int64_t arg1 = arg1Bits |
(0 - (arg1Bits & (ULL(1) << (sizeBits - 1))));
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
int64_t arg2 = arg2Bits |
(0 - (arg2Bits & (ULL(1) << (sizeBits - 1))));
uint64_t resBits = 0;
if (((ext & 0x2) == 0 && arg1 == arg2) ||
((ext & 0x2) == 0x2 && arg1 > arg2))
resBits = mask(sizeBits);
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2r(MediaOp):
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
int size = srcSize;
int sizeBits = size * 8;
int items = numItems(size);
uint64_t result = FpDestReg_uqw;
for (int i = 0; i < items; i++) {
int hiIndex = (i + 1) * sizeBits - 1;
int loIndex = (i + 0) * sizeBits;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, hiIndex, loIndex);
double arg1, arg2;
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
uint64_t resBits = 0;
clang/gcc: Fix compilation issues with clang 3.0 and gcc 4.6 This patch addresses a number of minor issues that cause problems when compiling with clang >= 3.0 and gcc >= 4.6. Most importantly, it avoids using the deprecated ext/hash_map and instead uses unordered_map (and similarly so for the hash_set). To make use of the new STL containers, g++ and clang has to be invoked with "-std=c++0x", and this is now added for all gcc versions >= 4.6, and for clang >= 3.0. For gcc >= 4.3 and <= 4.5 and clang <= 3.0 we use the tr1 unordered_map to avoid the deprecation warning. The addition of c++0x in turn causes a few problems, as the compiler is more stringent and adds a number of new warnings. Below, the most important issues are enumerated: 1) the use of namespaces is more strict, e.g. for isnan, and all headers opening the entire namespace std are now fixed. 2) another other issue caused by the more stringent compiler is the narrowing of the embedded python, which used to be a char array, and is now unsigned char since there were values larger than 128. 3) a particularly odd issue that arose with the new c++0x behaviour is found in range.hh, where the operator< causes gcc to complain about the template type parsing (the "<" is interpreted as the beginning of a template argument), and the problem seems to be related to the begin/end members introduced for the range-type iteration, which is a new feature in c++11. As a minor update, this patch also fixes the build flags for the clang debug target that used to be shared with gcc and incorrectly use "-ggdb".
2012-04-14 11:43:31 +02:00
bool nanop = std::isnan(arg1) || std::isnan(arg2);
switch (ext & mask(3)) {
case 0:
if (arg1 == arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 1:
if (arg1 < arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 2:
if (arg1 <= arg2 && !nanop)
resBits = mask(sizeBits);
break;
case 3:
if (nanop)
resBits = mask(sizeBits);
break;
case 4:
if (arg1 != arg2 || nanop)
resBits = mask(sizeBits);
break;
case 5:
if (!(arg1 < arg2) || nanop)
resBits = mask(sizeBits);
break;
case 6:
if (!(arg1 <= arg2) || nanop)
resBits = mask(sizeBits);
break;
case 7:
if (!nanop)
resBits = mask(sizeBits);
break;
};
result = insertBits(result, hiIndex, loIndex, resBits);
}
FpDestReg_uqw = result;
'''
class Mcmpf2rf(MediaOp):
def __init__(self, src1, src2,\
size = None, destSize = None, srcSize = None, ext = None):
super(Mcmpf2rf, self).__init__("InstRegIndex(0)", src1,\
src2, size, destSize, srcSize, ext)
code = '''
union floatInt
{
float f;
uint32_t i;
};
union doubleInt
{
double d;
uint64_t i;
};
assert(srcSize == destSize);
assert(srcSize == 4 || srcSize == 8);
int size = srcSize;
int sizeBits = size * 8;
double arg1, arg2;
uint64_t arg1Bits = bits(FpSrcReg1_uqw, sizeBits - 1, 0);
uint64_t arg2Bits = bits(FpSrcReg2_uqw, sizeBits - 1, 0);
if (size == 4) {
floatInt fi;
fi.i = arg1Bits;
arg1 = fi.f;
fi.i = arg2Bits;
arg2 = fi.f;
} else {
doubleInt di;
di.i = arg1Bits;
arg1 = di.d;
di.i = arg2Bits;
arg2 = di.d;
}
// ZF PF CF
// Unordered 1 1 1
// Greater than 0 0 0
// Less than 0 0 1
// Equal 1 0 0
// OF = SF = AF = 0
ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
cfofBits = cfofBits & ~(OFBit | CFBit);
if (std::isnan(arg1) || std::isnan(arg2)) {
ccFlagBits = ccFlagBits | (ZFBit | PFBit);
cfofBits = cfofBits | CFBit;
}
else if(arg1 < arg2)
cfofBits = cfofBits | CFBit;
else if(arg1 == arg2)
ccFlagBits = ccFlagBits | ZFBit;
'''
2013-01-15 14:43:20 +01:00
class Emms(MediaOp):
def __init__(self):
super(Emms, self).__init__('InstRegIndex(MISCREG_FTW)',
'InstRegIndex(0)', 'InstRegIndex(0)', 2)
2013-01-15 14:43:20 +01:00
code = 'FTW = 0xFFFF;'
}};