gem5/src/arch/x86/isa/microops/fpop.isa

457 lines
16 KiB
Plaintext
Raw Normal View History

// Copyright (c) 2007 The Hewlett-Packard Development Company
// Copyright (c) 2012-2013 Mark D. Hill and David A. Wood
// Copyright (c) 2015 Advanced Micro Devices, Inc.
//
// All rights reserved.
//
// The license below extends only to copyright in the software and shall
// not be construed as granting a license to any other intellectual
// property including but not limited to intellectual property relating
// to a hardware implementation of the functionality of the software
// licensed hereunder. You may use the software subject to the license
// terms below provided that you ensure that this notice is replicated
// unmodified and in its entirety in all distributions of the software,
// modified or unmodified, in source code or in binary form.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
// Nilay Vaish
//////////////////////////////////////////////////////////////////////////
//
// FpOp Microop templates
//
//////////////////////////////////////////////////////////////////////////
def template MicroFpOpExecute {{
arch: teach ISA parser how to split code across files This patch encompasses several interrelated and interdependent changes to the ISA generation step. The end goal is to reduce the size of the generated compilation units for instruction execution and decoding so that batch compilation can proceed with all CPUs active without exhausting physical memory. The ISA parser (src/arch/isa_parser.py) has been improved so that it can accept 'split [output_type];' directives at the top level of the grammar and 'split(output_type)' python calls within 'exec {{ ... }}' blocks. This has the effect of "splitting" the files into smaller compilation units. I use air-quotes around "splitting" because the files themselves are not split, but preprocessing directives are inserted to have the same effect. Architecturally, the ISA parser has had some changes in how it works. In general, it emits code sooner. It doesn't generate per-CPU files, and instead defers to the C preprocessor to create the duplicate copies for each CPU type. Likewise there are more files emitted and the C preprocessor does more substitution that used to be done by the ISA parser. Finally, the build system (SCons) needs to be able to cope with a dynamic list of source files coming out of the ISA parser. The changes to the SCons{cript,truct} files support this. In broad strokes, the targets requested on the command line are hidden from SCons until all the build dependencies are determined, otherwise it would try, realize it can't reach the goal, and terminate in failure. Since build steps (i.e. running the ISA parser) must be taken to determine the file list, several new build stages have been inserted at the very start of the build. First, the build dependencies from the ISA parser will be emitted to arch/$ISA/generated/inc.d, which is then read by a new SCons builder to finalize the dependencies. (Once inc.d exists, the ISA parser will not need to be run to complete this step.) Once the dependencies are known, the 'Environments' are made by the makeEnv() function. This function used to be called before the build began but now happens during the build. It is easy to see that this step is quite slow; this is a known issue and it's important to realize that it was already slow, but there was no obvious cause to attribute it to since nothing was displayed to the terminal. Since new steps that used to be performed serially are now in a potentially-parallel build phase, the pathname handling in the SCons scripts has been tightened up to deal with chdir() race conditions. In general, pathnames are computed earlier and more likely to be stored, passed around, and processed as absolute paths rather than relative paths. In the end, some of these issues had to be fixed by inserting serializing dependencies in the build. Minor note: For the null ISA, we just provide a dummy inc.d so SCons is never compelled to try to generate it. While it seems slightly wrong to have anything in src/arch/*/generated (i.e. a non-generated 'generated' file), it's by far the simplest solution.
2014-05-10 00:58:47 +02:00
Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
DPRINTF(X86, "The data size is %d\n", dataSize);
%(op_decl)s;
%(op_rd)s;
if(%(cond_check)s)
{
%(code)s;
%(flag_code)s;
%(tag_code)s;
%(top_code)s;
}
else
{
%(else_code)s;
}
//Write the resulting state to the execution context
if(fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};
def template MicroFpOpDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst _machInst,
const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _dataSize, int8_t _spm);
%(BasicExecDeclare)s
};
}};
def template MicroFpOpConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
uint8_t _dataSize, int8_t _spm) :
%(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
_src1, _src2, _dest, _dataSize, _spm,
%(op_class)s)
{
%(constructor)s;
}
}};
let {{
# Make these empty strings so that concatenating onto
# them will always work.
header_output = ""
decoder_output = ""
exec_output = ""
class FpOpMeta(type):
def buildCppClasses(self, name, Name, suffix, \
code, flag_code, cond_check, else_code, op_class):
# Globals to stick the output in
global header_output
global decoder_output
global exec_output
# Stick all the code together so it can be searched at once
allCode = "|".join((code, flag_code, cond_check, else_code))
# If there's something optional to do with flags, generate
# a version without it and fix up this version to use it.
if flag_code is not "" or cond_check is not "true":
self.buildCppClasses(name, Name, suffix,
code, "", "true", else_code, op_class)
suffix = "Flags" + suffix
base = "X86ISA::FpOp"
# Get everything ready for the substitution
iop_tag = InstObjParams(name, Name + suffix + "TopTag", base,
{"code" : code,
"flag_code" : flag_code,
"cond_check" : cond_check,
"else_code" : else_code,
"tag_code" : "FTW = genX87Tags(FTW, TOP, spm);",
"top_code" : "TOP = (TOP + spm + 8) % 8;",
"op_class" : op_class})
iop_top = InstObjParams(name, Name + suffix + "Top", base,
{"code" : code,
"flag_code" : flag_code,
"cond_check" : cond_check,
"else_code" : else_code,
"tag_code" : ";",
"top_code" : "TOP = (TOP + spm + 8) % 8;",
"op_class" : op_class})
iop = InstObjParams(name, Name + suffix, base,
{"code" : code,
"flag_code" : flag_code,
"cond_check" : cond_check,
"else_code" : else_code,
"tag_code" : ";",
"top_code" : ";",
"op_class" : op_class})
# Generate the actual code (finally!)
header_output += MicroFpOpDeclare.subst(iop_tag)
decoder_output += MicroFpOpConstructor.subst(iop_tag)
exec_output += MicroFpOpExecute.subst(iop_tag)
header_output += MicroFpOpDeclare.subst(iop_top)
decoder_output += MicroFpOpConstructor.subst(iop_top)
exec_output += MicroFpOpExecute.subst(iop_top)
header_output += MicroFpOpDeclare.subst(iop)
decoder_output += MicroFpOpConstructor.subst(iop)
exec_output += MicroFpOpExecute.subst(iop)
def __new__(mcls, Name, bases, dict):
abstract = False
name = Name.lower()
if "abstract" in dict:
abstract = dict['abstract']
del dict['abstract']
cls = super(FpOpMeta, mcls).__new__(mcls, Name, bases, dict)
if not abstract:
cls.className = Name
cls.mnemonic = name
code = cls.code
flag_code = cls.flag_code
cond_check = cls.cond_check
else_code = cls.else_code
op_class = cls.op_class
# Set up the C++ classes
mcls.buildCppClasses(cls, name, Name, "",
code, flag_code, cond_check, else_code, op_class)
# Hook into the microassembler dict
global microopClasses
microopClasses[name] = cls
return cls
class FpUnaryOp(X86Microop):
__metaclass__ = FpOpMeta
# This class itself doesn't act as a microop
abstract = True
# Default template parameter values
flag_code = ""
cond_check = "true"
else_code = ";"
op_class = "FloatAddOp"
def __init__(self, dest, src1, spm=0, \
SetStatus=False, UpdateFTW=True, dataSize="env.dataSize"):
self.dest = dest
self.src1 = src1
self.src2 = "InstRegIndex(0)"
self.spm = spm
self.dataSize = dataSize
if SetStatus:
self.className += "Flags"
if spm:
self.className += "Top"
if spm and UpdateFTW:
self.className += "Tag"
def getAllocator(self, microFlags):
return '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(src1)s, %(src2)s, %(dest)s,
%(dataSize)s, %(spm)d)''' % {
"class_name" : self.className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "src2" : self.src2,
"dest" : self.dest,
"dataSize" : self.dataSize,
"spm" : self.spm}
class FpBinaryOp(X86Microop):
__metaclass__ = FpOpMeta
# This class itself doesn't act as a microop
abstract = True
# Default template parameter values
flag_code = ""
cond_check = "true"
else_code = ";"
op_class = "FloatAddOp"
def __init__(self, dest, src1, src2, spm=0, \
SetStatus=False, UpdateFTW=True, dataSize="env.dataSize"):
self.dest = dest
self.src1 = src1
self.src2 = src2
self.spm = spm
self.dataSize = dataSize
if SetStatus:
self.className += "Flags"
if spm:
self.className += "Top"
if spm and UpdateFTW:
self.className += "Tag"
def getAllocator(self, microFlags):
return '''new %(class_name)s(machInst, macrocodeBlock,
%(flags)s, %(src1)s, %(src2)s, %(dest)s,
%(dataSize)s, %(spm)d)''' % {
"class_name" : self.className,
"flags" : self.microFlagsText(microFlags),
"src1" : self.src1, "src2" : self.src2,
"dest" : self.dest,
"dataSize" : self.dataSize,
"spm" : self.spm}
class Movfp(FpUnaryOp):
code = 'FpDestReg_uqw = FpSrcReg1_uqw;'
else_code = 'FpDestReg_uqw = FpDestReg_uqw;'
cond_check = "checkCondition(ccFlagBits | cfofBits | dfBit | \
ecfBit | ezfBit, src2)"
op_class = 'IntAluOp'
class Xorfp(FpBinaryOp):
code = 'FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;'
class Sqrtfp(FpBinaryOp):
code = 'FpDestReg = sqrt(FpSrcReg2);'
op_class = 'FloatSqrtOp'
class Cosfp(FpUnaryOp):
code = 'FpDestReg = cos(FpSrcReg1);'
op_class = 'FloatSqrtOp'
class Sinfp(FpUnaryOp):
code = 'FpDestReg = sin(FpSrcReg1);'
op_class = 'FloatSqrtOp'
class Tanfp(FpUnaryOp):
code = 'FpDestReg = tan(FpSrcReg1);'
op_class = 'FloatSqrtOp'
# Conversion microops
class ConvOp(FpBinaryOp):
abstract = True
op_class = 'FloatCvtOp'
def __init__(self, dest, src1, **kwargs):
super(ConvOp, self).__init__(dest, src1, \
"InstRegIndex(FLOATREG_MICROFP0)", \
**kwargs)
# These probably shouldn't look at the ExtMachInst directly to figure
# out what size to use and should instead delegate that to the macroop's
# constructor. That would be more efficient, and it would make the
# microops a little more modular.
class cvtf_i2d(ConvOp):
code = '''
X86IntReg intReg = SSrcReg1;
if (REX_W)
FpDestReg = intReg.SR;
else
FpDestReg = intReg.SE;
'''
class cvtf_i2d_hi(ConvOp):
code = 'FpDestReg = bits(SSrcReg1, 63, 32);'
class cvtf_d2i(ConvOp):
code = '''
int64_t intSrcReg1 = static_cast<int64_t>(FpSrcReg1);
if (REX_W)
SDestReg = intSrcReg1;
else
SDestReg = merge(SDestReg, intSrcReg1, 4);
'''
# Convert two integers registers representing an 80-bit floating
# point number to an x87 register.
class cvtint_fp80(FpBinaryOp):
code = '''
uint8_t bits[10];
*(uint64_t *)(bits + 0) = SSrcReg1;
*(uint16_t *)(bits + 8) = (uint16_t)SSrcReg2;
FpDestReg = loadFloat80(bits);
'''
# Convert an x87 register (double) into extended precision and
# extract the highest 64 bits.
class cvtfp80h_int(ConvOp):
code = '''
char bits[10];
storeFloat80(bits, FpSrcReg1);
SDestReg = *(uint64_t *)(bits + 0);
'''
# Convert an x87 register (double) into extended precision and
# extract the lowest 16 bits.
class cvtfp80l_int(ConvOp):
code = '''
char bits[10];
storeFloat80(bits, FpSrcReg1);
SDestReg = *(uint16_t *)(bits + 8);
'''
# These need to consider size at some point. They'll always use doubles
# for the moment.
class addfp(FpBinaryOp):
code = 'FpDestReg = FpSrcReg1 + FpSrcReg2;'
class mulfp(FpBinaryOp):
code = 'FpDestReg = FpSrcReg1 * FpSrcReg2;'
op_class = 'FloatMultOp'
class divfp(FpBinaryOp):
code = 'FpDestReg = FpSrcReg1 / FpSrcReg2;'
op_class = 'FloatDivOp'
class subfp(FpBinaryOp):
code = 'FpDestReg = FpSrcReg1 - FpSrcReg2;'
class Yl2xFp(FpBinaryOp):
code = '''
FpDestReg = FpSrcReg2 * (log(FpSrcReg1) / log(2));
'''
op_class = 'FloatSqrtOp'
class PremFp(FpBinaryOp):
code = '''
MiscReg new_fsw(FSW);
int src1_exp;
int src2_exp;
std::frexp(FpSrcReg1, &src1_exp);
std::frexp(FpSrcReg2, &src2_exp);
const int d(src2_exp - src1_exp);
if (d < 64) {
const int64_t q(std::trunc(FpSrcReg2 / FpSrcReg1));
FpDestReg = FpSrcReg2 - FpSrcReg1 * q;
new_fsw &= ~(CC0Bit | CC1Bit | CC2Bit | CC2Bit);
new_fsw |= (q & 0x1) ? CC1Bit : 0;
new_fsw |= (q & 0x2) ? CC3Bit : 0;
new_fsw |= (q & 0x4) ? CC0Bit : 0;
} else {
const int n(42);
const int64_t qq(std::trunc(
FpSrcReg2 / std::ldexp(FpSrcReg1, d - n)));
FpDestReg = FpSrcReg2 - std::ldexp(FpSrcReg1 * qq, d - n);
new_fsw |= CC2Bit;
}
DPRINTF(X86, "src1: %lf, src2: %lf, dest: %lf, FSW: 0x%x\\n",
FpSrcReg1, FpSrcReg2, FpDestReg, new_fsw);
'''
op_class = 'FloatDivOp'
flag_code = 'FSW = new_fsw;'
class Compfp(FpBinaryOp):
def __init__(self, src1, src2, spm=0, setStatus=False, updateFTW=True, \
dataSize="env.dataSize"):
super(Compfp, self).__init__("InstRegIndex(FLOATREG_MICROFP0)", \
src1, src2, spm, setStatus, updateFTW, dataSize)
# This class sets the condition codes in rflags according to the
# rules for comparing floating point.
code = '''
// ZF PF CF
// Unordered 1 1 1
// Greater than 0 0 0
// Less than 0 0 1
// Equal 1 0 0
// OF = SF = AF = 0
ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
cfofBits = cfofBits & ~(OFBit | CFBit);
if (std::isnan(FpSrcReg1) || std::isnan(FpSrcReg2)) {
ccFlagBits = ccFlagBits | (ZFBit | PFBit);
cfofBits = cfofBits | CFBit;
}
else if(FpSrcReg1 < FpSrcReg2)
cfofBits = cfofBits | CFBit;
else if(FpSrcReg1 == FpSrcReg2)
ccFlagBits = ccFlagBits | ZFBit;
'''
op_class = 'FloatCmpOp'
2013-01-15 14:43:19 +01:00
class absfp(FpUnaryOp):
code = 'FpDestReg = fabs(FpSrcReg1);'
flag_code = 'FSW = FSW & (~CC1Bit);'
2013-01-15 14:43:19 +01:00
class chsfp(FpUnaryOp):
code = 'FpDestReg = (-1) * (FpSrcReg1);'
flag_code = 'FSW = FSW & (~CC1Bit);'
class Pop87(FpUnaryOp):
def __init__(self, spm=1, UpdateFTW=True):
super(Pop87, self).__init__( \
"InstRegIndex(FLOATREG_MICROFP0)", \
"InstRegIndex(FLOATREG_MICROFP0)", \
spm=spm, SetStatus=False, UpdateFTW=UpdateFTW)
code = ''
op_class = 'IntAluOp'
}};