gem5/src/arch/x86/isa/microops/fpop.isa

// Copyright (c) 2007 The Hewlett-Packard Development Company
// Copyright (c) 2012-2013 Mark D. Hill and David A. Wood
// Copyright (c) 2015 Advanced Micro Devices, Inc.
//
// All rights reserved.
//
// The license below extends only to copyright in the software and shall
// not be construed as granting a license to any other intellectual
// property including but not limited to intellectual property relating
// to a hardware implementation of the functionality of the software
// licensed hereunder.  You may use the software subject to the license
// terms below provided that you ensure that this notice is replicated
// unmodified and in its entirety in all distributions of the software,
// modified or unmodified, in source code or in binary form.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
//          Nilay Vaish

//////////////////////////////////////////////////////////////////////////
//
// FpOp Microop templates
//
//////////////////////////////////////////////////////////////////////////

def template MicroFpOpExecute {{
        Fault %(class_name)s::execute(CPU_EXEC_CONTEXT *xc,
                Trace::InstRecord *traceData) const
        {
            Fault fault = NoFault;

            DPRINTF(X86, "The data size is %d\n", dataSize);
            %(op_decl)s;
            %(op_rd)s;

            if(%(cond_check)s)
            {
                %(code)s;
                %(flag_code)s;
                %(tag_code)s;
                %(top_code)s;
            }
            else
            {
                %(else_code)s;
            }

            //Write the resulting state to the execution context
            if(fault == NoFault)
            {
                %(op_wb)s;
            }
            return fault;
        }
}};

def template MicroFpOpDeclare {{
    class %(class_name)s : public %(base_class)s
    {
      public:
        %(class_name)s(ExtMachInst _machInst,
                const char * instMnem, uint64_t setFlags,
                InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
                uint8_t _dataSize, int8_t _spm);

        %(BasicExecDeclare)s
    };
}};

def template MicroFpOpConstructor {{
    %(class_name)s::%(class_name)s(
            ExtMachInst machInst, const char * instMnem, uint64_t setFlags,
            InstRegIndex _src1, InstRegIndex _src2, InstRegIndex _dest,
            uint8_t _dataSize, int8_t _spm) :
        %(base_class)s(machInst, "%(mnemonic)s", instMnem, setFlags,
                _src1, _src2, _dest, _dataSize, _spm,
                %(op_class)s)
    {
        %(constructor)s;
    }
}};

let {{
    # Make these empty strings so that concatenating onto
    # them will always work.
    header_output = ""
    decoder_output = ""
    exec_output = ""

    class FpOpMeta(type):
        def buildCppClasses(self, name, Name, suffix, \
                code, flag_code, cond_check, else_code, op_class):

            # Globals to stick the output in
            global header_output
            global decoder_output
            global exec_output

            # Stick all the code together so it can be searched at once
            allCode = "|".join((code, flag_code, cond_check, else_code))

            # If there's something optional to do with flags, generate
            # a version without it and fix up this version to use it.
            if flag_code is not "" or cond_check is not "true":
                self.buildCppClasses(name, Name, suffix,
                        code, "", "true", else_code, op_class)
                suffix = "Flags" + suffix

            base = "X86ISA::FpOp"

            # Get everything ready for the substitution
            iop_tag = InstObjParams(name, Name + suffix + "TopTag", base,
                    {"code" : code,
                     "flag_code" : flag_code,
                     "cond_check" : cond_check,
                     "else_code" : else_code,
                     "tag_code" : "FTW = genX87Tags(FTW, TOP, spm);",
                     "top_code" : "TOP = (TOP + spm + 8) % 8;",
                     "op_class" : op_class})
            iop_top = InstObjParams(name, Name + suffix + "Top", base,
                    {"code" : code,
                     "flag_code" : flag_code,
                     "cond_check" : cond_check,
                     "else_code" : else_code,
                     "tag_code" : ";",
                     "top_code" : "TOP = (TOP + spm + 8) % 8;",
                     "op_class" : op_class})
            iop = InstObjParams(name, Name + suffix, base,
                    {"code" : code,
                     "flag_code" : flag_code,
                     "cond_check" : cond_check,
                     "else_code" : else_code,
                     "tag_code" : ";",
                     "top_code" : ";",
                     "op_class" : op_class})

            # Generate the actual code (finally!)
            header_output += MicroFpOpDeclare.subst(iop_tag)
            decoder_output += MicroFpOpConstructor.subst(iop_tag)
            exec_output += MicroFpOpExecute.subst(iop_tag)
            header_output += MicroFpOpDeclare.subst(iop_top)
            decoder_output += MicroFpOpConstructor.subst(iop_top)
            exec_output += MicroFpOpExecute.subst(iop_top)
            header_output += MicroFpOpDeclare.subst(iop)
            decoder_output += MicroFpOpConstructor.subst(iop)
            exec_output += MicroFpOpExecute.subst(iop)


        def __new__(mcls, Name, bases, dict):
            abstract = False
            name = Name.lower()
            if "abstract" in dict:
                abstract = dict['abstract']
                del dict['abstract']

            cls = super(FpOpMeta, mcls).__new__(mcls, Name, bases, dict)
            if not abstract:
                cls.className = Name
                cls.mnemonic = name
                code = cls.code
                flag_code = cls.flag_code
                cond_check = cls.cond_check
                else_code = cls.else_code
                op_class = cls.op_class

                # Set up the C++ classes
                mcls.buildCppClasses(cls, name, Name, "",
                        code, flag_code, cond_check, else_code, op_class)

                # Hook into the microassembler dict
                global microopClasses
                microopClasses[name] = cls

            return cls

    class FpUnaryOp(X86Microop):
        __metaclass__ = FpOpMeta
        # This class itself doesn't act as a microop
        abstract = True

        # Default template parameter values
        flag_code = ""
        cond_check = "true"
        else_code = ";"
        op_class = "FloatAddOp"

        def __init__(self, dest, src1, spm=0, \
                SetStatus=False, UpdateFTW=True, dataSize="env.dataSize"):
            self.dest = dest
            self.src1 = src1
            self.src2 = "InstRegIndex(0)"
            self.spm = spm
            self.dataSize = dataSize
            if SetStatus:
                self.className += "Flags"
            if spm:
                self.className += "Top"
            if spm and UpdateFTW:
                self.className += "Tag"

        def getAllocator(self, microFlags):
            return '''new %(class_name)s(machInst, macrocodeBlock,
                    %(flags)s, %(src1)s, %(src2)s, %(dest)s,
                    %(dataSize)s, %(spm)d)''' % {
                "class_name" : self.className,
                "flags" : self.microFlagsText(microFlags),
                "src1" : self.src1, "src2" : self.src2,
                "dest" : self.dest,
                "dataSize" : self.dataSize,
                "spm" : self.spm}

    class FpBinaryOp(X86Microop):
        __metaclass__ = FpOpMeta
        # This class itself doesn't act as a microop
        abstract = True

        # Default template parameter values
        flag_code = ""
        cond_check = "true"
        else_code = ";"
        op_class = "FloatAddOp"

        def __init__(self, dest, src1, src2, spm=0, \
                SetStatus=False, UpdateFTW=True, dataSize="env.dataSize"):
            self.dest = dest
            self.src1 = src1
            self.src2 = src2
            self.spm = spm
            self.dataSize = dataSize
            if SetStatus:
                self.className += "Flags"
            if spm:
                self.className += "Top"
            if spm and UpdateFTW:
                self.className += "Tag"

        def getAllocator(self, microFlags):
            return '''new %(class_name)s(machInst, macrocodeBlock,
                    %(flags)s, %(src1)s, %(src2)s, %(dest)s,
                    %(dataSize)s, %(spm)d)''' % {
                "class_name" : self.className,
                "flags" : self.microFlagsText(microFlags),
                "src1" : self.src1, "src2" : self.src2,
                "dest" : self.dest,
                "dataSize" : self.dataSize,
                "spm" : self.spm}

    class Movfp(FpUnaryOp):
        code = 'FpDestReg_uqw = FpSrcReg1_uqw;'
        else_code = 'FpDestReg_uqw = FpDestReg_uqw;'
        cond_check = "checkCondition(ccFlagBits | cfofBits | dfBit | \
                                     ecfBit | ezfBit, src2)"
        op_class = 'IntAluOp'

    class Xorfp(FpBinaryOp):
        code = 'FpDestReg_uqw = FpSrcReg1_uqw ^ FpSrcReg2_uqw;'

    class Sqrtfp(FpBinaryOp):
        code = 'FpDestReg = sqrt(FpSrcReg2);'
        op_class = 'FloatSqrtOp'

    class Cosfp(FpUnaryOp):
        code = 'FpDestReg = cos(FpSrcReg1);'
        op_class = 'FloatSqrtOp'

    class Sinfp(FpUnaryOp):
        code = 'FpDestReg = sin(FpSrcReg1);'
        op_class = 'FloatSqrtOp'

    class Tanfp(FpUnaryOp):
        code = 'FpDestReg = tan(FpSrcReg1);'
        op_class = 'FloatSqrtOp'


    # Conversion microops
    class ConvOp(FpBinaryOp):
        abstract = True
        op_class = 'FloatCvtOp'
        def __init__(self, dest, src1, **kwargs):
            super(ConvOp, self).__init__(dest, src1, \
                    "InstRegIndex(FLOATREG_MICROFP0)", \
                    **kwargs)

    # These probably shouldn't look at the ExtMachInst directly to figure
    # out what size to use and should instead delegate that to the macroop's
    # constructor. That would be more efficient, and it would make the
    # microops a little more modular.
    class cvtf_i2d(ConvOp):
        code = '''
            X86IntReg intReg = SSrcReg1;
            if (REX_W)
                FpDestReg = intReg.SR;
            else
                FpDestReg = intReg.SE;
            '''

    class cvtf_i2d_hi(ConvOp):
        code = 'FpDestReg = bits(SSrcReg1, 63, 32);'

    class cvtf_d2i(ConvOp):
        code = '''
            int64_t intSrcReg1 = static_cast<int64_t>(FpSrcReg1);
            if (REX_W)
                SDestReg = intSrcReg1;
            else
                SDestReg = merge(SDestReg, intSrcReg1, 4);
            '''

    # Convert two integers registers representing an 80-bit floating
    # point number to an x87 register.
    class cvtint_fp80(FpBinaryOp):
        code = '''
            uint8_t bits[10];
            *(uint64_t *)(bits + 0) = SSrcReg1;
            *(uint16_t *)(bits + 8) = (uint16_t)SSrcReg2;
            FpDestReg = loadFloat80(bits);
            '''

    # Convert an x87 register (double) into extended precision and
    # extract the highest 64 bits.
    class cvtfp80h_int(ConvOp):
        code = '''
            char bits[10];
            storeFloat80(bits, FpSrcReg1);
            SDestReg = *(uint64_t *)(bits + 0);
            '''

    # Convert an x87 register (double) into extended precision and
    # extract the lowest 16 bits.
    class cvtfp80l_int(ConvOp):
        code = '''
            char bits[10];
            storeFloat80(bits, FpSrcReg1);
            SDestReg = *(uint16_t *)(bits + 8);
            '''

    # These need to consider size at some point. They'll always use doubles
    # for the moment.
    class addfp(FpBinaryOp):
        code = 'FpDestReg = FpSrcReg1 + FpSrcReg2;'

    class mulfp(FpBinaryOp):
        code = 'FpDestReg = FpSrcReg1 * FpSrcReg2;'
        op_class = 'FloatMultOp'

    class divfp(FpBinaryOp):
        code = 'FpDestReg = FpSrcReg1 / FpSrcReg2;'
        op_class = 'FloatDivOp'

    class subfp(FpBinaryOp):
        code = 'FpDestReg = FpSrcReg1 - FpSrcReg2;'

    class Yl2xFp(FpBinaryOp):
        code = '''
            FpDestReg = FpSrcReg2 * (log(FpSrcReg1) / log(2));
        '''
        op_class = 'FloatSqrtOp'

    class PremFp(FpBinaryOp):
        code = '''
            MiscReg new_fsw(FSW);
            int src1_exp;
            int src2_exp;
            std::frexp(FpSrcReg1, &src1_exp);
            std::frexp(FpSrcReg2, &src2_exp);

            const int d(src2_exp - src1_exp);
            if (d < 64) {
                const int64_t q(std::trunc(FpSrcReg2 / FpSrcReg1));
                FpDestReg = FpSrcReg2 - FpSrcReg1 * q;
                new_fsw &= ~(CC0Bit | CC1Bit | CC2Bit | CC2Bit);
                new_fsw |= (q & 0x1) ? CC1Bit : 0;
                new_fsw |= (q & 0x2) ? CC3Bit : 0;
                new_fsw |= (q & 0x4) ? CC0Bit : 0;
            } else {
                const int n(42);
                const int64_t qq(std::trunc(
                    FpSrcReg2 / std::ldexp(FpSrcReg1, d - n)));
                FpDestReg = FpSrcReg2 - std::ldexp(FpSrcReg1 * qq, d - n);
                new_fsw |= CC2Bit;
            }
            DPRINTF(X86, "src1: %lf, src2: %lf, dest: %lf, FSW: 0x%x\\n",
                    FpSrcReg1, FpSrcReg2, FpDestReg, new_fsw);
        '''
        op_class = 'FloatDivOp'

        flag_code = 'FSW = new_fsw;'

    class Compfp(FpBinaryOp):
        def __init__(self, src1, src2, spm=0, setStatus=False, updateFTW=True, \
                dataSize="env.dataSize"):
            super(Compfp, self).__init__("InstRegIndex(FLOATREG_MICROFP0)", \
                    src1, src2, spm, setStatus, updateFTW, dataSize)
        # This class sets the condition codes in rflags according to the
        # rules for comparing floating point.
        code = '''
            //               ZF PF CF
            // Unordered      1  1  1
            // Greater than   0  0  0
            // Less than      0  0  1
            // Equal          1  0  0
            //           OF = SF = AF = 0
            ccFlagBits = ccFlagBits & ~(SFBit | AFBit | ZFBit | PFBit);
            cfofBits = cfofBits & ~(OFBit | CFBit);

            if (std::isnan(FpSrcReg1) || std::isnan(FpSrcReg2)) {
                ccFlagBits = ccFlagBits | (ZFBit | PFBit);
                cfofBits = cfofBits | CFBit;
            }
            else if(FpSrcReg1 < FpSrcReg2)
                cfofBits = cfofBits | CFBit;
            else if(FpSrcReg1 == FpSrcReg2)
                ccFlagBits = ccFlagBits | ZFBit;
        '''
        op_class = 'FloatCmpOp'

    class absfp(FpUnaryOp):
        code = 'FpDestReg = fabs(FpSrcReg1);'
        flag_code = 'FSW = FSW & (~CC1Bit);'

    class chsfp(FpUnaryOp):
        code = 'FpDestReg = (-1) * (FpSrcReg1);'
        flag_code = 'FSW = FSW & (~CC1Bit);'

    class Pop87(FpUnaryOp):
        def __init__(self, spm=1, UpdateFTW=True):
            super(Pop87, self).__init__(               \
                    "InstRegIndex(FLOATREG_MICROFP0)", \
                    "InstRegIndex(FLOATREG_MICROFP0)", \
                    spm=spm, SetStatus=False, UpdateFTW=UpdateFTW)

        code = ''
        op_class = 'IntAluOp'
}};