x86: implement rcpps and rcpss SSE insts
These are packed single-precision approximate reciprocal operations, vector and scalar versions, respectively. This code was basically developed by copying the code for sqrtps and sqrtss. The mrcp micro-op was simplified relative to msqrt since there are no double-precision versions of this operation.
This commit is contained in:
parent
57b9f53afa
commit
a2c875c746
3 changed files with 77 additions and 5 deletions
|
@ -463,7 +463,7 @@
|
|||
0x0: MOVMSKPS(Gd,VRo);
|
||||
0x1: SQRTPS(Vo,Wo);
|
||||
0x2: WarnUnimpl::rqsrtps_Vo_Wo();
|
||||
0x3: WarnUnimpl::rcpps_Vo_Wo();
|
||||
0x3: RCPPS(Vo,Wo);
|
||||
0x4: ANDPS(Vo,Wo);
|
||||
0x5: ANDNPS(Vo,Wo);
|
||||
0x6: ORPS(Vo,Wo);
|
||||
|
@ -473,7 +473,7 @@
|
|||
0x4: decode OPCODE_OP_BOTTOM3 {
|
||||
0x1: SQRTSS(Vd,Wd);
|
||||
0x2: WarnUnimpl::rsqrtss_Vd_Wd();
|
||||
0x3: WarnUnimpl::rcpss_Vd_Wd();
|
||||
0x3: RCPSS(Vd,Wd);
|
||||
default: UD2();
|
||||
}
|
||||
// operand size (0x66)
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
# Copyright (c) 2007 The Hewlett-Packard Development Company
|
||||
# Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
|
@ -34,8 +36,41 @@
|
|||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Gabe Black
|
||||
# Steve Reinhardt
|
||||
|
||||
microcode = '''
|
||||
# RCPPS
|
||||
# RCPSS
|
||||
def macroop RCPSS_XMM_XMM {
|
||||
mrcp xmml, xmmlm, size=4, ext=Scalar
|
||||
};
|
||||
|
||||
def macroop RCPSS_XMM_M {
|
||||
ldfp ufp1, seg, sib, disp, dataSize=8
|
||||
mrcp xmml, ufp1, size=4, ext=Scalar
|
||||
};
|
||||
|
||||
def macroop RCPSS_XMM_P {
|
||||
rdip t7
|
||||
ldfp ufp1, seg, riprel, disp, dataSize=8
|
||||
mrcp xmml, ufp1, size=4, ext=Scalar
|
||||
};
|
||||
|
||||
def macroop RCPPS_XMM_XMM {
|
||||
mrcp xmml, xmmlm, size=4, ext=0
|
||||
mrcp xmmh, xmmhm, size=4, ext=0
|
||||
};
|
||||
|
||||
def macroop RCPPS_XMM_M {
|
||||
ldfp ufp1, seg, sib, "DISPLACEMENT", dataSize=8
|
||||
ldfp ufp2, seg, sib, "DISPLACEMENT + 8", dataSize=8
|
||||
mrcp xmml, ufp1, size=4, ext=0
|
||||
mrcp xmmh, ufp2, size=4, ext=0
|
||||
};
|
||||
|
||||
def macroop RCPPS_XMM_P {
|
||||
rdip t7
|
||||
ldfp ufp1, seg, riprel, "DISPLACEMENT", dataSize=8
|
||||
ldfp ufp2, seg, riprel, "DISPLACEMENT + 8", dataSize=8
|
||||
mrcp xmml, ufp1, size=4, ext=0
|
||||
mrcp xmmh, ufp2, size=4, ext=0
|
||||
};
|
||||
'''
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
/// Copyright (c) 2009 The Regents of The University of Michigan
|
||||
// Copyright (c) 2009 The Regents of The University of Michigan
|
||||
// Copyright (c) 2015 Advanced Micro Devices, Inc.
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
|
@ -691,6 +693,41 @@ let {{
|
|||
FpDestReg_uqw = result;
|
||||
'''
|
||||
|
||||
# compute approximate reciprocal --- single-precision only
|
||||
class Mrcp(MediaOp):
|
||||
def __init__(self, dest, src, \
|
||||
size = None, destSize = None, srcSize = None, ext = None):
|
||||
super(Mrcp, self).__init__(dest, src,\
|
||||
"InstRegIndex(0)", size, destSize, srcSize, ext)
|
||||
code = '''
|
||||
union floatInt
|
||||
{
|
||||
float f;
|
||||
uint32_t i;
|
||||
};
|
||||
|
||||
assert(srcSize == 4); // ISA defines single-precision only
|
||||
assert(srcSize == destSize);
|
||||
const int size = 4;
|
||||
const int sizeBits = size * 8;
|
||||
int items = numItems(size);
|
||||
uint64_t result = FpDestReg_uqw;
|
||||
|
||||
for (int i = 0; i < items; i++) {
|
||||
int hiIndex = (i + 1) * sizeBits - 1;
|
||||
int loIndex = (i + 0) * sizeBits;
|
||||
uint64_t argBits = bits(FpSrcReg1_uqw, hiIndex, loIndex);
|
||||
|
||||
floatInt fi;
|
||||
fi.i = argBits;
|
||||
// This is more accuracy than HW provides, but oh well
|
||||
fi.f = 1.0 / fi.f;
|
||||
argBits = fi.i;
|
||||
result = insertBits(result, hiIndex, loIndex, argBits);
|
||||
}
|
||||
FpDestReg_uqw = result;
|
||||
'''
|
||||
|
||||
class Maddf(MediaOp):
|
||||
code = '''
|
||||
union floatInt
|
||||
|
|
Loading…
Reference in a new issue