x86 ISA: Implement the sse3 haddps instruction.

Shuffle the 32 bit values into position, and then add in parallel.
This commit is contained in:
Marc Orr 2012-05-19 04:32:25 -07:00
parent 250c40799d
commit 16a559c9c6
2 changed files with 37 additions and 2 deletions

View file

@ -669,7 +669,7 @@
}
// repne (0xF2)
0x8: decode OPCODE_OP_BOTTOM3 {
0x4: WarnUnimpl::haddps_Vo_Wo();
0x4: HADDPS(Vo,Wo);
0x5: WarnUnimpl::hsubps_Vo_Wo();
default: UD2();
}

View file

@ -36,7 +36,42 @@
# Authors: Gabe Black
microcode = '''
# HADDPS
def macroop HADDPS_XMM_XMM {
shuffle ufp1, xmml, xmmh, ext=((0 << 0) | (2 << 2)), size=4
shuffle ufp2, xmml, xmmh, ext=((1 << 0) | (3 << 2)), size=4
shuffle ufp3, xmmlm, xmmhm, ext=((0 << 0) | (2 << 2)), size=4
shuffle ufp4, xmmlm, xmmhm, ext=((1 << 0) | (3 << 2)), size=4
maddf xmml, ufp1, ufp2, size=4
maddf xmmh, ufp3, ufp4, size=4
};
def macroop HADDPS_XMM_M {
ldfp ufp1, seg, sib, disp, dataSize=8
ldfp ufp2, seg, sib, "DISPLACEMENT+8", dataSize=8
shuffle ufp3, xmml, xmmh, ext=((0 << 0) | (2 << 2)), size=4
shuffle ufp4, xmml, xmmh, ext=((1 << 0) | (3 << 2)), size=4
shuffle ufp5, ufp1, ufp2, ext=((0 << 0) | (2 << 2)), size=4
shuffle ufp6, ufp1, ufp2, ext=((1 << 0) | (3 << 2)), size=4
maddf xmml, ufp3, ufp4, size=4
maddf xmmh, ufp5, ufp6, size=4
};
def macroop HADDPS_XMM_P {
rdip t7
ldfp ufp1, seg, riprel, disp, dataSize=8
ldfp ufp2, seg, riprel, "DISPLACEMENT+8", dataSize=8
shuffle ufp3, xmml, xmmh, ext=((0 << 0) | (2 << 2)), size=4
shuffle ufp4, xmml, xmmh, ext=((1 << 0) | (3 << 2)), size=4
shuffle ufp5, ufp1, ufp2, ext=((0 << 0) | (2 << 2)), size=4
shuffle ufp6, ufp1, ufp2, ext=((1 << 0) | (3 << 2)), size=4
maddf xmml, ufp3, ufp4, size=4
maddf xmmh, ufp5, ufp6, size=4
};
def macroop HADDPD_XMM_XMM {
maddf ufp1, xmmh , xmml, size=8, ext=Scalar