x86: decode instructions with vex prefix

This patch updates the x86 decoder so that it can decode instructions with vex
prefix. It also updates the isa with opcodes from vex opcode maps 1, 2 and 3.
Note that none of the instructions have been implemented yet. The
implementations would be provided in due course of time.
This commit is contained in:
Nilay Vaish 2015-07-17 11:31:22 -05:00
parent fc5bf6713f
commit 0ef3dcc27b
9 changed files with 1710 additions and 7 deletions

View file

@ -48,6 +48,8 @@ Decoder::doResetState()
emi.rex = 0;
emi.legacy = 0;
emi.vex = 0;
emi.opcode.type = BadOpcode;
emi.opcode.op = 0;
@ -93,6 +95,19 @@ Decoder::process()
case PrefixState:
state = doPrefixState(nextByte);
break;
case TwoByteVexState:
state = doTwoByteVexState(nextByte);
break;
case ThreeByteVexFirstState:
state = doThreeByteVexFirstState(nextByte);
break;
case ThreeByteVexSecondState:
state = doThreeByteVexSecondState(nextByte);
break;
case OneByteOpcodeState:
state = doOneByteOpcodeState(nextByte);
break;
@ -206,15 +221,68 @@ Decoder::doPrefixState(uint8_t nextByte)
DPRINTF(Decoder, "Found Rex prefix %#x.\n", nextByte);
emi.rex = nextByte;
break;
case Vex2Prefix:
DPRINTF(Decoder, "Found VEX two-byte prefix %#x.\n", nextByte);
emi.vex.zero = nextByte;
nextState = TwoByteVexState;
break;
case Vex3Prefix:
DPRINTF(Decoder, "Found VEX three-byte prefix %#x.\n", nextByte);
emi.vex.zero = nextByte;
nextState = ThreeByteVexFirstState;
break;
case 0:
nextState = OneByteOpcodeState;
break;
default:
panic("Unrecognized prefix %#x\n", nextByte);
}
return nextState;
}
Decoder::State
Decoder::doTwoByteVexState(uint8_t nextByte)
{
assert(emi.vex.zero == 0xc5);
consumeByte();
TwoByteVex tbe = 0;
tbe.first = nextByte;
emi.vex.first.r = tbe.first.r;
emi.vex.first.x = 1;
emi.vex.first.b = 1;
emi.vex.first.map_select = 1;
emi.vex.second.w = 0;
emi.vex.second.vvvv = tbe.first.vvvv;
emi.vex.second.l = tbe.first.l;
emi.vex.second.pp = tbe.first.pp;
emi.opcode.type = Vex;
return OneByteOpcodeState;
}
Decoder::State
Decoder::doThreeByteVexFirstState(uint8_t nextByte)
{
consumeByte();
emi.vex.first = nextByte;
return ThreeByteVexSecondState;
}
Decoder::State
Decoder::doThreeByteVexSecondState(uint8_t nextByte)
{
consumeByte();
emi.vex.second = nextByte;
emi.opcode.type = Vex;
return OneByteOpcodeState;
}
// Load the first opcode byte. Determine if there are more opcode bytes, and
// if not, what immediate and/or ModRM is needed.
Decoder::State
@ -222,7 +290,13 @@ Decoder::doOneByteOpcodeState(uint8_t nextByte)
{
State nextState = ErrorState;
consumeByte();
if (nextByte == 0x0f) {
if (emi.vex.zero != 0) {
DPRINTF(Decoder, "Found VEX opcode %#x.\n", nextByte);
emi.opcode.op = nextByte;
const uint8_t opcode_map = emi.vex.first.map_select;
nextState = processExtendedOpcode(ImmediateTypeVex[opcode_map]);
} else if (nextByte == 0x0f) {
nextState = TwoByteOpcodeState;
DPRINTF(Decoder, "Found opcode escape byte %#x.\n", nextByte);
} else {
@ -346,6 +420,54 @@ Decoder::processOpcode(ByteTable &immTable, ByteTable &modrmTable,
return nextState;
}
Decoder::State
Decoder::processExtendedOpcode(ByteTable &immTable)
{
//Figure out the effective operand size. This can be overriden to
//a fixed value at the decoder level.
int logOpSize;
if (emi.vex.second.w)
logOpSize = 3; // 64 bit operand size
else if (emi.vex.second.pp == 1)
logOpSize = altOp;
else
logOpSize = defOp;
//Set the actual op size
emi.opSize = 1 << logOpSize;
//Figure out the effective address size. This can be overriden to
//a fixed value at the decoder level.
int logAddrSize;
if(emi.legacy.addr)
logAddrSize = altAddr;
else
logAddrSize = defAddr;
//Set the actual address size
emi.addrSize = 1 << logAddrSize;
//Figure out the effective stack width. This can be overriden to
//a fixed value at the decoder level.
emi.stackSize = 1 << stack;
//Figure out how big of an immediate we'll retreive based
//on the opcode.
const uint8_t opcode = emi.opcode.op;
if (emi.vex.zero == 0xc5 || emi.vex.zero == 0xc4) {
int immType = immTable[opcode];
// Assume 64-bit mode;
immediateSize = SizeTypeToSize[2][immType];
}
if (opcode == 0x77) {
instDone = true;
return ResetState;
}
return ModRMState;
}
//Get the ModRM byte and determine what displacement, if any, there is.
//Also determine whether or not to get the SIB byte, displacement, or
//immediate next.
@ -353,8 +475,7 @@ Decoder::State
Decoder::doModRMState(uint8_t nextByte)
{
State nextState = ErrorState;
ModRM modRM;
modRM = nextByte;
ModRM modRM = nextByte;
DPRINTF(Decoder, "Found modrm byte %#x.\n", nextByte);
if (defOp == 1) {
//figure out 16 bit displacement size

View file

@ -64,6 +64,7 @@ class Decoder
static ByteTable ImmediateTypeTwoByte;
static ByteTable ImmediateTypeThreeByte0F38;
static ByteTable ImmediateTypeThreeByte0F3A;
static ByteTable ImmediateTypeVex[10];
protected:
struct InstBytes
@ -175,6 +176,9 @@ class Decoder
ResetState,
FromCacheState,
PrefixState,
TwoByteVexState,
ThreeByteVexFirstState,
ThreeByteVexSecondState,
OneByteOpcodeState,
TwoByteOpcodeState,
ThreeByte0F38OpcodeState,
@ -193,6 +197,9 @@ class Decoder
State doResetState();
State doFromCacheState();
State doPrefixState(uint8_t);
State doTwoByteVexState(uint8_t);
State doThreeByteVexFirstState(uint8_t);
State doThreeByteVexSecondState(uint8_t);
State doOneByteOpcodeState(uint8_t);
State doTwoByteOpcodeState(uint8_t);
State doThreeByte0F38OpcodeState(uint8_t);
@ -205,6 +212,8 @@ class Decoder
//Process the actual opcode found earlier, using the supplied tables.
State processOpcode(ByteTable &immTable, ByteTable &modrmTable,
bool addrSizedImm = false);
// Process the opcode found with VEX / XOP prefix.
State processExtendedOpcode(ByteTable &immTable);
protected:
/// Caching for decoded instruction objects.

View file

@ -55,6 +55,8 @@ namespace X86ISA
const uint8_t RE = Rep;
const uint8_t RN = Repne;
const uint8_t RX = RexPrefix;
const uint8_t V2 = Vex2Prefix;
const uint8_t V3 = Vex3Prefix;
//This table identifies whether a byte is a prefix, and if it is,
//which prefix it is.
@ -73,7 +75,7 @@ namespace X86ISA
/* 9*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* A*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* B*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* C*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* C*/ 0 , 0 , 0 , 0 , V3, V2, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* D*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* E*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0,
/* F*/ LO, 0 , RN, RE, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
@ -282,4 +284,74 @@ namespace X86ISA
/* E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
};
const Decoder::ByteTable Decoder::ImmediateTypeVex[10] =
{
// Table for opcode map 1
{
//LSB
// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
/* 0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 1 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 2 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 7 */ BY, BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* C */ 0 , 0 , BY, 0 , BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
},
// Table for opcode map 2
{
//LSB
// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
/* 0 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 1 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 2 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 4 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 6 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 7 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* C */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
},
// Table for opcode map 3
{
//LSB
// MSB 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | A | B | C | D | E | F
/* 0 */ 0 , 0 , 0 , 0 , BY, BY, BY, 0 , BY, BY, BY, BY, BY, BY, BY, BY,
/* 1 */ 0 , 0 , 0 , 0 , BY, BY, BY, BY, BY, BY, 0 , 0 , 0 , BY, 0 , 0 ,
/* 2 */ BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 3 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 4 */ BY, BY, BY, 0 , BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 5 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 6 */ BY, BY, BY, BY, 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 7 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 8 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* 9 */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* A */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* B */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* C */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* D */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , BY,
/* E */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/* F */ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0
},
{}, {}, {}, {}, {}, {}, {}
};
}

View file

@ -87,3 +87,12 @@ def bitfield STACKSIZE stackSize;
def bitfield MODE mode;
def bitfield MODE_MODE mode.mode;
def bitfield MODE_SUBMODE mode.submode;
def bitfield VEX_R vex.first.r;
def bitfield VEX_X vex.first.x;
def bitfield VEX_B vex.first.b;
def bitfield VEX_MAP vex.first.map_select;
def bitfield VEX_W vex.second.w;
def bitfield VEX_VVVV vex.second.vvvv;
def bitfield VEX_L vex.second.l;
def bitfield VEX_PP vex.second.pp;

View file

@ -49,6 +49,7 @@ decode LEGACY_LOCK default Unknown::unknown()
##include "two_byte_opcodes.isa"
##include "three_byte_0f38_opcodes.isa"
##include "three_byte_0f3a_opcodes.isa"
##include "vex_opcodes.isa"
}
//Lock prefix
##include "locked_opcodes.isa"

File diff suppressed because it is too large Load diff

View file

@ -71,7 +71,8 @@ namespace X86ISA
const ExtMachInst NoopMachInst M5_VAR_USED = {
0x0, // No legacy prefixes.
0x0, // No rex prefix.
{ OneByteOpcode, 0x90 }, // One opcode byte, 0x90.
0x0, // No two / three byte escape sequence
{ OneByteOpcode, 0x90 }, // One opcode byte, 0x90.
0x0, 0x0, // No modrm or sib.
0, 0, // No immediate or displacement.
8, 8, 8, // All sizes are 8.

View file

@ -41,6 +41,7 @@ paramOut(CheckpointOut &cp, const string &name, ExtMachInst const &machInst)
// Prefixes
paramOut(cp, name + ".legacy", (uint8_t)machInst.legacy);
paramOut(cp, name + ".rex", (uint8_t)machInst.rex);
paramOut(cp, name + ".vex", (uint32_t)machInst.vex);
// Opcode
paramOut(cp, name + ".opcode.type", (uint8_t)machInst.opcode.type);
@ -75,6 +76,10 @@ paramIn(CheckpointIn &cp, const string &name, ExtMachInst &machInst)
paramIn(cp, name + ".rex", temp8);
machInst.rex = temp8;
uint32_t temp32;
paramIn(cp, name + ".vex", temp32);
machInst.vex = temp32;
// Opcode
paramIn(cp, name + ".opcode.type", temp8);
machInst.opcode.type = (OpcodeType)temp8;

View file

@ -67,7 +67,10 @@ namespace X86ISA
AddressSizeOverride,
Lock,
Rep,
Repne
Repne,
Vex2Prefix,
Vex3Prefix,
XopPrefix,
};
BitUnion8(LegacyPrefixVector)
@ -104,12 +107,55 @@ namespace X86ISA
Bitfield<0> b;
EndBitUnion(Rex)
BitUnion(uint32_t, ThreeByteVex)
Bitfield<7,0> zero;
SubBitUnion(first, 15, 8)
// Inverted one-bit extension of ModRM reg field
Bitfield<15> r;
// Inverted one-bit extension of SIB index field
Bitfield<14> x;
// Inverted one-bit extension, r/m field or SIB base field
Bitfield<13> b;
// Opcode map select
Bitfield<12, 8> map_select;
EndSubBitUnion(first)
SubBitUnion(second, 23, 16)
// Default operand size override for a general purpose register to
// 64-bit size in 64-bit mode; operand configuration specifier for
// certain YMM/XMM-based operations.
Bitfield<23> w;
// Source or destination register selector, in ones' complement
// format
Bitfield<22, 19> vvvv;
// Vector length specifier
Bitfield<18> l;
// Implied 66, F2, or F3 opcode extension
Bitfield<17, 16> pp;
EndSubBitUnion(second)
EndBitUnion(ThreeByteVex)
BitUnion16(TwoByteVex)
Bitfield<7,0> zero;
SubBitUnion(first, 15, 8)
// Inverted one-bit extension of ModRM reg field
Bitfield<15> r;
// Source or destination register selector, in ones' complement
// format
Bitfield<14, 11> vvvv;
// Vector length specifier
Bitfield<10> l;
// Implied 66, F2, or F3 opcode extension
Bitfield<9, 8> pp;
EndSubBitUnion(first)
EndBitUnion(TwoByteVex)
enum OpcodeType {
BadOpcode,
OneByteOpcode,
TwoByteOpcode,
ThreeByte0F38Opcode,
ThreeByte0F3AOpcode
ThreeByte0F3AOpcode,
Vex,
};
static inline const char *
@ -126,6 +172,8 @@ namespace X86ISA
return "three byte 0f38";
case ThreeByte0F3AOpcode:
return "three byte 0f3a";
case Vex:
return "vex";
default:
return "unrecognized!";
}
@ -160,6 +208,10 @@ namespace X86ISA
//Prefixes
LegacyPrefixVector legacy;
Rex rex;
// We use the following field for encoding both two byte and three byte
// escape sequences
ThreeByteVex vex;
//This holds all of the bytes of the opcode
struct
{
@ -191,11 +243,13 @@ namespace X86ISA
operator << (std::ostream & os, const ExtMachInst & emi)
{
ccprintf(os, "\n{\n\tleg = %#x,\n\trex = %#x,\n\t"
"vex/xop = %#x,\n\t"
"op = {\n\t\ttype = %s,\n\t\top = %#x,\n\t\t},\n\t"
"modRM = %#x,\n\tsib = %#x,\n\t"
"immediate = %#x,\n\tdisplacement = %#x\n\t"
"dispSize = %d}\n",
(uint8_t)emi.legacy, (uint8_t)emi.rex,
(uint32_t)emi.vex,
opcodeTypeToStr(emi.opcode.type), (uint8_t)emi.opcode.op,
(uint8_t)emi.modRM, (uint8_t)emi.sib,
emi.immediate, emi.displacement, emi.dispSize);