ARM: Implement all ARM SIMD instructions.

This commit is contained in:
Gabe Black 2010-08-25 19:10:42 -05:00
parent f4f6b31df1
commit 6368edb281
18 changed files with 7076 additions and 555 deletions

View file

@ -137,6 +137,647 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
}
}
VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
PredMacroOp(mnem, machInst, __opClass)
{
assert(regs > 0 && regs <= 4);
assert(regs % elems == 0);
numMicroops = (regs > 2) ? 2 : 1;
bool wb = (rm != 15);
bool deinterleave = (elems > 1);
if (wb) numMicroops++;
if (deinterleave) numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
uint32_t noAlign = TLB::MustBeOne;
unsigned uopIdx = 0;
switch (regs) {
case 4:
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
size, machInst, rMid, rn, 0, align);
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
size, machInst, rMid + 4, rn, 16, noAlign);
break;
case 3:
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
size, machInst, rMid, rn, 0, align);
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
size, machInst, rMid + 4, rn, 16, noAlign);
break;
case 2:
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
size, machInst, rMid, rn, 0, align);
break;
case 1:
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
size, machInst, rMid, rn, 0, align);
break;
default:
panic("Unrecognized number of registers %d.\n", regs);
}
if (wb) {
if (rm != 15 && rm != 13) {
microOps[uopIdx++] =
new MicroAddUop(machInst, rn, rn, rm);
} else {
microOps[uopIdx++] =
new MicroAddiUop(machInst, rn, rn, regs * 8);
}
}
if (deinterleave) {
switch (elems) {
case 4:
assert(regs == 4);
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
size, machInst, vd * 2, rMid, inc * 2);
break;
case 3:
assert(regs == 3);
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
size, machInst, vd * 2, rMid, inc * 2);
break;
case 2:
assert(regs == 4 || regs == 2);
if (regs == 4) {
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
size, machInst, vd * 2, rMid, inc * 2);
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
} else {
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
size, machInst, vd * 2, rMid, inc * 2);
}
break;
default:
panic("Bad number of elements to deinterleave %d.\n", elems);
}
}
assert(uopIdx == numMicroops);
for (unsigned i = 0; i < numMicroops - 1; i++) {
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
assert(uopPtr);
uopPtr->setDelayedCommit();
}
microOps[numMicroops - 1]->setLastMicroop();
}
VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
OpClass __opClass, bool all, unsigned elems,
RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align,
RegIndex rm, unsigned lane) :
PredMacroOp(mnem, machInst, __opClass)
{
assert(regs > 0 && regs <= 4);
assert(regs % elems == 0);
unsigned eBytes = (1 << size);
unsigned loadSize = eBytes * elems;
unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
sizeof(FloatRegBits);
assert(loadRegs > 0 && loadRegs <= 4);
numMicroops = 1;
bool wb = (rm != 15);
if (wb) numMicroops++;
numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
RegIndex ufp0 = NumFloatArchRegs;
unsigned uopIdx = 0;
switch (loadSize) {
case 1:
microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
break;
case 2:
if (eBytes == 2) {
microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
} else {
microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
}
break;
case 3:
microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
break;
case 4:
switch (eBytes) {
case 1:
microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
break;
case 2:
microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
break;
case 4:
microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
}
break;
case 6:
microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
break;
case 8:
switch (eBytes) {
case 2:
microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
break;
case 4:
microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
}
break;
case 12:
microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
case 16:
microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
default:
panic("Unrecognized load size %d.\n", regs);
}
if (wb) {
if (rm != 15 && rm != 13) {
microOps[uopIdx++] =
new MicroAddUop(machInst, rn, rn, rm);
} else {
microOps[uopIdx++] =
new MicroAddiUop(machInst, rn, rn, loadSize);
}
}
switch (elems) {
case 4:
assert(regs == 4);
switch (size) {
case 0:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
case 1:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
case 2:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
default:
panic("Bad size %d.\n", size);
break;
}
break;
case 3:
assert(regs == 3);
switch (size) {
case 0:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
case 1:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
case 2:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
default:
panic("Bad size %d.\n", size);
break;
}
break;
case 2:
assert(regs == 2);
assert(loadRegs <= 2);
switch (size) {
case 0:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
case 1:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
case 2:
if (all) {
microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
machInst, vd * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
machInst, vd * 2, ufp0, inc * 2, lane);
}
break;
default:
panic("Bad size %d.\n", size);
break;
}
break;
case 1:
assert(regs == 1 || (all && regs == 2));
assert(loadRegs <= 2);
for (unsigned offset = 0; offset < regs; offset++) {
switch (size) {
case 0:
if (all) {
microOps[uopIdx++] =
new MicroUnpackAllNeon2to2Uop<uint8_t>(
machInst, (vd + offset) * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] =
new MicroUnpackNeon2to2Uop<uint8_t>(
machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
}
break;
case 1:
if (all) {
microOps[uopIdx++] =
new MicroUnpackAllNeon2to2Uop<uint16_t>(
machInst, (vd + offset) * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] =
new MicroUnpackNeon2to2Uop<uint16_t>(
machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
}
break;
case 2:
if (all) {
microOps[uopIdx++] =
new MicroUnpackAllNeon2to2Uop<uint32_t>(
machInst, (vd + offset) * 2, ufp0, inc * 2);
} else {
microOps[uopIdx++] =
new MicroUnpackNeon2to2Uop<uint32_t>(
machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
}
break;
default:
panic("Bad size %d.\n", size);
break;
}
}
break;
default:
panic("Bad number of elements to unpack %d.\n", elems);
}
assert(uopIdx == numMicroops);
for (unsigned i = 0; i < numMicroops - 1; i++) {
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
assert(uopPtr);
uopPtr->setDelayedCommit();
}
microOps[numMicroops - 1]->setLastMicroop();
}
VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
PredMacroOp(mnem, machInst, __opClass)
{
assert(regs > 0 && regs <= 4);
assert(regs % elems == 0);
numMicroops = (regs > 2) ? 2 : 1;
bool wb = (rm != 15);
bool interleave = (elems > 1);
if (wb) numMicroops++;
if (interleave) numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
uint32_t noAlign = TLB::MustBeOne;
RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
unsigned uopIdx = 0;
if (interleave) {
switch (elems) {
case 4:
assert(regs == 4);
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
size, machInst, rMid, vd * 2, inc * 2);
break;
case 3:
assert(regs == 3);
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
size, machInst, rMid, vd * 2, inc * 2);
break;
case 2:
assert(regs == 4 || regs == 2);
if (regs == 4) {
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
size, machInst, rMid, vd * 2, inc * 2);
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
} else {
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
size, machInst, rMid, vd * 2, inc * 2);
}
break;
default:
panic("Bad number of elements to interleave %d.\n", elems);
}
}
switch (regs) {
case 4:
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
size, machInst, rMid, rn, 0, align);
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
size, machInst, rMid + 4, rn, 16, noAlign);
break;
case 3:
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
size, machInst, rMid, rn, 0, align);
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
size, machInst, rMid + 4, rn, 16, noAlign);
break;
case 2:
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
size, machInst, rMid, rn, 0, align);
break;
case 1:
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
size, machInst, rMid, rn, 0, align);
break;
default:
panic("Unrecognized number of registers %d.\n", regs);
}
if (wb) {
if (rm != 15 && rm != 13) {
microOps[uopIdx++] =
new MicroAddUop(machInst, rn, rn, rm);
} else {
microOps[uopIdx++] =
new MicroAddiUop(machInst, rn, rn, regs * 8);
}
}
assert(uopIdx == numMicroops);
for (unsigned i = 0; i < numMicroops - 1; i++) {
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
assert(uopPtr);
uopPtr->setDelayedCommit();
}
microOps[numMicroops - 1]->setLastMicroop();
}
VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
OpClass __opClass, bool all, unsigned elems,
RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align,
RegIndex rm, unsigned lane) :
PredMacroOp(mnem, machInst, __opClass)
{
assert(!all);
assert(regs > 0 && regs <= 4);
assert(regs % elems == 0);
unsigned eBytes = (1 << size);
unsigned storeSize = eBytes * elems;
unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
sizeof(FloatRegBits);
assert(storeRegs > 0 && storeRegs <= 4);
numMicroops = 1;
bool wb = (rm != 15);
if (wb) numMicroops++;
numMicroops += (regs / elems);
microOps = new StaticInstPtr[numMicroops];
RegIndex ufp0 = NumFloatArchRegs;
unsigned uopIdx = 0;
switch (elems) {
case 4:
assert(regs == 4);
switch (size) {
case 0:
microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
case 1:
microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
case 2:
microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
default:
panic("Bad size %d.\n", size);
break;
}
break;
case 3:
assert(regs == 3);
switch (size) {
case 0:
microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
case 1:
microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
case 2:
microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
default:
panic("Bad size %d.\n", size);
break;
}
break;
case 2:
assert(regs == 2);
assert(storeRegs <= 2);
switch (size) {
case 0:
microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
case 1:
microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
case 2:
microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
machInst, ufp0, vd * 2, inc * 2, lane);
break;
default:
panic("Bad size %d.\n", size);
break;
}
break;
case 1:
assert(regs == 1 || (all && regs == 2));
assert(storeRegs <= 2);
for (unsigned offset = 0; offset < regs; offset++) {
switch (size) {
case 0:
microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
break;
case 1:
microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
break;
case 2:
microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
break;
default:
panic("Bad size %d.\n", size);
break;
}
}
break;
default:
panic("Bad number of elements to pack %d.\n", elems);
}
switch (storeSize) {
case 1:
microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
break;
case 2:
if (eBytes == 2) {
microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
} else {
microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
}
break;
case 3:
microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
break;
case 4:
switch (eBytes) {
case 1:
microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
machInst, ufp0, rn, 0, align);
break;
case 2:
microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
break;
case 4:
microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
}
break;
case 6:
microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
break;
case 8:
switch (eBytes) {
case 2:
microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
machInst, ufp0, rn, 0, align);
break;
case 4:
microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
}
break;
case 12:
microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
case 16:
microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
machInst, ufp0, rn, 0, align);
break;
default:
panic("Unrecognized store size %d.\n", regs);
}
if (wb) {
if (rm != 15 && rm != 13) {
microOps[uopIdx++] =
new MicroAddUop(machInst, rn, rn, rm);
} else {
microOps[uopIdx++] =
new MicroAddiUop(machInst, rn, rn, storeSize);
}
}
assert(uopIdx == numMicroops);
for (unsigned i = 0; i < numMicroops - 1; i++) {
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
assert(uopPtr);
uopPtr->setDelayedCommit();
}
microOps[numMicroops - 1]->setLastMicroop();
}
MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
OpClass __opClass, IntRegIndex rn,
RegIndex vd, bool single, bool up,
@ -169,17 +810,25 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
bool tempUp = up;
for (int j = 0; j < count; j++) {
if (load) {
microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
tempUp, addr);
if (!single)
microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, tempUp,
addr + (up ? 4 : -4));
if (single) {
microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
tempUp, addr);
} else {
microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
tempUp, addr);
microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
addr + (up ? 4 : -4));
}
} else {
microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
tempUp, addr);
if (!single)
microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, tempUp,
addr + (up ? 4 : -4));
if (single) {
microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
tempUp, addr);
} else {
microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
tempUp, addr);
microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
addr + (up ? 4 : -4));
}
}
if (!tempUp) {
addr -= (single ? 4 : 8);
@ -216,7 +865,7 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
}
std::string
MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss);
@ -228,6 +877,19 @@ MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
return ss.str();
}
std::string
MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss);
printReg(ss, ura);
ss << ", ";
printReg(ss, urb);
ss << ", ";
printReg(ss, urc);
return ss.str();
}
std::string
MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{

View file

@ -79,17 +79,67 @@ class MicroOp : public PredOp
}
};
/**
* Microops for Neon loads/stores
*/
class MicroNeonMemOp : public MicroOp
{
protected:
RegIndex dest, ura;
uint32_t imm;
unsigned memAccessFlags;
MicroNeonMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
RegIndex _dest, RegIndex _ura, uint32_t _imm)
: MicroOp(mnem, machInst, __opClass),
dest(_dest), ura(_ura), imm(_imm),
memAccessFlags(TLB::MustBeOne)
{
}
};
/**
* Microops for Neon load/store (de)interleaving
*/
class MicroNeonMixOp : public MicroOp
{
protected:
RegIndex dest, op1;
uint32_t step;
MicroNeonMixOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
RegIndex _dest, RegIndex _op1, uint32_t _step)
: MicroOp(mnem, machInst, __opClass),
dest(_dest), op1(_op1), step(_step)
{
}
};
class MicroNeonMixLaneOp : public MicroNeonMixOp
{
protected:
unsigned lane;
MicroNeonMixLaneOp(const char *mnem, ExtMachInst machInst,
OpClass __opClass, RegIndex _dest, RegIndex _op1,
uint32_t _step, unsigned _lane)
: MicroNeonMixOp(mnem, machInst, __opClass, _dest, _op1, _step),
lane(_lane)
{
}
};
/**
* Microops of the form IntRegA = IntRegB op Imm
*/
class MicroIntOp : public MicroOp
class MicroIntImmOp : public MicroOp
{
protected:
RegIndex ura, urb;
uint8_t imm;
MicroIntOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
RegIndex _ura, RegIndex _urb, uint8_t _imm)
MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
RegIndex _ura, RegIndex _urb, uint8_t _imm)
: MicroOp(mnem, machInst, __opClass),
ura(_ura), urb(_urb), imm(_imm)
{
@ -98,10 +148,28 @@ class MicroIntOp : public MicroOp
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
/**
* Microops of the form IntRegA = IntRegB op IntRegC
*/
class MicroIntOp : public MicroOp
{
protected:
RegIndex ura, urb, urc;
MicroIntOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
RegIndex _ura, RegIndex _urb, RegIndex _urc)
: MicroOp(mnem, machInst, __opClass),
ura(_ura), urb(_urb), urc(_urc)
{
}
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
/**
* Memory microops which use IntReg + Imm addressing
*/
class MicroMemOp : public MicroIntOp
class MicroMemOp : public MicroIntImmOp
{
protected:
bool up;
@ -109,7 +177,7 @@ class MicroMemOp : public MicroIntOp
MicroMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
RegIndex _ura, RegIndex _urb, bool _up, uint8_t _imm)
: MicroIntOp(mnem, machInst, __opClass, _ura, _urb, _imm),
: MicroIntImmOp(mnem, machInst, __opClass, _ura, _urb, _imm),
up(_up), memAccessFlags(TLB::MustBeOne | TLB::AlignWord)
{
}
@ -128,6 +196,46 @@ class MacroMemOp : public PredMacroOp
bool writeback, bool load, uint32_t reglist);
};
/**
* Base classes for microcoded integer memory instructions.
*/
class VldMultOp : public PredMacroOp
{
protected:
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align, RegIndex rm);
};
class VldSingleOp : public PredMacroOp
{
protected:
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
bool all, unsigned elems, RegIndex rn, RegIndex vd,
unsigned regs, unsigned inc, uint32_t size,
uint32_t align, RegIndex rm, unsigned lane);
};
/**
* Base class for microcoded integer memory instructions.
*/
class VstMultOp : public PredMacroOp
{
protected:
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
unsigned width, RegIndex rn, RegIndex vd, unsigned regs,
unsigned inc, uint32_t size, uint32_t align, RegIndex rm);
};
class VstSingleOp : public PredMacroOp
{
protected:
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
bool all, unsigned elems, RegIndex rn, RegIndex vd,
unsigned regs, unsigned inc, uint32_t size,
uint32_t align, RegIndex rm, unsigned lane);
};
/**
* Base class for microcoded floating point memory instructions.
*/

View file

@ -118,24 +118,26 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data)
break;
case 0xe:
if (op) {
bigData = 0;
for (int i = 7; i >= 0; i--) {
if (bits(data, i)) {
bigData |= (ULL(0xFF) << (i * 8));
}
}
} else {
bigData = (bigData << 0) | (bigData << 8) |
(bigData << 16) | (bigData << 24) |
(bigData << 32) | (bigData << 40) |
(bigData << 48) | (bigData << 56);
} else {
bigData = 0;
for (int i = 7; i >= 0; i--) {
if (bits(data, i)) {
bigData |= (0xFF << (i * 8));
}
}
}
break;
case 0xf:
if (!op) {
uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20);
bigData = (bits(bigData, 5, 0) << 19) |
(bVal << 25) | (bits(bigData, 7) << 31);
bigData |= (bigData << 32);
break;
}
// Fall through
default:

View file

@ -251,6 +251,28 @@ class ArmStaticInst : public StaticInst
}
}
template<class T, class E>
static inline T
cSwap(T val, bool big)
{
const unsigned count = sizeof(T) / sizeof(E);
union {
T tVal;
E eVals[count];
} conv;
conv.tVal = htog(val);
if (big) {
for (unsigned i = 0; i < count; i++) {
conv.eVals[i] = gtobe(conv.eVals[i]);
}
} else {
for (unsigned i = 0; i < count; i++) {
conv.eVals[i] = gtole(conv.eVals[i]);
}
}
return gtoh(conv.tVal);
}
// Perform an interworking branch.
template<class XC>
static inline void

View file

@ -91,6 +91,20 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
return ss.str();
}
std::string
FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss);
printReg(ss, dest + FP_Base_DepTag);
ss << ", ";
printReg(ss, op1 + FP_Base_DepTag);
ss << ", ";
printReg(ss, op2 + FP_Base_DepTag);
ccprintf(ss, ", #%d", imm);
return ss.str();
}
namespace ArmISA
{
@ -117,7 +131,7 @@ prepFpState(uint32_t rMode)
}
void
finishVfp(FPSCR &fpscr, VfpSavedState state)
finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
{
int exceptions = fetestexcept(FeAllExceptions);
bool underflow = false;
@ -134,7 +148,7 @@ finishVfp(FPSCR &fpscr, VfpSavedState state)
underflow = true;
fpscr.ufc = 1;
}
if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
if ((exceptions & FeInexact) && !(underflow && flush)) {
fpscr.ixc = 1;
}
fesetround(state);
@ -142,7 +156,7 @@ finishVfp(FPSCR &fpscr, VfpSavedState state)
template <class fpType>
fpType
fixDest(FPSCR fpscr, fpType val, fpType op1)
fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
{
int fpClass = std::fpclassify(val);
fpType junk = 0.0;
@ -150,12 +164,12 @@ fixDest(FPSCR fpscr, fpType val, fpType op1)
const bool single = (sizeof(val) == sizeof(float));
const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
const bool nan = std::isnan(op1);
if (!nan || (fpscr.dn == 1)) {
if (!nan || defaultNan) {
val = bitsToFp(qnan, junk);
} else if (nan) {
val = bitsToFp(fpToBits(op1) | qnan, junk);
}
} else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
} else if (fpClass == FP_SUBNORMAL && flush == 1) {
// Turn val into a zero with the correct sign;
uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
val = bitsToFp(fpToBits(val) & bitMask, junk);
@ -166,13 +180,13 @@ fixDest(FPSCR fpscr, fpType val, fpType op1)
}
template
float fixDest<float>(FPSCR fpscr, float val, float op1);
float fixDest<float>(bool flush, bool defaultNan, float val, float op1);
template
double fixDest<double>(FPSCR fpscr, double val, double op1);
double fixDest<double>(bool flush, bool defaultNan, double val, double op1);
template <class fpType>
fpType
fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
{
int fpClass = std::fpclassify(val);
fpType junk = 0.0;
@ -183,7 +197,7 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
const bool nan2 = std::isnan(op2);
const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
if ((!nan1 && !nan2) || defaultNan) {
val = bitsToFp(qnan, junk);
} else if (signal1) {
val = bitsToFp(fpToBits(op1) | qnan, junk);
@ -194,7 +208,7 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
} else if (nan2) {
val = op2;
}
} else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
} else if (fpClass == FP_SUBNORMAL && flush) {
// Turn val into a zero with the correct sign;
uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
val = bitsToFp(fpToBits(val) & bitMask, junk);
@ -205,15 +219,17 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
}
template
float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
float fixDest<float>(bool flush, bool defaultNan,
float val, float op1, float op2);
template
double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
double fixDest<double>(bool flush, bool defaultNan,
double val, double op1, double op2);
template <class fpType>
fpType
fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
{
fpType mid = fixDest(fpscr, val, op1, op2);
fpType mid = fixDest(flush, defaultNan, val, op1, op2);
const bool single = (sizeof(fpType) == sizeof(float));
const fpType junk = 0.0;
if ((single && (val == bitsToFp(0x00800000, junk) ||
@ -228,7 +244,7 @@ fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
temp = op1 / op2;
if (flushToZero(temp)) {
feraiseexcept(FeUnderflow);
if (fpscr.fz) {
if (flush) {
feclearexcept(FeInexact);
mid = temp;
}
@ -239,9 +255,11 @@ fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
}
template
float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
float fixDivDest<float>(bool flush, bool defaultNan,
float val, float op1, float op2);
template
double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
double fixDivDest<double>(bool flush, bool defaultNan,
double val, double op1, double op2);
float
fixFpDFpSDest(FPSCR fpscr, double val)
@ -255,7 +273,7 @@ fixFpDFpSDest(FPSCR fpscr, double val)
(bits(valBits, 63) << 31);
op1 = bitsToFp(op1Bits, junk);
}
float mid = fixDest(fpscr, (float)val, op1);
float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);
if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
(FeUnderflow | FeInexact)) {
feclearexcept(FeInexact);
@ -291,7 +309,7 @@ fixFpSFpDDest(FPSCR fpscr, float val)
((uint64_t)bits(valBits, 31) << 63);
op1 = bitsToFp(op1Bits, junk);
}
double mid = fixDest(fpscr, (double)val, op1);
double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);
if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
mid == bitsToFp(ULL(0x8010000000000000), junk)) {
__asm__ __volatile__("" : "=m" (val) : "m" (val));
@ -311,11 +329,10 @@ fixFpSFpDDest(FPSCR fpscr, float val)
return mid;
}
float
vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
uint16_t
vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
uint32_t rMode, bool ahp, float op)
{
float junk = 0.0;
uint32_t destBits = fpToBits(dest);
uint32_t opBits = fpToBits(op);
// Extract the operand.
bool neg = bits(opBits, 31);
@ -331,11 +348,11 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
// Signalling nan.
fpscr.ioc = 1;
}
if (fpscr.ahp) {
if (ahp) {
mantissa = 0;
exponent = 0;
fpscr.ioc = 1;
} else if (fpscr.dn) {
} else if (defaultNan) {
mantissa = (1 << 9);
exponent = 0x1f;
neg = false;
@ -346,7 +363,7 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
} else {
// Infinities.
exponent = 0x1F;
if (fpscr.ahp) {
if (ahp) {
fpscr.ioc = 1;
mantissa = 0x3ff;
} else {
@ -364,14 +381,14 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
// Denormalized.
// If flush to zero is on, this shouldn't happen.
assert(fpscr.fz == 0);
assert(!flush);
// Check for underflow
if (inexact || fpscr.ufe)
fpscr.ufc = 1;
// Handle rounding.
unsigned mode = fpscr.rMode;
unsigned mode = rMode;
if ((mode == VfpRoundUpward && !neg && extra) ||
(mode == VfpRoundDown && neg && extra) ||
(mode == VfpRoundNearest &&
@ -416,7 +433,7 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
}
// Handle rounding.
unsigned mode = fpscr.rMode;
unsigned mode = rMode;
bool nonZero = topOne || !restZeros;
if ((mode == VfpRoundUpward && !neg && nonZero) ||
(mode == VfpRoundDown && neg && nonZero) ||
@ -432,7 +449,7 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
}
// Deal with overflow
if (fpscr.ahp) {
if (ahp) {
if (exponent >= 0x20) {
exponent = 0x1f;
mantissa = 0x3ff;
@ -468,27 +485,17 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
replaceBits(result, 14, 10, exponent);
if (neg)
result |= (1 << 15);
if (top)
replaceBits(destBits, 31, 16, result);
else
replaceBits(destBits, 15, 0, result);
return bitsToFp(destBits, junk);
return result;
}
float
vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
{
float junk = 0.0;
uint32_t opBits = fpToBits(op);
// Extract the operand.
if (top)
opBits = bits(opBits, 31, 16);
else
opBits = bits(opBits, 15, 0);
// Extract the bitfields.
bool neg = bits(opBits, 15);
uint32_t exponent = bits(opBits, 14, 10);
uint32_t mantissa = bits(opBits, 9, 0);
bool neg = bits(op, 15);
uint32_t exponent = bits(op, 14, 10);
uint32_t mantissa = bits(op, 9, 0);
// Do the conversion.
if (exponent == 0) {
if (mantissa != 0) {
@ -500,7 +507,7 @@ vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
}
}
mantissa = mantissa << (23 - 10);
} else if (exponent == 0x1f && !fpscr.ahp) {
} else if (exponent == 0x1f && !ahp) {
// Infinities and nans.
exponent = 0xff;
if (mantissa != 0) {
@ -511,7 +518,7 @@ vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
fpscr.ioc = 1;
mantissa |= (1 << 22);
}
if (fpscr.dn) {
if (defaultNan) {
mantissa &= ~mask(22);
neg = false;
}
@ -624,7 +631,8 @@ vfpFpSToFixed(float val, bool isSigned, bool half,
}
float
vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
vfpUFixedToFpS(bool flush, bool defaultNan,
uint32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
@ -633,11 +641,12 @@ vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return fixDivDest(fpscr, val / scale, (float)val, scale);
return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
}
float
vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
vfpSFixedToFpS(bool flush, bool defaultNan,
int32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
@ -646,7 +655,7 @@ vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return fixDivDest(fpscr, val / scale, (float)val, scale);
return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
}
uint64_t
@ -743,7 +752,8 @@ vfpFpDToFixed(double val, bool isSigned, bool half,
}
double
vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
vfpUFixedToFpD(bool flush, bool defaultNan,
uint32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
@ -752,11 +762,12 @@ vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return fixDivDest(fpscr, val / scale, (double)val, scale);
return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
}
double
vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
vfpSFixedToFpD(bool flush, bool defaultNan,
int32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
@ -765,14 +776,211 @@ vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
return fixDivDest(fpscr, val / scale, (double)val, scale);
return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
}
// This function implements a magic formula taken from the architecture
// reference manual. It was originally called recip_sqrt_estimate.
static double
recipSqrtEstimate(double a)
{
int64_t q0, q1, s;
double r;
if (a < 0.5) {
q0 = (int64_t)(a * 512.0);
r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);
} else {
q1 = (int64_t)(a * 256.0);
r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0);
}
s = (int64_t)(256.0 * r + 0.5);
return (double)s / 256.0;
}
// This function is only intended for use in Neon instructions because
// it ignores certain bits in the FPSCR.
float
fprSqrtEstimate(FPSCR &fpscr, float op)
{
const uint32_t qnan = 0x7fc00000;
float junk = 0.0;
int fpClass = std::fpclassify(op);
if (fpClass == FP_NAN) {
if ((fpToBits(op) & qnan) != qnan)
fpscr.ioc = 1;
return bitsToFp(qnan, junk);
} else if (fpClass == FP_ZERO) {
fpscr.dzc = 1;
// Return infinity with the same sign as the operand.
return bitsToFp((std::signbit(op) << 31) |
(0xFF << 23) | (0 << 0), junk);
} else if (std::signbit(op)) {
// Set invalid op bit.
fpscr.ioc = 1;
return bitsToFp(qnan, junk);
} else if (fpClass == FP_INFINITE) {
return 0.0;
} else {
uint64_t opBits = fpToBits(op);
double scaled;
if (bits(opBits, 23)) {
scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
(ULL(0x3fd) << 52) | (bits(opBits, 31) << 63),
(double)0.0);
} else {
scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
(ULL(0x3fe) << 52) | (bits(opBits, 31) << 63),
(double)0.0);
}
uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2;
uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));
return bitsToFp((bits(estimate, 63) << 31) |
(bits(resultExp, 7, 0) << 23) |
(bits(estimate, 51, 29) << 0), junk);
}
}
uint32_t
unsignedRSqrtEstimate(uint32_t op)
{
if (bits(op, 31, 30) == 0) {
return -1;
} else {
double dpOp;
if (bits(op, 31)) {
dpOp = bitsToFp((ULL(0) << 63) |
(ULL(0x3fe) << 52) |
(bits((uint64_t)op, 30, 0) << 21) |
(0 << 0), (double)0.0);
} else {
dpOp = bitsToFp((ULL(0) << 63) |
(ULL(0x3fd) << 52) |
(bits((uint64_t)op, 29, 0) << 22) |
(0 << 0), (double)0.0);
}
uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp));
return (1 << 31) | bits(estimate, 51, 21);
}
}
// This function implements a magic formula taken from the architecture
// reference manual. It was originally called recip_estimate.
static double
recipEstimate(double a)
{
int64_t q, s;
double r;
q = (int64_t)(a * 512.0);
r = 1.0 / (((double)q + 0.5) / 512.0);
s = (int64_t)(256.0 * r + 0.5);
return (double)s / 256.0;
}
// This function is only intended for use in Neon instructions because
// it ignores certain bits in the FPSCR.
float
fpRecipEstimate(FPSCR &fpscr, float op)
{
const uint32_t qnan = 0x7fc00000;
float junk = 0.0;
int fpClass = std::fpclassify(op);
if (fpClass == FP_NAN) {
if ((fpToBits(op) & qnan) != qnan)
fpscr.ioc = 1;
return bitsToFp(qnan, junk);
} else if (fpClass == FP_INFINITE) {
return bitsToFp(std::signbit(op) << 31, junk);
} else if (fpClass == FP_ZERO) {
fpscr.dzc = 1;
// Return infinity with the same sign as the operand.
return bitsToFp((std::signbit(op) << 31) |
(0xFF << 23) | (0 << 0), junk);
} else if (fabs(op) >= pow(2.0, 126)) {
fpscr.ufc = 1;
return bitsToFp(std::signbit(op) << 31, junk);
} else {
uint64_t opBits = fpToBits(op);
double scaled;
scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
(ULL(0x3fe) << 52) | (ULL(0) << 63),
(double)0.0);
uint64_t resultExp = 253 - bits(opBits, 30, 23);
uint64_t estimate = fpToBits(recipEstimate(scaled));
return bitsToFp((bits(opBits, 31) << 31) |
(bits(resultExp, 7, 0) << 23) |
(bits(estimate, 51, 29) << 0), junk);
}
}
uint32_t
unsignedRecipEstimate(uint32_t op)
{
if (bits(op, 31) == 0) {
return -1;
} else {
double dpOp;
dpOp = bitsToFp((ULL(0) << 63) |
(ULL(0x3fe) << 52) |
(bits((uint64_t)op, 30, 0) << 21) |
(0 << 0), (double)0.0);
uint64_t estimate = fpToBits(recipEstimate(dpOp));
return (1 << 31) | bits(estimate, 51, 21);
}
}
template <class fpType>
fpType
FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
fpType op1, fpType op2) const
{
done = true;
fpType junk = 0.0;
fpType dest = 0.0;
const bool single = (sizeof(fpType) == sizeof(float));
const uint64_t qnan =
single ? 0x7fc00000 : ULL(0x7ff8000000000000);
const bool nan1 = std::isnan(op1);
const bool nan2 = std::isnan(op2);
const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
if (nan1 || nan2) {
if (defaultNan) {
dest = bitsToFp(qnan, junk);
} else if (signal1) {
dest = bitsToFp(fpToBits(op1) | qnan, junk);
} else if (signal2) {
dest = bitsToFp(fpToBits(op2) | qnan, junk);
} else if (nan1) {
dest = op1;
} else if (nan2) {
dest = op2;
}
if (signal1 || signal2) {
fpscr.ioc = 1;
}
} else {
done = false;
}
return dest;
}
template
float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
float op1, float op2) const;
template
double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
double op1, double op2) const;
template <class fpType>
fpType
FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
fpType (*func)(fpType, fpType),
bool flush, uint32_t rMode) const
bool flush, bool defaultNan, uint32_t rMode) const
{
const bool single = (sizeof(fpType) == sizeof(float));
fpType junk = 0.0;
@ -795,7 +1003,7 @@ FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
const bool nan2 = std::isnan(op2);
const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
if ((!nan1 && !nan2) || (defaultNan == 1)) {
dest = bitsToFp(qnan, junk);
} else if (signal1) {
dest = bitsToFp(fpToBits(op1) | qnan, junk);
@ -828,18 +1036,18 @@ FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
dest = temp;
}
}
finishVfp(fpscr, state);
finishVfp(fpscr, state, flush);
return dest;
}
template
float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
float (*func)(float, float),
bool flush, uint32_t rMode) const;
bool flush, bool defaultNan, uint32_t rMode) const;
template
double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
double (*func)(double, double),
bool flush, uint32_t rMode) const;
bool flush, bool defaultNan, uint32_t rMode) const;
template <class fpType>
fpType
@ -890,7 +1098,7 @@ FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
dest = temp;
}
}
finishVfp(fpscr, state);
finishVfp(fpscr, state, flush);
return dest;
}

View file

@ -192,10 +192,20 @@ bitsToFp(uint64_t bits, double junk)
return val.fp;
}
template <class fpType>
static bool
isSnan(fpType val)
{
const bool single = (sizeof(fpType) == sizeof(float));
const uint64_t qnan =
single ? 0x7fc00000 : ULL(0x7ff8000000000000);
return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
}
typedef int VfpSavedState;
VfpSavedState prepFpState(uint32_t rMode);
void finishVfp(FPSCR &fpscr, VfpSavedState state);
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush);
template <class fpType>
fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
@ -209,8 +219,9 @@ fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
float fixFpDFpSDest(FPSCR fpscr, double val);
double fixFpSFpDDest(FPSCR fpscr, float val);
float vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top);
float vcvtFpHFpS(FPSCR &fpscr, float op, bool top);
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
uint32_t rMode, bool ahp, float op);
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
static inline double
makeDouble(uint32_t low, uint32_t high)
@ -233,13 +244,23 @@ highFromDouble(double val)
uint64_t vfpFpSToFixed(float val, bool isSigned, bool half,
uint8_t imm, bool rzero = true);
float vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm);
float vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm);
float vfpUFixedToFpS(bool flush, bool defaultNan,
uint32_t val, bool half, uint8_t imm);
float vfpSFixedToFpS(bool flush, bool defaultNan,
int32_t val, bool half, uint8_t imm);
uint64_t vfpFpDToFixed(double val, bool isSigned, bool half,
uint8_t imm, bool rzero = true);
double vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm);
double vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm);
double vfpUFixedToFpD(bool flush, bool defaultNan,
uint32_t val, bool half, uint8_t imm);
double vfpSFixedToFpD(bool flush, bool defaultNan,
int32_t val, bool half, uint8_t imm);
float fprSqrtEstimate(FPSCR &fpscr, float op);
uint32_t unsignedRSqrtEstimate(uint32_t op);
float fpRecipEstimate(FPSCR &fpscr, float op);
uint32_t unsignedRecipEstimate(uint32_t op);
class VfpMacroOp : public PredMacroOp
{
@ -312,6 +333,66 @@ fpMulD(double a, double b)
return a * b;
}
static inline float
fpMaxS(float a, float b)
{
// Handle comparisons of +0 and -0.
if (!std::signbit(a) && std::signbit(b))
return a;
return fmaxf(a, b);
}
static inline float
fpMinS(float a, float b)
{
// Handle comparisons of +0 and -0.
if (std::signbit(a) && !std::signbit(b))
return a;
return fminf(a, b);
}
static inline float
fpRSqrtsS(float a, float b)
{
int fpClassA = std::fpclassify(a);
int fpClassB = std::fpclassify(b);
float aXb;
int fpClassAxB;
if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
(fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
return 1.5;
}
aXb = a*b;
fpClassAxB = std::fpclassify(aXb);
if(fpClassAxB == FP_SUBNORMAL) {
feraiseexcept(FeUnderflow);
return 1.5;
}
return (3.0 - (a * b)) / 2.0;
}
static inline float
fpRecpsS(float a, float b)
{
int fpClassA = std::fpclassify(a);
int fpClassB = std::fpclassify(b);
float aXb;
int fpClassAxB;
if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
(fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
return 2.0;
}
aXb = a*b;
fpClassAxB = std::fpclassify(aXb);
if(fpClassAxB == FP_SUBNORMAL) {
feraiseexcept(FeUnderflow);
return 2.0;
}
return 2.0 - (a * b);
}
class FpOp : public PredOp
{
protected:
@ -362,11 +443,16 @@ class FpOp : public PredOp
return fpToBits(val) >> 32;
}
template <class fpType>
fpType
processNans(FPSCR &fpscr, bool &done, bool defaultNan,
fpType op1, fpType op2) const;
template <class fpType>
fpType
binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
fpType (*func)(fpType, fpType),
bool flush, uint32_t rMode) const;
bool flush, bool defaultNan, uint32_t rMode) const;
template <class fpType>
fpType
@ -445,6 +531,27 @@ class FpRegRegRegOp : public FpOp
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
class FpRegRegRegImmOp : public FpOp
{
protected:
IntRegIndex dest;
IntRegIndex op1;
IntRegIndex op2;
uint64_t imm;
FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, IntRegIndex _dest,
IntRegIndex _op1, IntRegIndex _op2,
uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
FpOp(mnem, _machInst, __opClass),
dest(_dest), op1(_op1), op2(_op2), imm(_imm)
{
setVfpMicroFlags(mode, flags);
}
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
};
}
#endif //__ARCH_ARM_INSTS_VFP_HH__

View file

@ -88,7 +88,7 @@ decode BIGTHUMB {
0xf: McrMrc15::mcrMrc15();
}
}
0x3: WarnUnimpl::Advanced_SIMD();
0x3: ThumbNeonData::ThumbNeonData();
default: decode LTCOPROC {
0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre();
0xf: decode HTOPCODE_9_4 {

File diff suppressed because it is too large Load diff

View file

@ -282,7 +282,7 @@ let {{
exec_output += PredOpExecute.subst(vmovRegQIop);
vmovCoreRegBCode = '''
FpDest.uw = insertBits(FpDest.uw, imm * 8, imm * 8 + 7, Op1.ub);
FpDest.uw = insertBits(FpDest.uw, imm * 8 + 7, imm * 8, Op1.ub);
'''
vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp",
{ "code": vmovCoreRegBCode,
@ -292,7 +292,7 @@ let {{
exec_output += PredOpExecute.subst(vmovCoreRegBIop);
vmovCoreRegHCode = '''
FpDest.uw = insertBits(FpDest.uw, imm * 16, imm * 16 + 15, Op1.uh);
FpDest.uw = insertBits(FpDest.uw, imm * 16 + 15, imm * 16, Op1.uh);
'''
vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp",
{ "code": vmovCoreRegHCode,
@ -312,7 +312,8 @@ let {{
exec_output += PredOpExecute.subst(vmovCoreRegWIop);
vmovRegCoreUBCode = '''
Dest = bits(FpOp1.uw, imm * 8, imm * 8 + 7);
assert(imm < 4);
Dest = bits(FpOp1.uw, imm * 8 + 7, imm * 8);
'''
vmovRegCoreUBIop = InstObjParams("vmov", "VmovRegCoreUB", "FpRegRegImmOp",
{ "code": vmovRegCoreUBCode,
@ -322,7 +323,8 @@ let {{
exec_output += PredOpExecute.subst(vmovRegCoreUBIop);
vmovRegCoreUHCode = '''
Dest = bits(FpOp1.uw, imm * 16, imm * 16 + 15);
assert(imm < 2);
Dest = bits(FpOp1.uw, imm * 16 + 15, imm * 16);
'''
vmovRegCoreUHIop = InstObjParams("vmov", "VmovRegCoreUH", "FpRegRegImmOp",
{ "code": vmovRegCoreUHCode,
@ -332,7 +334,8 @@ let {{
exec_output += PredOpExecute.subst(vmovRegCoreUHIop);
vmovRegCoreSBCode = '''
Dest = sext<8>(bits(FpOp1.uw, imm * 8, imm * 8 + 7));
assert(imm < 4);
Dest = sext<8>(bits(FpOp1.uw, imm * 8 + 7, imm * 8));
'''
vmovRegCoreSBIop = InstObjParams("vmov", "VmovRegCoreSB", "FpRegRegImmOp",
{ "code": vmovRegCoreSBCode,
@ -342,7 +345,8 @@ let {{
exec_output += PredOpExecute.subst(vmovRegCoreSBIop);
vmovRegCoreSHCode = '''
Dest = sext<16>(bits(FpOp1.uw, imm * 16, imm * 16 + 15));
assert(imm < 2);
Dest = sext<16>(bits(FpOp1.uw, imm * 16 + 15, imm * 16));
'''
vmovRegCoreSHIop = InstObjParams("vmov", "VmovRegCoreSH", "FpRegRegImmOp",
{ "code": vmovRegCoreSHCode,
@ -396,7 +400,7 @@ let {{
Fpscr = fpscr;
'''
singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \
"%(func)s, fpscr.fz, fpscr.rMode)"
"%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)"
doubleCode = '''
FPSCR fpscr = Fpscr;
@ -408,7 +412,7 @@ let {{
doubleBinOp = '''
binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
%(func)s, fpscr.fz, fpscr.rMode);
%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
'''
doubleUnaryOp = '''
unaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), %(func)s,
@ -499,8 +503,9 @@ let {{
vmlaSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
fpMulS, fpscr.fz, fpscr.rMode);
FpDest = binaryOp(fpscr, FpDest, mid, fpAddS, fpscr.fz, fpscr.rMode);
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
FpDest = binaryOp(fpscr, FpDest, mid, fpAddS,
fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vmlaSIop = InstObjParams("vmlas", "VmlaS", "FpRegRegRegOp",
@ -514,9 +519,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
fpMulD, fpscr.fz, fpscr.rMode);
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
mid, fpAddD, fpscr.fz, fpscr.rMode);
mid, fpAddD, fpscr.fz,
fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@ -531,8 +537,9 @@ let {{
vmlsSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
fpMulS, fpscr.fz, fpscr.rMode);
FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS, fpscr.fz, fpscr.rMode);
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS,
fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vmlsSIop = InstObjParams("vmlss", "VmlsS", "FpRegRegRegOp",
@ -546,9 +553,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
fpMulD, fpscr.fz, fpscr.rMode);
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
-mid, fpAddD, fpscr.fz, fpscr.rMode);
-mid, fpAddD, fpscr.fz,
fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@ -563,8 +571,9 @@ let {{
vnmlaSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
fpMulS, fpscr.fz, fpscr.rMode);
FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS, fpscr.fz, fpscr.rMode);
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS,
fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "FpRegRegRegOp",
@ -578,9 +587,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
fpMulD, fpscr.fz, fpscr.rMode);
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
-mid, fpAddD, fpscr.fz, fpscr.rMode);
-mid, fpAddD, fpscr.fz,
fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@ -595,8 +605,9 @@ let {{
vnmlsSCode = '''
FPSCR fpscr = Fpscr;
float mid = binaryOp(fpscr, FpOp1, FpOp2,
fpMulS, fpscr.fz, fpscr.rMode);
FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS, fpscr.fz, fpscr.rMode);
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS,
fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "FpRegRegRegOp",
@ -610,9 +621,10 @@ let {{
FPSCR fpscr = Fpscr;
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
fpMulD, fpscr.fz, fpscr.rMode);
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
mid, fpAddD, fpscr.fz, fpscr.rMode);
mid, fpAddD, fpscr.fz,
fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@ -626,7 +638,8 @@ let {{
vnmulSCode = '''
FPSCR fpscr = Fpscr;
FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.rMode);
FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS,
fpscr.fz, fpscr.dn, fpscr.rMode);
Fpscr = fpscr;
'''
vnmulSIop = InstObjParams("vnmuls", "VnmulS", "FpRegRegRegOp",
@ -640,7 +653,8 @@ let {{
FPSCR fpscr = Fpscr;
double dest = -binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
dbl(FpOp2P0.uw, FpOp2P1.uw),
fpMulD, fpscr.fz, fpscr.rMode);
fpMulD, fpscr.fz, fpscr.dn,
fpscr.rMode);
Fpscr = fpscr;
FpDestP0.uw = dblLow(dest);
FpDestP1.uw = dblHi(dest);
@ -665,7 +679,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
FpDest = FpOp1.uw;
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtUIntFpSIop = InstObjParams("vcvt", "VcvtUIntFpS", "FpRegRegOp",
@ -681,7 +695,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1P0.uw) : "m" (FpOp1P0.uw));
double cDest = (uint64_t)FpOp1P0.uw;
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@ -699,7 +713,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
FpDest = FpOp1.sw;
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtSIntFpSIop = InstObjParams("vcvt", "VcvtSIntFpS", "FpRegRegOp",
@ -715,7 +729,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1P0.sw) : "m" (FpOp1P0.sw));
double cDest = FpOp1P0.sw;
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@ -734,7 +748,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0, false);
__asm__ __volatile__("" :: "m" (FpDest.uw));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUIntSRIop = InstObjParams("vcvt", "VcvtFpUIntSR", "FpRegRegOp",
@ -752,7 +766,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@ -770,7 +784,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0, false);
__asm__ __volatile__("" :: "m" (FpDest.sw));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSIntSRIop = InstObjParams("vcvtr", "VcvtFpSIntSR", "FpRegRegOp",
@ -788,7 +802,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@ -807,7 +821,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0);
__asm__ __volatile__("" :: "m" (FpDest.uw));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp",
@ -826,7 +840,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpDToFixed(cOp1, false, false, 0);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@ -845,7 +859,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0);
__asm__ __volatile__("" :: "m" (FpDest.sw));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp",
@ -864,7 +878,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
int64_t result = vfpFpDToFixed(cOp1, true, false, 0);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
'''
@ -882,7 +896,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
double cDest = fixFpSFpDDest(Fpscr, FpOp1);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@ -902,7 +916,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
FpDest = fixFpDFpSDest(Fpscr, cOp1);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpDFpSIop = InstObjParams("vcvt", "VcvtFpDFpS", "FpRegRegOp",
@ -917,9 +931,10 @@ let {{
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = vcvtFpHFpS(fpscr, FpOp1, true);
FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp,
bits(fpToBits(FpOp1), 31, 16));
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp",
@ -933,9 +948,10 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest = vcvtFpHFpS(fpscr, FpOp1, false);
FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp,
bits(fpToBits(FpOp1), 15, 0));
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp",
@ -949,11 +965,13 @@ let {{
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
: "m" (FpOp1), "m" (FpDest));
FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, true);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw)
: "m" (FpOp1), "m" (FpDest.uw));
FpDest.uw = insertBits(FpDest.uw, 31, 16,,
vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn,
fpscr.rMode, fpscr.ahp, FpOp1));
__asm__ __volatile__("" :: "m" (FpDest.uw));
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp",
@ -967,11 +985,13 @@ let {{
FPSCR fpscr = Fpscr;
vfpFlushToZero(fpscr, FpOp1);
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
: "m" (FpOp1), "m" (FpDest));
FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, false);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw)
: "m" (FpOp1), "m" (FpDest.uw));
FpDest.uw = insertBits(FpDest.uw, 15, 0,
vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn,
fpscr.rMode, fpscr.ahp, FpOp1));
__asm__ __volatile__("" :: "m" (FpDest.uw));
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp",
@ -1201,7 +1221,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
__asm__ __volatile__("" :: "m" (FpDest.sw));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "FpRegRegImmOp",
@ -1219,7 +1239,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm);
__asm__ __volatile__("" :: "m" (mid));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = mid;
FpDestP1.uw = mid >> 32;
@ -1238,7 +1258,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
__asm__ __volatile__("" :: "m" (FpDest.uw));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "FpRegRegImmOp",
@ -1256,7 +1276,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm);
__asm__ __volatile__("" :: "m" (mid));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = mid;
FpDestP1.uw = mid >> 32;
@ -1272,9 +1292,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sw, false, imm);
FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sw, false, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "FpRegRegImmOp",
@ -1289,9 +1309,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
double cDest = vfpSFixedToFpD(Fpscr, mid, false, imm);
double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@ -1307,9 +1327,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uw, false, imm);
FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uw, false, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "FpRegRegImmOp",
@ -1324,9 +1344,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
double cDest = vfpUFixedToFpD(Fpscr, mid, false, imm);
double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@ -1345,7 +1365,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
__asm__ __volatile__("" :: "m" (FpDest.sh));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS",
@ -1364,7 +1384,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t result = vfpFpDToFixed(cOp1, true, true, imm);
__asm__ __volatile__("" :: "m" (result));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = result;
FpDestP1.uw = result >> 32;
@ -1384,7 +1404,7 @@ let {{
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
__asm__ __volatile__("" :: "m" (FpDest.uh));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS",
@ -1403,7 +1423,7 @@ let {{
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm);
__asm__ __volatile__("" :: "m" (mid));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = mid;
FpDestP1.uw = mid >> 32;
@ -1420,9 +1440,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.sh) : "m" (FpOp1.sh));
FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sh, true, imm);
FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sh, true, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS",
@ -1438,9 +1458,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
double cDest = vfpSFixedToFpD(Fpscr, mid, true, imm);
double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);
@ -1457,9 +1477,9 @@ let {{
FPSCR fpscr = Fpscr;
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (FpOp1.uh) : "m" (FpOp1.uh));
FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uh, true, imm);
FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uh, true, imm);
__asm__ __volatile__("" :: "m" (FpDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
'''
vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS",
@ -1475,9 +1495,9 @@ let {{
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepFpState(fpscr.rMode);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
double cDest = vfpUFixedToFpD(Fpscr, mid, true, imm);
double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
__asm__ __volatile__("" :: "m" (cDest));
finishVfp(fpscr, state);
finishVfp(fpscr, state, fpscr.fz);
Fpscr = fpscr;
FpDestP0.uw = dblLow(cDest);
FpDestP1.uw = dblHi(cDest);

View file

@ -70,5 +70,8 @@
//Divide
##include "div.isa"
//FP (VFP and Neon)
//VFP
##include "fp.isa"
//Neon
##include "neon.isa"

View file

@ -57,11 +57,34 @@ let {{
microLdrFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
microLdrFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrFpUop',
'MicroMemOp',
{'memacc_code': microLdrFpUopCode,
'ea_code': 'EA = Rb + (up ? imm : -imm);',
'predicate_test': predicateTest},
['IsMicroop'])
'MicroMemOp',
{'memacc_code': microLdrFpUopCode,
'ea_code':
'EA = Rb + (up ? imm : -imm);',
'predicate_test': predicateTest},
['IsMicroop'])
microLdrDBFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
microLdrDBFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDBFpUop',
'MicroMemOp',
{'memacc_code': microLdrFpUopCode,
'ea_code': '''
EA = Rb + (up ? imm : -imm) +
(((CPSR)Cpsr).e ? 4 : 0);
''',
'predicate_test': predicateTest},
['IsMicroop'])
microLdrDTFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
microLdrDTFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDTFpUop',
'MicroMemOp',
{'memacc_code': microLdrFpUopCode,
'ea_code': '''
EA = Rb + (up ? imm : -imm) -
(((CPSR)Cpsr).e ? 4 : 0);
''',
'predicate_test': predicateTest},
['IsMicroop'])
microLdrRetUopCode = '''
CPSR cpsr = Cpsr;
@ -98,10 +121,36 @@ let {{
'predicate_test': predicateTest},
['IsMicroop'])
microStrDBFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
microStrDBFpUopIop = InstObjParams('strfp_uop', 'MicroStrDBFpUop',
'MicroMemOp',
{'memacc_code': microStrFpUopCode,
'postacc_code': "",
'ea_code': '''
EA = Rb + (up ? imm : -imm) +
(((CPSR)Cpsr).e ? 4 : 0);
''',
'predicate_test': predicateTest},
['IsMicroop'])
microStrDTFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
microStrDTFpUopIop = InstObjParams('strfp_uop', 'MicroStrDTFpUop',
'MicroMemOp',
{'memacc_code': microStrFpUopCode,
'postacc_code': "",
'ea_code': '''
EA = Rb + (up ? imm : -imm) -
(((CPSR)Cpsr).e ? 4 : 0);
''',
'predicate_test': predicateTest},
['IsMicroop'])
header_output = decoder_output = exec_output = ''
loadIops = (microLdrUopIop, microLdrFpUopIop, microLdrRetUopIop)
storeIops = (microStrUopIop, microStrFpUopIop)
loadIops = (microLdrUopIop, microLdrRetUopIop,
microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop)
storeIops = (microStrUopIop, microStrFpUopIop,
microStrDBFpUopIop, microStrDTFpUopIop)
for iop in loadIops + storeIops:
header_output += MicroMemDeclare.subst(iop)
decoder_output += MicroMemConstructor.subst(iop)
@ -115,6 +164,403 @@ let {{
StoreCompleteAcc.subst(iop)
}};
let {{
exec_output = header_output = ''
eaCode = 'EA = Ra + imm;'
for size in (1, 2, 3, 4, 6, 8, 12, 16):
# Set up the memory access.
regs = (size + 3) // 4
subst = { "size" : size, "regs" : regs }
memDecl = '''
union MemUnion {
uint8_t bytes[%(size)d];
Element elements[%(size)d / sizeof(Element)];
uint32_t floatRegBits[%(regs)d];
};
''' % subst
# Do endian conversion for all the elements.
convCode = '''
const unsigned eCount = sizeof(memUnion.elements) /
sizeof(memUnion.elements[0]);
if (((CPSR)Cpsr).e) {
for (unsigned i = 0; i < eCount; i++) {
memUnion.elements[i] = gtobe(memUnion.elements[i]);
}
} else {
for (unsigned i = 0; i < eCount; i++) {
memUnion.elements[i] = gtole(memUnion.elements[i]);
}
}
'''
# Offload everything into registers
regSetCode = ''
for reg in range(regs):
mask = ''
if reg == regs - 1:
mask = ' & mask(%d)' % (32 - 8 * (regs * 4 - size))
regSetCode += '''
FpDestP%(reg)d.uw = gtoh(memUnion.floatRegBits[%(reg)d])%(mask)s;
''' % { "reg" : reg, "mask" : mask }
# Pull everything in from registers
regGetCode = ''
for reg in range(regs):
regGetCode += '''
memUnion.floatRegBits[%(reg)d] = htog(FpDestP%(reg)d.uw);
''' % { "reg" : reg }
loadMemAccCode = convCode + regSetCode
storeMemAccCode = regGetCode + convCode
loadIop = InstObjParams('ldrneon%(size)d_uop' % subst,
'MicroLdrNeon%(size)dUop' % subst,
'MicroNeonMemOp',
{ 'mem_decl' : memDecl,
'size' : size,
'memacc_code' : loadMemAccCode,
'ea_code' : eaCode,
'predicate_test' : predicateTest },
[ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
storeIop = InstObjParams('strneon%(size)d_uop' % subst,
'MicroStrNeon%(size)dUop' % subst,
'MicroNeonMemOp',
{ 'mem_decl' : memDecl,
'size' : size,
'memacc_code' : storeMemAccCode,
'ea_code' : eaCode,
'predicate_test' : predicateTest },
[ 'IsMicroop', 'IsMemRef', 'IsStore' ])
exec_output += NeonLoadExecute.subst(loadIop) + \
NeonLoadInitiateAcc.subst(loadIop) + \
NeonLoadCompleteAcc.subst(loadIop) + \
NeonStoreExecute.subst(storeIop) + \
NeonStoreInitiateAcc.subst(storeIop) + \
NeonStoreCompleteAcc.subst(storeIop)
header_output += MicroNeonMemDeclare.subst(loadIop) + \
MicroNeonMemDeclare.subst(storeIop)
}};
let {{
exec_output = ''
for eSize, type in (1, 'uint8_t'), \
(2, 'uint16_t'), \
(4, 'uint32_t'), \
(8, 'uint64_t'):
size = eSize
# An instruction handles no more than 16 bytes and no more than
# 4 elements, or the number of elements needed to fill 8 or 16 bytes.
sizes = set((16, 8))
for count in 1, 2, 3, 4:
size = count * eSize
if size <= 16:
sizes.add(size)
for size in sizes:
substDict = {
"class_name" : "MicroLdrNeon%dUop" % size,
"targs" : type
}
exec_output += MicroNeonMemExecDeclare.subst(substDict)
substDict["class_name"] = "MicroStrNeon%dUop" % size
exec_output += MicroNeonMemExecDeclare.subst(substDict)
size += eSize
}};
////////////////////////////////////////////////////////////////////
//
// Neon (de)interlacing microops
//
let {{
header_output = exec_output = ''
for dRegs in (2, 3, 4):
loadConv = ''
unloadConv = ''
for dReg in range(dRegs):
loadConv += '''
conv1.cRegs[%(sReg0)d] = htog(FpOp1P%(sReg0)d.uw);
conv1.cRegs[%(sReg1)d] = htog(FpOp1P%(sReg1)d.uw);
''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
unloadConv += '''
FpDestS%(dReg)dP0.uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
FpDestS%(dReg)dP1.uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
''' % { "dReg" : dReg }
microDeintNeonCode = '''
const unsigned dRegs = %(dRegs)d;
const unsigned regs = 2 * dRegs;
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
sizeof(Element);
union convStruct {
FloatRegBits cRegs[regs];
Element elements[dRegs * perDReg];
} conv1, conv2;
%(loadConv)s
unsigned srcElem = 0;
for (unsigned destOffset = 0;
destOffset < perDReg; destOffset++) {
for (unsigned dReg = 0; dReg < dRegs; dReg++) {
conv2.elements[dReg * perDReg + destOffset] =
conv1.elements[srcElem++];
}
}
%(unloadConv)s
''' % { "dRegs" : dRegs,
"loadConv" : loadConv,
"unloadConv" : unloadConv }
microDeintNeonIop = \
InstObjParams('deintneon%duop' % (dRegs * 2),
'MicroDeintNeon%dUop' % (dRegs * 2),
'MicroNeonMixOp',
{ 'predicate_test': predicateTest,
'code' : microDeintNeonCode },
['IsMicroop'])
header_output += MicroNeonMixDeclare.subst(microDeintNeonIop)
exec_output += MicroNeonMixExecute.subst(microDeintNeonIop)
loadConv = ''
unloadConv = ''
for dReg in range(dRegs):
loadConv += '''
conv1.cRegs[2 * %(dReg)d + 0] = htog(FpOp1S%(dReg)dP0.uw);
conv1.cRegs[2 * %(dReg)d + 1] = htog(FpOp1S%(dReg)dP1.uw);
''' % { "dReg" : dReg }
unloadConv += '''
FpDestP%(sReg0)d.uw = gtoh(conv2.cRegs[%(sReg0)d]);
FpDestP%(sReg1)d.uw = gtoh(conv2.cRegs[%(sReg1)d]);
''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
microInterNeonCode = '''
const unsigned dRegs = %(dRegs)d;
const unsigned regs = 2 * dRegs;
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
sizeof(Element);
union convStruct {
FloatRegBits cRegs[regs];
Element elements[dRegs * perDReg];
} conv1, conv2;
%(loadConv)s
unsigned destElem = 0;
for (unsigned srcOffset = 0;
srcOffset < perDReg; srcOffset++) {
for (unsigned dReg = 0; dReg < dRegs; dReg++) {
conv2.elements[destElem++] =
conv1.elements[dReg * perDReg + srcOffset];
}
}
%(unloadConv)s
''' % { "dRegs" : dRegs,
"loadConv" : loadConv,
"unloadConv" : unloadConv }
microInterNeonIop = \
InstObjParams('interneon%duop' % (dRegs * 2),
'MicroInterNeon%dUop' % (dRegs * 2),
'MicroNeonMixOp',
{ 'predicate_test': predicateTest,
'code' : microInterNeonCode },
['IsMicroop'])
header_output += MicroNeonMixDeclare.subst(microInterNeonIop)
exec_output += MicroNeonMixExecute.subst(microInterNeonIop)
}};
let {{
exec_output = ''
for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
for dRegs in (2, 3, 4):
Name = "MicroDeintNeon%dUop" % (dRegs * 2)
substDict = { "class_name" : Name, "targs" : type }
exec_output += MicroNeonExecDeclare.subst(substDict)
Name = "MicroInterNeon%dUop" % (dRegs * 2)
substDict = { "class_name" : Name, "targs" : type }
exec_output += MicroNeonExecDeclare.subst(substDict)
}};
////////////////////////////////////////////////////////////////////
//
// Neon microops to pack/unpack a single lane
//
let {{
header_output = exec_output = ''
for sRegs in 1, 2:
baseLoadRegs = ''
for reg in range(sRegs):
baseLoadRegs += '''
sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d.uw);
sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d.uw);
''' % { "reg0" : (2 * reg + 0),
"reg1" : (2 * reg + 1) }
for dRegs in range(sRegs, 5):
unloadRegs = ''
loadRegs = baseLoadRegs
for reg in range(dRegs):
loadRegs += '''
destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0.uw);
destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1.uw);
''' % { "reg" : reg }
unloadRegs += '''
FpDestS%(reg)dP0.uw = gtoh(destRegs[%(reg)d].fRegs[0]);
FpDestS%(reg)dP1.uw = gtoh(destRegs[%(reg)d].fRegs[1]);
''' % { "reg" : reg }
microUnpackNeonCode = '''
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
sizeof(Element);
union SourceRegs {
FloatRegBits fRegs[2 * %(sRegs)d];
Element elements[%(sRegs)d * perDReg];
} sourceRegs;
union DestReg {
FloatRegBits fRegs[2];
Element elements[perDReg];
} destRegs[%(dRegs)d];
%(loadRegs)s
for (unsigned i = 0; i < %(dRegs)d; i++) {
destRegs[i].elements[lane] = sourceRegs.elements[i];
}
%(unloadRegs)s
''' % { "sRegs" : sRegs, "dRegs" : dRegs,
"loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
microUnpackNeonIop = \
InstObjParams('unpackneon%dto%duop' % (sRegs * 2, dRegs * 2),
'MicroUnpackNeon%dto%dUop' %
(sRegs * 2, dRegs * 2),
'MicroNeonMixLaneOp',
{ 'predicate_test': predicateTest,
'code' : microUnpackNeonCode },
['IsMicroop'])
header_output += MicroNeonMixLaneDeclare.subst(microUnpackNeonIop)
exec_output += MicroNeonMixExecute.subst(microUnpackNeonIop)
for sRegs in 1, 2:
loadRegs = ''
for reg in range(sRegs):
loadRegs += '''
sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d.uw);
sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d.uw);
''' % { "reg0" : (2 * reg + 0),
"reg1" : (2 * reg + 1) }
for dRegs in range(sRegs, 5):
unloadRegs = ''
for reg in range(dRegs):
unloadRegs += '''
FpDestS%(reg)dP0.uw = gtoh(destRegs[%(reg)d].fRegs[0]);
FpDestS%(reg)dP1.uw = gtoh(destRegs[%(reg)d].fRegs[1]);
''' % { "reg" : reg }
microUnpackAllNeonCode = '''
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
sizeof(Element);
union SourceRegs {
FloatRegBits fRegs[2 * %(sRegs)d];
Element elements[%(sRegs)d * perDReg];
} sourceRegs;
union DestReg {
FloatRegBits fRegs[2];
Element elements[perDReg];
} destRegs[%(dRegs)d];
%(loadRegs)s
for (unsigned i = 0; i < %(dRegs)d; i++) {
for (unsigned j = 0; j < perDReg; j++)
destRegs[i].elements[j] = sourceRegs.elements[i];
}
%(unloadRegs)s
''' % { "sRegs" : sRegs, "dRegs" : dRegs,
"loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
microUnpackAllNeonIop = \
InstObjParams('unpackallneon%dto%duop' % (sRegs * 2, dRegs * 2),
'MicroUnpackAllNeon%dto%dUop' %
(sRegs * 2, dRegs * 2),
'MicroNeonMixOp',
{ 'predicate_test': predicateTest,
'code' : microUnpackAllNeonCode },
['IsMicroop'])
header_output += MicroNeonMixDeclare.subst(microUnpackAllNeonIop)
exec_output += MicroNeonMixExecute.subst(microUnpackAllNeonIop)
for dRegs in 1, 2:
unloadRegs = ''
for reg in range(dRegs):
unloadRegs += '''
FpDestP%(reg0)d.uw = gtoh(destRegs.fRegs[%(reg0)d]);
FpDestP%(reg1)d.uw = gtoh(destRegs.fRegs[%(reg1)d]);
''' % { "reg0" : (2 * reg + 0),
"reg1" : (2 * reg + 1) }
for sRegs in range(dRegs, 5):
loadRegs = ''
for reg in range(sRegs):
loadRegs += '''
sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0.uw);
sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1.uw);
''' % { "reg" : reg }
microPackNeonCode = '''
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
sizeof(Element);
union SourceReg {
FloatRegBits fRegs[2];
Element elements[perDReg];
} sourceRegs[%(sRegs)d];
union DestRegs {
FloatRegBits fRegs[2 * %(dRegs)d];
Element elements[%(dRegs)d * perDReg];
} destRegs;
%(loadRegs)s
for (unsigned i = 0; i < %(sRegs)d; i++) {
destRegs.elements[i] = sourceRegs[i].elements[lane];
}
%(unloadRegs)s
''' % { "sRegs" : sRegs, "dRegs" : dRegs,
"loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
microPackNeonIop = \
InstObjParams('packneon%dto%duop' % (sRegs * 2, dRegs * 2),
'MicroPackNeon%dto%dUop' %
(sRegs * 2, dRegs * 2),
'MicroNeonMixLaneOp',
{ 'predicate_test': predicateTest,
'code' : microPackNeonCode },
['IsMicroop'])
header_output += MicroNeonMixLaneDeclare.subst(microPackNeonIop)
exec_output += MicroNeonMixExecute.subst(microPackNeonIop)
}};
let {{
exec_output = ''
for type in ('uint8_t', 'uint16_t', 'uint32_t'):
for sRegs in 1, 2:
for dRegs in range(sRegs, 5):
for format in ("MicroUnpackNeon%(sRegs)dto%(dRegs)dUop",
"MicroUnpackAllNeon%(sRegs)dto%(dRegs)dUop",
"MicroPackNeon%(dRegs)dto%(sRegs)dUop"):
Name = format % { "sRegs" : sRegs * 2,
"dRegs" : dRegs * 2 }
substDict = { "class_name" : Name, "targs" : type }
exec_output += MicroNeonExecDeclare.subst(substDict)
}};
////////////////////////////////////////////////////////////////////
//
// Integer = Integer op Immediate microops
@ -122,23 +568,32 @@ let {{
let {{
microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop',
'MicroIntOp',
'MicroIntImmOp',
{'code': 'Ra = Rb + imm;',
'predicate_test': predicateTest},
['IsMicroop'])
microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
'MicroIntOp',
{'code': 'Ra = Rb + Rc;',
'predicate_test': predicateTest},
['IsMicroop'])
microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop',
'MicroIntOp',
'MicroIntImmOp',
{'code': 'Ra = Rb - imm;',
'predicate_test': predicateTest},
['IsMicroop'])
header_output = MicroIntDeclare.subst(microAddiUopIop) + \
MicroIntDeclare.subst(microSubiUopIop)
decoder_output = MicroIntConstructor.subst(microAddiUopIop) + \
MicroIntConstructor.subst(microSubiUopIop)
header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
MicroIntImmDeclare.subst(microSubiUopIop) + \
MicroIntDeclare.subst(microAddUopIop)
decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
MicroIntImmConstructor.subst(microSubiUopIop) + \
MicroIntConstructor.subst(microAddUopIop)
exec_output = PredOpExecute.subst(microAddiUopIop) + \
PredOpExecute.subst(microSubiUopIop)
PredOpExecute.subst(microSubiUopIop) + \
PredOpExecute.subst(microAddUopIop)
}};
let {{
@ -146,6 +601,22 @@ let {{
header_output = MacroMemDeclare.subst(iop)
decoder_output = MacroMemConstructor.subst(iop)
iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
header_output += VMemMultDeclare.subst(iop)
decoder_output += VMemMultConstructor.subst(iop)
iop = InstObjParams("vldsingle", "VldSingle", 'VldSingleOp', "", [])
header_output += VMemSingleDeclare.subst(iop)
decoder_output += VMemSingleConstructor.subst(iop)
iop = InstObjParams("vstmult", "VstMult", 'VstMultOp', "", [])
header_output += VMemMultDeclare.subst(iop)
decoder_output += VMemMultConstructor.subst(iop)
iop = InstObjParams("vstsingle", "VstSingle", 'VstSingleOp', "", [])
header_output += VMemSingleDeclare.subst(iop)
decoder_output += VMemSingleConstructor.subst(iop)
vfpIop = InstObjParams("vldmstm", "VLdmStm", 'MacroVFPMemOp', "", [])
header_output += MacroVFPMemDeclare.subst(vfpIop)
decoder_output += MacroVFPMemConstructor.subst(vfpIop)

File diff suppressed because it is too large Load diff

View file

@ -47,6 +47,7 @@ def operand_types {{
'sw' : ('signed int', 32),
'uw' : ('unsigned int', 32),
'ud' : ('unsigned int', 64),
'tud' : ('twin64 int', 64),
'sf' : ('float', 32),
'df' : ('float', 64)
}};
@ -96,6 +97,18 @@ def operands {{
'FpDestP1': ('FloatReg', 'sf', '(dest + 1)', 'IsFloating', 2),
'FpDestP2': ('FloatReg', 'sf', '(dest + 2)', 'IsFloating', 2),
'FpDestP3': ('FloatReg', 'sf', '(dest + 3)', 'IsFloating', 2),
'FpDestP4': ('FloatReg', 'sf', '(dest + 4)', 'IsFloating', 2),
'FpDestP5': ('FloatReg', 'sf', '(dest + 5)', 'IsFloating', 2),
'FpDestP6': ('FloatReg', 'sf', '(dest + 6)', 'IsFloating', 2),
'FpDestP7': ('FloatReg', 'sf', '(dest + 7)', 'IsFloating', 2),
'FpDestS0P0': ('FloatReg', 'sf', '(dest + step * 0 + 0)', 'IsFloating', 2),
'FpDestS0P1': ('FloatReg', 'sf', '(dest + step * 0 + 1)', 'IsFloating', 2),
'FpDestS1P0': ('FloatReg', 'sf', '(dest + step * 1 + 0)', 'IsFloating', 2),
'FpDestS1P1': ('FloatReg', 'sf', '(dest + step * 1 + 1)', 'IsFloating', 2),
'FpDestS2P0': ('FloatReg', 'sf', '(dest + step * 2 + 0)', 'IsFloating', 2),
'FpDestS2P1': ('FloatReg', 'sf', '(dest + step * 2 + 1)', 'IsFloating', 2),
'FpDestS3P0': ('FloatReg', 'sf', '(dest + step * 3 + 0)', 'IsFloating', 2),
'FpDestS3P1': ('FloatReg', 'sf', '(dest + step * 3 + 1)', 'IsFloating', 2),
'Result': ('IntReg', 'uw', 'result', 'IsInteger', 2,
maybePCRead, maybePCWrite),
'Dest2': ('IntReg', 'uw', 'dest2', 'IsInteger', 2,
@ -124,6 +137,18 @@ def operands {{
'FpOp1P1': ('FloatReg', 'sf', '(op1 + 1)', 'IsFloating', 2),
'FpOp1P2': ('FloatReg', 'sf', '(op1 + 2)', 'IsFloating', 2),
'FpOp1P3': ('FloatReg', 'sf', '(op1 + 3)', 'IsFloating', 2),
'FpOp1P4': ('FloatReg', 'sf', '(op1 + 4)', 'IsFloating', 2),
'FpOp1P5': ('FloatReg', 'sf', '(op1 + 5)', 'IsFloating', 2),
'FpOp1P6': ('FloatReg', 'sf', '(op1 + 6)', 'IsFloating', 2),
'FpOp1P7': ('FloatReg', 'sf', '(op1 + 7)', 'IsFloating', 2),
'FpOp1S0P0': ('FloatReg', 'sf', '(op1 + step * 0 + 0)', 'IsFloating', 2),
'FpOp1S0P1': ('FloatReg', 'sf', '(op1 + step * 0 + 1)', 'IsFloating', 2),
'FpOp1S1P0': ('FloatReg', 'sf', '(op1 + step * 1 + 0)', 'IsFloating', 2),
'FpOp1S1P1': ('FloatReg', 'sf', '(op1 + step * 1 + 1)', 'IsFloating', 2),
'FpOp1S2P0': ('FloatReg', 'sf', '(op1 + step * 2 + 0)', 'IsFloating', 2),
'FpOp1S2P1': ('FloatReg', 'sf', '(op1 + step * 2 + 1)', 'IsFloating', 2),
'FpOp1S3P0': ('FloatReg', 'sf', '(op1 + step * 3 + 0)', 'IsFloating', 2),
'FpOp1S3P1': ('FloatReg', 'sf', '(op1 + step * 3 + 1)', 'IsFloating', 2),
'MiscOp1': ('ControlReg', 'uw', 'op1', (None, None, 'IsControl'), 2),
'Op2': ('IntReg', 'uw', 'op2', 'IsInteger', 2,
maybePCRead, maybePCWrite),
@ -164,6 +189,7 @@ def operands {{
maybePCRead, maybeIWPCWrite),
'Fa' : ('FloatReg', 'sf', 'ura', 'IsFloating', 2),
'Rb' : ('IntReg', 'uw', 'urb', 'IsInteger', 2, maybePCRead, maybePCWrite),
'Rc' : ('IntReg', 'uw', 'urc', 'IsInteger', 2, maybePCRead, maybePCWrite),
#General Purpose Floating Point Reg Operands
'Fd': ('FloatReg', 'df', 'FD', 'IsFloating', 2),

View file

@ -74,10 +74,152 @@ def template MicroMemConstructor {{
////////////////////////////////////////////////////////////////////
//
// Integer = Integer op Immediate microops
// Neon load/store microops
//
def template MicroNeonMemDeclare {{
template <class Element>
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, RegIndex _dest,
RegIndex _ura, uint32_t _imm, unsigned extraMemFlags)
: %(base_class)s("%(mnemonic)s", machInst,
%(op_class)s, _dest, _ura, _imm)
{
memAccessFlags |= extraMemFlags;
%(constructor)s;
}
%(BasicExecDeclare)s
%(InitiateAccDeclare)s
%(CompleteAccDeclare)s
};
}};
////////////////////////////////////////////////////////////////////
//
// Integer = Integer op Integer microops
//
def template MicroIntDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst,
RegIndex _ura, RegIndex _urb, RegIndex _urc);
%(BasicExecDeclare)s
};
}};
def template MicroIntConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst,
RegIndex _ura,
RegIndex _urb,
RegIndex _urc)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_ura, _urb, _urc)
{
%(constructor)s;
}
}};
def template MicroNeonMemExecDeclare {{
template
Fault %(class_name)s<%(targs)s>::execute(
%(CPU_exec_context)s *, Trace::InstRecord *) const;
template
Fault %(class_name)s<%(targs)s>::initiateAcc(
%(CPU_exec_context)s *, Trace::InstRecord *) const;
template
Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr,
%(CPU_exec_context)s *, Trace::InstRecord *) const;
}};
def template MicroNeonExecDeclare {{
template
Fault %(class_name)s<%(targs)s>::execute(
%(CPU_exec_context)s *, Trace::InstRecord *) const;
}};
////////////////////////////////////////////////////////////////////
//
// Neon (de)interlacing microops
//
def template MicroNeonMixDeclare {{
template <class Element>
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
uint8_t _step) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_dest, _op1, _step)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
def template MicroNeonMixExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
uint64_t resTemp = 0;
resTemp = resTemp;
%(op_decl)s;
%(op_rd)s;
if (%(predicate_test)s)
{
%(code)s;
if (fault == NoFault)
{
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
////////////////////////////////////////////////////////////////////
//
// Neon (un)packing microops using a particular lane
//
def template MicroNeonMixLaneDeclare {{
template <class Element>
class %(class_name)s : public %(base_class)s
{
public:
%(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
uint8_t _step, unsigned _lane) :
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_dest, _op1, _step, _lane)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
////////////////////////////////////////////////////////////////////
//
// Integer = Integer op Immediate microops
//
def template MicroIntImmDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
@ -88,7 +230,7 @@ def template MicroIntDeclare {{
};
}};
def template MicroIntConstructor {{
def template MicroIntImmConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst,
RegIndex _ura,
RegIndex _urb,
@ -132,6 +274,52 @@ def template MacroMemConstructor {{
}};
def template VMemMultDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
// Constructor
%(class_name)s(ExtMachInst machInst, unsigned width,
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
uint32_t size, uint32_t align, RegIndex rm);
%(BasicExecPanic)s
};
}};
def template VMemMultConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst, unsigned width,
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
uint32_t size, uint32_t align, RegIndex rm)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, width,
rn, vd, regs, inc, size, align, rm)
{
%(constructor)s;
}
}};
def template VMemSingleDeclare {{
class %(class_name)s : public %(base_class)s
{
public:
// Constructor
%(class_name)s(ExtMachInst machInst, bool all, unsigned width,
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
uint32_t size, uint32_t align, RegIndex rm, unsigned lane = 0);
%(BasicExecPanic)s
};
}};
def template VMemSingleConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst machInst, bool all, unsigned width,
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, all, width,
rn, vd, regs, inc, size, align, rm, lane)
{
%(constructor)s;
}
}};
def template MacroVFPMemDeclare {{
/**
* Static instructions class for a store multiple instruction

View file

@ -180,6 +180,42 @@ def template LoadExecute {{
}
}};
def template NeonLoadExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
%(op_decl)s;
%(mem_decl)s;
%(op_rd)s;
%(ea_code)s;
MemUnion memUnion;
uint8_t *dataPtr = memUnion.bytes;
if (%(predicate_test)s)
{
if (fault == NoFault) {
fault = xc->readBytes(EA, dataPtr, %(size)d, memAccessFlags);
%(memacc_code)s;
}
if (fault == NoFault) {
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
def template StoreExecute {{
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@ -217,6 +253,46 @@ def template StoreExecute {{
}
}};
def template NeonStoreExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
%(op_decl)s;
%(mem_decl)s;
%(op_rd)s;
%(ea_code)s;
MemUnion memUnion;
uint8_t *dataPtr = memUnion.bytes;
if (%(predicate_test)s)
{
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
fault = xc->writeBytes(dataPtr, %(size)d, EA,
memAccessFlags, NULL);
}
if (fault == NoFault) {
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
def template StoreExExecute {{
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@ -336,6 +412,45 @@ def template StoreInitiateAcc {{
}
}};
def template NeonStoreInitiateAcc {{
template <class Element>
Fault %(class_name)s<Element>::initiateAcc(
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
%(op_decl)s;
%(mem_decl)s;
%(op_rd)s;
%(ea_code)s;
if (%(predicate_test)s)
{
MemUnion memUnion;
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
fault = xc->writeBytes(memUnion.bytes, %(size)d, EA,
memAccessFlags, NULL);
}
// Need to write back any potential address register update
if (fault == NoFault) {
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
def template LoadInitiateAcc {{
Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
@ -363,6 +478,31 @@ def template LoadInitiateAcc {{
}
}};
def template NeonLoadInitiateAcc {{
template <class Element>
Fault %(class_name)s<Element>::initiateAcc(
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
%(op_src_decl)s;
%(op_rd)s;
%(ea_code)s;
if (%(predicate_test)s)
{
if (fault == NoFault) {
fault = xc->readBytes(EA, NULL, %(size)d, memAccessFlags);
}
} else if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
def template LoadCompleteAcc {{
Fault %(class_name)s::completeAcc(PacketPtr pkt,
%(CPU_exec_context)s *xc,
@ -395,6 +535,40 @@ def template LoadCompleteAcc {{
}
}};
def template NeonLoadCompleteAcc {{
template <class Element>
Fault %(class_name)s<Element>::completeAcc(
PacketPtr pkt, %(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(mem_decl)s;
%(op_decl)s;
%(op_rd)s;
if (%(predicate_test)s)
{
// ARM instructions will not have a pkt if the predicate is false
MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>();
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
def template StoreCompleteAcc {{
Fault %(class_name)s::completeAcc(PacketPtr pkt,
%(CPU_exec_context)s *xc,
@ -420,6 +594,32 @@ def template StoreCompleteAcc {{
}
}};
def template NeonStoreCompleteAcc {{
template <class Element>
Fault %(class_name)s<Element>::completeAcc(
PacketPtr pkt, %(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
if (%(predicate_test)s)
{
if (fault == NoFault) {
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
def template StoreExCompleteAcc {{
Fault %(class_name)s::completeAcc(PacketPtr pkt,
%(CPU_exec_context)s *xc,

View file

@ -0,0 +1,227 @@
// -*- mode:c++ -*-
// Copyright (c) 2010 ARM Limited
// All rights reserved
//
// The license below extends only to copyright in the software and shall
// not be construed as granting a license to any other intellectual
// property including but not limited to intellectual property relating
// to a hardware implementation of the functionality of the software
// licensed hereunder. You may use the software subject to the license
// terms below provided that you ensure that this notice is replicated
// unmodified and in its entirety in all distributions of the software,
// modified or unmodified, in source code or in binary form.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Authors: Gabe Black
def template NeonRegRegRegOpDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
public:
// Constructor
%(class_name)s(ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_dest, _op1, _op2)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
def template NeonRegRegRegImmOpDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
public:
// Constructor
%(class_name)s(ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
uint64_t _imm)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_dest, _op1, _op2, _imm)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
def template NeonRegRegImmOpDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
public:
// Constructor
%(class_name)s(ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_dest, _op1, _imm)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
def template NeonRegImmOpDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
public:
// Constructor
%(class_name)s(ExtMachInst machInst, IntRegIndex _dest, uint64_t _imm)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _imm)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
def template NeonRegRegOpDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
public:
// Constructor
%(class_name)s(ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _op1)
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
_dest, _op1)
{
%(constructor)s;
}
%(BasicExecDeclare)s
};
}};
def template NeonExecDeclare {{
template
Fault %(class_name)s<%(targs)s>::execute(
%(CPU_exec_context)s *, Trace::InstRecord *) const;
}};
def template NeonEqualRegExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
const unsigned rCount = %(r_count)d;
const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
union RegVect {
FloatRegBits regs[rCount];
Element elements[eCount];
};
if (%(predicate_test)s)
{
%(code)s;
if (fault == NoFault)
{
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};
output header {{
uint16_t nextBiggerType(uint8_t);
uint32_t nextBiggerType(uint16_t);
uint64_t nextBiggerType(uint32_t);
int16_t nextBiggerType(int8_t);
int32_t nextBiggerType(int16_t);
int64_t nextBiggerType(int32_t);
}};
def template NeonUnequalRegExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
Trace::InstRecord *traceData) const
{
typedef typeof(nextBiggerType((Element)0)) BigElement;
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
const unsigned rCount = %(r_count)d;
const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
union RegVect {
FloatRegBits regs[rCount];
Element elements[eCount];
BigElement bigElements[eCount / 2];
};
union BigRegVect {
FloatRegBits regs[2 * rCount];
BigElement elements[eCount];
};
if (%(predicate_test)s)
{
%(code)s;
if (fault == NoFault)
{
%(op_wb)s;
}
}
if (fault == NoFault && machInst.itstateMask != 0) {
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
}
return fault;
}
}};

View file

@ -60,3 +60,6 @@
//Templates for VFP instructions
##include "vfp.isa"
//Templates for Neon instructions
##include "neon.isa"

View file

@ -65,20 +65,22 @@ class TLB : public BaseTLB
{
public:
enum ArmFlags {
AlignmentMask = 0x7,
AlignmentMask = 0x1f,
AlignByte = 0x0,
AlignHalfWord = 0x1,
AlignWord = 0x3,
AlignDoubleWord = 0x7,
AlignQuadWord = 0xf,
AlignOctWord = 0x1f,
AllowUnaligned = 0x8,
AllowUnaligned = 0x20,
// Priv code operating as if it wasn't
UserMode = 0x10,
UserMode = 0x40,
// Because zero otherwise looks like a valid setting and may be used
// accidentally, this bit must be non-zero to show it was used on
// purpose.
MustBeOne = 0x20
MustBeOne = 0x80
};
protected:
typedef std::multimap<Addr, int> PageTable;