ARM: Implement all ARM SIMD instructions.
This commit is contained in:
parent
f4f6b31df1
commit
6368edb281
|
@ -137,6 +137,647 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
|
|||
}
|
||||
}
|
||||
|
||||
VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
|
||||
unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
|
||||
PredMacroOp(mnem, machInst, __opClass)
|
||||
{
|
||||
assert(regs > 0 && regs <= 4);
|
||||
assert(regs % elems == 0);
|
||||
|
||||
numMicroops = (regs > 2) ? 2 : 1;
|
||||
bool wb = (rm != 15);
|
||||
bool deinterleave = (elems > 1);
|
||||
|
||||
if (wb) numMicroops++;
|
||||
if (deinterleave) numMicroops += (regs / elems);
|
||||
microOps = new StaticInstPtr[numMicroops];
|
||||
|
||||
RegIndex rMid = deinterleave ? NumFloatArchRegs : vd * 2;
|
||||
|
||||
uint32_t noAlign = TLB::MustBeOne;
|
||||
|
||||
unsigned uopIdx = 0;
|
||||
switch (regs) {
|
||||
case 4:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
|
||||
size, machInst, rMid + 4, rn, 16, noAlign);
|
||||
break;
|
||||
case 3:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
|
||||
size, machInst, rMid + 4, rn, 16, noAlign);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
break;
|
||||
case 1:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
break;
|
||||
default:
|
||||
panic("Unrecognized number of registers %d.\n", regs);
|
||||
}
|
||||
if (wb) {
|
||||
if (rm != 15 && rm != 13) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddUop(machInst, rn, rn, rm);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddiUop(machInst, rn, rn, regs * 8);
|
||||
}
|
||||
}
|
||||
if (deinterleave) {
|
||||
switch (elems) {
|
||||
case 4:
|
||||
assert(regs == 4);
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
|
||||
size, machInst, vd * 2, rMid, inc * 2);
|
||||
break;
|
||||
case 3:
|
||||
assert(regs == 3);
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
|
||||
size, machInst, vd * 2, rMid, inc * 2);
|
||||
break;
|
||||
case 2:
|
||||
assert(regs == 4 || regs == 2);
|
||||
if (regs == 4) {
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
|
||||
size, machInst, vd * 2, rMid, inc * 2);
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
|
||||
size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
|
||||
size, machInst, vd * 2, rMid, inc * 2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad number of elements to deinterleave %d.\n", elems);
|
||||
}
|
||||
}
|
||||
assert(uopIdx == numMicroops);
|
||||
|
||||
for (unsigned i = 0; i < numMicroops - 1; i++) {
|
||||
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
|
||||
assert(uopPtr);
|
||||
uopPtr->setDelayedCommit();
|
||||
}
|
||||
microOps[numMicroops - 1]->setLastMicroop();
|
||||
}
|
||||
|
||||
VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
|
||||
OpClass __opClass, bool all, unsigned elems,
|
||||
RegIndex rn, RegIndex vd, unsigned regs,
|
||||
unsigned inc, uint32_t size, uint32_t align,
|
||||
RegIndex rm, unsigned lane) :
|
||||
PredMacroOp(mnem, machInst, __opClass)
|
||||
{
|
||||
assert(regs > 0 && regs <= 4);
|
||||
assert(regs % elems == 0);
|
||||
|
||||
unsigned eBytes = (1 << size);
|
||||
unsigned loadSize = eBytes * elems;
|
||||
unsigned loadRegs M5_VAR_USED = (loadSize + sizeof(FloatRegBits) - 1) /
|
||||
sizeof(FloatRegBits);
|
||||
|
||||
assert(loadRegs > 0 && loadRegs <= 4);
|
||||
|
||||
numMicroops = 1;
|
||||
bool wb = (rm != 15);
|
||||
|
||||
if (wb) numMicroops++;
|
||||
numMicroops += (regs / elems);
|
||||
microOps = new StaticInstPtr[numMicroops];
|
||||
|
||||
RegIndex ufp0 = NumFloatArchRegs;
|
||||
|
||||
unsigned uopIdx = 0;
|
||||
switch (loadSize) {
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 2:
|
||||
if (eBytes == 2) {
|
||||
microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 4:
|
||||
switch (eBytes) {
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 4:
|
||||
microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 8:
|
||||
switch (eBytes) {
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 4:
|
||||
microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 12:
|
||||
microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 16:
|
||||
microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
default:
|
||||
panic("Unrecognized load size %d.\n", regs);
|
||||
}
|
||||
if (wb) {
|
||||
if (rm != 15 && rm != 13) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddUop(machInst, rn, rn, rm);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddiUop(machInst, rn, rn, loadSize);
|
||||
}
|
||||
}
|
||||
switch (elems) {
|
||||
case 4:
|
||||
assert(regs == 4);
|
||||
switch (size) {
|
||||
case 0:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
assert(regs == 3);
|
||||
switch (size) {
|
||||
case 0:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
assert(regs == 2);
|
||||
assert(loadRegs <= 2);
|
||||
switch (size) {
|
||||
case 0:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (all) {
|
||||
microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
|
||||
machInst, vd * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
assert(regs == 1 || (all && regs == 2));
|
||||
assert(loadRegs <= 2);
|
||||
for (unsigned offset = 0; offset < regs; offset++) {
|
||||
switch (size) {
|
||||
case 0:
|
||||
if (all) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroUnpackAllNeon2to2Uop<uint8_t>(
|
||||
machInst, (vd + offset) * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroUnpackNeon2to2Uop<uint8_t>(
|
||||
machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
if (all) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroUnpackAllNeon2to2Uop<uint16_t>(
|
||||
machInst, (vd + offset) * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroUnpackNeon2to2Uop<uint16_t>(
|
||||
machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (all) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroUnpackAllNeon2to2Uop<uint32_t>(
|
||||
machInst, (vd + offset) * 2, ufp0, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroUnpackNeon2to2Uop<uint32_t>(
|
||||
machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad number of elements to unpack %d.\n", elems);
|
||||
}
|
||||
assert(uopIdx == numMicroops);
|
||||
|
||||
for (unsigned i = 0; i < numMicroops - 1; i++) {
|
||||
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
|
||||
assert(uopPtr);
|
||||
uopPtr->setDelayedCommit();
|
||||
}
|
||||
microOps[numMicroops - 1]->setLastMicroop();
|
||||
}
|
||||
|
||||
VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
|
||||
unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
|
||||
PredMacroOp(mnem, machInst, __opClass)
|
||||
{
|
||||
assert(regs > 0 && regs <= 4);
|
||||
assert(regs % elems == 0);
|
||||
|
||||
numMicroops = (regs > 2) ? 2 : 1;
|
||||
bool wb = (rm != 15);
|
||||
bool interleave = (elems > 1);
|
||||
|
||||
if (wb) numMicroops++;
|
||||
if (interleave) numMicroops += (regs / elems);
|
||||
microOps = new StaticInstPtr[numMicroops];
|
||||
|
||||
uint32_t noAlign = TLB::MustBeOne;
|
||||
|
||||
RegIndex rMid = interleave ? NumFloatArchRegs : vd * 2;
|
||||
|
||||
unsigned uopIdx = 0;
|
||||
if (interleave) {
|
||||
switch (elems) {
|
||||
case 4:
|
||||
assert(regs == 4);
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
|
||||
size, machInst, rMid, vd * 2, inc * 2);
|
||||
break;
|
||||
case 3:
|
||||
assert(regs == 3);
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
|
||||
size, machInst, rMid, vd * 2, inc * 2);
|
||||
break;
|
||||
case 2:
|
||||
assert(regs == 4 || regs == 2);
|
||||
if (regs == 4) {
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
|
||||
size, machInst, rMid, vd * 2, inc * 2);
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
|
||||
size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
|
||||
} else {
|
||||
microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
|
||||
size, machInst, rMid, vd * 2, inc * 2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad number of elements to interleave %d.\n", elems);
|
||||
}
|
||||
}
|
||||
switch (regs) {
|
||||
case 4:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
|
||||
size, machInst, rMid + 4, rn, 16, noAlign);
|
||||
break;
|
||||
case 3:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
|
||||
size, machInst, rMid + 4, rn, 16, noAlign);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
break;
|
||||
case 1:
|
||||
microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
|
||||
size, machInst, rMid, rn, 0, align);
|
||||
break;
|
||||
default:
|
||||
panic("Unrecognized number of registers %d.\n", regs);
|
||||
}
|
||||
if (wb) {
|
||||
if (rm != 15 && rm != 13) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddUop(machInst, rn, rn, rm);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddiUop(machInst, rn, rn, regs * 8);
|
||||
}
|
||||
}
|
||||
assert(uopIdx == numMicroops);
|
||||
|
||||
for (unsigned i = 0; i < numMicroops - 1; i++) {
|
||||
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
|
||||
assert(uopPtr);
|
||||
uopPtr->setDelayedCommit();
|
||||
}
|
||||
microOps[numMicroops - 1]->setLastMicroop();
|
||||
}
|
||||
|
||||
VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
|
||||
OpClass __opClass, bool all, unsigned elems,
|
||||
RegIndex rn, RegIndex vd, unsigned regs,
|
||||
unsigned inc, uint32_t size, uint32_t align,
|
||||
RegIndex rm, unsigned lane) :
|
||||
PredMacroOp(mnem, machInst, __opClass)
|
||||
{
|
||||
assert(!all);
|
||||
assert(regs > 0 && regs <= 4);
|
||||
assert(regs % elems == 0);
|
||||
|
||||
unsigned eBytes = (1 << size);
|
||||
unsigned storeSize = eBytes * elems;
|
||||
unsigned storeRegs M5_VAR_USED = (storeSize + sizeof(FloatRegBits) - 1) /
|
||||
sizeof(FloatRegBits);
|
||||
|
||||
assert(storeRegs > 0 && storeRegs <= 4);
|
||||
|
||||
numMicroops = 1;
|
||||
bool wb = (rm != 15);
|
||||
|
||||
if (wb) numMicroops++;
|
||||
numMicroops += (regs / elems);
|
||||
microOps = new StaticInstPtr[numMicroops];
|
||||
|
||||
RegIndex ufp0 = NumFloatArchRegs;
|
||||
|
||||
unsigned uopIdx = 0;
|
||||
switch (elems) {
|
||||
case 4:
|
||||
assert(regs == 4);
|
||||
switch (size) {
|
||||
case 0:
|
||||
microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
assert(regs == 3);
|
||||
switch (size) {
|
||||
case 0:
|
||||
microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
assert(regs == 2);
|
||||
assert(storeRegs <= 2);
|
||||
switch (size) {
|
||||
case 0:
|
||||
microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
|
||||
machInst, ufp0, vd * 2, inc * 2, lane);
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
assert(regs == 1 || (all && regs == 2));
|
||||
assert(storeRegs <= 2);
|
||||
for (unsigned offset = 0; offset < regs; offset++) {
|
||||
switch (size) {
|
||||
case 0:
|
||||
microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
|
||||
machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
|
||||
break;
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
|
||||
machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
|
||||
machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
|
||||
break;
|
||||
default:
|
||||
panic("Bad size %d.\n", size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
panic("Bad number of elements to pack %d.\n", elems);
|
||||
}
|
||||
switch (storeSize) {
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 2:
|
||||
if (eBytes == 2) {
|
||||
microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
} else {
|
||||
microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 4:
|
||||
switch (eBytes) {
|
||||
case 1:
|
||||
microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 4:
|
||||
microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 8:
|
||||
switch (eBytes) {
|
||||
case 2:
|
||||
microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 4:
|
||||
microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 12:
|
||||
microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
case 16:
|
||||
microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
|
||||
machInst, ufp0, rn, 0, align);
|
||||
break;
|
||||
default:
|
||||
panic("Unrecognized store size %d.\n", regs);
|
||||
}
|
||||
if (wb) {
|
||||
if (rm != 15 && rm != 13) {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddUop(machInst, rn, rn, rm);
|
||||
} else {
|
||||
microOps[uopIdx++] =
|
||||
new MicroAddiUop(machInst, rn, rn, storeSize);
|
||||
}
|
||||
}
|
||||
assert(uopIdx == numMicroops);
|
||||
|
||||
for (unsigned i = 0; i < numMicroops - 1; i++) {
|
||||
MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
|
||||
assert(uopPtr);
|
||||
uopPtr->setDelayedCommit();
|
||||
}
|
||||
microOps[numMicroops - 1]->setLastMicroop();
|
||||
}
|
||||
|
||||
MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
|
||||
OpClass __opClass, IntRegIndex rn,
|
||||
RegIndex vd, bool single, bool up,
|
||||
|
@ -169,17 +810,25 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
|
|||
bool tempUp = up;
|
||||
for (int j = 0; j < count; j++) {
|
||||
if (load) {
|
||||
microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
|
||||
tempUp, addr);
|
||||
if (!single)
|
||||
microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn, tempUp,
|
||||
addr + (up ? 4 : -4));
|
||||
if (single) {
|
||||
microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
|
||||
tempUp, addr);
|
||||
} else {
|
||||
microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
|
||||
tempUp, addr);
|
||||
microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
|
||||
addr + (up ? 4 : -4));
|
||||
}
|
||||
} else {
|
||||
microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
|
||||
tempUp, addr);
|
||||
if (!single)
|
||||
microOps[i++] = new MicroStrFpUop(machInst, vd++, rn, tempUp,
|
||||
addr + (up ? 4 : -4));
|
||||
if (single) {
|
||||
microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
|
||||
tempUp, addr);
|
||||
} else {
|
||||
microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
|
||||
tempUp, addr);
|
||||
microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
|
||||
addr + (up ? 4 : -4));
|
||||
}
|
||||
}
|
||||
if (!tempUp) {
|
||||
addr -= (single ? 4 : 8);
|
||||
|
@ -216,7 +865,7 @@ MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
|
|||
}
|
||||
|
||||
std::string
|
||||
MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
||||
MicroIntImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
printMnemonic(ss);
|
||||
|
@ -228,6 +877,19 @@ MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string
|
||||
MicroIntOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
printMnemonic(ss);
|
||||
printReg(ss, ura);
|
||||
ss << ", ";
|
||||
printReg(ss, urb);
|
||||
ss << ", ";
|
||||
printReg(ss, urc);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string
|
||||
MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
||||
{
|
||||
|
|
|
@ -79,17 +79,67 @@ class MicroOp : public PredOp
|
|||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Microops for Neon loads/stores
|
||||
*/
|
||||
class MicroNeonMemOp : public MicroOp
|
||||
{
|
||||
protected:
|
||||
RegIndex dest, ura;
|
||||
uint32_t imm;
|
||||
unsigned memAccessFlags;
|
||||
|
||||
MicroNeonMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
RegIndex _dest, RegIndex _ura, uint32_t _imm)
|
||||
: MicroOp(mnem, machInst, __opClass),
|
||||
dest(_dest), ura(_ura), imm(_imm),
|
||||
memAccessFlags(TLB::MustBeOne)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Microops for Neon load/store (de)interleaving
|
||||
*/
|
||||
class MicroNeonMixOp : public MicroOp
|
||||
{
|
||||
protected:
|
||||
RegIndex dest, op1;
|
||||
uint32_t step;
|
||||
|
||||
MicroNeonMixOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
RegIndex _dest, RegIndex _op1, uint32_t _step)
|
||||
: MicroOp(mnem, machInst, __opClass),
|
||||
dest(_dest), op1(_op1), step(_step)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
class MicroNeonMixLaneOp : public MicroNeonMixOp
|
||||
{
|
||||
protected:
|
||||
unsigned lane;
|
||||
|
||||
MicroNeonMixLaneOp(const char *mnem, ExtMachInst machInst,
|
||||
OpClass __opClass, RegIndex _dest, RegIndex _op1,
|
||||
uint32_t _step, unsigned _lane)
|
||||
: MicroNeonMixOp(mnem, machInst, __opClass, _dest, _op1, _step),
|
||||
lane(_lane)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Microops of the form IntRegA = IntRegB op Imm
|
||||
*/
|
||||
class MicroIntOp : public MicroOp
|
||||
class MicroIntImmOp : public MicroOp
|
||||
{
|
||||
protected:
|
||||
RegIndex ura, urb;
|
||||
uint8_t imm;
|
||||
|
||||
MicroIntOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
RegIndex _ura, RegIndex _urb, uint8_t _imm)
|
||||
MicroIntImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
RegIndex _ura, RegIndex _urb, uint8_t _imm)
|
||||
: MicroOp(mnem, machInst, __opClass),
|
||||
ura(_ura), urb(_urb), imm(_imm)
|
||||
{
|
||||
|
@ -98,10 +148,28 @@ class MicroIntOp : public MicroOp
|
|||
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* Microops of the form IntRegA = IntRegB op IntRegC
|
||||
*/
|
||||
class MicroIntOp : public MicroOp
|
||||
{
|
||||
protected:
|
||||
RegIndex ura, urb, urc;
|
||||
|
||||
MicroIntOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
RegIndex _ura, RegIndex _urb, RegIndex _urc)
|
||||
: MicroOp(mnem, machInst, __opClass),
|
||||
ura(_ura), urb(_urb), urc(_urc)
|
||||
{
|
||||
}
|
||||
|
||||
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
|
||||
};
|
||||
|
||||
/**
|
||||
* Memory microops which use IntReg + Imm addressing
|
||||
*/
|
||||
class MicroMemOp : public MicroIntOp
|
||||
class MicroMemOp : public MicroIntImmOp
|
||||
{
|
||||
protected:
|
||||
bool up;
|
||||
|
@ -109,7 +177,7 @@ class MicroMemOp : public MicroIntOp
|
|||
|
||||
MicroMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
RegIndex _ura, RegIndex _urb, bool _up, uint8_t _imm)
|
||||
: MicroIntOp(mnem, machInst, __opClass, _ura, _urb, _imm),
|
||||
: MicroIntImmOp(mnem, machInst, __opClass, _ura, _urb, _imm),
|
||||
up(_up), memAccessFlags(TLB::MustBeOne | TLB::AlignWord)
|
||||
{
|
||||
}
|
||||
|
@ -128,6 +196,46 @@ class MacroMemOp : public PredMacroOp
|
|||
bool writeback, bool load, uint32_t reglist);
|
||||
};
|
||||
|
||||
/**
|
||||
* Base classes for microcoded integer memory instructions.
|
||||
*/
|
||||
class VldMultOp : public PredMacroOp
|
||||
{
|
||||
protected:
|
||||
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
|
||||
unsigned inc, uint32_t size, uint32_t align, RegIndex rm);
|
||||
};
|
||||
|
||||
class VldSingleOp : public PredMacroOp
|
||||
{
|
||||
protected:
|
||||
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
bool all, unsigned elems, RegIndex rn, RegIndex vd,
|
||||
unsigned regs, unsigned inc, uint32_t size,
|
||||
uint32_t align, RegIndex rm, unsigned lane);
|
||||
};
|
||||
|
||||
/**
|
||||
* Base class for microcoded integer memory instructions.
|
||||
*/
|
||||
class VstMultOp : public PredMacroOp
|
||||
{
|
||||
protected:
|
||||
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
unsigned width, RegIndex rn, RegIndex vd, unsigned regs,
|
||||
unsigned inc, uint32_t size, uint32_t align, RegIndex rm);
|
||||
};
|
||||
|
||||
class VstSingleOp : public PredMacroOp
|
||||
{
|
||||
protected:
|
||||
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
|
||||
bool all, unsigned elems, RegIndex rn, RegIndex vd,
|
||||
unsigned regs, unsigned inc, uint32_t size,
|
||||
uint32_t align, RegIndex rm, unsigned lane);
|
||||
};
|
||||
|
||||
/**
|
||||
* Base class for microcoded floating point memory instructions.
|
||||
*/
|
||||
|
|
|
@ -118,24 +118,26 @@ simd_modified_imm(bool op, uint8_t cmode, uint8_t data)
|
|||
break;
|
||||
case 0xe:
|
||||
if (op) {
|
||||
bigData = 0;
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (bits(data, i)) {
|
||||
bigData |= (ULL(0xFF) << (i * 8));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bigData = (bigData << 0) | (bigData << 8) |
|
||||
(bigData << 16) | (bigData << 24) |
|
||||
(bigData << 32) | (bigData << 40) |
|
||||
(bigData << 48) | (bigData << 56);
|
||||
} else {
|
||||
bigData = 0;
|
||||
for (int i = 7; i >= 0; i--) {
|
||||
if (bits(data, i)) {
|
||||
bigData |= (0xFF << (i * 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 0xf:
|
||||
if (!op) {
|
||||
uint64_t bVal = bits(bigData, 6) ? (0x1F) : (0x20);
|
||||
bigData = (bits(bigData, 5, 0) << 19) |
|
||||
(bVal << 25) | (bits(bigData, 7) << 31);
|
||||
bigData |= (bigData << 32);
|
||||
break;
|
||||
}
|
||||
// Fall through
|
||||
default:
|
||||
|
|
|
@ -251,6 +251,28 @@ class ArmStaticInst : public StaticInst
|
|||
}
|
||||
}
|
||||
|
||||
template<class T, class E>
|
||||
static inline T
|
||||
cSwap(T val, bool big)
|
||||
{
|
||||
const unsigned count = sizeof(T) / sizeof(E);
|
||||
union {
|
||||
T tVal;
|
||||
E eVals[count];
|
||||
} conv;
|
||||
conv.tVal = htog(val);
|
||||
if (big) {
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
conv.eVals[i] = gtobe(conv.eVals[i]);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
conv.eVals[i] = gtole(conv.eVals[i]);
|
||||
}
|
||||
}
|
||||
return gtoh(conv.tVal);
|
||||
}
|
||||
|
||||
// Perform an interworking branch.
|
||||
template<class XC>
|
||||
static inline void
|
||||
|
|
|
@ -91,6 +91,20 @@ FpRegRegRegOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
std::string
|
||||
FpRegRegRegImmOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
printMnemonic(ss);
|
||||
printReg(ss, dest + FP_Base_DepTag);
|
||||
ss << ", ";
|
||||
printReg(ss, op1 + FP_Base_DepTag);
|
||||
ss << ", ";
|
||||
printReg(ss, op2 + FP_Base_DepTag);
|
||||
ccprintf(ss, ", #%d", imm);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
namespace ArmISA
|
||||
{
|
||||
|
||||
|
@ -117,7 +131,7 @@ prepFpState(uint32_t rMode)
|
|||
}
|
||||
|
||||
void
|
||||
finishVfp(FPSCR &fpscr, VfpSavedState state)
|
||||
finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush)
|
||||
{
|
||||
int exceptions = fetestexcept(FeAllExceptions);
|
||||
bool underflow = false;
|
||||
|
@ -134,7 +148,7 @@ finishVfp(FPSCR &fpscr, VfpSavedState state)
|
|||
underflow = true;
|
||||
fpscr.ufc = 1;
|
||||
}
|
||||
if ((exceptions & FeInexact) && !(underflow && fpscr.fz)) {
|
||||
if ((exceptions & FeInexact) && !(underflow && flush)) {
|
||||
fpscr.ixc = 1;
|
||||
}
|
||||
fesetround(state);
|
||||
|
@ -142,7 +156,7 @@ finishVfp(FPSCR &fpscr, VfpSavedState state)
|
|||
|
||||
template <class fpType>
|
||||
fpType
|
||||
fixDest(FPSCR fpscr, fpType val, fpType op1)
|
||||
fixDest(bool flush, bool defaultNan, fpType val, fpType op1)
|
||||
{
|
||||
int fpClass = std::fpclassify(val);
|
||||
fpType junk = 0.0;
|
||||
|
@ -150,12 +164,12 @@ fixDest(FPSCR fpscr, fpType val, fpType op1)
|
|||
const bool single = (sizeof(val) == sizeof(float));
|
||||
const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
|
||||
const bool nan = std::isnan(op1);
|
||||
if (!nan || (fpscr.dn == 1)) {
|
||||
if (!nan || defaultNan) {
|
||||
val = bitsToFp(qnan, junk);
|
||||
} else if (nan) {
|
||||
val = bitsToFp(fpToBits(op1) | qnan, junk);
|
||||
}
|
||||
} else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
|
||||
} else if (fpClass == FP_SUBNORMAL && flush == 1) {
|
||||
// Turn val into a zero with the correct sign;
|
||||
uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
|
||||
val = bitsToFp(fpToBits(val) & bitMask, junk);
|
||||
|
@ -166,13 +180,13 @@ fixDest(FPSCR fpscr, fpType val, fpType op1)
|
|||
}
|
||||
|
||||
template
|
||||
float fixDest<float>(FPSCR fpscr, float val, float op1);
|
||||
float fixDest<float>(bool flush, bool defaultNan, float val, float op1);
|
||||
template
|
||||
double fixDest<double>(FPSCR fpscr, double val, double op1);
|
||||
double fixDest<double>(bool flush, bool defaultNan, double val, double op1);
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
||||
fixDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
|
||||
{
|
||||
int fpClass = std::fpclassify(val);
|
||||
fpType junk = 0.0;
|
||||
|
@ -183,7 +197,7 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
|||
const bool nan2 = std::isnan(op2);
|
||||
const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
|
||||
const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
|
||||
if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
|
||||
if ((!nan1 && !nan2) || defaultNan) {
|
||||
val = bitsToFp(qnan, junk);
|
||||
} else if (signal1) {
|
||||
val = bitsToFp(fpToBits(op1) | qnan, junk);
|
||||
|
@ -194,7 +208,7 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
|||
} else if (nan2) {
|
||||
val = op2;
|
||||
}
|
||||
} else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
|
||||
} else if (fpClass == FP_SUBNORMAL && flush) {
|
||||
// Turn val into a zero with the correct sign;
|
||||
uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
|
||||
val = bitsToFp(fpToBits(val) & bitMask, junk);
|
||||
|
@ -205,15 +219,17 @@ fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
|||
}
|
||||
|
||||
template
|
||||
float fixDest<float>(FPSCR fpscr, float val, float op1, float op2);
|
||||
float fixDest<float>(bool flush, bool defaultNan,
|
||||
float val, float op1, float op2);
|
||||
template
|
||||
double fixDest<double>(FPSCR fpscr, double val, double op1, double op2);
|
||||
double fixDest<double>(bool flush, bool defaultNan,
|
||||
double val, double op1, double op2);
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
||||
fixDivDest(bool flush, bool defaultNan, fpType val, fpType op1, fpType op2)
|
||||
{
|
||||
fpType mid = fixDest(fpscr, val, op1, op2);
|
||||
fpType mid = fixDest(flush, defaultNan, val, op1, op2);
|
||||
const bool single = (sizeof(fpType) == sizeof(float));
|
||||
const fpType junk = 0.0;
|
||||
if ((single && (val == bitsToFp(0x00800000, junk) ||
|
||||
|
@ -228,7 +244,7 @@ fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
|||
temp = op1 / op2;
|
||||
if (flushToZero(temp)) {
|
||||
feraiseexcept(FeUnderflow);
|
||||
if (fpscr.fz) {
|
||||
if (flush) {
|
||||
feclearexcept(FeInexact);
|
||||
mid = temp;
|
||||
}
|
||||
|
@ -239,9 +255,11 @@ fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
|
|||
}
|
||||
|
||||
template
|
||||
float fixDivDest<float>(FPSCR fpscr, float val, float op1, float op2);
|
||||
float fixDivDest<float>(bool flush, bool defaultNan,
|
||||
float val, float op1, float op2);
|
||||
template
|
||||
double fixDivDest<double>(FPSCR fpscr, double val, double op1, double op2);
|
||||
double fixDivDest<double>(bool flush, bool defaultNan,
|
||||
double val, double op1, double op2);
|
||||
|
||||
float
|
||||
fixFpDFpSDest(FPSCR fpscr, double val)
|
||||
|
@ -255,7 +273,7 @@ fixFpDFpSDest(FPSCR fpscr, double val)
|
|||
(bits(valBits, 63) << 31);
|
||||
op1 = bitsToFp(op1Bits, junk);
|
||||
}
|
||||
float mid = fixDest(fpscr, (float)val, op1);
|
||||
float mid = fixDest(fpscr.fz, fpscr.dn, (float)val, op1);
|
||||
if (fpscr.fz && fetestexcept(FeUnderflow | FeInexact) ==
|
||||
(FeUnderflow | FeInexact)) {
|
||||
feclearexcept(FeInexact);
|
||||
|
@ -291,7 +309,7 @@ fixFpSFpDDest(FPSCR fpscr, float val)
|
|||
((uint64_t)bits(valBits, 31) << 63);
|
||||
op1 = bitsToFp(op1Bits, junk);
|
||||
}
|
||||
double mid = fixDest(fpscr, (double)val, op1);
|
||||
double mid = fixDest(fpscr.fz, fpscr.dn, (double)val, op1);
|
||||
if (mid == bitsToFp(ULL(0x0010000000000000), junk) ||
|
||||
mid == bitsToFp(ULL(0x8010000000000000), junk)) {
|
||||
__asm__ __volatile__("" : "=m" (val) : "m" (val));
|
||||
|
@ -311,11 +329,10 @@ fixFpSFpDDest(FPSCR fpscr, float val)
|
|||
return mid;
|
||||
}
|
||||
|
||||
float
|
||||
vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
||||
uint16_t
|
||||
vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
|
||||
uint32_t rMode, bool ahp, float op)
|
||||
{
|
||||
float junk = 0.0;
|
||||
uint32_t destBits = fpToBits(dest);
|
||||
uint32_t opBits = fpToBits(op);
|
||||
// Extract the operand.
|
||||
bool neg = bits(opBits, 31);
|
||||
|
@ -331,11 +348,11 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
|||
// Signalling nan.
|
||||
fpscr.ioc = 1;
|
||||
}
|
||||
if (fpscr.ahp) {
|
||||
if (ahp) {
|
||||
mantissa = 0;
|
||||
exponent = 0;
|
||||
fpscr.ioc = 1;
|
||||
} else if (fpscr.dn) {
|
||||
} else if (defaultNan) {
|
||||
mantissa = (1 << 9);
|
||||
exponent = 0x1f;
|
||||
neg = false;
|
||||
|
@ -346,7 +363,7 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
|||
} else {
|
||||
// Infinities.
|
||||
exponent = 0x1F;
|
||||
if (fpscr.ahp) {
|
||||
if (ahp) {
|
||||
fpscr.ioc = 1;
|
||||
mantissa = 0x3ff;
|
||||
} else {
|
||||
|
@ -364,14 +381,14 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
|||
// Denormalized.
|
||||
|
||||
// If flush to zero is on, this shouldn't happen.
|
||||
assert(fpscr.fz == 0);
|
||||
assert(!flush);
|
||||
|
||||
// Check for underflow
|
||||
if (inexact || fpscr.ufe)
|
||||
fpscr.ufc = 1;
|
||||
|
||||
// Handle rounding.
|
||||
unsigned mode = fpscr.rMode;
|
||||
unsigned mode = rMode;
|
||||
if ((mode == VfpRoundUpward && !neg && extra) ||
|
||||
(mode == VfpRoundDown && neg && extra) ||
|
||||
(mode == VfpRoundNearest &&
|
||||
|
@ -416,7 +433,7 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
|||
}
|
||||
|
||||
// Handle rounding.
|
||||
unsigned mode = fpscr.rMode;
|
||||
unsigned mode = rMode;
|
||||
bool nonZero = topOne || !restZeros;
|
||||
if ((mode == VfpRoundUpward && !neg && nonZero) ||
|
||||
(mode == VfpRoundDown && neg && nonZero) ||
|
||||
|
@ -432,7 +449,7 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
|||
}
|
||||
|
||||
// Deal with overflow
|
||||
if (fpscr.ahp) {
|
||||
if (ahp) {
|
||||
if (exponent >= 0x20) {
|
||||
exponent = 0x1f;
|
||||
mantissa = 0x3ff;
|
||||
|
@ -468,27 +485,17 @@ vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top)
|
|||
replaceBits(result, 14, 10, exponent);
|
||||
if (neg)
|
||||
result |= (1 << 15);
|
||||
if (top)
|
||||
replaceBits(destBits, 31, 16, result);
|
||||
else
|
||||
replaceBits(destBits, 15, 0, result);
|
||||
return bitsToFp(destBits, junk);
|
||||
return result;
|
||||
}
|
||||
|
||||
float
|
||||
vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
|
||||
vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op)
|
||||
{
|
||||
float junk = 0.0;
|
||||
uint32_t opBits = fpToBits(op);
|
||||
// Extract the operand.
|
||||
if (top)
|
||||
opBits = bits(opBits, 31, 16);
|
||||
else
|
||||
opBits = bits(opBits, 15, 0);
|
||||
// Extract the bitfields.
|
||||
bool neg = bits(opBits, 15);
|
||||
uint32_t exponent = bits(opBits, 14, 10);
|
||||
uint32_t mantissa = bits(opBits, 9, 0);
|
||||
bool neg = bits(op, 15);
|
||||
uint32_t exponent = bits(op, 14, 10);
|
||||
uint32_t mantissa = bits(op, 9, 0);
|
||||
// Do the conversion.
|
||||
if (exponent == 0) {
|
||||
if (mantissa != 0) {
|
||||
|
@ -500,7 +507,7 @@ vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
|
|||
}
|
||||
}
|
||||
mantissa = mantissa << (23 - 10);
|
||||
} else if (exponent == 0x1f && !fpscr.ahp) {
|
||||
} else if (exponent == 0x1f && !ahp) {
|
||||
// Infinities and nans.
|
||||
exponent = 0xff;
|
||||
if (mantissa != 0) {
|
||||
|
@ -511,7 +518,7 @@ vcvtFpHFpS(FPSCR &fpscr, float op, bool top)
|
|||
fpscr.ioc = 1;
|
||||
mantissa |= (1 << 22);
|
||||
}
|
||||
if (fpscr.dn) {
|
||||
if (defaultNan) {
|
||||
mantissa &= ~mask(22);
|
||||
neg = false;
|
||||
}
|
||||
|
@ -624,7 +631,8 @@ vfpFpSToFixed(float val, bool isSigned, bool half,
|
|||
}
|
||||
|
||||
float
|
||||
vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
|
||||
vfpUFixedToFpS(bool flush, bool defaultNan,
|
||||
uint32_t val, bool half, uint8_t imm)
|
||||
{
|
||||
fesetround(FeRoundNearest);
|
||||
if (half)
|
||||
|
@ -633,11 +641,12 @@ vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
|
|||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
feclearexcept(FeAllExceptions);
|
||||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
return fixDivDest(fpscr, val / scale, (float)val, scale);
|
||||
return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
|
||||
}
|
||||
|
||||
float
|
||||
vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
|
||||
vfpSFixedToFpS(bool flush, bool defaultNan,
|
||||
int32_t val, bool half, uint8_t imm)
|
||||
{
|
||||
fesetround(FeRoundNearest);
|
||||
if (half)
|
||||
|
@ -646,7 +655,7 @@ vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
|
|||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
feclearexcept(FeAllExceptions);
|
||||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
return fixDivDest(fpscr, val / scale, (float)val, scale);
|
||||
return fixDivDest(flush, defaultNan, val / scale, (float)val, scale);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
@ -743,7 +752,8 @@ vfpFpDToFixed(double val, bool isSigned, bool half,
|
|||
}
|
||||
|
||||
double
|
||||
vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
|
||||
vfpUFixedToFpD(bool flush, bool defaultNan,
|
||||
uint32_t val, bool half, uint8_t imm)
|
||||
{
|
||||
fesetround(FeRoundNearest);
|
||||
if (half)
|
||||
|
@ -752,11 +762,12 @@ vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
|
|||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
feclearexcept(FeAllExceptions);
|
||||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
return fixDivDest(fpscr, val / scale, (double)val, scale);
|
||||
return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
|
||||
}
|
||||
|
||||
double
|
||||
vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
|
||||
vfpSFixedToFpD(bool flush, bool defaultNan,
|
||||
int32_t val, bool half, uint8_t imm)
|
||||
{
|
||||
fesetround(FeRoundNearest);
|
||||
if (half)
|
||||
|
@ -765,14 +776,211 @@ vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
|
|||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
feclearexcept(FeAllExceptions);
|
||||
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
|
||||
return fixDivDest(fpscr, val / scale, (double)val, scale);
|
||||
return fixDivDest(flush, defaultNan, val / scale, (double)val, scale);
|
||||
}
|
||||
|
||||
// This function implements a magic formula taken from the architecture
|
||||
// reference manual. It was originally called recip_sqrt_estimate.
|
||||
static double
|
||||
recipSqrtEstimate(double a)
|
||||
{
|
||||
int64_t q0, q1, s;
|
||||
double r;
|
||||
if (a < 0.5) {
|
||||
q0 = (int64_t)(a * 512.0);
|
||||
r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0);
|
||||
} else {
|
||||
q1 = (int64_t)(a * 256.0);
|
||||
r = 1.0 / sqrt(((double)q1 + 0.5) / 256.0);
|
||||
}
|
||||
s = (int64_t)(256.0 * r + 0.5);
|
||||
return (double)s / 256.0;
|
||||
}
|
||||
|
||||
// This function is only intended for use in Neon instructions because
|
||||
// it ignores certain bits in the FPSCR.
|
||||
float
|
||||
fprSqrtEstimate(FPSCR &fpscr, float op)
|
||||
{
|
||||
const uint32_t qnan = 0x7fc00000;
|
||||
float junk = 0.0;
|
||||
int fpClass = std::fpclassify(op);
|
||||
if (fpClass == FP_NAN) {
|
||||
if ((fpToBits(op) & qnan) != qnan)
|
||||
fpscr.ioc = 1;
|
||||
return bitsToFp(qnan, junk);
|
||||
} else if (fpClass == FP_ZERO) {
|
||||
fpscr.dzc = 1;
|
||||
// Return infinity with the same sign as the operand.
|
||||
return bitsToFp((std::signbit(op) << 31) |
|
||||
(0xFF << 23) | (0 << 0), junk);
|
||||
} else if (std::signbit(op)) {
|
||||
// Set invalid op bit.
|
||||
fpscr.ioc = 1;
|
||||
return bitsToFp(qnan, junk);
|
||||
} else if (fpClass == FP_INFINITE) {
|
||||
return 0.0;
|
||||
} else {
|
||||
uint64_t opBits = fpToBits(op);
|
||||
double scaled;
|
||||
if (bits(opBits, 23)) {
|
||||
scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
|
||||
(ULL(0x3fd) << 52) | (bits(opBits, 31) << 63),
|
||||
(double)0.0);
|
||||
} else {
|
||||
scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
|
||||
(ULL(0x3fe) << 52) | (bits(opBits, 31) << 63),
|
||||
(double)0.0);
|
||||
}
|
||||
uint64_t resultExp = (380 - bits(opBits, 30, 23)) / 2;
|
||||
|
||||
uint64_t estimate = fpToBits(recipSqrtEstimate(scaled));
|
||||
|
||||
return bitsToFp((bits(estimate, 63) << 31) |
|
||||
(bits(resultExp, 7, 0) << 23) |
|
||||
(bits(estimate, 51, 29) << 0), junk);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
unsignedRSqrtEstimate(uint32_t op)
|
||||
{
|
||||
if (bits(op, 31, 30) == 0) {
|
||||
return -1;
|
||||
} else {
|
||||
double dpOp;
|
||||
if (bits(op, 31)) {
|
||||
dpOp = bitsToFp((ULL(0) << 63) |
|
||||
(ULL(0x3fe) << 52) |
|
||||
(bits((uint64_t)op, 30, 0) << 21) |
|
||||
(0 << 0), (double)0.0);
|
||||
} else {
|
||||
dpOp = bitsToFp((ULL(0) << 63) |
|
||||
(ULL(0x3fd) << 52) |
|
||||
(bits((uint64_t)op, 29, 0) << 22) |
|
||||
(0 << 0), (double)0.0);
|
||||
}
|
||||
uint64_t estimate = fpToBits(recipSqrtEstimate(dpOp));
|
||||
return (1 << 31) | bits(estimate, 51, 21);
|
||||
}
|
||||
}
|
||||
|
||||
// This function implements a magic formula taken from the architecture
|
||||
// reference manual. It was originally called recip_estimate.
|
||||
|
||||
static double
|
||||
recipEstimate(double a)
|
||||
{
|
||||
int64_t q, s;
|
||||
double r;
|
||||
q = (int64_t)(a * 512.0);
|
||||
r = 1.0 / (((double)q + 0.5) / 512.0);
|
||||
s = (int64_t)(256.0 * r + 0.5);
|
||||
return (double)s / 256.0;
|
||||
}
|
||||
|
||||
// This function is only intended for use in Neon instructions because
|
||||
// it ignores certain bits in the FPSCR.
|
||||
float
|
||||
fpRecipEstimate(FPSCR &fpscr, float op)
|
||||
{
|
||||
const uint32_t qnan = 0x7fc00000;
|
||||
float junk = 0.0;
|
||||
int fpClass = std::fpclassify(op);
|
||||
if (fpClass == FP_NAN) {
|
||||
if ((fpToBits(op) & qnan) != qnan)
|
||||
fpscr.ioc = 1;
|
||||
return bitsToFp(qnan, junk);
|
||||
} else if (fpClass == FP_INFINITE) {
|
||||
return bitsToFp(std::signbit(op) << 31, junk);
|
||||
} else if (fpClass == FP_ZERO) {
|
||||
fpscr.dzc = 1;
|
||||
// Return infinity with the same sign as the operand.
|
||||
return bitsToFp((std::signbit(op) << 31) |
|
||||
(0xFF << 23) | (0 << 0), junk);
|
||||
} else if (fabs(op) >= pow(2.0, 126)) {
|
||||
fpscr.ufc = 1;
|
||||
return bitsToFp(std::signbit(op) << 31, junk);
|
||||
} else {
|
||||
uint64_t opBits = fpToBits(op);
|
||||
double scaled;
|
||||
scaled = bitsToFp((0 << 0) | (bits(opBits, 22, 0) << 29) |
|
||||
(ULL(0x3fe) << 52) | (ULL(0) << 63),
|
||||
(double)0.0);
|
||||
uint64_t resultExp = 253 - bits(opBits, 30, 23);
|
||||
|
||||
uint64_t estimate = fpToBits(recipEstimate(scaled));
|
||||
|
||||
return bitsToFp((bits(opBits, 31) << 31) |
|
||||
(bits(resultExp, 7, 0) << 23) |
|
||||
(bits(estimate, 51, 29) << 0), junk);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
unsignedRecipEstimate(uint32_t op)
|
||||
{
|
||||
if (bits(op, 31) == 0) {
|
||||
return -1;
|
||||
} else {
|
||||
double dpOp;
|
||||
dpOp = bitsToFp((ULL(0) << 63) |
|
||||
(ULL(0x3fe) << 52) |
|
||||
(bits((uint64_t)op, 30, 0) << 21) |
|
||||
(0 << 0), (double)0.0);
|
||||
uint64_t estimate = fpToBits(recipEstimate(dpOp));
|
||||
return (1 << 31) | bits(estimate, 51, 21);
|
||||
}
|
||||
}
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
|
||||
fpType op1, fpType op2) const
|
||||
{
|
||||
done = true;
|
||||
fpType junk = 0.0;
|
||||
fpType dest = 0.0;
|
||||
const bool single = (sizeof(fpType) == sizeof(float));
|
||||
const uint64_t qnan =
|
||||
single ? 0x7fc00000 : ULL(0x7ff8000000000000);
|
||||
const bool nan1 = std::isnan(op1);
|
||||
const bool nan2 = std::isnan(op2);
|
||||
const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
|
||||
const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
|
||||
if (nan1 || nan2) {
|
||||
if (defaultNan) {
|
||||
dest = bitsToFp(qnan, junk);
|
||||
} else if (signal1) {
|
||||
dest = bitsToFp(fpToBits(op1) | qnan, junk);
|
||||
} else if (signal2) {
|
||||
dest = bitsToFp(fpToBits(op2) | qnan, junk);
|
||||
} else if (nan1) {
|
||||
dest = op1;
|
||||
} else if (nan2) {
|
||||
dest = op2;
|
||||
}
|
||||
if (signal1 || signal2) {
|
||||
fpscr.ioc = 1;
|
||||
}
|
||||
} else {
|
||||
done = false;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
template
|
||||
float FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
|
||||
float op1, float op2) const;
|
||||
template
|
||||
double FpOp::processNans(FPSCR &fpscr, bool &done, bool defaultNan,
|
||||
double op1, double op2) const;
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
|
||||
fpType (*func)(fpType, fpType),
|
||||
bool flush, uint32_t rMode) const
|
||||
bool flush, bool defaultNan, uint32_t rMode) const
|
||||
{
|
||||
const bool single = (sizeof(fpType) == sizeof(float));
|
||||
fpType junk = 0.0;
|
||||
|
@ -795,7 +1003,7 @@ FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
|
|||
const bool nan2 = std::isnan(op2);
|
||||
const bool signal1 = nan1 && ((fpToBits(op1) & qnan) != qnan);
|
||||
const bool signal2 = nan2 && ((fpToBits(op2) & qnan) != qnan);
|
||||
if ((!nan1 && !nan2) || (fpscr.dn == 1)) {
|
||||
if ((!nan1 && !nan2) || (defaultNan == 1)) {
|
||||
dest = bitsToFp(qnan, junk);
|
||||
} else if (signal1) {
|
||||
dest = bitsToFp(fpToBits(op1) | qnan, junk);
|
||||
|
@ -828,18 +1036,18 @@ FpOp::binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
|
|||
dest = temp;
|
||||
}
|
||||
}
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, flush);
|
||||
return dest;
|
||||
}
|
||||
|
||||
template
|
||||
float FpOp::binaryOp(FPSCR &fpscr, float op1, float op2,
|
||||
float (*func)(float, float),
|
||||
bool flush, uint32_t rMode) const;
|
||||
bool flush, bool defaultNan, uint32_t rMode) const;
|
||||
template
|
||||
double FpOp::binaryOp(FPSCR &fpscr, double op1, double op2,
|
||||
double (*func)(double, double),
|
||||
bool flush, uint32_t rMode) const;
|
||||
bool flush, bool defaultNan, uint32_t rMode) const;
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
|
@ -890,7 +1098,7 @@ FpOp::unaryOp(FPSCR &fpscr, fpType op1, fpType (*func)(fpType),
|
|||
dest = temp;
|
||||
}
|
||||
}
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, flush);
|
||||
return dest;
|
||||
}
|
||||
|
||||
|
|
|
@ -192,10 +192,20 @@ bitsToFp(uint64_t bits, double junk)
|
|||
return val.fp;
|
||||
}
|
||||
|
||||
template <class fpType>
|
||||
static bool
|
||||
isSnan(fpType val)
|
||||
{
|
||||
const bool single = (sizeof(fpType) == sizeof(float));
|
||||
const uint64_t qnan =
|
||||
single ? 0x7fc00000 : ULL(0x7ff8000000000000);
|
||||
return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
|
||||
}
|
||||
|
||||
typedef int VfpSavedState;
|
||||
|
||||
VfpSavedState prepFpState(uint32_t rMode);
|
||||
void finishVfp(FPSCR &fpscr, VfpSavedState state);
|
||||
void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush);
|
||||
|
||||
template <class fpType>
|
||||
fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
|
||||
|
@ -209,8 +219,9 @@ fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
|
|||
float fixFpDFpSDest(FPSCR fpscr, double val);
|
||||
double fixFpSFpDDest(FPSCR fpscr, float val);
|
||||
|
||||
float vcvtFpSFpH(FPSCR &fpscr, float op, float dest, bool top);
|
||||
float vcvtFpHFpS(FPSCR &fpscr, float op, bool top);
|
||||
uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
|
||||
uint32_t rMode, bool ahp, float op);
|
||||
float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
|
||||
|
||||
static inline double
|
||||
makeDouble(uint32_t low, uint32_t high)
|
||||
|
@ -233,13 +244,23 @@ highFromDouble(double val)
|
|||
|
||||
uint64_t vfpFpSToFixed(float val, bool isSigned, bool half,
|
||||
uint8_t imm, bool rzero = true);
|
||||
float vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm);
|
||||
float vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm);
|
||||
float vfpUFixedToFpS(bool flush, bool defaultNan,
|
||||
uint32_t val, bool half, uint8_t imm);
|
||||
float vfpSFixedToFpS(bool flush, bool defaultNan,
|
||||
int32_t val, bool half, uint8_t imm);
|
||||
|
||||
uint64_t vfpFpDToFixed(double val, bool isSigned, bool half,
|
||||
uint8_t imm, bool rzero = true);
|
||||
double vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm);
|
||||
double vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm);
|
||||
double vfpUFixedToFpD(bool flush, bool defaultNan,
|
||||
uint32_t val, bool half, uint8_t imm);
|
||||
double vfpSFixedToFpD(bool flush, bool defaultNan,
|
||||
int32_t val, bool half, uint8_t imm);
|
||||
|
||||
float fprSqrtEstimate(FPSCR &fpscr, float op);
|
||||
uint32_t unsignedRSqrtEstimate(uint32_t op);
|
||||
|
||||
float fpRecipEstimate(FPSCR &fpscr, float op);
|
||||
uint32_t unsignedRecipEstimate(uint32_t op);
|
||||
|
||||
class VfpMacroOp : public PredMacroOp
|
||||
{
|
||||
|
@ -312,6 +333,66 @@ fpMulD(double a, double b)
|
|||
return a * b;
|
||||
}
|
||||
|
||||
static inline float
|
||||
fpMaxS(float a, float b)
|
||||
{
|
||||
// Handle comparisons of +0 and -0.
|
||||
if (!std::signbit(a) && std::signbit(b))
|
||||
return a;
|
||||
return fmaxf(a, b);
|
||||
}
|
||||
|
||||
static inline float
|
||||
fpMinS(float a, float b)
|
||||
{
|
||||
// Handle comparisons of +0 and -0.
|
||||
if (std::signbit(a) && !std::signbit(b))
|
||||
return a;
|
||||
return fminf(a, b);
|
||||
}
|
||||
|
||||
static inline float
|
||||
fpRSqrtsS(float a, float b)
|
||||
{
|
||||
int fpClassA = std::fpclassify(a);
|
||||
int fpClassB = std::fpclassify(b);
|
||||
float aXb;
|
||||
int fpClassAxB;
|
||||
|
||||
if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
|
||||
(fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
|
||||
return 1.5;
|
||||
}
|
||||
aXb = a*b;
|
||||
fpClassAxB = std::fpclassify(aXb);
|
||||
if(fpClassAxB == FP_SUBNORMAL) {
|
||||
feraiseexcept(FeUnderflow);
|
||||
return 1.5;
|
||||
}
|
||||
return (3.0 - (a * b)) / 2.0;
|
||||
}
|
||||
|
||||
static inline float
|
||||
fpRecpsS(float a, float b)
|
||||
{
|
||||
int fpClassA = std::fpclassify(a);
|
||||
int fpClassB = std::fpclassify(b);
|
||||
float aXb;
|
||||
int fpClassAxB;
|
||||
|
||||
if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
|
||||
(fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
|
||||
return 2.0;
|
||||
}
|
||||
aXb = a*b;
|
||||
fpClassAxB = std::fpclassify(aXb);
|
||||
if(fpClassAxB == FP_SUBNORMAL) {
|
||||
feraiseexcept(FeUnderflow);
|
||||
return 2.0;
|
||||
}
|
||||
return 2.0 - (a * b);
|
||||
}
|
||||
|
||||
class FpOp : public PredOp
|
||||
{
|
||||
protected:
|
||||
|
@ -362,11 +443,16 @@ class FpOp : public PredOp
|
|||
return fpToBits(val) >> 32;
|
||||
}
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
processNans(FPSCR &fpscr, bool &done, bool defaultNan,
|
||||
fpType op1, fpType op2) const;
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
|
||||
fpType (*func)(fpType, fpType),
|
||||
bool flush, uint32_t rMode) const;
|
||||
bool flush, bool defaultNan, uint32_t rMode) const;
|
||||
|
||||
template <class fpType>
|
||||
fpType
|
||||
|
@ -445,6 +531,27 @@ class FpRegRegRegOp : public FpOp
|
|||
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
|
||||
};
|
||||
|
||||
class FpRegRegRegImmOp : public FpOp
|
||||
{
|
||||
protected:
|
||||
IntRegIndex dest;
|
||||
IntRegIndex op1;
|
||||
IntRegIndex op2;
|
||||
uint64_t imm;
|
||||
|
||||
FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
|
||||
OpClass __opClass, IntRegIndex _dest,
|
||||
IntRegIndex _op1, IntRegIndex _op2,
|
||||
uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
|
||||
FpOp(mnem, _machInst, __opClass),
|
||||
dest(_dest), op1(_op1), op2(_op2), imm(_imm)
|
||||
{
|
||||
setVfpMicroFlags(mode, flags);
|
||||
}
|
||||
|
||||
std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif //__ARCH_ARM_INSTS_VFP_HH__
|
||||
|
|
|
@ -88,7 +88,7 @@ decode BIGTHUMB {
|
|||
0xf: McrMrc15::mcrMrc15();
|
||||
}
|
||||
}
|
||||
0x3: WarnUnimpl::Advanced_SIMD();
|
||||
0x3: ThumbNeonData::ThumbNeonData();
|
||||
default: decode LTCOPROC {
|
||||
0xa, 0xb: ExtensionRegLoadStore::extensionRegLoadStre();
|
||||
0xf: decode HTOPCODE_9_4 {
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -282,7 +282,7 @@ let {{
|
|||
exec_output += PredOpExecute.subst(vmovRegQIop);
|
||||
|
||||
vmovCoreRegBCode = '''
|
||||
FpDest.uw = insertBits(FpDest.uw, imm * 8, imm * 8 + 7, Op1.ub);
|
||||
FpDest.uw = insertBits(FpDest.uw, imm * 8 + 7, imm * 8, Op1.ub);
|
||||
'''
|
||||
vmovCoreRegBIop = InstObjParams("vmov", "VmovCoreRegB", "FpRegRegImmOp",
|
||||
{ "code": vmovCoreRegBCode,
|
||||
|
@ -292,7 +292,7 @@ let {{
|
|||
exec_output += PredOpExecute.subst(vmovCoreRegBIop);
|
||||
|
||||
vmovCoreRegHCode = '''
|
||||
FpDest.uw = insertBits(FpDest.uw, imm * 16, imm * 16 + 15, Op1.uh);
|
||||
FpDest.uw = insertBits(FpDest.uw, imm * 16 + 15, imm * 16, Op1.uh);
|
||||
'''
|
||||
vmovCoreRegHIop = InstObjParams("vmov", "VmovCoreRegH", "FpRegRegImmOp",
|
||||
{ "code": vmovCoreRegHCode,
|
||||
|
@ -312,7 +312,8 @@ let {{
|
|||
exec_output += PredOpExecute.subst(vmovCoreRegWIop);
|
||||
|
||||
vmovRegCoreUBCode = '''
|
||||
Dest = bits(FpOp1.uw, imm * 8, imm * 8 + 7);
|
||||
assert(imm < 4);
|
||||
Dest = bits(FpOp1.uw, imm * 8 + 7, imm * 8);
|
||||
'''
|
||||
vmovRegCoreUBIop = InstObjParams("vmov", "VmovRegCoreUB", "FpRegRegImmOp",
|
||||
{ "code": vmovRegCoreUBCode,
|
||||
|
@ -322,7 +323,8 @@ let {{
|
|||
exec_output += PredOpExecute.subst(vmovRegCoreUBIop);
|
||||
|
||||
vmovRegCoreUHCode = '''
|
||||
Dest = bits(FpOp1.uw, imm * 16, imm * 16 + 15);
|
||||
assert(imm < 2);
|
||||
Dest = bits(FpOp1.uw, imm * 16 + 15, imm * 16);
|
||||
'''
|
||||
vmovRegCoreUHIop = InstObjParams("vmov", "VmovRegCoreUH", "FpRegRegImmOp",
|
||||
{ "code": vmovRegCoreUHCode,
|
||||
|
@ -332,7 +334,8 @@ let {{
|
|||
exec_output += PredOpExecute.subst(vmovRegCoreUHIop);
|
||||
|
||||
vmovRegCoreSBCode = '''
|
||||
Dest = sext<8>(bits(FpOp1.uw, imm * 8, imm * 8 + 7));
|
||||
assert(imm < 4);
|
||||
Dest = sext<8>(bits(FpOp1.uw, imm * 8 + 7, imm * 8));
|
||||
'''
|
||||
vmovRegCoreSBIop = InstObjParams("vmov", "VmovRegCoreSB", "FpRegRegImmOp",
|
||||
{ "code": vmovRegCoreSBCode,
|
||||
|
@ -342,7 +345,8 @@ let {{
|
|||
exec_output += PredOpExecute.subst(vmovRegCoreSBIop);
|
||||
|
||||
vmovRegCoreSHCode = '''
|
||||
Dest = sext<16>(bits(FpOp1.uw, imm * 16, imm * 16 + 15));
|
||||
assert(imm < 2);
|
||||
Dest = sext<16>(bits(FpOp1.uw, imm * 16 + 15, imm * 16));
|
||||
'''
|
||||
vmovRegCoreSHIop = InstObjParams("vmov", "VmovRegCoreSH", "FpRegRegImmOp",
|
||||
{ "code": vmovRegCoreSHCode,
|
||||
|
@ -396,7 +400,7 @@ let {{
|
|||
Fpscr = fpscr;
|
||||
'''
|
||||
singleBinOp = "binaryOp(fpscr, FpOp1, FpOp2," + \
|
||||
"%(func)s, fpscr.fz, fpscr.rMode)"
|
||||
"%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode)"
|
||||
singleUnaryOp = "unaryOp(fpscr, FpOp1, %(func)s, fpscr.fz, fpscr.rMode)"
|
||||
doubleCode = '''
|
||||
FPSCR fpscr = Fpscr;
|
||||
|
@ -408,7 +412,7 @@ let {{
|
|||
doubleBinOp = '''
|
||||
binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
|
||||
dbl(FpOp2P0.uw, FpOp2P1.uw),
|
||||
%(func)s, fpscr.fz, fpscr.rMode);
|
||||
%(func)s, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
'''
|
||||
doubleUnaryOp = '''
|
||||
unaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw), %(func)s,
|
||||
|
@ -499,8 +503,9 @@ let {{
|
|||
vmlaSCode = '''
|
||||
FPSCR fpscr = Fpscr;
|
||||
float mid = binaryOp(fpscr, FpOp1, FpOp2,
|
||||
fpMulS, fpscr.fz, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, FpDest, mid, fpAddS, fpscr.fz, fpscr.rMode);
|
||||
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, FpDest, mid, fpAddS,
|
||||
fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vmlaSIop = InstObjParams("vmlas", "VmlaS", "FpRegRegRegOp",
|
||||
|
@ -514,9 +519,10 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
|
||||
dbl(FpOp2P0.uw, FpOp2P1.uw),
|
||||
fpMulD, fpscr.fz, fpscr.rMode);
|
||||
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
|
||||
mid, fpAddD, fpscr.fz, fpscr.rMode);
|
||||
mid, fpAddD, fpscr.fz,
|
||||
fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(dest);
|
||||
FpDestP1.uw = dblHi(dest);
|
||||
|
@ -531,8 +537,9 @@ let {{
|
|||
vmlsSCode = '''
|
||||
FPSCR fpscr = Fpscr;
|
||||
float mid = binaryOp(fpscr, FpOp1, FpOp2,
|
||||
fpMulS, fpscr.fz, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS, fpscr.fz, fpscr.rMode);
|
||||
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, FpDest, -mid, fpAddS,
|
||||
fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vmlsSIop = InstObjParams("vmlss", "VmlsS", "FpRegRegRegOp",
|
||||
|
@ -546,9 +553,10 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
|
||||
dbl(FpOp2P0.uw, FpOp2P1.uw),
|
||||
fpMulD, fpscr.fz, fpscr.rMode);
|
||||
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
double dest = binaryOp(fpscr, dbl(FpDestP0.uw, FpDestP1.uw),
|
||||
-mid, fpAddD, fpscr.fz, fpscr.rMode);
|
||||
-mid, fpAddD, fpscr.fz,
|
||||
fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(dest);
|
||||
FpDestP1.uw = dblHi(dest);
|
||||
|
@ -563,8 +571,9 @@ let {{
|
|||
vnmlaSCode = '''
|
||||
FPSCR fpscr = Fpscr;
|
||||
float mid = binaryOp(fpscr, FpOp1, FpOp2,
|
||||
fpMulS, fpscr.fz, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS, fpscr.fz, fpscr.rMode);
|
||||
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, -FpDest, -mid, fpAddS,
|
||||
fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vnmlaSIop = InstObjParams("vnmlas", "VnmlaS", "FpRegRegRegOp",
|
||||
|
@ -578,9 +587,10 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
|
||||
dbl(FpOp2P0.uw, FpOp2P1.uw),
|
||||
fpMulD, fpscr.fz, fpscr.rMode);
|
||||
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
|
||||
-mid, fpAddD, fpscr.fz, fpscr.rMode);
|
||||
-mid, fpAddD, fpscr.fz,
|
||||
fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(dest);
|
||||
FpDestP1.uw = dblHi(dest);
|
||||
|
@ -595,8 +605,9 @@ let {{
|
|||
vnmlsSCode = '''
|
||||
FPSCR fpscr = Fpscr;
|
||||
float mid = binaryOp(fpscr, FpOp1, FpOp2,
|
||||
fpMulS, fpscr.fz, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS, fpscr.fz, fpscr.rMode);
|
||||
fpMulS, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
FpDest = binaryOp(fpscr, -FpDest, mid, fpAddS,
|
||||
fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vnmlsSIop = InstObjParams("vnmlss", "VnmlsS", "FpRegRegRegOp",
|
||||
|
@ -610,9 +621,10 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
double mid = binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
|
||||
dbl(FpOp2P0.uw, FpOp2P1.uw),
|
||||
fpMulD, fpscr.fz, fpscr.rMode);
|
||||
fpMulD, fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
double dest = binaryOp(fpscr, -dbl(FpDestP0.uw, FpDestP1.uw),
|
||||
mid, fpAddD, fpscr.fz, fpscr.rMode);
|
||||
mid, fpAddD, fpscr.fz,
|
||||
fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(dest);
|
||||
FpDestP1.uw = dblHi(dest);
|
||||
|
@ -626,7 +638,8 @@ let {{
|
|||
|
||||
vnmulSCode = '''
|
||||
FPSCR fpscr = Fpscr;
|
||||
FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS, fpscr.fz, fpscr.rMode);
|
||||
FpDest = -binaryOp(fpscr, FpOp1, FpOp2, fpMulS,
|
||||
fpscr.fz, fpscr.dn, fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vnmulSIop = InstObjParams("vnmuls", "VnmulS", "FpRegRegRegOp",
|
||||
|
@ -640,7 +653,8 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
double dest = -binaryOp(fpscr, dbl(FpOp1P0.uw, FpOp1P1.uw),
|
||||
dbl(FpOp2P0.uw, FpOp2P1.uw),
|
||||
fpMulD, fpscr.fz, fpscr.rMode);
|
||||
fpMulD, fpscr.fz, fpscr.dn,
|
||||
fpscr.rMode);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(dest);
|
||||
FpDestP1.uw = dblHi(dest);
|
||||
|
@ -665,7 +679,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
|
||||
FpDest = FpOp1.uw;
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtUIntFpSIop = InstObjParams("vcvt", "VcvtUIntFpS", "FpRegRegOp",
|
||||
|
@ -681,7 +695,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1P0.uw) : "m" (FpOp1P0.uw));
|
||||
double cDest = (uint64_t)FpOp1P0.uw;
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
@ -699,7 +713,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
|
||||
FpDest = FpOp1.sw;
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtSIntFpSIop = InstObjParams("vcvt", "VcvtSIntFpS", "FpRegRegOp",
|
||||
|
@ -715,7 +729,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1P0.sw) : "m" (FpOp1P0.sw));
|
||||
double cDest = FpOp1P0.sw;
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
@ -734,7 +748,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0, false);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.uw));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpUIntSRIop = InstObjParams("vcvt", "VcvtFpUIntSR", "FpRegRegOp",
|
||||
|
@ -752,7 +766,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
uint64_t result = vfpFpDToFixed(cOp1, false, false, 0, false);
|
||||
__asm__ __volatile__("" :: "m" (result));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = result;
|
||||
'''
|
||||
|
@ -770,7 +784,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0, false);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.sw));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpSIntSRIop = InstObjParams("vcvtr", "VcvtFpSIntSR", "FpRegRegOp",
|
||||
|
@ -788,7 +802,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
int64_t result = vfpFpDToFixed(cOp1, true, false, 0, false);
|
||||
__asm__ __volatile__("" :: "m" (result));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = result;
|
||||
'''
|
||||
|
@ -807,7 +821,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, 0);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.uw));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpUIntSIop = InstObjParams("vcvt", "VcvtFpUIntS", "FpRegRegOp",
|
||||
|
@ -826,7 +840,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
uint64_t result = vfpFpDToFixed(cOp1, false, false, 0);
|
||||
__asm__ __volatile__("" :: "m" (result));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = result;
|
||||
'''
|
||||
|
@ -845,7 +859,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, 0);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.sw));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpSIntSIop = InstObjParams("vcvt", "VcvtFpSIntS", "FpRegRegOp",
|
||||
|
@ -864,7 +878,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
int64_t result = vfpFpDToFixed(cOp1, true, false, 0);
|
||||
__asm__ __volatile__("" :: "m" (result));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = result;
|
||||
'''
|
||||
|
@ -882,7 +896,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
double cDest = fixFpSFpDDest(Fpscr, FpOp1);
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
@ -902,7 +916,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
FpDest = fixFpDFpSDest(Fpscr, cOp1);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpDFpSIop = InstObjParams("vcvt", "VcvtFpDFpS", "FpRegRegOp",
|
||||
|
@ -917,9 +931,10 @@ let {{
|
|||
vfpFlushToZero(fpscr, FpOp1);
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest = vcvtFpHFpS(fpscr, FpOp1, true);
|
||||
FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp,
|
||||
bits(fpToBits(FpOp1), 31, 16));
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpHTFpSIop = InstObjParams("vcvtt", "VcvtFpHTFpS", "FpRegRegOp",
|
||||
|
@ -933,9 +948,10 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest = vcvtFpHFpS(fpscr, FpOp1, false);
|
||||
FpDest = vcvtFpHFpS(fpscr, fpscr.dn, fpscr.ahp,
|
||||
bits(fpToBits(FpOp1), 15, 0));
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpHBFpSIop = InstObjParams("vcvtb", "VcvtFpHBFpS", "FpRegRegOp",
|
||||
|
@ -949,11 +965,13 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
vfpFlushToZero(fpscr, FpOp1);
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
|
||||
: "m" (FpOp1), "m" (FpDest));
|
||||
FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, true);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw)
|
||||
: "m" (FpOp1), "m" (FpDest.uw));
|
||||
FpDest.uw = insertBits(FpDest.uw, 31, 16,,
|
||||
vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn,
|
||||
fpscr.rMode, fpscr.ahp, FpOp1));
|
||||
__asm__ __volatile__("" :: "m" (FpDest.uw));
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpSFpHTIop = InstObjParams("vcvtt", "VcvtFpSFpHT", "FpRegRegOp",
|
||||
|
@ -967,11 +985,13 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
vfpFlushToZero(fpscr, FpOp1);
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest)
|
||||
: "m" (FpOp1), "m" (FpDest));
|
||||
FpDest = vcvtFpSFpH(fpscr, FpOp1, FpDest, false);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1), "=m" (FpDest.uw)
|
||||
: "m" (FpOp1), "m" (FpDest.uw));
|
||||
FpDest.uw = insertBits(FpDest.uw, 15, 0,
|
||||
vcvtFpSFpH(fpscr, fpscr.fz, fpscr.dn,
|
||||
fpscr.rMode, fpscr.ahp, FpOp1));
|
||||
__asm__ __volatile__("" :: "m" (FpDest.uw));
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpSFpHBIop = InstObjParams("vcvtb", "VcvtFpSFpHB", "FpRegRegOp",
|
||||
|
@ -1201,7 +1221,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.sw));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpSFixedSIop = InstObjParams("vcvt", "VcvtFpSFixedS", "FpRegRegImmOp",
|
||||
|
@ -1219,7 +1239,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
uint64_t mid = vfpFpDToFixed(cOp1, true, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (mid));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = mid;
|
||||
FpDestP1.uw = mid >> 32;
|
||||
|
@ -1238,7 +1258,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.uw));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpUFixedSIop = InstObjParams("vcvt", "VcvtFpUFixedS", "FpRegRegImmOp",
|
||||
|
@ -1256,7 +1276,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
uint64_t mid = vfpFpDToFixed(cOp1, false, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (mid));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = mid;
|
||||
FpDestP1.uw = mid >> 32;
|
||||
|
@ -1272,9 +1292,9 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
|
||||
FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sw, false, imm);
|
||||
FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sw, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtSFixedFpSIop = InstObjParams("vcvt", "VcvtSFixedFpS", "FpRegRegImmOp",
|
||||
|
@ -1289,9 +1309,9 @@ let {{
|
|||
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
|
||||
double cDest = vfpSFixedToFpD(Fpscr, mid, false, imm);
|
||||
double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
@ -1307,9 +1327,9 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
|
||||
FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uw, false, imm);
|
||||
FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uw, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtUFixedFpSIop = InstObjParams("vcvt", "VcvtUFixedFpS", "FpRegRegImmOp",
|
||||
|
@ -1324,9 +1344,9 @@ let {{
|
|||
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
|
||||
double cDest = vfpUFixedToFpD(Fpscr, mid, false, imm);
|
||||
double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, false, imm);
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
@ -1345,7 +1365,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.sh));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpSHFixedSIop = InstObjParams("vcvt", "VcvtFpSHFixedS",
|
||||
|
@ -1364,7 +1384,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
uint64_t result = vfpFpDToFixed(cOp1, true, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (result));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = result;
|
||||
FpDestP1.uw = result >> 32;
|
||||
|
@ -1384,7 +1404,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
|
||||
FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest.uh));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtFpUHFixedSIop = InstObjParams("vcvt", "VcvtFpUHFixedS",
|
||||
|
@ -1403,7 +1423,7 @@ let {{
|
|||
__asm__ __volatile__("" : "=m" (cOp1) : "m" (cOp1));
|
||||
uint64_t mid = vfpFpDToFixed(cOp1, false, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (mid));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = mid;
|
||||
FpDestP1.uw = mid >> 32;
|
||||
|
@ -1420,9 +1440,9 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1.sh) : "m" (FpOp1.sh));
|
||||
FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sh, true, imm);
|
||||
FpDest = vfpSFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.sh, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtSHFixedFpSIop = InstObjParams("vcvt", "VcvtSHFixedFpS",
|
||||
|
@ -1438,9 +1458,9 @@ let {{
|
|||
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
|
||||
double cDest = vfpSFixedToFpD(Fpscr, mid, true, imm);
|
||||
double cDest = vfpSFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
@ -1457,9 +1477,9 @@ let {{
|
|||
FPSCR fpscr = Fpscr;
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (FpOp1.uh) : "m" (FpOp1.uh));
|
||||
FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uh, true, imm);
|
||||
FpDest = vfpUFixedToFpS(fpscr.fz, fpscr.dn, FpOp1.uh, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (FpDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
'''
|
||||
vcvtUHFixedFpSIop = InstObjParams("vcvt", "VcvtUHFixedFpS",
|
||||
|
@ -1475,9 +1495,9 @@ let {{
|
|||
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
|
||||
VfpSavedState state = prepFpState(fpscr.rMode);
|
||||
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
|
||||
double cDest = vfpUFixedToFpD(Fpscr, mid, true, imm);
|
||||
double cDest = vfpUFixedToFpD(fpscr.fz, fpscr.dn, mid, true, imm);
|
||||
__asm__ __volatile__("" :: "m" (cDest));
|
||||
finishVfp(fpscr, state);
|
||||
finishVfp(fpscr, state, fpscr.fz);
|
||||
Fpscr = fpscr;
|
||||
FpDestP0.uw = dblLow(cDest);
|
||||
FpDestP1.uw = dblHi(cDest);
|
||||
|
|
|
@ -70,5 +70,8 @@
|
|||
//Divide
|
||||
##include "div.isa"
|
||||
|
||||
//FP (VFP and Neon)
|
||||
//VFP
|
||||
##include "fp.isa"
|
||||
|
||||
//Neon
|
||||
##include "neon.isa"
|
||||
|
|
|
@ -57,11 +57,34 @@ let {{
|
|||
|
||||
microLdrFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
|
||||
microLdrFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrFpUop',
|
||||
'MicroMemOp',
|
||||
{'memacc_code': microLdrFpUopCode,
|
||||
'ea_code': 'EA = Rb + (up ? imm : -imm);',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
'MicroMemOp',
|
||||
{'memacc_code': microLdrFpUopCode,
|
||||
'ea_code':
|
||||
'EA = Rb + (up ? imm : -imm);',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microLdrDBFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
|
||||
microLdrDBFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDBFpUop',
|
||||
'MicroMemOp',
|
||||
{'memacc_code': microLdrFpUopCode,
|
||||
'ea_code': '''
|
||||
EA = Rb + (up ? imm : -imm) +
|
||||
(((CPSR)Cpsr).e ? 4 : 0);
|
||||
''',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microLdrDTFpUopCode = "Fa.uw = cSwap(Mem.uw, ((CPSR)Cpsr).e);"
|
||||
microLdrDTFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrDTFpUop',
|
||||
'MicroMemOp',
|
||||
{'memacc_code': microLdrFpUopCode,
|
||||
'ea_code': '''
|
||||
EA = Rb + (up ? imm : -imm) -
|
||||
(((CPSR)Cpsr).e ? 4 : 0);
|
||||
''',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microLdrRetUopCode = '''
|
||||
CPSR cpsr = Cpsr;
|
||||
|
@ -98,10 +121,36 @@ let {{
|
|||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microStrDBFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
|
||||
microStrDBFpUopIop = InstObjParams('strfp_uop', 'MicroStrDBFpUop',
|
||||
'MicroMemOp',
|
||||
{'memacc_code': microStrFpUopCode,
|
||||
'postacc_code': "",
|
||||
'ea_code': '''
|
||||
EA = Rb + (up ? imm : -imm) +
|
||||
(((CPSR)Cpsr).e ? 4 : 0);
|
||||
''',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microStrDTFpUopCode = "Mem = cSwap(Fa.uw, ((CPSR)Cpsr).e);"
|
||||
microStrDTFpUopIop = InstObjParams('strfp_uop', 'MicroStrDTFpUop',
|
||||
'MicroMemOp',
|
||||
{'memacc_code': microStrFpUopCode,
|
||||
'postacc_code': "",
|
||||
'ea_code': '''
|
||||
EA = Rb + (up ? imm : -imm) -
|
||||
(((CPSR)Cpsr).e ? 4 : 0);
|
||||
''',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
header_output = decoder_output = exec_output = ''
|
||||
|
||||
loadIops = (microLdrUopIop, microLdrFpUopIop, microLdrRetUopIop)
|
||||
storeIops = (microStrUopIop, microStrFpUopIop)
|
||||
loadIops = (microLdrUopIop, microLdrRetUopIop,
|
||||
microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop)
|
||||
storeIops = (microStrUopIop, microStrFpUopIop,
|
||||
microStrDBFpUopIop, microStrDTFpUopIop)
|
||||
for iop in loadIops + storeIops:
|
||||
header_output += MicroMemDeclare.subst(iop)
|
||||
decoder_output += MicroMemConstructor.subst(iop)
|
||||
|
@ -115,6 +164,403 @@ let {{
|
|||
StoreCompleteAcc.subst(iop)
|
||||
}};
|
||||
|
||||
let {{
|
||||
exec_output = header_output = ''
|
||||
|
||||
eaCode = 'EA = Ra + imm;'
|
||||
|
||||
for size in (1, 2, 3, 4, 6, 8, 12, 16):
|
||||
# Set up the memory access.
|
||||
regs = (size + 3) // 4
|
||||
subst = { "size" : size, "regs" : regs }
|
||||
memDecl = '''
|
||||
union MemUnion {
|
||||
uint8_t bytes[%(size)d];
|
||||
Element elements[%(size)d / sizeof(Element)];
|
||||
uint32_t floatRegBits[%(regs)d];
|
||||
};
|
||||
''' % subst
|
||||
|
||||
# Do endian conversion for all the elements.
|
||||
convCode = '''
|
||||
const unsigned eCount = sizeof(memUnion.elements) /
|
||||
sizeof(memUnion.elements[0]);
|
||||
if (((CPSR)Cpsr).e) {
|
||||
for (unsigned i = 0; i < eCount; i++) {
|
||||
memUnion.elements[i] = gtobe(memUnion.elements[i]);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i < eCount; i++) {
|
||||
memUnion.elements[i] = gtole(memUnion.elements[i]);
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
# Offload everything into registers
|
||||
regSetCode = ''
|
||||
for reg in range(regs):
|
||||
mask = ''
|
||||
if reg == regs - 1:
|
||||
mask = ' & mask(%d)' % (32 - 8 * (regs * 4 - size))
|
||||
regSetCode += '''
|
||||
FpDestP%(reg)d.uw = gtoh(memUnion.floatRegBits[%(reg)d])%(mask)s;
|
||||
''' % { "reg" : reg, "mask" : mask }
|
||||
|
||||
# Pull everything in from registers
|
||||
regGetCode = ''
|
||||
for reg in range(regs):
|
||||
regGetCode += '''
|
||||
memUnion.floatRegBits[%(reg)d] = htog(FpDestP%(reg)d.uw);
|
||||
''' % { "reg" : reg }
|
||||
|
||||
loadMemAccCode = convCode + regSetCode
|
||||
storeMemAccCode = regGetCode + convCode
|
||||
|
||||
loadIop = InstObjParams('ldrneon%(size)d_uop' % subst,
|
||||
'MicroLdrNeon%(size)dUop' % subst,
|
||||
'MicroNeonMemOp',
|
||||
{ 'mem_decl' : memDecl,
|
||||
'size' : size,
|
||||
'memacc_code' : loadMemAccCode,
|
||||
'ea_code' : eaCode,
|
||||
'predicate_test' : predicateTest },
|
||||
[ 'IsMicroop', 'IsMemRef', 'IsLoad' ])
|
||||
storeIop = InstObjParams('strneon%(size)d_uop' % subst,
|
||||
'MicroStrNeon%(size)dUop' % subst,
|
||||
'MicroNeonMemOp',
|
||||
{ 'mem_decl' : memDecl,
|
||||
'size' : size,
|
||||
'memacc_code' : storeMemAccCode,
|
||||
'ea_code' : eaCode,
|
||||
'predicate_test' : predicateTest },
|
||||
[ 'IsMicroop', 'IsMemRef', 'IsStore' ])
|
||||
|
||||
exec_output += NeonLoadExecute.subst(loadIop) + \
|
||||
NeonLoadInitiateAcc.subst(loadIop) + \
|
||||
NeonLoadCompleteAcc.subst(loadIop) + \
|
||||
NeonStoreExecute.subst(storeIop) + \
|
||||
NeonStoreInitiateAcc.subst(storeIop) + \
|
||||
NeonStoreCompleteAcc.subst(storeIop)
|
||||
header_output += MicroNeonMemDeclare.subst(loadIop) + \
|
||||
MicroNeonMemDeclare.subst(storeIop)
|
||||
}};
|
||||
|
||||
let {{
|
||||
exec_output = ''
|
||||
for eSize, type in (1, 'uint8_t'), \
|
||||
(2, 'uint16_t'), \
|
||||
(4, 'uint32_t'), \
|
||||
(8, 'uint64_t'):
|
||||
size = eSize
|
||||
# An instruction handles no more than 16 bytes and no more than
|
||||
# 4 elements, or the number of elements needed to fill 8 or 16 bytes.
|
||||
sizes = set((16, 8))
|
||||
for count in 1, 2, 3, 4:
|
||||
size = count * eSize
|
||||
if size <= 16:
|
||||
sizes.add(size)
|
||||
for size in sizes:
|
||||
substDict = {
|
||||
"class_name" : "MicroLdrNeon%dUop" % size,
|
||||
"targs" : type
|
||||
}
|
||||
exec_output += MicroNeonMemExecDeclare.subst(substDict)
|
||||
substDict["class_name"] = "MicroStrNeon%dUop" % size
|
||||
exec_output += MicroNeonMemExecDeclare.subst(substDict)
|
||||
size += eSize
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Neon (de)interlacing microops
|
||||
//
|
||||
|
||||
let {{
|
||||
header_output = exec_output = ''
|
||||
for dRegs in (2, 3, 4):
|
||||
loadConv = ''
|
||||
unloadConv = ''
|
||||
for dReg in range(dRegs):
|
||||
loadConv += '''
|
||||
conv1.cRegs[%(sReg0)d] = htog(FpOp1P%(sReg0)d.uw);
|
||||
conv1.cRegs[%(sReg1)d] = htog(FpOp1P%(sReg1)d.uw);
|
||||
''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
|
||||
unloadConv += '''
|
||||
FpDestS%(dReg)dP0.uw = gtoh(conv2.cRegs[2 * %(dReg)d + 0]);
|
||||
FpDestS%(dReg)dP1.uw = gtoh(conv2.cRegs[2 * %(dReg)d + 1]);
|
||||
''' % { "dReg" : dReg }
|
||||
microDeintNeonCode = '''
|
||||
const unsigned dRegs = %(dRegs)d;
|
||||
const unsigned regs = 2 * dRegs;
|
||||
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
|
||||
sizeof(Element);
|
||||
union convStruct {
|
||||
FloatRegBits cRegs[regs];
|
||||
Element elements[dRegs * perDReg];
|
||||
} conv1, conv2;
|
||||
|
||||
%(loadConv)s
|
||||
|
||||
unsigned srcElem = 0;
|
||||
for (unsigned destOffset = 0;
|
||||
destOffset < perDReg; destOffset++) {
|
||||
for (unsigned dReg = 0; dReg < dRegs; dReg++) {
|
||||
conv2.elements[dReg * perDReg + destOffset] =
|
||||
conv1.elements[srcElem++];
|
||||
}
|
||||
}
|
||||
|
||||
%(unloadConv)s
|
||||
''' % { "dRegs" : dRegs,
|
||||
"loadConv" : loadConv,
|
||||
"unloadConv" : unloadConv }
|
||||
microDeintNeonIop = \
|
||||
InstObjParams('deintneon%duop' % (dRegs * 2),
|
||||
'MicroDeintNeon%dUop' % (dRegs * 2),
|
||||
'MicroNeonMixOp',
|
||||
{ 'predicate_test': predicateTest,
|
||||
'code' : microDeintNeonCode },
|
||||
['IsMicroop'])
|
||||
header_output += MicroNeonMixDeclare.subst(microDeintNeonIop)
|
||||
exec_output += MicroNeonMixExecute.subst(microDeintNeonIop)
|
||||
|
||||
loadConv = ''
|
||||
unloadConv = ''
|
||||
for dReg in range(dRegs):
|
||||
loadConv += '''
|
||||
conv1.cRegs[2 * %(dReg)d + 0] = htog(FpOp1S%(dReg)dP0.uw);
|
||||
conv1.cRegs[2 * %(dReg)d + 1] = htog(FpOp1S%(dReg)dP1.uw);
|
||||
''' % { "dReg" : dReg }
|
||||
unloadConv += '''
|
||||
FpDestP%(sReg0)d.uw = gtoh(conv2.cRegs[%(sReg0)d]);
|
||||
FpDestP%(sReg1)d.uw = gtoh(conv2.cRegs[%(sReg1)d]);
|
||||
''' % { "sReg0" : (dReg * 2), "sReg1" : (dReg * 2 + 1) }
|
||||
microInterNeonCode = '''
|
||||
const unsigned dRegs = %(dRegs)d;
|
||||
const unsigned regs = 2 * dRegs;
|
||||
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
|
||||
sizeof(Element);
|
||||
union convStruct {
|
||||
FloatRegBits cRegs[regs];
|
||||
Element elements[dRegs * perDReg];
|
||||
} conv1, conv2;
|
||||
|
||||
%(loadConv)s
|
||||
|
||||
unsigned destElem = 0;
|
||||
for (unsigned srcOffset = 0;
|
||||
srcOffset < perDReg; srcOffset++) {
|
||||
for (unsigned dReg = 0; dReg < dRegs; dReg++) {
|
||||
conv2.elements[destElem++] =
|
||||
conv1.elements[dReg * perDReg + srcOffset];
|
||||
}
|
||||
}
|
||||
|
||||
%(unloadConv)s
|
||||
''' % { "dRegs" : dRegs,
|
||||
"loadConv" : loadConv,
|
||||
"unloadConv" : unloadConv }
|
||||
microInterNeonIop = \
|
||||
InstObjParams('interneon%duop' % (dRegs * 2),
|
||||
'MicroInterNeon%dUop' % (dRegs * 2),
|
||||
'MicroNeonMixOp',
|
||||
{ 'predicate_test': predicateTest,
|
||||
'code' : microInterNeonCode },
|
||||
['IsMicroop'])
|
||||
header_output += MicroNeonMixDeclare.subst(microInterNeonIop)
|
||||
exec_output += MicroNeonMixExecute.subst(microInterNeonIop)
|
||||
}};
|
||||
|
||||
let {{
|
||||
exec_output = ''
|
||||
for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
|
||||
for dRegs in (2, 3, 4):
|
||||
Name = "MicroDeintNeon%dUop" % (dRegs * 2)
|
||||
substDict = { "class_name" : Name, "targs" : type }
|
||||
exec_output += MicroNeonExecDeclare.subst(substDict)
|
||||
Name = "MicroInterNeon%dUop" % (dRegs * 2)
|
||||
substDict = { "class_name" : Name, "targs" : type }
|
||||
exec_output += MicroNeonExecDeclare.subst(substDict)
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Neon microops to pack/unpack a single lane
|
||||
//
|
||||
|
||||
let {{
|
||||
header_output = exec_output = ''
|
||||
for sRegs in 1, 2:
|
||||
baseLoadRegs = ''
|
||||
for reg in range(sRegs):
|
||||
baseLoadRegs += '''
|
||||
sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d.uw);
|
||||
sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d.uw);
|
||||
''' % { "reg0" : (2 * reg + 0),
|
||||
"reg1" : (2 * reg + 1) }
|
||||
for dRegs in range(sRegs, 5):
|
||||
unloadRegs = ''
|
||||
loadRegs = baseLoadRegs
|
||||
for reg in range(dRegs):
|
||||
loadRegs += '''
|
||||
destRegs[%(reg)d].fRegs[0] = htog(FpDestS%(reg)dP0.uw);
|
||||
destRegs[%(reg)d].fRegs[1] = htog(FpDestS%(reg)dP1.uw);
|
||||
''' % { "reg" : reg }
|
||||
unloadRegs += '''
|
||||
FpDestS%(reg)dP0.uw = gtoh(destRegs[%(reg)d].fRegs[0]);
|
||||
FpDestS%(reg)dP1.uw = gtoh(destRegs[%(reg)d].fRegs[1]);
|
||||
''' % { "reg" : reg }
|
||||
microUnpackNeonCode = '''
|
||||
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
|
||||
sizeof(Element);
|
||||
|
||||
union SourceRegs {
|
||||
FloatRegBits fRegs[2 * %(sRegs)d];
|
||||
Element elements[%(sRegs)d * perDReg];
|
||||
} sourceRegs;
|
||||
|
||||
union DestReg {
|
||||
FloatRegBits fRegs[2];
|
||||
Element elements[perDReg];
|
||||
} destRegs[%(dRegs)d];
|
||||
|
||||
%(loadRegs)s
|
||||
|
||||
for (unsigned i = 0; i < %(dRegs)d; i++) {
|
||||
destRegs[i].elements[lane] = sourceRegs.elements[i];
|
||||
}
|
||||
|
||||
%(unloadRegs)s
|
||||
''' % { "sRegs" : sRegs, "dRegs" : dRegs,
|
||||
"loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
|
||||
|
||||
microUnpackNeonIop = \
|
||||
InstObjParams('unpackneon%dto%duop' % (sRegs * 2, dRegs * 2),
|
||||
'MicroUnpackNeon%dto%dUop' %
|
||||
(sRegs * 2, dRegs * 2),
|
||||
'MicroNeonMixLaneOp',
|
||||
{ 'predicate_test': predicateTest,
|
||||
'code' : microUnpackNeonCode },
|
||||
['IsMicroop'])
|
||||
header_output += MicroNeonMixLaneDeclare.subst(microUnpackNeonIop)
|
||||
exec_output += MicroNeonMixExecute.subst(microUnpackNeonIop)
|
||||
|
||||
for sRegs in 1, 2:
|
||||
loadRegs = ''
|
||||
for reg in range(sRegs):
|
||||
loadRegs += '''
|
||||
sourceRegs.fRegs[%(reg0)d] = htog(FpOp1P%(reg0)d.uw);
|
||||
sourceRegs.fRegs[%(reg1)d] = htog(FpOp1P%(reg1)d.uw);
|
||||
''' % { "reg0" : (2 * reg + 0),
|
||||
"reg1" : (2 * reg + 1) }
|
||||
for dRegs in range(sRegs, 5):
|
||||
unloadRegs = ''
|
||||
for reg in range(dRegs):
|
||||
unloadRegs += '''
|
||||
FpDestS%(reg)dP0.uw = gtoh(destRegs[%(reg)d].fRegs[0]);
|
||||
FpDestS%(reg)dP1.uw = gtoh(destRegs[%(reg)d].fRegs[1]);
|
||||
''' % { "reg" : reg }
|
||||
microUnpackAllNeonCode = '''
|
||||
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
|
||||
sizeof(Element);
|
||||
|
||||
union SourceRegs {
|
||||
FloatRegBits fRegs[2 * %(sRegs)d];
|
||||
Element elements[%(sRegs)d * perDReg];
|
||||
} sourceRegs;
|
||||
|
||||
union DestReg {
|
||||
FloatRegBits fRegs[2];
|
||||
Element elements[perDReg];
|
||||
} destRegs[%(dRegs)d];
|
||||
|
||||
%(loadRegs)s
|
||||
|
||||
for (unsigned i = 0; i < %(dRegs)d; i++) {
|
||||
for (unsigned j = 0; j < perDReg; j++)
|
||||
destRegs[i].elements[j] = sourceRegs.elements[i];
|
||||
}
|
||||
|
||||
%(unloadRegs)s
|
||||
''' % { "sRegs" : sRegs, "dRegs" : dRegs,
|
||||
"loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
|
||||
|
||||
microUnpackAllNeonIop = \
|
||||
InstObjParams('unpackallneon%dto%duop' % (sRegs * 2, dRegs * 2),
|
||||
'MicroUnpackAllNeon%dto%dUop' %
|
||||
(sRegs * 2, dRegs * 2),
|
||||
'MicroNeonMixOp',
|
||||
{ 'predicate_test': predicateTest,
|
||||
'code' : microUnpackAllNeonCode },
|
||||
['IsMicroop'])
|
||||
header_output += MicroNeonMixDeclare.subst(microUnpackAllNeonIop)
|
||||
exec_output += MicroNeonMixExecute.subst(microUnpackAllNeonIop)
|
||||
|
||||
for dRegs in 1, 2:
|
||||
unloadRegs = ''
|
||||
for reg in range(dRegs):
|
||||
unloadRegs += '''
|
||||
FpDestP%(reg0)d.uw = gtoh(destRegs.fRegs[%(reg0)d]);
|
||||
FpDestP%(reg1)d.uw = gtoh(destRegs.fRegs[%(reg1)d]);
|
||||
''' % { "reg0" : (2 * reg + 0),
|
||||
"reg1" : (2 * reg + 1) }
|
||||
for sRegs in range(dRegs, 5):
|
||||
loadRegs = ''
|
||||
for reg in range(sRegs):
|
||||
loadRegs += '''
|
||||
sourceRegs[%(reg)d].fRegs[0] = htog(FpOp1S%(reg)dP0.uw);
|
||||
sourceRegs[%(reg)d].fRegs[1] = htog(FpOp1S%(reg)dP1.uw);
|
||||
''' % { "reg" : reg }
|
||||
microPackNeonCode = '''
|
||||
const unsigned perDReg = (2 * sizeof(FloatRegBits)) /
|
||||
sizeof(Element);
|
||||
|
||||
union SourceReg {
|
||||
FloatRegBits fRegs[2];
|
||||
Element elements[perDReg];
|
||||
} sourceRegs[%(sRegs)d];
|
||||
|
||||
union DestRegs {
|
||||
FloatRegBits fRegs[2 * %(dRegs)d];
|
||||
Element elements[%(dRegs)d * perDReg];
|
||||
} destRegs;
|
||||
|
||||
%(loadRegs)s
|
||||
|
||||
for (unsigned i = 0; i < %(sRegs)d; i++) {
|
||||
destRegs.elements[i] = sourceRegs[i].elements[lane];
|
||||
}
|
||||
|
||||
%(unloadRegs)s
|
||||
''' % { "sRegs" : sRegs, "dRegs" : dRegs,
|
||||
"loadRegs" : loadRegs, "unloadRegs" : unloadRegs }
|
||||
|
||||
microPackNeonIop = \
|
||||
InstObjParams('packneon%dto%duop' % (sRegs * 2, dRegs * 2),
|
||||
'MicroPackNeon%dto%dUop' %
|
||||
(sRegs * 2, dRegs * 2),
|
||||
'MicroNeonMixLaneOp',
|
||||
{ 'predicate_test': predicateTest,
|
||||
'code' : microPackNeonCode },
|
||||
['IsMicroop'])
|
||||
header_output += MicroNeonMixLaneDeclare.subst(microPackNeonIop)
|
||||
exec_output += MicroNeonMixExecute.subst(microPackNeonIop)
|
||||
}};
|
||||
|
||||
let {{
|
||||
exec_output = ''
|
||||
for type in ('uint8_t', 'uint16_t', 'uint32_t'):
|
||||
for sRegs in 1, 2:
|
||||
for dRegs in range(sRegs, 5):
|
||||
for format in ("MicroUnpackNeon%(sRegs)dto%(dRegs)dUop",
|
||||
"MicroUnpackAllNeon%(sRegs)dto%(dRegs)dUop",
|
||||
"MicroPackNeon%(dRegs)dto%(sRegs)dUop"):
|
||||
Name = format % { "sRegs" : sRegs * 2,
|
||||
"dRegs" : dRegs * 2 }
|
||||
substDict = { "class_name" : Name, "targs" : type }
|
||||
exec_output += MicroNeonExecDeclare.subst(substDict)
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Integer = Integer op Immediate microops
|
||||
|
@ -122,23 +568,32 @@ let {{
|
|||
|
||||
let {{
|
||||
microAddiUopIop = InstObjParams('addi_uop', 'MicroAddiUop',
|
||||
'MicroIntOp',
|
||||
'MicroIntImmOp',
|
||||
{'code': 'Ra = Rb + imm;',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microAddUopIop = InstObjParams('add_uop', 'MicroAddUop',
|
||||
'MicroIntOp',
|
||||
{'code': 'Ra = Rb + Rc;',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
microSubiUopIop = InstObjParams('subi_uop', 'MicroSubiUop',
|
||||
'MicroIntOp',
|
||||
'MicroIntImmOp',
|
||||
{'code': 'Ra = Rb - imm;',
|
||||
'predicate_test': predicateTest},
|
||||
['IsMicroop'])
|
||||
|
||||
header_output = MicroIntDeclare.subst(microAddiUopIop) + \
|
||||
MicroIntDeclare.subst(microSubiUopIop)
|
||||
decoder_output = MicroIntConstructor.subst(microAddiUopIop) + \
|
||||
MicroIntConstructor.subst(microSubiUopIop)
|
||||
header_output = MicroIntImmDeclare.subst(microAddiUopIop) + \
|
||||
MicroIntImmDeclare.subst(microSubiUopIop) + \
|
||||
MicroIntDeclare.subst(microAddUopIop)
|
||||
decoder_output = MicroIntImmConstructor.subst(microAddiUopIop) + \
|
||||
MicroIntImmConstructor.subst(microSubiUopIop) + \
|
||||
MicroIntConstructor.subst(microAddUopIop)
|
||||
exec_output = PredOpExecute.subst(microAddiUopIop) + \
|
||||
PredOpExecute.subst(microSubiUopIop)
|
||||
PredOpExecute.subst(microSubiUopIop) + \
|
||||
PredOpExecute.subst(microAddUopIop)
|
||||
}};
|
||||
|
||||
let {{
|
||||
|
@ -146,6 +601,22 @@ let {{
|
|||
header_output = MacroMemDeclare.subst(iop)
|
||||
decoder_output = MacroMemConstructor.subst(iop)
|
||||
|
||||
iop = InstObjParams("vldmult", "VldMult", 'VldMultOp', "", [])
|
||||
header_output += VMemMultDeclare.subst(iop)
|
||||
decoder_output += VMemMultConstructor.subst(iop)
|
||||
|
||||
iop = InstObjParams("vldsingle", "VldSingle", 'VldSingleOp', "", [])
|
||||
header_output += VMemSingleDeclare.subst(iop)
|
||||
decoder_output += VMemSingleConstructor.subst(iop)
|
||||
|
||||
iop = InstObjParams("vstmult", "VstMult", 'VstMultOp', "", [])
|
||||
header_output += VMemMultDeclare.subst(iop)
|
||||
decoder_output += VMemMultConstructor.subst(iop)
|
||||
|
||||
iop = InstObjParams("vstsingle", "VstSingle", 'VstSingleOp', "", [])
|
||||
header_output += VMemSingleDeclare.subst(iop)
|
||||
decoder_output += VMemSingleConstructor.subst(iop)
|
||||
|
||||
vfpIop = InstObjParams("vldmstm", "VLdmStm", 'MacroVFPMemOp', "", [])
|
||||
header_output += MacroVFPMemDeclare.subst(vfpIop)
|
||||
decoder_output += MacroVFPMemConstructor.subst(vfpIop)
|
||||
|
|
3343
src/arch/arm/isa/insts/neon.isa
Normal file
3343
src/arch/arm/isa/insts/neon.isa
Normal file
File diff suppressed because it is too large
Load diff
|
@ -47,6 +47,7 @@ def operand_types {{
|
|||
'sw' : ('signed int', 32),
|
||||
'uw' : ('unsigned int', 32),
|
||||
'ud' : ('unsigned int', 64),
|
||||
'tud' : ('twin64 int', 64),
|
||||
'sf' : ('float', 32),
|
||||
'df' : ('float', 64)
|
||||
}};
|
||||
|
@ -96,6 +97,18 @@ def operands {{
|
|||
'FpDestP1': ('FloatReg', 'sf', '(dest + 1)', 'IsFloating', 2),
|
||||
'FpDestP2': ('FloatReg', 'sf', '(dest + 2)', 'IsFloating', 2),
|
||||
'FpDestP3': ('FloatReg', 'sf', '(dest + 3)', 'IsFloating', 2),
|
||||
'FpDestP4': ('FloatReg', 'sf', '(dest + 4)', 'IsFloating', 2),
|
||||
'FpDestP5': ('FloatReg', 'sf', '(dest + 5)', 'IsFloating', 2),
|
||||
'FpDestP6': ('FloatReg', 'sf', '(dest + 6)', 'IsFloating', 2),
|
||||
'FpDestP7': ('FloatReg', 'sf', '(dest + 7)', 'IsFloating', 2),
|
||||
'FpDestS0P0': ('FloatReg', 'sf', '(dest + step * 0 + 0)', 'IsFloating', 2),
|
||||
'FpDestS0P1': ('FloatReg', 'sf', '(dest + step * 0 + 1)', 'IsFloating', 2),
|
||||
'FpDestS1P0': ('FloatReg', 'sf', '(dest + step * 1 + 0)', 'IsFloating', 2),
|
||||
'FpDestS1P1': ('FloatReg', 'sf', '(dest + step * 1 + 1)', 'IsFloating', 2),
|
||||
'FpDestS2P0': ('FloatReg', 'sf', '(dest + step * 2 + 0)', 'IsFloating', 2),
|
||||
'FpDestS2P1': ('FloatReg', 'sf', '(dest + step * 2 + 1)', 'IsFloating', 2),
|
||||
'FpDestS3P0': ('FloatReg', 'sf', '(dest + step * 3 + 0)', 'IsFloating', 2),
|
||||
'FpDestS3P1': ('FloatReg', 'sf', '(dest + step * 3 + 1)', 'IsFloating', 2),
|
||||
'Result': ('IntReg', 'uw', 'result', 'IsInteger', 2,
|
||||
maybePCRead, maybePCWrite),
|
||||
'Dest2': ('IntReg', 'uw', 'dest2', 'IsInteger', 2,
|
||||
|
@ -124,6 +137,18 @@ def operands {{
|
|||
'FpOp1P1': ('FloatReg', 'sf', '(op1 + 1)', 'IsFloating', 2),
|
||||
'FpOp1P2': ('FloatReg', 'sf', '(op1 + 2)', 'IsFloating', 2),
|
||||
'FpOp1P3': ('FloatReg', 'sf', '(op1 + 3)', 'IsFloating', 2),
|
||||
'FpOp1P4': ('FloatReg', 'sf', '(op1 + 4)', 'IsFloating', 2),
|
||||
'FpOp1P5': ('FloatReg', 'sf', '(op1 + 5)', 'IsFloating', 2),
|
||||
'FpOp1P6': ('FloatReg', 'sf', '(op1 + 6)', 'IsFloating', 2),
|
||||
'FpOp1P7': ('FloatReg', 'sf', '(op1 + 7)', 'IsFloating', 2),
|
||||
'FpOp1S0P0': ('FloatReg', 'sf', '(op1 + step * 0 + 0)', 'IsFloating', 2),
|
||||
'FpOp1S0P1': ('FloatReg', 'sf', '(op1 + step * 0 + 1)', 'IsFloating', 2),
|
||||
'FpOp1S1P0': ('FloatReg', 'sf', '(op1 + step * 1 + 0)', 'IsFloating', 2),
|
||||
'FpOp1S1P1': ('FloatReg', 'sf', '(op1 + step * 1 + 1)', 'IsFloating', 2),
|
||||
'FpOp1S2P0': ('FloatReg', 'sf', '(op1 + step * 2 + 0)', 'IsFloating', 2),
|
||||
'FpOp1S2P1': ('FloatReg', 'sf', '(op1 + step * 2 + 1)', 'IsFloating', 2),
|
||||
'FpOp1S3P0': ('FloatReg', 'sf', '(op1 + step * 3 + 0)', 'IsFloating', 2),
|
||||
'FpOp1S3P1': ('FloatReg', 'sf', '(op1 + step * 3 + 1)', 'IsFloating', 2),
|
||||
'MiscOp1': ('ControlReg', 'uw', 'op1', (None, None, 'IsControl'), 2),
|
||||
'Op2': ('IntReg', 'uw', 'op2', 'IsInteger', 2,
|
||||
maybePCRead, maybePCWrite),
|
||||
|
@ -164,6 +189,7 @@ def operands {{
|
|||
maybePCRead, maybeIWPCWrite),
|
||||
'Fa' : ('FloatReg', 'sf', 'ura', 'IsFloating', 2),
|
||||
'Rb' : ('IntReg', 'uw', 'urb', 'IsInteger', 2, maybePCRead, maybePCWrite),
|
||||
'Rc' : ('IntReg', 'uw', 'urc', 'IsInteger', 2, maybePCRead, maybePCWrite),
|
||||
|
||||
#General Purpose Floating Point Reg Operands
|
||||
'Fd': ('FloatReg', 'df', 'FD', 'IsFloating', 2),
|
||||
|
|
|
@ -74,10 +74,152 @@ def template MicroMemConstructor {{
|
|||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Integer = Integer op Immediate microops
|
||||
// Neon load/store microops
|
||||
//
|
||||
|
||||
def template MicroNeonMemDeclare {{
|
||||
template <class Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, RegIndex _dest,
|
||||
RegIndex _ura, uint32_t _imm, unsigned extraMemFlags)
|
||||
: %(base_class)s("%(mnemonic)s", machInst,
|
||||
%(op_class)s, _dest, _ura, _imm)
|
||||
{
|
||||
memAccessFlags |= extraMemFlags;
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
%(InitiateAccDeclare)s
|
||||
%(CompleteAccDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Integer = Integer op Integer microops
|
||||
//
|
||||
|
||||
def template MicroIntDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst,
|
||||
RegIndex _ura, RegIndex _urb, RegIndex _urc);
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template MicroIntConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst,
|
||||
RegIndex _ura,
|
||||
RegIndex _urb,
|
||||
RegIndex _urc)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_ura, _urb, _urc)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template MicroNeonMemExecDeclare {{
|
||||
template
|
||||
Fault %(class_name)s<%(targs)s>::execute(
|
||||
%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||
template
|
||||
Fault %(class_name)s<%(targs)s>::initiateAcc(
|
||||
%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||
template
|
||||
Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr,
|
||||
%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||
}};
|
||||
|
||||
def template MicroNeonExecDeclare {{
|
||||
template
|
||||
Fault %(class_name)s<%(targs)s>::execute(
|
||||
%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Neon (de)interlacing microops
|
||||
//
|
||||
|
||||
def template MicroNeonMixDeclare {{
|
||||
template <class Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
|
||||
uint8_t _step) :
|
||||
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1, _step)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template MicroNeonMixExecute {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
Fault fault = NoFault;
|
||||
uint64_t resTemp = 0;
|
||||
resTemp = resTemp;
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
%(code)s;
|
||||
if (fault == NoFault)
|
||||
{
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Neon (un)packing microops using a particular lane
|
||||
//
|
||||
|
||||
def template MicroNeonMixLaneDeclare {{
|
||||
template <class Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
%(class_name)s(ExtMachInst machInst, RegIndex _dest, RegIndex _op1,
|
||||
uint8_t _step, unsigned _lane) :
|
||||
%(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1, _step, _lane)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Integer = Integer op Immediate microops
|
||||
//
|
||||
|
||||
def template MicroIntImmDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
|
@ -88,7 +230,7 @@ def template MicroIntDeclare {{
|
|||
};
|
||||
}};
|
||||
|
||||
def template MicroIntConstructor {{
|
||||
def template MicroIntImmConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst,
|
||||
RegIndex _ura,
|
||||
RegIndex _urb,
|
||||
|
@ -132,6 +274,52 @@ def template MacroMemConstructor {{
|
|||
|
||||
}};
|
||||
|
||||
def template VMemMultDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst, unsigned width,
|
||||
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
|
||||
uint32_t size, uint32_t align, RegIndex rm);
|
||||
%(BasicExecPanic)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template VMemMultConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst, unsigned width,
|
||||
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
|
||||
uint32_t size, uint32_t align, RegIndex rm)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, width,
|
||||
rn, vd, regs, inc, size, align, rm)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template VMemSingleDeclare {{
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst, bool all, unsigned width,
|
||||
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
|
||||
uint32_t size, uint32_t align, RegIndex rm, unsigned lane = 0);
|
||||
%(BasicExecPanic)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template VMemSingleConstructor {{
|
||||
%(class_name)s::%(class_name)s(ExtMachInst machInst, bool all, unsigned width,
|
||||
RegIndex rn, RegIndex vd, unsigned regs, unsigned inc,
|
||||
uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, all, width,
|
||||
rn, vd, regs, inc, size, align, rm, lane)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
}};
|
||||
|
||||
def template MacroVFPMemDeclare {{
|
||||
/**
|
||||
* Static instructions class for a store multiple instruction
|
||||
|
|
|
@ -180,6 +180,42 @@ def template LoadExecute {{
|
|||
}
|
||||
}};
|
||||
|
||||
def template NeonLoadExecute {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::execute(
|
||||
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
Fault fault = NoFault;
|
||||
|
||||
%(op_decl)s;
|
||||
%(mem_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
MemUnion memUnion;
|
||||
uint8_t *dataPtr = memUnion.bytes;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
if (fault == NoFault) {
|
||||
fault = xc->readBytes(EA, dataPtr, %(size)d, memAccessFlags);
|
||||
%(memacc_code)s;
|
||||
}
|
||||
|
||||
if (fault == NoFault) {
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template StoreExecute {{
|
||||
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
|
@ -217,6 +253,46 @@ def template StoreExecute {{
|
|||
}
|
||||
}};
|
||||
|
||||
def template NeonStoreExecute {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::execute(
|
||||
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
Fault fault = NoFault;
|
||||
|
||||
%(op_decl)s;
|
||||
%(mem_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
MemUnion memUnion;
|
||||
uint8_t *dataPtr = memUnion.bytes;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
if (fault == NoFault) {
|
||||
%(memacc_code)s;
|
||||
}
|
||||
|
||||
if (fault == NoFault) {
|
||||
fault = xc->writeBytes(dataPtr, %(size)d, EA,
|
||||
memAccessFlags, NULL);
|
||||
}
|
||||
|
||||
if (fault == NoFault) {
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template StoreExExecute {{
|
||||
Fault %(class_name)s::execute(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
|
@ -336,6 +412,45 @@ def template StoreInitiateAcc {{
|
|||
}
|
||||
}};
|
||||
|
||||
def template NeonStoreInitiateAcc {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::initiateAcc(
|
||||
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
Fault fault = NoFault;
|
||||
|
||||
%(op_decl)s;
|
||||
%(mem_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
MemUnion memUnion;
|
||||
if (fault == NoFault) {
|
||||
%(memacc_code)s;
|
||||
}
|
||||
|
||||
if (fault == NoFault) {
|
||||
fault = xc->writeBytes(memUnion.bytes, %(size)d, EA,
|
||||
memAccessFlags, NULL);
|
||||
}
|
||||
|
||||
// Need to write back any potential address register update
|
||||
if (fault == NoFault) {
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template LoadInitiateAcc {{
|
||||
Fault %(class_name)s::initiateAcc(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
|
@ -363,6 +478,31 @@ def template LoadInitiateAcc {{
|
|||
}
|
||||
}};
|
||||
|
||||
def template NeonLoadInitiateAcc {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::initiateAcc(
|
||||
%(CPU_exec_context)s *xc, Trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
Fault fault = NoFault;
|
||||
|
||||
%(op_src_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
if (fault == NoFault) {
|
||||
fault = xc->readBytes(EA, NULL, %(size)d, memAccessFlags);
|
||||
}
|
||||
} else if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template LoadCompleteAcc {{
|
||||
Fault %(class_name)s::completeAcc(PacketPtr pkt,
|
||||
%(CPU_exec_context)s *xc,
|
||||
|
@ -395,6 +535,40 @@ def template LoadCompleteAcc {{
|
|||
}
|
||||
}};
|
||||
|
||||
def template NeonLoadCompleteAcc {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::completeAcc(
|
||||
PacketPtr pkt, %(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
Fault fault = NoFault;
|
||||
|
||||
%(mem_decl)s;
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
// ARM instructions will not have a pkt if the predicate is false
|
||||
MemUnion &memUnion = *(MemUnion *)pkt->getPtr<uint8_t>();
|
||||
|
||||
if (fault == NoFault) {
|
||||
%(memacc_code)s;
|
||||
}
|
||||
|
||||
if (fault == NoFault) {
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template StoreCompleteAcc {{
|
||||
Fault %(class_name)s::completeAcc(PacketPtr pkt,
|
||||
%(CPU_exec_context)s *xc,
|
||||
|
@ -420,6 +594,32 @@ def template StoreCompleteAcc {{
|
|||
}
|
||||
}};
|
||||
|
||||
def template NeonStoreCompleteAcc {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::completeAcc(
|
||||
PacketPtr pkt, %(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
Fault fault = NoFault;
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
if (fault == NoFault) {
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
def template StoreExCompleteAcc {{
|
||||
Fault %(class_name)s::completeAcc(PacketPtr pkt,
|
||||
%(CPU_exec_context)s *xc,
|
||||
|
|
227
src/arch/arm/isa/templates/neon.isa
Normal file
227
src/arch/arm/isa/templates/neon.isa
Normal file
|
@ -0,0 +1,227 @@
|
|||
// -*- mode:c++ -*-
|
||||
|
||||
// Copyright (c) 2010 ARM Limited
|
||||
// All rights reserved
|
||||
//
|
||||
// The license below extends only to copyright in the software and shall
|
||||
// not be construed as granting a license to any other intellectual
|
||||
// property including but not limited to intellectual property relating
|
||||
// to a hardware implementation of the functionality of the software
|
||||
// licensed hereunder. You may use the software subject to the license
|
||||
// terms below provided that you ensure that this notice is replicated
|
||||
// unmodified and in its entirety in all distributions of the software,
|
||||
// modified or unmodified, in source code or in binary form.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are
|
||||
// met: redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer;
|
||||
// redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution;
|
||||
// neither the name of the copyright holders nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Authors: Gabe Black
|
||||
|
||||
def template NeonRegRegRegOpDeclare {{
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
protected:
|
||||
typedef _Element Element;
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst,
|
||||
IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1, _op2)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template NeonRegRegRegImmOpDeclare {{
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
protected:
|
||||
typedef _Element Element;
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst,
|
||||
IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
|
||||
uint64_t _imm)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1, _op2, _imm)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template NeonRegRegImmOpDeclare {{
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
protected:
|
||||
typedef _Element Element;
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst,
|
||||
IntRegIndex _dest, IntRegIndex _op1, uint64_t _imm)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1, _imm)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template NeonRegImmOpDeclare {{
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
protected:
|
||||
typedef _Element Element;
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst, IntRegIndex _dest, uint64_t _imm)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, _dest, _imm)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template NeonRegRegOpDeclare {{
|
||||
template <class _Element>
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
protected:
|
||||
typedef _Element Element;
|
||||
public:
|
||||
// Constructor
|
||||
%(class_name)s(ExtMachInst machInst,
|
||||
IntRegIndex _dest, IntRegIndex _op1)
|
||||
: %(base_class)s("%(mnemonic)s", machInst, %(op_class)s,
|
||||
_dest, _op1)
|
||||
{
|
||||
%(constructor)s;
|
||||
}
|
||||
|
||||
%(BasicExecDeclare)s
|
||||
};
|
||||
}};
|
||||
|
||||
def template NeonExecDeclare {{
|
||||
template
|
||||
Fault %(class_name)s<%(targs)s>::execute(
|
||||
%(CPU_exec_context)s *, Trace::InstRecord *) const;
|
||||
}};
|
||||
|
||||
def template NeonEqualRegExecute {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
Fault fault = NoFault;
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
const unsigned rCount = %(r_count)d;
|
||||
const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
|
||||
|
||||
union RegVect {
|
||||
FloatRegBits regs[rCount];
|
||||
Element elements[eCount];
|
||||
};
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
%(code)s;
|
||||
if (fault == NoFault)
|
||||
{
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
||||
|
||||
output header {{
|
||||
uint16_t nextBiggerType(uint8_t);
|
||||
uint32_t nextBiggerType(uint16_t);
|
||||
uint64_t nextBiggerType(uint32_t);
|
||||
int16_t nextBiggerType(int8_t);
|
||||
int32_t nextBiggerType(int16_t);
|
||||
int64_t nextBiggerType(int32_t);
|
||||
}};
|
||||
|
||||
def template NeonUnequalRegExecute {{
|
||||
template <class Element>
|
||||
Fault %(class_name)s<Element>::execute(%(CPU_exec_context)s *xc,
|
||||
Trace::InstRecord *traceData) const
|
||||
{
|
||||
typedef typeof(nextBiggerType((Element)0)) BigElement;
|
||||
Fault fault = NoFault;
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
|
||||
const unsigned rCount = %(r_count)d;
|
||||
const unsigned eCount = rCount * sizeof(FloatRegBits) / sizeof(Element);
|
||||
|
||||
union RegVect {
|
||||
FloatRegBits regs[rCount];
|
||||
Element elements[eCount];
|
||||
BigElement bigElements[eCount / 2];
|
||||
};
|
||||
|
||||
union BigRegVect {
|
||||
FloatRegBits regs[2 * rCount];
|
||||
BigElement elements[eCount];
|
||||
};
|
||||
|
||||
if (%(predicate_test)s)
|
||||
{
|
||||
%(code)s;
|
||||
if (fault == NoFault)
|
||||
{
|
||||
%(op_wb)s;
|
||||
}
|
||||
}
|
||||
|
||||
if (fault == NoFault && machInst.itstateMask != 0) {
|
||||
xc->setMiscReg(MISCREG_ITSTATE, machInst.newItstate);
|
||||
}
|
||||
|
||||
return fault;
|
||||
}
|
||||
}};
|
|
@ -60,3 +60,6 @@
|
|||
|
||||
//Templates for VFP instructions
|
||||
##include "vfp.isa"
|
||||
|
||||
//Templates for Neon instructions
|
||||
##include "neon.isa"
|
||||
|
|
|
@ -65,20 +65,22 @@ class TLB : public BaseTLB
|
|||
{
|
||||
public:
|
||||
enum ArmFlags {
|
||||
AlignmentMask = 0x7,
|
||||
AlignmentMask = 0x1f,
|
||||
|
||||
AlignByte = 0x0,
|
||||
AlignHalfWord = 0x1,
|
||||
AlignWord = 0x3,
|
||||
AlignDoubleWord = 0x7,
|
||||
AlignQuadWord = 0xf,
|
||||
AlignOctWord = 0x1f,
|
||||
|
||||
AllowUnaligned = 0x8,
|
||||
AllowUnaligned = 0x20,
|
||||
// Priv code operating as if it wasn't
|
||||
UserMode = 0x10,
|
||||
UserMode = 0x40,
|
||||
// Because zero otherwise looks like a valid setting and may be used
|
||||
// accidentally, this bit must be non-zero to show it was used on
|
||||
// purpose.
|
||||
MustBeOne = 0x20
|
||||
MustBeOne = 0x80
|
||||
};
|
||||
protected:
|
||||
typedef std::multimap<Addr, int> PageTable;
|
||||
|
|
Loading…
Reference in a new issue