From 8f95144e161ef7bdb264eb572108a98f215785c0 Mon Sep 17 00:00:00 2001 From: Mitch Hayenga Date: Wed, 3 Sep 2014 07:42:52 -0400 Subject: [PATCH] arm: Make memory ops work on 64bit/128-bit quantities Multiple instructions assume only 32-bit load operations are available, this patch increases load sizes to 64-bit or 128-bit for many load pair and load multiple instructions. --- src/arch/arm/insts/macromem.cc | 398 ++++++++++++++---------- src/arch/arm/insts/macromem.hh | 22 +- src/arch/arm/isa/insts/ldr64.isa | 90 +++--- src/arch/arm/isa/insts/macromem.isa | 24 +- src/arch/arm/isa/insts/mem.isa | 4 +- src/arch/arm/isa/templates/macromem.isa | 35 ++- 6 files changed, 360 insertions(+), 213 deletions(-) diff --git a/src/arch/arm/insts/macromem.cc b/src/arch/arm/insts/macromem.cc index 65cd2c3b7..1ea968328 100644 --- a/src/arch/arm/insts/macromem.cc +++ b/src/arch/arm/insts/macromem.cc @@ -61,14 +61,29 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, { uint32_t regs = reglist; uint32_t ones = number_of_ones(reglist); - // Remember that writeback adds a uop or two and the temp register adds one - numMicroops = ones + (writeback ? (load ? 2 : 1) : 0) + 1; + uint32_t mem_ops = ones; - // It's technically legal to do a lot of nothing - if (!ones) + // Copy the base address register if we overwrite it, or if this instruction + // is basically a no-op (we have to do something) + bool copy_base = (bits(reglist, rn) && load) || !ones; + bool force_user = user & !bits(reglist, 15); + bool exception_ret = user & bits(reglist, 15); + bool pc_temp = load && writeback && bits(reglist, 15); + + if (!ones) { numMicroops = 1; + } else if (load) { + numMicroops = ((ones + 1) / 2) + + ((ones % 2 == 0 && exception_ret) ? 1 : 0) + + (copy_base ? 1 : 0) + + (writeback? 1 : 0) + + (pc_temp ? 1 : 0); + } else { + numMicroops = ones + (writeback ? 1 : 0); + } microOps = new StaticInstPtr[numMicroops]; + uint32_t addr = 0; if (!up) @@ -81,94 +96,129 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, // Add 0 to Rn and stick it in ureg0. // This is equivalent to a move. - *uop = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); + if (copy_base) + *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0); unsigned reg = 0; - unsigned regIdx = 0; - bool force_user = user & !bits(reglist, 15); - bool exception_ret = user & bits(reglist, 15); + while (mem_ops != 0) { + // Do load operations in pairs if possible + if (load && mem_ops >= 2 && + !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) { + // 64-bit memory operation + // Find 2 set register bits (clear them after finding) + unsigned reg_idx1; + unsigned reg_idx2; - for (int i = 0; i < ones; i++) { - // Find the next register. - while (!bits(regs, reg)) - reg++; - replaceBits(regs, reg, 0); + // Find the first register + while (!bits(regs, reg)) reg++; + replaceBits(regs, reg, 0); + reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg; - regIdx = reg; - if (force_user) { - regIdx = intRegInMode(MODE_USER, regIdx); - } + // Find the second register + while (!bits(regs, reg)) reg++; + replaceBits(regs, reg, 0); + reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg; + + // Load into temp reg if necessary + if (reg_idx2 == INTREG_PC && pc_temp) + reg_idx2 = INTREG_UREG1; + + // Actually load both registers from memory + *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2, + copy_base ? INTREG_UREG0 : rn, up, addr); + + if (!writeback && reg_idx2 == INTREG_PC) { + // No writeback if idx==pc, set appropriate flags + (*uop)->setFlag(StaticInst::IsControl); + (*uop)->setFlag(StaticInst::IsIndirectControl); + + if (!(condCode == COND_AL || condCode == COND_UC)) + (*uop)->setFlag(StaticInst::IsCondControl); + else + (*uop)->setFlag(StaticInst::IsUncondControl); + } + + if (up) addr += 8; + else addr -= 8; + mem_ops -= 2; + } else { + // 32-bit memory operation + // Find register for operation + unsigned reg_idx; + while(!bits(regs, reg)) reg++; + replaceBits(regs, reg, 0); + reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg; + + if (load) { + if (writeback && reg_idx == INTREG_PC) { + // If this instruction changes the PC and performs a + // writeback, ensure the pc load/branch is the last uop. + // Load into a temp reg here. + *uop = new MicroLdrUop(machInst, INTREG_UREG1, + copy_base ? INTREG_UREG0 : rn, up, addr); + } else if (reg_idx == INTREG_PC && exception_ret) { + // Special handling for exception return + *uop = new MicroLdrRetUop(machInst, reg_idx, + copy_base ? INTREG_UREG0 : rn, up, addr); + } else { + // standard single load uop + *uop = new MicroLdrUop(machInst, reg_idx, + copy_base ? INTREG_UREG0 : rn, up, addr); + } + + // Loading pc as last operation? Set appropriate flags. + if (!writeback && reg_idx == INTREG_PC) { + (*uop)->setFlag(StaticInst::IsControl); + (*uop)->setFlag(StaticInst::IsIndirectControl); - if (load) { - if (writeback && i == ones - 1) { - // If it's a writeback and this is the last register - // do the load into a temporary register which we'll move - // into the final one later - *++uop = new MicroLdrUop(machInst, INTREG_UREG1, INTREG_UREG0, - up, addr); - } else { - // Otherwise just do it normally - if (reg == INTREG_PC && exception_ret) { - // This must be the exception return form of ldm. - *++uop = new MicroLdrRetUop(machInst, regIdx, - INTREG_UREG0, up, addr); if (!(condCode == COND_AL || condCode == COND_UC)) (*uop)->setFlag(StaticInst::IsCondControl); else (*uop)->setFlag(StaticInst::IsUncondControl); - } else { - *++uop = new MicroLdrUop(machInst, regIdx, - INTREG_UREG0, up, addr); - if (reg == INTREG_PC) { - (*uop)->setFlag(StaticInst::IsControl); - if (!(condCode == COND_AL || condCode == COND_UC)) - (*uop)->setFlag(StaticInst::IsCondControl); - else - (*uop)->setFlag(StaticInst::IsUncondControl); - (*uop)->setFlag(StaticInst::IsIndirectControl); - } } + } else { + *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr); } - } else { - *++uop = new MicroStrUop(machInst, regIdx, INTREG_UREG0, up, addr); + + if (up) addr += 4; + else addr -= 4; + --mem_ops; } - if (up) - addr += 4; - else - addr -= 4; + // Load/store micro-op generated, go to next uop + ++uop; } if (writeback && ones) { - // put the register update after we're done all loading + // Perform writeback uop operation if (up) - *++uop = new MicroAddiUop(machInst, rn, rn, ones * 4); + *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4); else - *++uop = new MicroSubiUop(machInst, rn, rn, ones * 4); + *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4); + + // Write PC after address writeback? + if (pc_temp) { + if (exception_ret) { + *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); + } else { + *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1); + } + (*uop)->setFlag(StaticInst::IsControl); + (*uop)->setFlag(StaticInst::IsIndirectControl); - // If this was a load move the last temporary value into place - // this way we can't take an exception after we update the base - // register. - if (load && reg == INTREG_PC && exception_ret) { - *++uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1); if (!(condCode == COND_AL || condCode == COND_UC)) (*uop)->setFlag(StaticInst::IsCondControl); else (*uop)->setFlag(StaticInst::IsUncondControl); - } else if (load) { - *++uop = new MicroUopRegMov(machInst, regIdx, INTREG_UREG1); - if (reg == INTREG_PC) { - (*uop)->setFlag(StaticInst::IsControl); - (*uop)->setFlag(StaticInst::IsCondControl); - (*uop)->setFlag(StaticInst::IsIndirectControl); - // This is created as a RAS POP - if (rn == INTREG_SP) - (*uop)->setFlag(StaticInst::IsReturn); - } + if (rn == INTREG_SP) + (*uop)->setFlag(StaticInst::IsReturn); + + ++uop; } } + --uop; (*uop)->setLastMicroop(); /* Take the control flags from the last microop for the macroop */ @@ -176,16 +226,15 @@ MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst, setFlag(StaticInst::IsControl); if ((*uop)->isCondCtrl()) setFlag(StaticInst::IsCondControl); + if ((*uop)->isUncondCtrl()) + setFlag(StaticInst::IsUncondControl); if ((*uop)->isIndirectCtrl()) setFlag(StaticInst::IsIndirectControl); if ((*uop)->isReturn()) setFlag(StaticInst::IsReturn); - for (StaticInstPtr *curUop = microOps; - !(*curUop)->isLastMicroop(); curUop++) { - MicroOp * uopPtr = dynamic_cast(curUop->get()); - assert(uopPtr); - uopPtr->setDelayedCommit(); + for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) { + (*uop)->setDelayedCommit(); } } @@ -196,95 +245,96 @@ PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2) : PredMacroOp(mnem, machInst, __opClass) { + bool post = (mode == AddrMd_PostIndex); bool writeback = (mode != AddrMd_Offset); - numMicroops = 1 + (size / 4) + (writeback ? 1 : 0); + + if (load) { + // Use integer rounding to round up loads of size 4 + numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0); + } else { + numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0); + } microOps = new StaticInstPtr[numMicroops]; StaticInstPtr *uop = microOps; - bool post = (mode == AddrMd_PostIndex); - rn = makeSP(rn); - *uop = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, post ? 0 : imm); + if (!post) { + *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn, + post ? 0 : imm); + } if (fp) { if (size == 16) { if (load) { - *++uop = new MicroLdrQBFpXImmUop(machInst, rt, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroLdrQTFpXImmUop(machInst, rt, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroLdrQBFpXImmUop(machInst, rt2, - INTREG_UREG0, 16, noAlloc, exclusive, acrel); - *++uop = new MicroLdrQTFpXImmUop(machInst, rt2, - INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *uop++ = new MicroLdFp16Uop(machInst, rt, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroLdFp16Uop(machInst, rt2, + post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); } else { - *++uop = new MicroStrQBFpXImmUop(machInst, rt, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroStrQTFpXImmUop(machInst, rt, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroStrQBFpXImmUop(machInst, rt2, - INTREG_UREG0, 16, noAlloc, exclusive, acrel); - *++uop = new MicroStrQTFpXImmUop(machInst, rt2, - INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *uop++ = new MicroStrQBFpXImmUop(machInst, rt, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroStrQTFpXImmUop(machInst, rt, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroStrQBFpXImmUop(machInst, rt2, + post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); + *uop++ = new MicroStrQTFpXImmUop(machInst, rt2, + post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel); } } else if (size == 8) { if (load) { - *++uop = new MicroLdrFpXImmUop(machInst, rt, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroLdrFpXImmUop(machInst, rt2, - INTREG_UREG0, 8, noAlloc, exclusive, acrel); + *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } else { - *++uop = new MicroStrFpXImmUop(machInst, rt, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroStrFpXImmUop(machInst, rt2, - INTREG_UREG0, 8, noAlloc, exclusive, acrel); + *uop++ = new MicroStrFpXImmUop(machInst, rt, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroStrFpXImmUop(machInst, rt2, + post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel); } } else if (size == 4) { if (load) { - *++uop = new MicroLdrDFpXImmUop(machInst, rt, rt2, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } else { - *++uop = new MicroStrDFpXImmUop(machInst, rt, rt2, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } } } else { if (size == 8) { if (load) { - *++uop = new MicroLdrXImmUop(machInst, rt, INTREG_UREG0, - 0, noAlloc, exclusive, acrel); - *++uop = new MicroLdrXImmUop(machInst, rt2, INTREG_UREG0, - size, noAlloc, exclusive, acrel); + *uop++ = new MicroLdPairUop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } else { - *++uop = new MicroStrXImmUop(machInst, rt, INTREG_UREG0, + *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); - *++uop = new MicroStrXImmUop(machInst, rt2, INTREG_UREG0, + *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0, size, noAlloc, exclusive, acrel); } } else if (size == 4) { if (load) { if (signExt) { - *++uop = new MicroLdrDSXImmUop(machInst, rt, rt2, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } else { - *++uop = new MicroLdrDUXImmUop(machInst, rt, rt2, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } } else { - *++uop = new MicroStrDXImmUop(machInst, rt, rt2, - INTREG_UREG0, 0, noAlloc, exclusive, acrel); + *uop++ = new MicroStrDXImmUop(machInst, rt, rt2, + post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel); } } } if (writeback) { - *++uop = new MicroAddXiUop(machInst, rn, INTREG_UREG0, + *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0, post ? imm : 0); } - (*uop)->setLastMicroop(); + assert(uop == µOps[numMicroops]); + (*--uop)->setLastMicroop(); for (StaticInstPtr *curUop = microOps; !(*curUop)->isLastMicroop(); curUop++) { @@ -297,18 +347,19 @@ BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst, IntRegIndex base, int64_t imm) : PredMacroOp(mnem, machInst, __opClass) { - numMicroops = 2; + numMicroops = load ? 1 : 2; microOps = new StaticInstPtr[numMicroops]; + StaticInstPtr *uop = microOps; + if (load) { - microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); - microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); + *uop = new MicroLdFp16Uop(machInst, dest, base, imm); } else { - microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); - microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + (*uop)->setDelayedCommit(); + *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm); } - microOps[0]->setDelayedCommit(); - microOps[1]->setLastMicroop(); + (*uop)->setLastMicroop(); } BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, @@ -316,21 +367,24 @@ BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst, IntRegIndex base, int64_t imm) : PredMacroOp(mnem, machInst, __opClass) { - numMicroops = 3; + numMicroops = load ? 2 : 3; microOps = new StaticInstPtr[numMicroops]; - if (load) { - microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, 0); - microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, 0); - } else { - microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, 0); - microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, 0); - } - microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + StaticInstPtr *uop = microOps; - microOps[0]->setDelayedCommit(); - microOps[1]->setDelayedCommit(); - microOps[2]->setLastMicroop(); + if (load) { + *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0); + } else { + *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0); + *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0); + } + *uop = new MicroAddXiUop(machInst, base, base, imm); + (*uop)->setLastMicroop(); + + for (StaticInstPtr *curUop = microOps; + !(*curUop)->isLastMicroop(); curUop++) { + (*curUop)->setDelayedCommit(); + } } BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, @@ -338,21 +392,24 @@ BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst, IntRegIndex base, int64_t imm) : PredMacroOp(mnem, machInst, __opClass) { - numMicroops = 3; + numMicroops = load ? 2 : 3; microOps = new StaticInstPtr[numMicroops]; - if (load) { - microOps[0] = new MicroLdrQBFpXImmUop(machInst, dest, base, imm); - microOps[1] = new MicroLdrQTFpXImmUop(machInst, dest, base, imm); - } else { - microOps[0] = new MicroStrQBFpXImmUop(machInst, dest, base, imm); - microOps[1] = new MicroStrQTFpXImmUop(machInst, dest, base, imm); - } - microOps[2] = new MicroAddXiUop(machInst, base, base, imm); + StaticInstPtr *uop = microOps; - microOps[0]->setDelayedCommit(); - microOps[1]->setDelayedCommit(); - microOps[2]->setLastMicroop(); + if (load) { + *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm); + } else { + *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm); + *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm); + } + *uop = new MicroAddXiUop(machInst, base, base, imm); + (*uop)->setLastMicroop(); + + for (StaticInstPtr *curUop = microOps; + !(*curUop)->isLastMicroop(); curUop++) { + (*curUop)->setDelayedCommit(); + } } BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, @@ -361,23 +418,23 @@ BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst, ArmExtendType type, int64_t imm) : PredMacroOp(mnem, machInst, __opClass) { - numMicroops = 2; + numMicroops = load ? 1 : 2; microOps = new StaticInstPtr[numMicroops]; + StaticInstPtr *uop = microOps; + if (load) { - microOps[0] = new MicroLdrQBFpXRegUop(machInst, dest, base, - offset, type, imm); - microOps[1] = new MicroLdrQTFpXRegUop(machInst, dest, base, - offset, type, imm); + *uop = new MicroLdFp16RegUop(machInst, dest, base, + offset, type, imm); } else { - microOps[0] = new MicroStrQBFpXRegUop(machInst, dest, base, - offset, type, imm); - microOps[1] = new MicroStrQTFpXRegUop(machInst, dest, base, - offset, type, imm); + *uop = new MicroStrQBFpXRegUop(machInst, dest, base, + offset, type, imm); + (*uop)->setDelayedCommit(); + *++uop = new MicroStrQTFpXRegUop(machInst, dest, base, + offset, type, imm); } - microOps[0]->setDelayedCommit(); - microOps[1]->setLastMicroop(); + (*uop)->setLastMicroop(); } BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, @@ -385,14 +442,11 @@ BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst, int64_t imm) : PredMacroOp(mnem, machInst, __opClass) { - numMicroops = 2; + numMicroops = 1; microOps = new StaticInstPtr[numMicroops]; - microOps[0] = new MicroLdrQBFpXLitUop(machInst, dest, imm); - microOps[1] = new MicroLdrQTFpXLitUop(machInst, dest, imm); - - microOps[0]->setDelayedCommit(); - microOps[1]->setLastMicroop(); + microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm); + microOps[0]->setLastMicroop(); } VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, @@ -1538,4 +1592,20 @@ MicroMemOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const return ss.str(); } +std::string +MicroMemPairOp::generateDisassembly(Addr pc, const SymbolTable *symtab) const +{ + std::stringstream ss; + printMnemonic(ss); + printReg(ss, dest); + ss << ","; + printReg(ss, dest2); + ss << ", ["; + printReg(ss, urb); + ss << ", "; + ccprintf(ss, "#%d", imm); + ss << "]"; + return ss.str(); +} + } diff --git a/src/arch/arm/insts/macromem.hh b/src/arch/arm/insts/macromem.hh index fc8e3e1b7..412337d06 100644 --- a/src/arch/arm/insts/macromem.hh +++ b/src/arch/arm/insts/macromem.hh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2013 ARM Limited + * Copyright (c) 2010-2014 ARM Limited * All rights reserved * * The license below extends only to copyright in the software and shall @@ -395,6 +395,26 @@ class MicroMemOp : public MicroIntImmOp std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; }; +class MicroMemPairOp : public MicroOp +{ + protected: + RegIndex dest, dest2, urb; + bool up; + int32_t imm; + unsigned memAccessFlags; + + MicroMemPairOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, + RegIndex _dreg1, RegIndex _dreg2, RegIndex _base, + bool _up, uint8_t _imm) + : MicroOp(mnem, machInst, __opClass), + dest(_dreg1), dest2(_dreg2), urb(_base), up(_up), imm(_imm), + memAccessFlags(TLB::MustBeOne | TLB::AlignWord) + { + } + + std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const; +}; + /** * Base class for microcoded integer memory instructions. */ diff --git a/src/arch/arm/isa/insts/ldr64.isa b/src/arch/arm/isa/insts/ldr64.isa index 78460f661..eea925e66 100644 --- a/src/arch/arm/isa/insts/ldr64.isa +++ b/src/arch/arm/isa/insts/ldr64.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2011-2013 ARM Limited +// Copyright (c) 2011-2014 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -187,35 +187,32 @@ let {{ AA64FpDestP2_uw = 0; AA64FpDestP3_uw = 0; ''' - elif self.size == 8 or (self.size == 16 and not self.top): + elif self.size == 8: accCode = ''' uint64_t data = cSwap(Mem%s, isBigEndian64(xc->tcBase())); AA64FpDestP0_uw = (uint32_t)data; AA64FpDestP1_uw = (data >> 32); + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; ''' - # Only zero out the other half if this isn't part of a - # pair of 8 byte loads implementing a 16 byte load. - if self.size == 8: - accCode += ''' - AA64FpDestP2_uw = 0; - AA64FpDestP3_uw = 0; - ''' - elif self.size == 16 and self.top: + elif self.size == 16: accCode = ''' - uint64_t data = cSwap(Mem%s, - isBigEndian64(xc->tcBase())); - AA64FpDestP2_uw = (uint32_t)data; - AA64FpDestP3_uw = (data >> 32); + Twin64_t data = cSwap(Mem%s, + isBigEndian64(xc->tcBase())); + + + AA64FpDestP0_uw = (uint32_t)data.a; + AA64FpDestP1_uw = (data.a >> 32); + AA64FpDestP2_uw = (uint32_t)data.b; + AA64FpDestP3_uw = (data.b >> 32); ''' elif self.flavor == "widen" or self.size == 8: accCode = "XDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" else: accCode = "WDest = cSwap(Mem%s, isBigEndian64(xc->tcBase()));" - if self.size == 16: - accCode = accCode % buildMemSuffix(self.sign, 8) - else: - accCode = accCode % buildMemSuffix(self.sign, self.size) + + accCode = accCode % buildMemSuffix(self.sign, self.size) self.codeBlobs["memacc_code"] = accCode @@ -231,17 +228,29 @@ let {{ # Code that actually handles the access if self.flavor == "fp": - accCode = ''' - uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); - AA64FpDestP0_uw = (uint32_t)data; - AA64FpDestP1_uw = 0; - AA64FpDestP2_uw = 0; - AA64FpDestP3_uw = 0; - AA64FpDest2P0_uw = (data >> 32); - AA64FpDest2P1_uw = 0; - AA64FpDest2P2_uw = 0; - AA64FpDest2P3_uw = 0; - ''' + if self.size == 4: + accCode = ''' + uint64_t data = cSwap(Mem_ud, isBigEndian64(xc->tcBase())); + AA64FpDestP0_uw = (uint32_t)data; + AA64FpDestP1_uw = 0; + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + AA64FpDest2P0_uw = (data >> 32); + AA64FpDest2P1_uw = 0; + AA64FpDest2P2_uw = 0; + AA64FpDest2P3_uw = 0; + ''' + elif self.size == 8: + accCode = ''' + AA64FpDestP0_uw = (uint32_t)Mem_tud.a; + AA64FpDestP1_uw = (uint32_t)(Mem_tud.a >> 32); + AA64FpDestP2_uw = 0; + AA64FpDestP3_uw = 0; + AA64FpDest2P0_uw = (uint32_t)Mem_tud.b; + AA64FpDest2P1_uw = (uint32_t)(Mem_tud.b >> 32); + AA64FpDest2P2_uw = 0; + AA64FpDest2P3_uw = 0; + ''' else: if self.sign: if self.size == 4: @@ -253,8 +262,8 @@ let {{ ''' elif self.size == 8: accCode = ''' - XDest = sext<64>(Mem_tud.a); - XDest2 = sext<64>(Mem_tud.b); + XDest = Mem_tud.a; + XDest2 = Mem_tud.b; ''' else: if self.size == 4: @@ -416,6 +425,11 @@ let {{ decConstBase = 'LoadStoreLitU64' micro = True + LoadImmDU64("ldp_uop", "MicroLdPairUop", 8).emit() + LoadImmDU64("ldp_fp8_uop", "MicroLdPairFp8Uop", 8, flavor="fp").emit() + LoadImmU64("ldfp16_uop", "MicroLdFp16Uop", 16, flavor="fp").emit() + LoadReg64("ldfp16reg_uop", "MicroLdFp16RegUop", 16, flavor="fp").emit() + LoadImmDouble64("ldaxp", "LDAXPW64", 4, flavor="acexp").emit() LoadImmDouble64("ldaxp", "LDAXPX64", 8, flavor="acexp").emit() LoadImmDouble64("ldxp", "LDXPW64", 4, flavor="exp").emit() @@ -428,18 +442,8 @@ let {{ LoadRegU64("ldrfpxr_uop", "MicroLdrFpXRegUop", 8, flavor="fp").emit() LoadLitU64("ldrfpxl_uop", "MicroLdrFpXLitUop", 8, literal=True, flavor="fp").emit() - LoadImmU64("ldrqbfpxi_uop", "MicroLdrQBFpXImmUop", - 16, flavor="fp", top = False).emit() - LoadRegU64("ldrqbfpxr_uop", "MicroLdrQBFpXRegUop", - 16, flavor="fp", top = False).emit() - LoadLitU64("ldrqbfpxl_uop", "MicroLdrQBFpXLitUop", - 16, literal=True, flavor="fp", top = False).emit() - LoadImmU64("ldrqtfpxi_uop", "MicroLdrQTFpXImmUop", - 16, flavor="fp", top = True).emit() - LoadRegU64("ldrqtfpxr_uop", "MicroLdrQTFpXRegUop", - 16, flavor="fp", top = True).emit() - LoadLitU64("ldrqtfpxl_uop", "MicroLdrQTFpXLitUop", - 16, literal=True, flavor="fp", top = True).emit() + LoadLitU64("ldfp16_lit__uop", "MicroLdFp16LitUop", + 16, literal=True, flavor="fp").emit() LoadImmDU64("ldrduxi_uop", "MicroLdrDUXImmUop", 4, sign=False).emit() LoadImmDU64("ldrdsxi_uop", "MicroLdrDSXImmUop", 4, sign=True).emit() LoadImmDU64("ldrdfpxi_uop", "MicroLdrDFpXImmUop", 4, flavor="fp").emit() diff --git a/src/arch/arm/isa/insts/macromem.isa b/src/arch/arm/isa/insts/macromem.isa index f164595dd..41060ff01 100644 --- a/src/arch/arm/isa/insts/macromem.isa +++ b/src/arch/arm/isa/insts/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2013 ARM Limited +// Copyright (c) 2010-2014 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -55,6 +55,18 @@ let {{ 'predicate_test': predicateTest}, ['IsMicroop']) + microLdr2UopCode = ''' + uint64_t data = Mem_ud; + Dest = cSwap((uint32_t) data, ((CPSR)Cpsr).e); + Dest2 = cSwap((uint32_t) (data >> 32), ((CPSR)Cpsr).e); + ''' + microLdr2UopIop = InstObjParams('ldr2_uop', 'MicroLdr2Uop', + 'MicroMemPairOp', + {'memacc_code': microLdr2UopCode, + 'ea_code': 'EA = URb + (up ? imm : -imm);', + 'predicate_test': predicateTest}, + ['IsMicroop']) + microLdrFpUopCode = "Fa_uw = cSwap(Mem_uw, ((CPSR)Cpsr).e);" microLdrFpUopIop = InstObjParams('ldrfp_uop', 'MicroLdrFpUop', 'MicroMemOp', @@ -159,8 +171,8 @@ let {{ header_output = decoder_output = exec_output = '' - loadIops = (microLdrUopIop, microLdrRetUopIop, microLdrFpUopIop, - microLdrDBFpUopIop, microLdrDTFpUopIop) + loadIops = (microLdrUopIop, microLdrRetUopIop, + microLdrFpUopIop, microLdrDBFpUopIop, microLdrDTFpUopIop) storeIops = (microStrUopIop, microStrFpUopIop, microStrDBFpUopIop, microStrDTFpUopIop) for iop in loadIops + storeIops: @@ -174,6 +186,12 @@ let {{ exec_output += StoreExecute.subst(iop) + \ StoreInitiateAcc.subst(iop) + \ StoreCompleteAcc.subst(iop) + + header_output += MicroMemPairDeclare.subst(microLdr2UopIop) + decoder_output += MicroMemPairConstructor.subst(microLdr2UopIop) + exec_output += LoadExecute.subst(microLdr2UopIop) + \ + LoadInitiateAcc.subst(microLdr2UopIop) + \ + LoadCompleteAcc.subst(microLdr2UopIop) }}; let {{ diff --git a/src/arch/arm/isa/insts/mem.isa b/src/arch/arm/isa/insts/mem.isa index aed6bab0d..7323b02c9 100644 --- a/src/arch/arm/isa/insts/mem.isa +++ b/src/arch/arm/isa/insts/mem.isa @@ -193,7 +193,9 @@ let {{ return Name def buildMemSuffix(sign, size): - if size == 8: + if size == 16: + memSuffix = '_tud' + elif size == 8: memSuffix = '_ud' elif size == 4: if sign: diff --git a/src/arch/arm/isa/templates/macromem.isa b/src/arch/arm/isa/templates/macromem.isa index 9a6de16cc..b252c91e7 100644 --- a/src/arch/arm/isa/templates/macromem.isa +++ b/src/arch/arm/isa/templates/macromem.isa @@ -1,6 +1,6 @@ // -*- mode:c++ -*- -// Copyright (c) 2010-2013 ARM Limited +// Copyright (c) 2010-2014 ARM Limited // All rights reserved // // The license below extends only to copyright in the software and shall @@ -77,6 +77,39 @@ def template MicroMemConstructor {{ } }}; + +def template MicroMemPairDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + %(class_name)s(ExtMachInst machInst, + RegIndex _dreg1, RegIndex _dreg2, RegIndex _base, + bool _up, uint8_t _imm); + %(BasicExecDeclare)s + %(InitiateAccDeclare)s + %(CompleteAccDeclare)s + }; +}}; + +def template MicroMemPairConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + RegIndex _dreg1, + RegIndex _dreg2, + RegIndex _base, + bool _up, + uint8_t _imm) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _dreg1, _dreg2, _base, _up, _imm) + { + %(constructor)s; + if (!(condCode == COND_AL || condCode == COND_UC)) { + for (int x = 0; x < _numDestRegs; x++) { + _srcRegIdx[_numSrcRegs++] = _destRegIdx[x]; + } + } + } +}}; + //////////////////////////////////////////////////////////////////// // // Neon load/store microops