#include "arch/isa_traits.hh" #include "base/statistics.hh" #include "cpu/exec_context.hh" #include "cpu/exetrace.hh" #include "cpu/ozone/front_end.hh" #include "mem/mem_interface.hh" #include "sim/byte_swap.hh" using namespace TheISA; template FrontEnd::FrontEnd(Params *params) : branchPred(params), cacheCompletionEvent(this), icacheInterface(params->icacheInterface), instBufferSize(0), maxInstBufferSize(params->maxInstBufferSize), width(params->frontEndWidth), freeRegs(params->numPhysicalRegs), numPhysRegs(params->numPhysicalRegs), serializeNext(false) { status = Idle; // Setup branch predictor. // Setup Memory Request memReq = new MemReq(); memReq->asid = 0; memReq->data = new uint8_t[64]; // Size of cache block. cacheBlkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; assert(isPowerOf2(cacheBlkSize)); // Create mask to get rid of offset bits. cacheBlkMask = (cacheBlkSize - 1); // Create space to store a cache line. cacheData = new uint8_t[cacheBlkSize]; fetchCacheLineNextCycle = true; cacheBlkValid = false; #if !FULL_SYSTEM pTable = params->pTable; #endif } template std::string FrontEnd::name() const { return cpu->name() + ".frontend"; } template void FrontEnd::setCommBuffer(TimeBuffer *_comm) { comm = _comm; // @todo: Hardcoded for now. Allow this to be set by a latency. fromCommit = comm->getWire(-1); } template void FrontEnd::setXC(ExecContext *xc_ptr) { xc = xc_ptr; memReq->xc = xc; } template void FrontEnd::regStats() { icacheStallCycles .name(name() + ".icacheStallCycles") .desc("Number of cycles fetch is stalled on an Icache miss") .prereq(icacheStallCycles); fetchedInsts .name(name() + ".fetchedInsts") .desc("Number of instructions fetch has processed") .prereq(fetchedInsts); fetchedBranches .name(name() + ".fetchedBranches") .desc("Number of fetched branches") .prereq(fetchedBranches); predictedBranches .name(name() + ".predictedBranches") .desc("Number of branches that fetch has predicted taken") .prereq(predictedBranches); fetchCycles .name(name() + ".fetchCycles") .desc("Number of cycles fetch has run and was not squashing or" " blocked") .prereq(fetchCycles); fetchIdleCycles .name(name() + ".fetchIdleCycles") .desc("Number of cycles fetch was idle") .prereq(fetchIdleCycles); fetchSquashCycles .name(name() + ".fetchSquashCycles") .desc("Number of cycles fetch has spent squashing") .prereq(fetchSquashCycles); fetchBlockedCycles .name(name() + ".fetchBlockedCycles") .desc("Number of cycles fetch has spent blocked") .prereq(fetchBlockedCycles); fetchedCacheLines .name(name() + ".fetchedCacheLines") .desc("Number of cache lines fetched") .prereq(fetchedCacheLines); fetchNisnDist .init(/* base value */ 0, /* last value */ width, /* bucket size */ 1) .name(name() + ".rateDist") .desc("Number of instructions fetched each cycle (Total)") .flags(Stats::pdf); idleRate .name(name() + ".idleRate") .desc("Percent of cycles fetch was idle") .prereq(idleRate); idleRate = fetchIdleCycles * 100 / cpu->numCycles; branchRate .name(name() + ".branchRate") .desc("Number of branch fetches per cycle") .flags(Stats::total); branchRate = fetchedBranches / cpu->numCycles; fetchRate .name(name() + ".rate") .desc("Number of inst fetches per cycle") .flags(Stats::total); fetchRate = fetchedInsts / cpu->numCycles; IFQCount .name(name() + ".IFQ:count") .desc("cumulative IFQ occupancy") ; IFQFcount .name(name() + ".IFQ:fullCount") .desc("cumulative IFQ full count") .flags(Stats::total) ; IFQOccupancy .name(name() + ".IFQ:occupancy") .desc("avg IFQ occupancy (inst's)") ; IFQOccupancy = IFQCount / cpu->numCycles; IFQLatency .name(name() + ".IFQ:latency") .desc("avg IFQ occupant latency (cycle's)") .flags(Stats::total) ; IFQFullRate .name(name() + ".IFQ:fullRate") .desc("fraction of time (cycles) IFQ was full") .flags(Stats::total); ; IFQFullRate = IFQFcount * Stats::constant(100) / cpu->numCycles; dispatchCountStat .name(name() + ".DIS:count") .desc("cumulative count of dispatched insts") .flags(Stats::total) ; dispatchedSerializing .name(name() + ".DIS:serializingInsts") .desc("count of serializing insts dispatched") .flags(Stats::total) ; dispatchedTempSerializing .name(name() + ".DIS:tempSerializingInsts") .desc("count of temporary serializing insts dispatched") .flags(Stats::total) ; dispatchSerializeStallCycles .name(name() + ".DIS:serializeStallCycles") .desc("count of cycles dispatch stalled for serializing inst") .flags(Stats::total) ; dispatchRate .name(name() + ".DIS:rate") .desc("dispatched insts per cycle") .flags(Stats::total) ; dispatchRate = dispatchCountStat / cpu->numCycles; regIntFull .name(name() + ".REG:int:full") .desc("number of cycles where there were no INT registers") ; regFpFull .name(name() + ".REG:fp:full") .desc("number of cycles where there were no FP registers") ; IFQLatency = IFQOccupancy / dispatchRate; branchPred.regStats(); } template void FrontEnd::tick() { // @todo: Maybe I want to just have direct communication... if (fromCommit->doneSeqNum) { branchPred.update(fromCommit->doneSeqNum, 0); } IFQCount += instBufferSize; IFQFcount += instBufferSize == maxInstBufferSize; // Fetch cache line if (status == IcacheMissComplete) { cacheBlkValid = true; status = Running; if (barrierInst) status = SerializeBlocked; if (freeRegs <= 0) status = RenameBlocked; checkBE(); } else if (status == IcacheMissStall) { DPRINTF(FE, "Still in Icache miss stall.\n"); icacheStallCycles++; return; } if (status == RenameBlocked || status == SerializeBlocked || status == BEBlocked) { // This might cause the front end to run even though it // shouldn't, but this should only be a problem for one cycle. // Also will cause a one cycle bubble between changing state // and restarting. DPRINTF(FE, "In blocked status.\n"); fetchBlockedCycles++; if (status == SerializeBlocked) { dispatchSerializeStallCycles++; } updateStatus(); return; } else if (status != IcacheMissComplete) { if (fetchCacheLineNextCycle) { Fault fault = fetchCacheLine(); if (fault != NoFault) { handleFault(fault); return; } fetchCacheLineNextCycle = false; } // If miss, stall until it returns. if (status == IcacheMissStall) { // Tell CPU to not tick me for now. return; } } fetchCycles++; int num_inst = 0; // Otherwise loop and process instructions. // One way to hack infinite width is to set width and maxInstBufferSize // both really high. Inelegant, but probably will work. while (num_inst < width && instBufferSize < maxInstBufferSize) { // Get instruction from cache line. DynInstPtr inst = getInstFromCacheline(); if (!inst) { // PC is no longer in the cache line, end fetch. // Might want to check this at the end of the cycle so that // there's no cycle lost to checking for a new cache line. DPRINTF(FE, "Need to get new cache line\n"); fetchCacheLineNextCycle = true; break; } // if (generalizeFetch) { processInst(inst); if (status == SerializeBlocked) { break; } // Possibly push into a time buffer that estimates the front end // latency instBuffer.push_back(inst); ++instBufferSize; ++num_inst; // } else { // fetch(num_inst); // decode(num_inst); // rename(num_inst); // } if (inst->predTaken()) { // Start over with tick? break; } else if (freeRegs <= 0) { DPRINTF(FE, "Ran out of free registers to rename to!\n"); status = RenameBlocked; break; } else if (serializeNext) { break; } } fetchNisnDist.sample(num_inst); checkBE(); DPRINTF(FE, "Num insts processed: %i, Inst Buffer size: %i, Free " "Regs %i\n", num_inst, instBufferSize, freeRegs); } template Fault FrontEnd::fetchCacheLine() { // Read a cache line, based on the current PC. #if FULL_SYSTEM // Flag to say whether or not address is physical addr. unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; #else unsigned flags = 0; #endif // FULL_SYSTEM Fault fault = NoFault; // Align the fetch PC so it's at the start of a cache block. Addr fetch_PC = icacheBlockAlignPC(PC); DPRINTF(FE, "Fetching cache line starting at %#x.\n", fetch_PC); // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. memReq->cmd = Read; memReq->reset(fetch_PC, cacheBlkSize, flags); // Translate the instruction request. fault = cpu->translateInstReq(memReq); // In the case of faults, the fetch stage may need to stall and wait // on what caused the fetch (ITB or Icache miss). // assert(fault == NoFault); // Now do the timing access to see whether or not the instruction // exists within the cache. if (icacheInterface && fault == NoFault) { memReq->completionEvent = NULL; memReq->time = curTick; MemAccessResult res = icacheInterface->access(memReq); // If the cache missed then schedule an event to wake // up this stage once the cache miss completes. if (icacheInterface->doEvents() && res != MA_HIT) { memReq->completionEvent = new ICacheCompletionEvent(this); status = IcacheMissStall; cacheBlkValid = false; DPRINTF(FE, "Cache miss.\n"); } else { DPRINTF(FE, "Cache hit.\n"); cacheBlkValid = true; memcpy(cacheData, memReq->data, memReq->size); } } // Note that this will set the cache block PC a bit earlier than it should // be set. cacheBlkPC = fetch_PC; ++fetchedCacheLines; DPRINTF(FE, "Done fetching cache line.\n"); return fault; } template void FrontEnd::processInst(DynInstPtr &inst) { if (processBarriers(inst)) { return; } Addr inst_PC = inst->readPC(); // BPredInfo bp_info = branchPred.lookup(inst_PC); if (!inst->isControl()) { inst->setPredTarg(inst->readNextPC()); } else { fetchedBranches++; if (branchPred.predict(inst, inst_PC, inst->threadNumber)) { predictedBranches++; } } Addr next_PC = inst->readPredTarg(); DPRINTF(FE, "[sn:%lli] Predicted and processed inst PC %#x, next PC " "%#x\n", inst->seqNum, inst_PC, next_PC); // inst->setNextPC(next_PC); // inst->setBPredInfo(bp_info); // Not sure where I should set this PC = next_PC; renameInst(inst); } template bool FrontEnd::processBarriers(DynInstPtr &inst) { if (serializeNext) { inst->setSerializeBefore(); serializeNext = false; } else if (!inst->isSerializing()) { return false; } if (inst->isSerializeBefore() && !inst->isSerializeHandled()) { DPRINTF(FE, "Serialize before instruction encountered.\n"); if (!inst->isTempSerializeBefore()) { dispatchedSerializing++; inst->setSerializeHandled(); } else { dispatchedTempSerializing++; } // Change status over to BarrierStall so that other stages know // what this is blocked on. status = SerializeBlocked; barrierInst = inst; return true; } else if (inst->isSerializeAfter() && !inst->isSerializeHandled()) { DPRINTF(FE, "Serialize after instruction encountered.\n"); inst->setSerializeHandled(); dispatchedSerializing++; serializeNext = true; return false; } return false; } template void FrontEnd::handleFault(Fault &fault) { DPRINTF(FE, "Fault at fetch, telling commit\n"); backEnd->fetchFault(fault); // We're blocked on the back end until it handles this fault. status = BEBlocked; } template void FrontEnd::squash(const InstSeqNum &squash_num, const Addr &next_PC, const bool is_branch, const bool branch_taken) { DPRINTF(FE, "Squashing from [sn:%lli], setting PC to %#x\n", squash_num, next_PC); while (!instBuffer.empty() && instBuffer.back()->seqNum > squash_num) { DynInstPtr inst = instBuffer.back(); DPRINTF(FE, "Squashing instruction [sn:%lli] PC %#x\n", inst->seqNum, inst->readPC()); inst->clearDependents(); instBuffer.pop_back(); --instBufferSize; // Fix up branch predictor if necessary. // branchPred.undo(inst->getBPredInfo()); freeRegs+= inst->numDestRegs(); } // Copy over rename table from the back end. renameTable.copyFrom(backEnd->renameTable); PC = next_PC; // Update BP with proper information. if (is_branch) { branchPred.squash(squash_num, next_PC, branch_taken, 0); } else { branchPred.squash(squash_num, 0); } // Clear the icache miss if it's outstanding. if (status == IcacheMissStall && icacheInterface) { DPRINTF(FE, "Squashing outstanding Icache miss.\n"); icacheInterface->squash(0); } if (status == SerializeBlocked) { assert(barrierInst->seqNum > squash_num); barrierInst = NULL; } // Unless this squash originated from the front end, we're probably // in running mode now. // Actually might want to make this latency dependent. status = Running; fetchCacheLineNextCycle = true; } template typename Impl::DynInstPtr FrontEnd::getInst() { if (instBufferSize == 0) { return NULL; } DynInstPtr inst = instBuffer.front(); instBuffer.pop_front(); --instBufferSize; dispatchCountStat++; return inst; } template void FrontEnd::processCacheCompletion() { DPRINTF(FE, "Processing cache completion\n"); // Do something here. if (status != IcacheMissStall) { DPRINTF(FE, "Previous fetch was squashed.\n"); return; } status = IcacheMissComplete; /* if (checkStall(tid)) { fetchStatus[tid] = Blocked; } else { fetchStatus[tid] = IcacheMissComplete; } */ memcpy(cacheData, memReq->data, memReq->size); // Reset the completion event to NULL. memReq->completionEvent = NULL; } template void FrontEnd::addFreeRegs(int num_freed) { if (status == RenameBlocked && freeRegs + num_freed > 0) { status = Running; } freeRegs+= num_freed; assert(freeRegs <= numPhysRegs); } template bool FrontEnd::updateStatus() { // bool rename_block = freeRegs <= 0; bool serialize_block = !backEnd->robEmpty() || instBufferSize; bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); bool ret_val = false; /* // Should already be handled through addFreeRegs function if (status == RenameBlocked && !rename_block) { status = Running; ret_val = true; } */ if (status == SerializeBlocked && !serialize_block) { status = SerializeComplete; ret_val = true; } if (status == BEBlocked && !be_block) { if (barrierInst) { status = SerializeBlocked; } else { status = Running; } ret_val = true; } return ret_val; } template void FrontEnd::checkBE() { bool be_block = cpu->decoupledFrontEnd ? false : backEnd->isBlocked(); if (be_block) { if (status == Running || status == Idle) { status = BEBlocked; } } } template typename Impl::DynInstPtr FrontEnd::getInstFromCacheline() { if (status == SerializeComplete) { DynInstPtr inst = barrierInst; status = Running; barrierInst = NULL; return inst; } InstSeqNum inst_seq; MachInst inst; // @todo: Fix this magic number used here to handle word offset (and // getting rid of PAL bit) unsigned offset = (PC & cacheBlkMask) & ~3; // PC of inst is not in this cache block if (PC >= (cacheBlkPC + cacheBlkSize) || PC < cacheBlkPC || !cacheBlkValid) { // DPRINTF(OoOCPU, "OoOCPU: PC is not in this cache block\n"); // DPRINTF(OoOCPU, "OoOCPU: PC: %#x, cacheBlkPC: %#x, cacheBlkValid: %i", // PC, cacheBlkPC, cacheBlkValid); // panic("Instruction not in cache line or cache line invalid!"); return NULL; } ////////////////////////// // Fetch one instruction ////////////////////////// // Get a sequence number. inst_seq = getAndIncrementInstSeq(); // Make sure this is a valid index. assert(offset <= cacheBlkSize - sizeof(MachInst)); // Get the instruction from the array of the cache line. inst = htog(*reinterpret_cast(&cacheData[offset])); ExtMachInst decode_inst = TheISA::makeExtMI(inst, PC); // Create a new DynInst from the instruction fetched. DynInstPtr instruction = new DynInst(decode_inst, PC, PC+sizeof(MachInst), inst_seq, cpu); instruction->setState(thread); DPRINTF(FE, "Instruction [sn:%lli] created, with PC %#x\n%s\n", inst_seq, instruction->readPC(), instruction->staticInst->disassemble(PC)); instruction->traceData = Trace::getInstRecord(curTick, xc, cpu, instruction->staticInst, instruction->readPC(), 0); // Increment stat of fetched instructions. ++fetchedInsts; return instruction; } template void FrontEnd::renameInst(DynInstPtr &inst) { DynInstPtr src_inst = NULL; int num_src_regs = inst->numSrcRegs(); if (num_src_regs == 0) { inst->setCanIssue(); } else { for (int i = 0; i < num_src_regs; ++i) { src_inst = renameTable[inst->srcRegIdx(i)]; inst->setSrcInst(src_inst, i); DPRINTF(FE, "[sn:%lli]: Src reg %i is inst [sn:%lli]\n", inst->seqNum, (int)inst->srcRegIdx(i), src_inst->seqNum); if (src_inst->isCompleted()) { DPRINTF(FE, "Reg ready.\n"); inst->markSrcRegReady(i); } else { DPRINTF(FE, "Adding to dependent list.\n"); src_inst->addDependent(inst); } } } for (int i = 0; i < inst->numDestRegs(); ++i) { RegIndex idx = inst->destRegIdx(i); DPRINTF(FE, "Dest reg %i is now inst [sn:%lli], was previously " "[sn:%lli]\n", (int)inst->destRegIdx(i), inst->seqNum, renameTable[idx]->seqNum); inst->setPrevDestInst(renameTable[idx], i); renameTable[idx] = inst; --freeRegs; } } template void FrontEnd::dumpInsts() { cprintf("instBuffer size: %i\n", instBuffer.size()); InstBuffIt buff_it = instBuffer.begin(); for (int num = 0; buff_it != instBuffer.end(); num++) { cprintf("Instruction:%i\nPC:%#x\n[tid:%i]\n[sn:%lli]\nIssued:%i\n" "Squashed:%i\n\n", num, (*buff_it)->readPC(), (*buff_it)->threadNumber, (*buff_it)->seqNum, (*buff_it)->isIssued(), (*buff_it)->isSquashed()); buff_it++; } } template FrontEnd::ICacheCompletionEvent::ICacheCompletionEvent(FrontEnd *fe) : Event(&mainEventQueue, Delayed_Writeback_Pri), frontEnd(fe) { this->setFlags(Event::AutoDelete); } template void FrontEnd::ICacheCompletionEvent::process() { frontEnd->processCacheCompletion(); } template const char * FrontEnd::ICacheCompletionEvent::description() { return "ICache completion event"; }