// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes // from decode; only the correct instructions should be killed. This will // probably require changing the CPU's instList functions to take a seqNum // instead of a dyninst. With probe path, should be able to specify // size of data to fetch. Will be able to get full cache line. // Remove this later. #define OPCODE(X) (X >> 26) & 0x3f #include "cpu/exetrace.hh" #include "mem/base_mem.hh" #include "mem/mem_interface.hh" #include "mem/mem_req.hh" #include "cpu/beta_cpu/fetch.hh" #include "sim/universe.hh" template SimpleFetch::CacheCompletionEvent ::CacheCompletionEvent(SimpleFetch *_fetch) : Event(&mainEventQueue), fetch(_fetch) { } template void SimpleFetch::CacheCompletionEvent::process() { fetch->processCacheCompletion(); } template const char * SimpleFetch::CacheCompletionEvent::description() { return "SimpleFetch cache completion event"; } template SimpleFetch::SimpleFetch(Params ¶ms) : cacheCompletionEvent(this), icacheInterface(params.icacheInterface), decodeToFetchDelay(params.decodeToFetchDelay), renameToFetchDelay(params.renameToFetchDelay), iewToFetchDelay(params.iewToFetchDelay), commitToFetchDelay(params.commitToFetchDelay), fetchWidth(params.fetchWidth), inst(0) { // Set status to idle. _status = Idle; // Create a new memory request. memReq = new MemReq(); // Not sure of this parameter. I think it should be based on the // thread number. #ifndef FULL_SYSTEM memReq->asid = params.asid; #else memReq->asid = 0; #endif // FULL_SYSTEM memReq->data = new uint8_t[64]; // Size of cache block. blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64; // Create mask to get rid of offset bits. cacheBlockMask = ~((int)log2(blkSize) - 1); // Get the size of an instruction. instSize = sizeof(MachInst); } template void SimpleFetch::setCPU(FullCPU *cpu_ptr) { DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n"); cpu = cpu_ptr; // This line will be removed eventually. memReq->xc = cpu->xcBase(); } template void SimpleFetch::setTimeBuffer(TimeBuffer *time_buffer) { DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n"); timeBuffer = time_buffer; // Create wires to get information from proper places in time buffer. fromDecode = timeBuffer->getWire(-decodeToFetchDelay); fromRename = timeBuffer->getWire(-renameToFetchDelay); fromIEW = timeBuffer->getWire(-iewToFetchDelay); fromCommit = timeBuffer->getWire(-commitToFetchDelay); } template void SimpleFetch::setFetchQueue(TimeBuffer *fq_ptr) { DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n"); fetchQueue = fq_ptr; // Create wire to write information to proper place in fetch queue. toDecode = fetchQueue->getWire(0); } template void SimpleFetch::processCacheCompletion() { DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n"); // Only change the status if it's still waiting on the icache access // to return. // Can keep track of how many cache accesses go unused due to // misspeculation here. // How to handle an outstanding miss which gets cancelled due to squash, // then a new icache miss gets scheduled? if (_status == IcacheMissStall) _status = IcacheMissComplete; } // Note that in the SimpleFetch<>, will most likely have to provide the // template parameters to BP and BTB. template void SimpleFetch::squash(Addr new_PC) { DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC); cpu->setNextPC(new_PC + instSize); cpu->setPC(new_PC); _status = Squashing; // Clear out the instructions that are no longer valid. // Actually maybe slightly unrealistic to kill instructions that are // in flight like that between stages. Perhaps just have next // stage ignore those instructions or something. In the cycle where it's // returning from squashing, the other stages can just ignore the inputs // for that cycle. // Tell the CPU to remove any instructions that aren't currently // in the ROB (instructions in flight that were killed). cpu->removeInstsNotInROB(); } template void SimpleFetch::tick() { #if 0 if (fromCommit->commitInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from commit.\n"); // In any case, squash. squash(fromCommit->commitInfo.nextPC); return; } if (fromDecode->decodeInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from decode.\n"); // Squash unless we're already squashing? squash(fromDecode->decodeInfo.nextPC); return; } if (fromCommit->commitInfo.robSquashing) { DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); // Continue to squash. _status = Squashing; return; } if (fromDecode->decodeInfo.stall || fromRename->renameInfo.stall || fromIEW->iewInfo.stall || fromCommit->commitInfo.stall) { DPRINTF(Fetch, "Fetch: Stalling stage.\n"); DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " "Commit: %i\n", fromDecode->decodeInfo.stall, fromRename->renameInfo.stall, fromIEW->iewInfo.stall, fromCommit->commitInfo.stall); // What to do if we're already in an icache stall? } #endif if (_status != Blocked && _status != Squashing && _status != IcacheMissStall) { DPRINTF(Fetch, "Fetch: Running stage.\n"); fetch(); } else if (_status == Blocked) { // If still being told to stall, do nothing. if (fromDecode->decodeInfo.stall || fromRename->renameInfo.stall || fromIEW->iewInfo.stall || fromCommit->commitInfo.stall) { DPRINTF(Fetch, "Fetch: Stalling stage.\n"); DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " "Commit: %i\n", fromDecode->decodeInfo.stall, fromRename->renameInfo.stall, fromIEW->iewInfo.stall, fromCommit->commitInfo.stall); } else { DPRINTF(Fetch, "Fetch: Done blocking.\n"); _status = Running; } if (fromCommit->commitInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from commit.\n"); squash(fromCommit->commitInfo.nextPC); return; } else if (fromDecode->decodeInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from decode.\n"); squash(fromDecode->decodeInfo.nextPC); return; } else if (fromCommit->commitInfo.robSquashing) { DPRINTF(Fetch, "Fetch: ROB is still squashing.\n"); _status = Squashing; return; } } else if (_status == Squashing) { // If there are no squash signals then change back to running. // Note that when a squash starts happening, commitInfo.squash will // be high. But if the squash is still in progress, then only // commitInfo.robSquashing will be high. if (!fromCommit->commitInfo.squash && !fromCommit->commitInfo.robSquashing) { DPRINTF(Fetch, "Fetch: Done squashing.\n"); _status = Running; } else if (fromCommit->commitInfo.squash) { // If there's a new squash, then start squashing again. squash(fromCommit->commitInfo.nextPC); } else { // Purely a debugging statement. DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); } } } template void SimpleFetch::fetch() { ////////////////////////////////////////// // Check backwards communication ////////////////////////////////////////// // If branch prediction is incorrect, squash any instructions, // update PC, and do not fetch anything this cycle. // Might want to put all the PC changing stuff in one area. // Normally should also check here to see if there is branch // misprediction info to update with. if (fromCommit->commitInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from commit.\n"); squash(fromCommit->commitInfo.nextPC); return; } else if (fromDecode->decodeInfo.squash) { DPRINTF(Fetch, "Fetch: Squashing instructions due to squash " "from decode.\n"); squash(fromDecode->decodeInfo.nextPC); return; } else if (fromCommit->commitInfo.robSquashing) { DPRINTF(Fetch, "Fetch: ROB still squashing.\n"); _status = Squashing; return; } // If being told to stall, do nothing. if (fromDecode->decodeInfo.stall || fromRename->renameInfo.stall || fromIEW->iewInfo.stall || fromCommit->commitInfo.stall) { DPRINTF(Fetch, "Fetch: Stalling stage.\n"); DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i " "Commit: %i\n", fromDecode->decodeInfo.stall, fromRename->renameInfo.stall, fromIEW->iewInfo.stall, fromCommit->commitInfo.stall); _status = Blocked; return; } ////////////////////////////////////////// // Start actual fetch ////////////////////////////////////////// // If nothing else outstanding, attempt to read instructions. #ifdef FULL_SYSTEM // Flag to say whether or not address is physical addr. unsigned flags = cpu->inPalMode() ? PHYSICAL : 0; #else unsigned flags = 0; #endif // FULL_SYSTEM // The current PC. Addr PC = cpu->readPC(); // Fault code for memory access. Fault fault = No_Fault; // If returning from the delay of a cache miss, then update the status // to running, otherwise do the cache access. if (_status == IcacheMissComplete) { DPRINTF(Fetch, "Fetch: Icache miss is complete.\n"); // Reset the completion event to NULL. memReq->completionEvent = NULL; _status = Running; } else { DPRINTF(Fetch, "Fetch: Attempting to translate and read " "instruction, starting at PC %08p.\n", PC); // Otherwise check if the instruction exists within the cache. // If it does, then proceed on to read the instruction and the rest // of the instructions in the cache line until either the end of the // cache line or a predicted taken branch is encountered. // Note that this simply checks if the first instruction exists // within the cache, assuming the rest of the cache line also exists // within the cache. // Setup the memReq to do a read of the first isntruction's address. // Set the appropriate read size and flags as well. memReq->cmd = Read; memReq->reset(PC, instSize, flags); // Translate the instruction request. // Should this function be // in the CPU class ? Probably...ITB/DTB should exist within the // CPU. fault = cpu->translateInstReq(memReq); // In the case of faults, the fetch stage may need to stall and wait // on what caused the fetch (ITB or Icache miss). // If translation was successful, attempt to read the first // instruction. if (fault == No_Fault) { DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); fault = cpu->mem->read(memReq, inst); // This read may change when the mem interface changes. } // Now do the timing access to see whether or not the instruction // exists within the cache. if (icacheInterface && fault == No_Fault) { DPRINTF(Fetch, "Fetch: Doing timing memory access.\n"); memReq->completionEvent = NULL; memReq->time = curTick; MemAccessResult result = icacheInterface->access(memReq); // If the cache missed (in this model functional and timing // memories are different), then schedule an event to wake // up this stage once the cache miss completes. if (result != MA_HIT && icacheInterface->doEvents()) { memReq->completionEvent = &cacheCompletionEvent; // lastIcacheStall = curTick; // How does current model work as far as individual // stages scheduling/unscheduling? // Perhaps have only the main CPU scheduled/unscheduled, // and have it choose what stages to run appropriately. DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n"); _status = IcacheMissStall; return; } } } // As far as timing goes, the CPU will need to send an event through // the MemReq in order to be woken up once the memory access completes. // Probably have a status on a per thread basis so each thread can // block independently and be woken up independently. Addr next_PC = 0; InstSeqNum inst_seq; // If the read of the first instruction was successful, then grab the // instructions from the rest of the cache line and put them into the // queue heading to decode. if (fault == No_Fault) { DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n"); // Need to keep track of whether or not a predicted branch // ended this fetch block. bool predicted_branch = false; // Might want to keep track of various stats. // numLinesFetched++; // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); // Because the first instruction was already fetched, create the // DynInst and put it into the queue to decode. DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq, cpu); DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", instruction, instruction->readPC()); DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", OPCODE(inst)); instruction->traceData = Trace::getInstRecord(curTick, cpu->xcBase(), cpu, instruction->staticInst, instruction->readPC(), 0); cpu->addInst(instruction); // Write the instruction to the first slot in the queue // that heads to decode. toDecode->insts[0] = instruction; // Now update the PC to fetch the next instruction in the cache // line. PC = PC + instSize; // Obtain the index into the cache line by getting only the low // order bits. int line_index = PC & cacheBlockMask; // Take instructions and put them into the queue heading to decode. // Then read the next instruction in the cache line. Continue // until either all of the fetch bandwidth is used (not an issue for // non-SMT), or the end of the cache line is reached. Note that // this assumes standard cachelines, and not something like a trace // cache where lines might not end at cache-line size aligned // addresses. // @todo: Fix the horrible amount of translates/reads that must // take place due to reading an entire cacheline. Ideally it // should all take place at once, return an array of binary // instructions, which can then be used to get all the instructions // needed. Figure out if I can roll it back into one loop. for (int fetched = 1; line_index < blkSize && fetched < fetchWidth; line_index+=instSize, ++fetched) { // Reset the mem request to setup the read of the next // instruction. memReq->reset(PC, instSize, flags); // Translate the instruction request. fault = cpu->translateInstReq(memReq); // Read instruction. if (fault == No_Fault) { fault = cpu->mem->read(memReq, inst); } // Check if there was a fault. if (fault != No_Fault) { panic("Fetch: Read of instruction faulted when it should " "succeed; most likely exceeding cache line.\n"); } // Get a sequence number. inst_seq = cpu->getAndIncrementInstSeq(); // Create the actual DynInst. Parameters are: // DynInst(instruction, PC, predicted PC, CPU pointer). // Because this simple model has no branch prediction, the // predicted PC will simply be PC+sizeof(MachInst). // Update to actually use a branch predictor to predict the // target in the future. DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq, cpu); DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n", instruction, instruction->readPC()); DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n", OPCODE(inst)); cpu->addInst(instruction); // Write the instruction to the proper slot in the queue // that heads to decode. toDecode->insts[fetched] = instruction; // Might want to keep track of various stats. // numInstsFetched++; // Now update the PC to fetch the next instruction in the cache // line. PC = PC + instSize; } // If no branches predicted taken, then increment PC with // fall-through path. This simple model always predicts not // taken. if (!predicted_branch) { next_PC = PC; } } // Now that fetching is completed, update the PC to signify what the next // cycle will be. Might want to move this to the beginning of this // function so that the PC updates at the beginning of everything. // Or might want to leave setting the PC to the main CPU, with fetch // only changing the nextPC (will require correct determination of // next PC). if (fault == No_Fault) { DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC); cpu->setPC(next_PC); cpu->setNextPC(next_PC + instSize); } else { // Handle the fault. // This stage will not be able to continue until all the ROB // slots are empty, at which point the fault can be handled. // The only other way it can wake up is if a squash comes along // and changes the PC. Not sure how to handle that case...perhaps // have it handled by the upper level CPU class which peeks into the // time buffer and sees if a squash comes along, in which case it // changes the status. DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n"); _status = Blocked; #ifdef FULL_SYSTEM // Trap will probably need a pointer to the CPU to do accessing. // Or an exec context. --Write ProxyExecContext eventually. // Avoid using this for now as the xc really shouldn't be in here. cpu->trap(fault); #else // !FULL_SYSTEM fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC()); #endif // FULL_SYSTEM } }