gem5/cpu/beta_cpu/fetch_impl.hh

// Todo: Rewrite this.  Add in branch prediction.  Fix up if squashing comes
// from decode; only the correct instructions should be killed.  This will
// probably require changing the CPU's instList functions to take a seqNum
// instead of a dyninst.  With probe path, should be able to specify
// size of data to fetch.  Will be able to get full cache line.

// Remove this later.
#define OPCODE(X)                       (X >> 26) & 0x3f

#include "cpu/exetrace.hh"
#include "mem/base_mem.hh"
#include "mem/mem_interface.hh"
#include "mem/mem_req.hh"
#include "cpu/beta_cpu/fetch.hh"

#include "sim/universe.hh"

template<class Impl>
SimpleFetch<Impl>::CacheCompletionEvent
::CacheCompletionEvent(SimpleFetch *_fetch)
    : Event(&mainEventQueue),
      fetch(_fetch)
{
}

template<class Impl>
void
SimpleFetch<Impl>::CacheCompletionEvent::process()
{
    fetch->processCacheCompletion();
}

template<class Impl>
const char *
SimpleFetch<Impl>::CacheCompletionEvent::description()
{
    return "SimpleFetch cache completion event";
}

template<class Impl>
SimpleFetch<Impl>::SimpleFetch(Params &params)
    : cacheCompletionEvent(this),
      icacheInterface(params.icacheInterface),
      decodeToFetchDelay(params.decodeToFetchDelay),
      renameToFetchDelay(params.renameToFetchDelay),
      iewToFetchDelay(params.iewToFetchDelay),
      commitToFetchDelay(params.commitToFetchDelay),
      fetchWidth(params.fetchWidth),
      inst(0)
{
    // Set status to idle.
    _status = Idle;

    // Create a new memory request.
    memReq = new MemReq();
    // Not sure of this parameter.  I think it should be based on the
    // thread number.
#ifndef FULL_SYSTEM
    memReq->asid = params.asid;
#else
    memReq->asid = 0;
#endif // FULL_SYSTEM
    memReq->data = new uint8_t[64];

    // Size of cache block.
    blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;

    // Create mask to get rid of offset bits.
    cacheBlockMask = ~((int)log2(blkSize) - 1);

    // Get the size of an instruction.
    instSize = sizeof(MachInst);
}

template<class Impl>
void
SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)
{
    DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");
    cpu = cpu_ptr;
    // This line will be removed eventually.
    memReq->xc = cpu->xcBase();
}

template<class Impl>
void
SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)
{
    DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");
    timeBuffer = time_buffer;

    // Create wires to get information from proper places in time buffer.
    fromDecode = timeBuffer->getWire(-decodeToFetchDelay);
    fromRename = timeBuffer->getWire(-renameToFetchDelay);
    fromIEW = timeBuffer->getWire(-iewToFetchDelay);
    fromCommit = timeBuffer->getWire(-commitToFetchDelay);
}

template<class Impl>
void
SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
    DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");
    fetchQueue = fq_ptr;

    // Create wire to write information to proper place in fetch queue.
    toDecode = fetchQueue->getWire(0);
}

template<class Impl>
void
SimpleFetch<Impl>::processCacheCompletion()
{
    DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");

    // Only change the status if it's still waiting on the icache access
    // to return.
    // Can keep track of how many cache accesses go unused due to
    // misspeculation here.
    // How to handle an outstanding miss which gets cancelled due to squash,
    // then a new icache miss gets scheduled?
    if (_status == IcacheMissStall)
        _status = IcacheMissComplete;
}

// Note that in the SimpleFetch<>, will most likely have to provide the
// template parameters to BP and BTB.
template<class Impl>
void
SimpleFetch<Impl>::squash(Addr new_PC)
{
    DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);
    cpu->setNextPC(new_PC + instSize);
    cpu->setPC(new_PC);

    _status = Squashing;

    // Clear out the instructions that are no longer valid.
    // Actually maybe slightly unrealistic to kill instructions that are
    // in flight like that between stages.  Perhaps just have next
    // stage ignore those instructions or something.  In the cycle where it's
    // returning from squashing, the other stages can just ignore the inputs
    // for that cycle.

    // Tell the CPU to remove any instructions that aren't currently
    // in the ROB (instructions in flight that were killed).
    cpu->removeInstsNotInROB();
}

template<class Impl>
void
SimpleFetch<Impl>::tick()
{
#if 0
    if (fromCommit->commitInfo.squash) {
        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
                "from commit.\n");

        // In any case, squash.
        squash(fromCommit->commitInfo.nextPC);
        return;
    }

    if (fromDecode->decodeInfo.squash) {
        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
                "from decode.\n");

        // Squash unless we're already squashing?
        squash(fromDecode->decodeInfo.nextPC);
        return;
    }

    if (fromCommit->commitInfo.robSquashing) {
        DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");

        // Continue to squash.
        _status = Squashing;
        return;
    }

    if (fromDecode->decodeInfo.stall ||
        fromRename->renameInfo.stall ||
        fromIEW->iewInfo.stall ||
        fromCommit->commitInfo.stall)
    {
        DPRINTF(Fetch, "Fetch: Stalling stage.\n");
        DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
                "Commit: %i\n",
                fromDecode->decodeInfo.stall,
                fromRename->renameInfo.stall,
                fromIEW->iewInfo.stall,
                fromCommit->commitInfo.stall);
        // What to do if we're already in an icache stall?
    }
#endif

    if (_status != Blocked &&
        _status != Squashing &&
        _status != IcacheMissStall) {
        DPRINTF(Fetch, "Fetch: Running stage.\n");

        fetch();
    } else if (_status == Blocked) {
        // If still being told to stall, do nothing.
        if (fromDecode->decodeInfo.stall ||
            fromRename->renameInfo.stall ||
            fromIEW->iewInfo.stall ||
            fromCommit->commitInfo.stall)
        {
            DPRINTF(Fetch, "Fetch: Stalling stage.\n");
            DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
                    "Commit: %i\n",
                    fromDecode->decodeInfo.stall,
                    fromRename->renameInfo.stall,
                    fromIEW->iewInfo.stall,
                    fromCommit->commitInfo.stall);
        } else {

            DPRINTF(Fetch, "Fetch: Done blocking.\n");
            _status = Running;
        }

        if (fromCommit->commitInfo.squash) {
            DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
                    "from commit.\n");
            squash(fromCommit->commitInfo.nextPC);
            return;
        } else if (fromDecode->decodeInfo.squash) {
            DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
                    "from decode.\n");
            squash(fromDecode->decodeInfo.nextPC);
            return;
        } else if (fromCommit->commitInfo.robSquashing) {
            DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");
            _status = Squashing;
            return;
        }
    } else if (_status == Squashing) {
        // If there are no squash signals then change back to running.
        // Note that when a squash starts happening, commitInfo.squash will
        // be high.  But if the squash is still in progress, then only
        // commitInfo.robSquashing will be high.
        if (!fromCommit->commitInfo.squash &&
            !fromCommit->commitInfo.robSquashing) {

            DPRINTF(Fetch, "Fetch: Done squashing.\n");
            _status = Running;
        } else if (fromCommit->commitInfo.squash) {
            // If there's a new squash, then start squashing again.
            squash(fromCommit->commitInfo.nextPC);
        } else {
            // Purely a debugging statement.
            DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
        }
    }

}

template<class Impl>
void
SimpleFetch<Impl>::fetch()
{
    //////////////////////////////////////////
    // Check backwards communication
    //////////////////////////////////////////

    // If branch prediction is incorrect, squash any instructions,
    // update PC, and do not fetch anything this cycle.

    // Might want to put all the PC changing stuff in one area.
    // Normally should also check here to see if there is branch
    // misprediction info to update with.
    if (fromCommit->commitInfo.squash) {
        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
                "from commit.\n");
        squash(fromCommit->commitInfo.nextPC);
        return;
    } else if (fromDecode->decodeInfo.squash) {
        DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "
                "from decode.\n");
        squash(fromDecode->decodeInfo.nextPC);
        return;
    } else if (fromCommit->commitInfo.robSquashing) {
        DPRINTF(Fetch, "Fetch: ROB still squashing.\n");
        _status = Squashing;
        return;
    }

    // If being told to stall, do nothing.
    if (fromDecode->decodeInfo.stall ||
        fromRename->renameInfo.stall ||
        fromIEW->iewInfo.stall ||
        fromCommit->commitInfo.stall)
    {
        DPRINTF(Fetch, "Fetch: Stalling stage.\n");
        DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "
                "Commit: %i\n",
                fromDecode->decodeInfo.stall,
                fromRename->renameInfo.stall,
                fromIEW->iewInfo.stall,
                fromCommit->commitInfo.stall);
        _status = Blocked;
        return;
    }

    //////////////////////////////////////////
    // Start actual fetch
    //////////////////////////////////////////

    // If nothing else outstanding, attempt to read instructions.

#ifdef FULL_SYSTEM
    // Flag to say whether or not address is physical addr.
    unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;
#else
    unsigned flags = 0;
#endif // FULL_SYSTEM

    // The current PC.
    Addr PC = cpu->readPC();

    // Fault code for memory access.
    Fault fault = No_Fault;

    // If returning from the delay of a cache miss, then update the status
    // to running, otherwise do the cache access.
    if (_status == IcacheMissComplete) {
        DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");

        // Reset the completion event to NULL.
        memReq->completionEvent = NULL;

        _status = Running;
    } else {
        DPRINTF(Fetch, "Fetch: Attempting to translate and read "
                       "instruction, starting at PC %08p.\n",
                PC);

        // Otherwise check if the instruction exists within the cache.
        // If it does, then proceed on to read the instruction and the rest
        // of the instructions in the cache line until either the end of the
        // cache line or a predicted taken branch is encountered.
        // Note that this simply checks if the first instruction exists
        // within the cache, assuming the rest of the cache line also exists
        // within the cache.

        // Setup the memReq to do a read of the first isntruction's address.
        // Set the appropriate read size and flags as well.
        memReq->cmd = Read;
        memReq->reset(PC, instSize, flags);

        // Translate the instruction request.
        // Should this function be
        // in the CPU class ?  Probably...ITB/DTB should exist within the
        // CPU.

        fault = cpu->translateInstReq(memReq);

        // In the case of faults, the fetch stage may need to stall and wait
        // on what caused the fetch (ITB or Icache miss).

        // If translation was successful, attempt to read the first
        // instruction.
        if (fault == No_Fault) {
            DPRINTF(Fetch, "Fetch: Doing instruction read.\n");
            fault = cpu->mem->read(memReq, inst);
            // This read may change when the mem interface changes.
        }

        // Now do the timing access to see whether or not the instruction
        // exists within the cache.
        if (icacheInterface && fault == No_Fault) {
            DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");
            memReq->completionEvent = NULL;

            memReq->time = curTick;

            MemAccessResult result = icacheInterface->access(memReq);

            // If the cache missed (in this model functional and timing
            // memories are different), then schedule an event to wake
            // up this stage once the cache miss completes.
            if (result != MA_HIT && icacheInterface->doEvents()) {
                memReq->completionEvent = &cacheCompletionEvent;
//        	    lastIcacheStall = curTick;

                // How does current model work as far as individual
                // stages scheduling/unscheduling?
                // Perhaps have only the main CPU scheduled/unscheduled,
                // and have it choose what stages to run appropriately.

                DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");
                _status = IcacheMissStall;
                return;
            }
        }
    }

    // As far as timing goes, the CPU will need to send an event through
    // the MemReq in order to be woken up once the memory access completes.
    // Probably have a status on a per thread basis so each thread can
    // block independently and be woken up independently.

    Addr next_PC = 0;
    InstSeqNum inst_seq;

    // If the read of the first instruction was successful, then grab the
    // instructions from the rest of the cache line and put them into the
    // queue heading to decode.
    if (fault == No_Fault) {
        DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");

        // Need to keep track of whether or not a predicted branch
        // ended this fetch block.
        bool predicted_branch = false;

        // Might want to keep track of various stats.
//        numLinesFetched++;

        // Get a sequence number.
        inst_seq = cpu->getAndIncrementInstSeq();

        // Because the first instruction was already fetched, create the
        // DynInst and put it into the queue to decode.
        DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq,
                                           cpu);
        DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
                instruction, instruction->readPC());
        DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
                OPCODE(inst));

        instruction->traceData =
            Trace::getInstRecord(curTick, cpu->xcBase(), cpu,
                                 instruction->staticInst,
                                 instruction->readPC(), 0);

        cpu->addInst(instruction);

        // Write the instruction to the first slot in the queue
        // that heads to decode.
        toDecode->insts[0] = instruction;

        // Now update the PC to fetch the next instruction in the cache
        // line.
        PC = PC + instSize;

        // Obtain the index into the cache line by getting only the low
        // order bits.
        int line_index = PC & cacheBlockMask;

        // Take instructions and put them into the queue heading to decode.
        // Then read the next instruction in the cache line.  Continue
        // until either all of the fetch bandwidth is used (not an issue for
        // non-SMT), or the end of the cache line is reached.  Note that
        // this assumes standard cachelines, and not something like a trace
        // cache where lines might not end at cache-line size aligned
        // addresses.
        // @todo: Fix the horrible amount of translates/reads that must
        // take place due to reading an entire cacheline.  Ideally it
        // should all take place at once, return an array of binary
        // instructions, which can then be used to get all the instructions
        // needed.  Figure out if I can roll it back into one loop.
        for (int fetched = 1;
             line_index < blkSize && fetched < fetchWidth;
             line_index+=instSize, ++fetched)
        {
            // Reset the mem request to setup the read of the next
            // instruction.
            memReq->reset(PC, instSize, flags);

            // Translate the instruction request.
            fault = cpu->translateInstReq(memReq);

            // Read instruction.
            if (fault == No_Fault) {
                fault = cpu->mem->read(memReq, inst);
            }

            // Check if there was a fault.
            if (fault != No_Fault) {
                panic("Fetch: Read of instruction faulted when it should "
                      "succeed; most likely exceeding cache line.\n");
            }

            // Get a sequence number.
            inst_seq = cpu->getAndIncrementInstSeq();

            // Create the actual DynInst.  Parameters are:
            // DynInst(instruction, PC, predicted PC, CPU pointer).
            // Because this simple model has no branch prediction, the
            // predicted PC will simply be PC+sizeof(MachInst).
            // Update to actually use a branch predictor to predict the
            // target in the future.
            DynInst *instruction = new DynInst(inst, PC, PC+instSize,
                                               inst_seq, cpu);
            DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",
                    instruction, instruction->readPC());
            DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",
                    OPCODE(inst));

            cpu->addInst(instruction);

            // Write the instruction to the proper slot in the queue
            // that heads to decode.
            toDecode->insts[fetched] = instruction;

            // Might want to keep track of various stats.
//             numInstsFetched++;

            // Now update the PC to fetch the next instruction in the cache
            // line.
            PC = PC + instSize;
        }

        // If no branches predicted taken, then increment PC with
        // fall-through path.  This simple model always predicts not
        // taken.
        if (!predicted_branch) {
            next_PC = PC;
        }
    }

    // Now that fetching is completed, update the PC to signify what the next
    // cycle will be.  Might want to move this to the beginning of this
    // function so that the PC updates at the beginning of everything.
    // Or might want to leave setting the PC to the main CPU, with fetch
    // only changing the nextPC (will require correct determination of
    // next PC).
    if (fault == No_Fault) {
        DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);
        cpu->setPC(next_PC);
        cpu->setNextPC(next_PC + instSize);
    } else {
        // Handle the fault.
        // This stage will not be able to continue until all the ROB
        // slots are empty, at which point the fault can be handled.
        // The only other way it can wake up is if a squash comes along
        // and changes the PC.  Not sure how to handle that case...perhaps
        // have it handled by the upper level CPU class which peeks into the
        // time buffer and sees if a squash comes along, in which case it
        // changes the status.

        DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");

        _status = Blocked;
#ifdef FULL_SYSTEM
        // Trap will probably need a pointer to the CPU to do accessing.
        // Or an exec context. --Write ProxyExecContext eventually.
        // Avoid using this for now as the xc really shouldn't be in here.
        cpu->trap(fault);
#else // !FULL_SYSTEM
        fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());
#endif // FULL_SYSTEM
    }
}
Check in of new CPU. This checkin works under non-Fullsystem mode, with no caches. SConscript: Added new CPU files to build. arch/alpha/isa_desc: Changed rduniq and wruniq to be nonspeculative because the uniq register is not renamed. arch/isa_parser.py: Added new CPU exec method. base/statistics.hh: Minor change for namespace conflict. Probably can change back one the new CPU files are cleaned up. base/traceflags.py: Added new CPU trace flags. cpu/static_inst.hh: Changed static inst to use a file that defines the execute functions. --HG-- extra : convert_revision : bd4ce34361308280168324817fc1258dd253e519 2004-08-20 20:54:07 +02:00			`// Todo: Rewrite this. Add in branch prediction. Fix up if squashing comes`
			`// from decode; only the correct instructions should be killed. This will`
			`// probably require changing the CPU's instList functions to take a seqNum`
			`// instead of a dyninst. With probe path, should be able to specify`
			`// size of data to fetch. Will be able to get full cache line.`

			`// Remove this later.`
			`#define OPCODE(X) (X >> 26) & 0x3f`

			`#include "cpu/exetrace.hh"`
			`#include "mem/base_mem.hh"`
			`#include "mem/mem_interface.hh"`
			`#include "mem/mem_req.hh"`
			`#include "cpu/beta_cpu/fetch.hh"`

			`#include "sim/universe.hh"`

			`template<class Impl>`
			`SimpleFetch<Impl>::CacheCompletionEvent`
			`::CacheCompletionEvent(SimpleFetch *_fetch)`
			`: Event(&mainEventQueue),`
			`fetch(_fetch)`
			`{`
			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::CacheCompletionEvent::process()`
			`{`
			`fetch->processCacheCompletion();`
			`}`

			`template<class Impl>`
			`const char *`
			`SimpleFetch<Impl>::CacheCompletionEvent::description()`
			`{`
			`return "SimpleFetch cache completion event";`
			`}`

			`template<class Impl>`
			`SimpleFetch<Impl>::SimpleFetch(Params &params)`
			`: cacheCompletionEvent(this),`
			`icacheInterface(params.icacheInterface),`
			`decodeToFetchDelay(params.decodeToFetchDelay),`
			`renameToFetchDelay(params.renameToFetchDelay),`
			`iewToFetchDelay(params.iewToFetchDelay),`
			`commitToFetchDelay(params.commitToFetchDelay),`
			`fetchWidth(params.fetchWidth),`
			`inst(0)`
			`{`
			`// Set status to idle.`
			`_status = Idle;`

			`// Create a new memory request.`
			`memReq = new MemReq();`
			`// Not sure of this parameter. I think it should be based on the`
			`// thread number.`
			`#ifndef FULL_SYSTEM`
			`memReq->asid = params.asid;`
			`#else`
			`memReq->asid = 0;`
			`#endif // FULL_SYSTEM`
			`memReq->data = new uint8_t[64];`

			`// Size of cache block.`
			`blkSize = icacheInterface ? icacheInterface->getBlockSize() : 64;`

			`// Create mask to get rid of offset bits.`
			`cacheBlockMask = ~((int)log2(blkSize) - 1);`

			`// Get the size of an instruction.`
			`instSize = sizeof(MachInst);`
			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::setCPU(FullCPU *cpu_ptr)`
			`{`
			`DPRINTF(Fetch, "Fetch: Setting the CPU pointer.\n");`
			`cpu = cpu_ptr;`
			`// This line will be removed eventually.`
			`memReq->xc = cpu->xcBase();`
			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer)`
			`{`
			`DPRINTF(Fetch, "Fetch: Setting the time buffer pointer.\n");`
			`timeBuffer = time_buffer;`

			`// Create wires to get information from proper places in time buffer.`
			`fromDecode = timeBuffer->getWire(-decodeToFetchDelay);`
			`fromRename = timeBuffer->getWire(-renameToFetchDelay);`
			`fromIEW = timeBuffer->getWire(-iewToFetchDelay);`
			`fromCommit = timeBuffer->getWire(-commitToFetchDelay);`
			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)`
			`{`
			`DPRINTF(Fetch, "Fetch: Setting the fetch queue pointer.\n");`
			`fetchQueue = fq_ptr;`

			`// Create wire to write information to proper place in fetch queue.`
			`toDecode = fetchQueue->getWire(0);`
			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::processCacheCompletion()`
			`{`
			`DPRINTF(Fetch, "Fetch: Waking up from cache miss.\n");`

			`// Only change the status if it's still waiting on the icache access`
			`// to return.`
			`// Can keep track of how many cache accesses go unused due to`
			`// misspeculation here.`
			`// How to handle an outstanding miss which gets cancelled due to squash,`
			`// then a new icache miss gets scheduled?`
			`if (_status == IcacheMissStall)`
			`_status = IcacheMissComplete;`
			`}`

			`// Note that in the SimpleFetch<>, will most likely have to provide the`
			`// template parameters to BP and BTB.`
			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::squash(Addr new_PC)`
			`{`
			`DPRINTF(Fetch, "Fetch: Squashing, setting PC to: %#x.\n", new_PC);`
			`cpu->setNextPC(new_PC + instSize);`
			`cpu->setPC(new_PC);`

			`_status = Squashing;`

			`// Clear out the instructions that are no longer valid.`
			`// Actually maybe slightly unrealistic to kill instructions that are`
			`// in flight like that between stages. Perhaps just have next`
			`// stage ignore those instructions or something. In the cycle where it's`
			`// returning from squashing, the other stages can just ignore the inputs`
			`// for that cycle.`

			`// Tell the CPU to remove any instructions that aren't currently`
			`// in the ROB (instructions in flight that were killed).`
			`cpu->removeInstsNotInROB();`
			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::tick()`
			`{`
			`#if 0`
			`if (fromCommit->commitInfo.squash) {`
			`DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "`
			`"from commit.\n");`

			`// In any case, squash.`
			`squash(fromCommit->commitInfo.nextPC);`
			`return;`
			`}`

			`if (fromDecode->decodeInfo.squash) {`
			`DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "`
			`"from decode.\n");`

			`// Squash unless we're already squashing?`
			`squash(fromDecode->decodeInfo.nextPC);`
			`return;`
			`}`

			`if (fromCommit->commitInfo.robSquashing) {`
			`DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");`

			`// Continue to squash.`
			`_status = Squashing;`
			`return;`
			`}`

			`if (fromDecode->decodeInfo.stall \|\|`
			`fromRename->renameInfo.stall \|\|`
			`fromIEW->iewInfo.stall \|\|`
			`fromCommit->commitInfo.stall)`
			`{`
			`DPRINTF(Fetch, "Fetch: Stalling stage.\n");`
			`DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "`
			`"Commit: %i\n",`
			`fromDecode->decodeInfo.stall,`
			`fromRename->renameInfo.stall,`
			`fromIEW->iewInfo.stall,`
			`fromCommit->commitInfo.stall);`
			`// What to do if we're already in an icache stall?`
			`}`
			`#endif`

			`if (_status != Blocked &&`
			`_status != Squashing &&`
			`_status != IcacheMissStall) {`
			`DPRINTF(Fetch, "Fetch: Running stage.\n");`

			`fetch();`
			`} else if (_status == Blocked) {`
			`// If still being told to stall, do nothing.`
			`if (fromDecode->decodeInfo.stall \|\|`
			`fromRename->renameInfo.stall \|\|`
			`fromIEW->iewInfo.stall \|\|`
			`fromCommit->commitInfo.stall)`
			`{`
			`DPRINTF(Fetch, "Fetch: Stalling stage.\n");`
			`DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "`
			`"Commit: %i\n",`
			`fromDecode->decodeInfo.stall,`
			`fromRename->renameInfo.stall,`
			`fromIEW->iewInfo.stall,`
			`fromCommit->commitInfo.stall);`
			`} else {`

			`DPRINTF(Fetch, "Fetch: Done blocking.\n");`
			`_status = Running;`
			`}`

			`if (fromCommit->commitInfo.squash) {`
			`DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "`
			`"from commit.\n");`
			`squash(fromCommit->commitInfo.nextPC);`
			`return;`
			`} else if (fromDecode->decodeInfo.squash) {`
			`DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "`
			`"from decode.\n");`
			`squash(fromDecode->decodeInfo.nextPC);`
			`return;`
			`} else if (fromCommit->commitInfo.robSquashing) {`
			`DPRINTF(Fetch, "Fetch: ROB is still squashing.\n");`
			`_status = Squashing;`
			`return;`
			`}`
			`} else if (_status == Squashing) {`
			`// If there are no squash signals then change back to running.`
			`// Note that when a squash starts happening, commitInfo.squash will`
			`// be high. But if the squash is still in progress, then only`
			`// commitInfo.robSquashing will be high.`
			`if (!fromCommit->commitInfo.squash &&`
			`!fromCommit->commitInfo.robSquashing) {`

			`DPRINTF(Fetch, "Fetch: Done squashing.\n");`
			`_status = Running;`
			`} else if (fromCommit->commitInfo.squash) {`
			`// If there's a new squash, then start squashing again.`
			`squash(fromCommit->commitInfo.nextPC);`
			`} else {`
			`// Purely a debugging statement.`
			`DPRINTF(Fetch, "Fetch: ROB still squashing.\n");`
			`}`
			`}`

			`}`

			`template<class Impl>`
			`void`
			`SimpleFetch<Impl>::fetch()`
			`{`
			`//////////////////////////////////////////`
			`// Check backwards communication`
			`//////////////////////////////////////////`

			`// If branch prediction is incorrect, squash any instructions,`
			`// update PC, and do not fetch anything this cycle.`

			`// Might want to put all the PC changing stuff in one area.`
			`// Normally should also check here to see if there is branch`
			`// misprediction info to update with.`
			`if (fromCommit->commitInfo.squash) {`
			`DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "`
			`"from commit.\n");`
			`squash(fromCommit->commitInfo.nextPC);`
			`return;`
			`} else if (fromDecode->decodeInfo.squash) {`
			`DPRINTF(Fetch, "Fetch: Squashing instructions due to squash "`
			`"from decode.\n");`
			`squash(fromDecode->decodeInfo.nextPC);`
			`return;`
			`} else if (fromCommit->commitInfo.robSquashing) {`
			`DPRINTF(Fetch, "Fetch: ROB still squashing.\n");`
			`_status = Squashing;`
			`return;`
			`}`

			`// If being told to stall, do nothing.`
			`if (fromDecode->decodeInfo.stall \|\|`
			`fromRename->renameInfo.stall \|\|`
			`fromIEW->iewInfo.stall \|\|`
			`fromCommit->commitInfo.stall)`
			`{`
			`DPRINTF(Fetch, "Fetch: Stalling stage.\n");`
			`DPRINTF(Fetch, "Fetch: Statuses: Decode: %i Rename: %i IEW: %i "`
			`"Commit: %i\n",`
			`fromDecode->decodeInfo.stall,`
			`fromRename->renameInfo.stall,`
			`fromIEW->iewInfo.stall,`
			`fromCommit->commitInfo.stall);`
			`_status = Blocked;`
			`return;`
			`}`

			`//////////////////////////////////////////`
			`// Start actual fetch`
			`//////////////////////////////////////////`

			`// If nothing else outstanding, attempt to read instructions.`

			`#ifdef FULL_SYSTEM`
			`// Flag to say whether or not address is physical addr.`
			`unsigned flags = cpu->inPalMode() ? PHYSICAL : 0;`
			`#else`
			`unsigned flags = 0;`
			`#endif // FULL_SYSTEM`

			`// The current PC.`
			`Addr PC = cpu->readPC();`

			`// Fault code for memory access.`
			`Fault fault = No_Fault;`

			`// If returning from the delay of a cache miss, then update the status`
			`// to running, otherwise do the cache access.`
			`if (_status == IcacheMissComplete) {`
			`DPRINTF(Fetch, "Fetch: Icache miss is complete.\n");`

			`// Reset the completion event to NULL.`
			`memReq->completionEvent = NULL;`

			`_status = Running;`
			`} else {`
			`DPRINTF(Fetch, "Fetch: Attempting to translate and read "`
			`"instruction, starting at PC %08p.\n",`
			`PC);`

			`// Otherwise check if the instruction exists within the cache.`
			`// If it does, then proceed on to read the instruction and the rest`
			`// of the instructions in the cache line until either the end of the`
			`// cache line or a predicted taken branch is encountered.`
			`// Note that this simply checks if the first instruction exists`
			`// within the cache, assuming the rest of the cache line also exists`
			`// within the cache.`

			`// Setup the memReq to do a read of the first isntruction's address.`
			`// Set the appropriate read size and flags as well.`
			`memReq->cmd = Read;`
			`memReq->reset(PC, instSize, flags);`

			`// Translate the instruction request.`
			`// Should this function be`
			`// in the CPU class ? Probably...ITB/DTB should exist within the`
			`// CPU.`

			`fault = cpu->translateInstReq(memReq);`

			`// In the case of faults, the fetch stage may need to stall and wait`
			`// on what caused the fetch (ITB or Icache miss).`

			`// If translation was successful, attempt to read the first`
			`// instruction.`
			`if (fault == No_Fault) {`
			`DPRINTF(Fetch, "Fetch: Doing instruction read.\n");`
			`fault = cpu->mem->read(memReq, inst);`
			`// This read may change when the mem interface changes.`
			`}`

			`// Now do the timing access to see whether or not the instruction`
			`// exists within the cache.`
			`if (icacheInterface && fault == No_Fault) {`
			`DPRINTF(Fetch, "Fetch: Doing timing memory access.\n");`
			`memReq->completionEvent = NULL;`

			`memReq->time = curTick;`

			`MemAccessResult result = icacheInterface->access(memReq);`

			`// If the cache missed (in this model functional and timing`
			`// memories are different), then schedule an event to wake`
			`// up this stage once the cache miss completes.`
			`if (result != MA_HIT && icacheInterface->doEvents()) {`
			`memReq->completionEvent = &cacheCompletionEvent;`
			`// lastIcacheStall = curTick;`

			`// How does current model work as far as individual`
			`// stages scheduling/unscheduling?`
			`// Perhaps have only the main CPU scheduled/unscheduled,`
			`// and have it choose what stages to run appropriately.`

			`DPRINTF(Fetch, "Fetch: Stalling due to icache miss.\n");`
			`_status = IcacheMissStall;`
			`return;`
			`}`
			`}`
			`}`

			`// As far as timing goes, the CPU will need to send an event through`
			`// the MemReq in order to be woken up once the memory access completes.`
			`// Probably have a status on a per thread basis so each thread can`
			`// block independently and be woken up independently.`

			`Addr next_PC = 0;`
			`InstSeqNum inst_seq;`

			`// If the read of the first instruction was successful, then grab the`
			`// instructions from the rest of the cache line and put them into the`
			`// queue heading to decode.`
			`if (fault == No_Fault) {`
			`DPRINTF(Fetch, "Fetch: Adding instructions to queue to decode.\n");`

			`// Need to keep track of whether or not a predicted branch`
			`// ended this fetch block.`
			`bool predicted_branch = false;`

			`// Might want to keep track of various stats.`
			`// numLinesFetched++;`

			`// Get a sequence number.`
			`inst_seq = cpu->getAndIncrementInstSeq();`

			`// Because the first instruction was already fetched, create the`
			`// DynInst and put it into the queue to decode.`
			`DynInst *instruction = new DynInst(inst, PC, PC+instSize, inst_seq,`
			`cpu);`
			`DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",`
			`instruction, instruction->readPC());`
			`DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",`
			`OPCODE(inst));`

			`instruction->traceData =`
			`Trace::getInstRecord(curTick, cpu->xcBase(), cpu,`
			`instruction->staticInst,`
			`instruction->readPC(), 0);`

			`cpu->addInst(instruction);`

			`// Write the instruction to the first slot in the queue`
			`// that heads to decode.`
			`toDecode->insts[0] = instruction;`

			`// Now update the PC to fetch the next instruction in the cache`
			`// line.`
			`PC = PC + instSize;`

			`// Obtain the index into the cache line by getting only the low`
			`// order bits.`
			`int line_index = PC & cacheBlockMask;`

			`// Take instructions and put them into the queue heading to decode.`
			`// Then read the next instruction in the cache line. Continue`
			`// until either all of the fetch bandwidth is used (not an issue for`
			`// non-SMT), or the end of the cache line is reached. Note that`
			`// this assumes standard cachelines, and not something like a trace`
			`// cache where lines might not end at cache-line size aligned`
			`// addresses.`
			`// @todo: Fix the horrible amount of translates/reads that must`
			`// take place due to reading an entire cacheline. Ideally it`
			`// should all take place at once, return an array of binary`
			`// instructions, which can then be used to get all the instructions`
			`// needed. Figure out if I can roll it back into one loop.`
			`for (int fetched = 1;`
			`line_index < blkSize && fetched < fetchWidth;`
			`line_index+=instSize, ++fetched)`
			`{`
			`// Reset the mem request to setup the read of the next`
			`// instruction.`
			`memReq->reset(PC, instSize, flags);`

			`// Translate the instruction request.`
			`fault = cpu->translateInstReq(memReq);`

			`// Read instruction.`
			`if (fault == No_Fault) {`
			`fault = cpu->mem->read(memReq, inst);`
			`}`

			`// Check if there was a fault.`
			`if (fault != No_Fault) {`
			`panic("Fetch: Read of instruction faulted when it should "`
			`"succeed; most likely exceeding cache line.\n");`
			`}`

			`// Get a sequence number.`
			`inst_seq = cpu->getAndIncrementInstSeq();`

			`// Create the actual DynInst. Parameters are:`
			`// DynInst(instruction, PC, predicted PC, CPU pointer).`
			`// Because this simple model has no branch prediction, the`
			`// predicted PC will simply be PC+sizeof(MachInst).`
			`// Update to actually use a branch predictor to predict the`
			`// target in the future.`
			`DynInst *instruction = new DynInst(inst, PC, PC+instSize,`
			`inst_seq, cpu);`
			`DPRINTF(Fetch, "Fetch: Instruction %i created, with PC %#x\n",`
			`instruction, instruction->readPC());`
			`DPRINTF(Fetch, "Fetch: Instruction opcode is: %03p\n",`
			`OPCODE(inst));`

			`cpu->addInst(instruction);`

			`// Write the instruction to the proper slot in the queue`
			`// that heads to decode.`
			`toDecode->insts[fetched] = instruction;`

			`// Might want to keep track of various stats.`
			`// numInstsFetched++;`

			`// Now update the PC to fetch the next instruction in the cache`
			`// line.`
			`PC = PC + instSize;`
			`}`

			`// If no branches predicted taken, then increment PC with`
			`// fall-through path. This simple model always predicts not`
			`// taken.`
			`if (!predicted_branch) {`
			`next_PC = PC;`
			`}`
			`}`

			`// Now that fetching is completed, update the PC to signify what the next`
			`// cycle will be. Might want to move this to the beginning of this`
			`// function so that the PC updates at the beginning of everything.`
			`// Or might want to leave setting the PC to the main CPU, with fetch`
			`// only changing the nextPC (will require correct determination of`
			`// next PC).`
			`if (fault == No_Fault) {`
			`DPRINTF(Fetch, "Fetch: Setting PC to %08p.\n", next_PC);`
			`cpu->setPC(next_PC);`
			`cpu->setNextPC(next_PC + instSize);`
			`} else {`
			`// Handle the fault.`
			`// This stage will not be able to continue until all the ROB`
			`// slots are empty, at which point the fault can be handled.`
			`// The only other way it can wake up is if a squash comes along`
			`// and changes the PC. Not sure how to handle that case...perhaps`
			`// have it handled by the upper level CPU class which peeks into the`
			`// time buffer and sees if a squash comes along, in which case it`
			`// changes the status.`

			`DPRINTF(Fetch, "Fetch: Blocked, need to handle the trap.\n");`

			`_status = Blocked;`
			`#ifdef FULL_SYSTEM`
			`// Trap will probably need a pointer to the CPU to do accessing.`
			`// Or an exec context. --Write ProxyExecContext eventually.`
			`// Avoid using this for now as the xc really shouldn't be in here.`
			`cpu->trap(fault);`
			`#else // !FULL_SYSTEM`
			`fatal("fault (%d) detected @ PC %08p", fault, cpu->readPC());`
			`#endif // FULL_SYSTEM`
			`}`
			`}`