Merge ktlim@zamp:/z/ktlim2/clean/m5-o3
into zamp.eecs.umich.edu:/z/ktlim2/clean/newmem-merge src/cpu/checker/o3_cpu_builder.cc: src/cpu/o3/alpha_cpu.hh: src/cpu/o3/alpha_cpu_impl.hh: src/cpu/o3/alpha_dyn_inst_impl.hh: src/cpu/o3/bpred_unit.cc: src/cpu/o3/commit.hh: src/cpu/o3/fetch_impl.hh: src/cpu/o3/lsq_unit.hh: src/cpu/o3/lsq_unit_impl.hh: src/cpu/o3/thread_state.hh: Hand merge. --HG-- rename : cpu/activity.cc => src/cpu/activity.cc rename : cpu/activity.hh => src/cpu/activity.hh rename : cpu/base_dyn_inst.cc => src/cpu/base_dyn_inst.cc rename : cpu/checker/cpu.hh => src/cpu/checker/cpu.hh rename : cpu/checker/cpu_builder.cc => src/cpu/checker/cpu_builder.cc rename : cpu/checker/exec_context.hh => src/cpu/checker/exec_context.hh rename : cpu/checker/o3_cpu_builder.cc => src/cpu/checker/o3_cpu_builder.cc rename : cpu/o3/2bit_local_pred.cc => src/cpu/o3/2bit_local_pred.cc rename : cpu/o3/2bit_local_pred.hh => src/cpu/o3/2bit_local_pred.hh rename : cpu/o3/alpha_cpu.hh => src/cpu/o3/alpha_cpu.hh rename : cpu/o3/alpha_cpu_builder.cc => src/cpu/o3/alpha_cpu_builder.cc rename : cpu/o3/alpha_cpu_impl.hh => src/cpu/o3/alpha_cpu_impl.hh rename : cpu/o3/alpha_dyn_inst.hh => src/cpu/o3/alpha_dyn_inst.hh rename : cpu/o3/alpha_dyn_inst_impl.hh => src/cpu/o3/alpha_dyn_inst_impl.hh rename : cpu/o3/alpha_params.hh => src/cpu/o3/alpha_params.hh rename : cpu/o3/bpred_unit.cc => src/cpu/o3/bpred_unit.cc rename : cpu/o3/bpred_unit.hh => src/cpu/o3/bpred_unit.hh rename : cpu/o3/bpred_unit_impl.hh => src/cpu/o3/bpred_unit_impl.hh rename : cpu/o3/comm.hh => src/cpu/o3/comm.hh rename : cpu/o3/commit.hh => src/cpu/o3/commit.hh rename : cpu/o3/commit_impl.hh => src/cpu/o3/commit_impl.hh rename : cpu/o3/cpu.hh => src/cpu/o3/cpu.hh rename : cpu/o3/cpu_policy.hh => src/cpu/o3/cpu_policy.hh rename : cpu/o3/decode.hh => src/cpu/o3/decode.hh rename : cpu/o3/decode_impl.hh => src/cpu/o3/decode_impl.hh rename : cpu/o3/dep_graph.hh => src/cpu/o3/dep_graph.hh rename : cpu/o3/fetch.hh => src/cpu/o3/fetch.hh rename : cpu/o3/fetch_impl.hh => src/cpu/o3/fetch_impl.hh rename : cpu/o3/fu_pool.cc => src/cpu/o3/fu_pool.cc rename : cpu/o3/fu_pool.hh => src/cpu/o3/fu_pool.hh rename : cpu/o3/iew.hh => src/cpu/o3/iew.hh rename : cpu/o3/iew_impl.hh => src/cpu/o3/iew_impl.hh rename : cpu/o3/inst_queue.hh => src/cpu/o3/inst_queue.hh rename : cpu/o3/inst_queue_impl.hh => src/cpu/o3/inst_queue_impl.hh rename : cpu/o3/lsq.hh => src/cpu/o3/lsq.hh rename : cpu/o3/lsq_unit.hh => src/cpu/o3/lsq_unit.hh rename : cpu/o3/lsq_unit_impl.hh => src/cpu/o3/lsq_unit_impl.hh rename : cpu/o3/mem_dep_unit.hh => src/cpu/o3/mem_dep_unit.hh rename : cpu/o3/mem_dep_unit_impl.hh => src/cpu/o3/mem_dep_unit_impl.hh rename : cpu/o3/rename.hh => src/cpu/o3/rename.hh rename : cpu/o3/rename_impl.hh => src/cpu/o3/rename_impl.hh rename : cpu/o3/rename_map.hh => src/cpu/o3/rename_map.hh rename : cpu/o3/rob.hh => src/cpu/o3/rob.hh rename : cpu/o3/store_set.cc => src/cpu/o3/store_set.cc rename : cpu/o3/store_set.hh => src/cpu/o3/store_set.hh rename : cpu/o3/thread_state.hh => src/cpu/o3/thread_state.hh rename : cpu/o3/tournament_pred.cc => src/cpu/o3/tournament_pred.cc rename : cpu/o3/tournament_pred.hh => src/cpu/o3/tournament_pred.hh rename : cpu/ozone/cpu_builder.cc => src/cpu/ozone/cpu_builder.cc rename : cpu/ozone/ozone_impl.hh => src/cpu/ozone/ozone_impl.hh rename : cpu/ozone/simple_impl.hh => src/cpu/ozone/simple_impl.hh rename : cpu/ozone/simple_params.hh => src/cpu/ozone/simple_params.hh rename : python/m5/objects/AlphaFullCPU.py => src/python/m5/objects/AlphaFullCPU.py rename : python/m5/objects/OzoneCPU.py => src/python/m5/objects/OzoneCPU.py extra : convert_revision : b7be30474dd03dd3970e737a9d0489aeb2ead84f
This commit is contained in:
commit
984c2a4ff6
54 changed files with 1274 additions and 351 deletions
|
@ -1,3 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "base/timebuf.hh"
|
#include "base/timebuf.hh"
|
||||||
#include "cpu/activity.hh"
|
#include "cpu/activity.hh"
|
||||||
|
@ -14,6 +41,8 @@ ActivityRecorder::ActivityRecorder(int num_stages, int longest_latency,
|
||||||
void
|
void
|
||||||
ActivityRecorder::activity()
|
ActivityRecorder::activity()
|
||||||
{
|
{
|
||||||
|
// If we've already recorded activity for this cycle, we don't
|
||||||
|
// want to increment the count any more.
|
||||||
if (activityBuffer[0]) {
|
if (activityBuffer[0]) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -28,6 +57,8 @@ ActivityRecorder::activity()
|
||||||
void
|
void
|
||||||
ActivityRecorder::advance()
|
ActivityRecorder::advance()
|
||||||
{
|
{
|
||||||
|
// If there's a 1 in the slot that is about to be erased once the
|
||||||
|
// time buffer advances, then decrement the activityCount.
|
||||||
if (activityBuffer[-longestLatency]) {
|
if (activityBuffer[-longestLatency]) {
|
||||||
--activityCount;
|
--activityCount;
|
||||||
|
|
||||||
|
@ -46,6 +77,7 @@ ActivityRecorder::advance()
|
||||||
void
|
void
|
||||||
ActivityRecorder::activateStage(const int idx)
|
ActivityRecorder::activateStage(const int idx)
|
||||||
{
|
{
|
||||||
|
// Increment the activity count if this stage wasn't already active.
|
||||||
if (!stageActive[idx]) {
|
if (!stageActive[idx]) {
|
||||||
++activityCount;
|
++activityCount;
|
||||||
|
|
||||||
|
@ -62,6 +94,7 @@ ActivityRecorder::activateStage(const int idx)
|
||||||
void
|
void
|
||||||
ActivityRecorder::deactivateStage(const int idx)
|
ActivityRecorder::deactivateStage(const int idx)
|
||||||
{
|
{
|
||||||
|
// Decrement the activity count if this stage was active.
|
||||||
if (stageActive[idx]) {
|
if (stageActive[idx]) {
|
||||||
--activityCount;
|
--activityCount;
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __CPU_ACTIVITY_HH__
|
#ifndef __CPU_ACTIVITY_HH__
|
||||||
#define __CPU_ACTIVITY_HH__
|
#define __CPU_ACTIVITY_HH__
|
||||||
|
@ -5,33 +32,61 @@
|
||||||
#include "base/timebuf.hh"
|
#include "base/timebuf.hh"
|
||||||
#include "base/trace.hh"
|
#include "base/trace.hh"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ActivityRecorder helper class that informs the CPU if it can switch
|
||||||
|
* over to being idle or not. It works by having a time buffer as
|
||||||
|
* long as any time buffer in the CPU, and the CPU and all of its
|
||||||
|
* stages inform the ActivityRecorder when they write to any time
|
||||||
|
* buffer. The ActivityRecorder marks a 1 in the "0" slot of the time
|
||||||
|
* buffer any time a stage writes to a time buffer, and it advances
|
||||||
|
* its time buffer at the same time as all other stages. The
|
||||||
|
* ActivityRecorder also records if a stage has activity to do next
|
||||||
|
* cycle. The recorder keeps a count of these two. Thus any time the
|
||||||
|
* count is non-zero, there is either communication still in flight,
|
||||||
|
* or activity that still must be done, meaning that the CPU can not
|
||||||
|
* idle. If count is zero, then the CPU can safely idle as it has no
|
||||||
|
* more outstanding work to do.
|
||||||
|
*/
|
||||||
class ActivityRecorder {
|
class ActivityRecorder {
|
||||||
public:
|
public:
|
||||||
ActivityRecorder(int num_stages, int longest_latency, int count);
|
ActivityRecorder(int num_stages, int longest_latency, int count);
|
||||||
|
|
||||||
/** Records that there is activity this cycle. */
|
/** Records that there is activity this cycle. */
|
||||||
void activity();
|
void activity();
|
||||||
/** Advances the activity buffer, decrementing the activityCount if active
|
|
||||||
* communication just left the time buffer, and descheduling the CPU if
|
/** Advances the activity buffer, decrementing the activityCount
|
||||||
* there is no activity.
|
* if active communication just left the time buffer, and
|
||||||
|
* determining if there is no activity.
|
||||||
*/
|
*/
|
||||||
void advance();
|
void advance();
|
||||||
|
|
||||||
/** Marks a stage as active. */
|
/** Marks a stage as active. */
|
||||||
void activateStage(const int idx);
|
void activateStage(const int idx);
|
||||||
|
|
||||||
/** Deactivates a stage. */
|
/** Deactivates a stage. */
|
||||||
void deactivateStage(const int idx);
|
void deactivateStage(const int idx);
|
||||||
|
|
||||||
|
/** Returns how many things are active within the recorder. */
|
||||||
int getActivityCount() { return activityCount; }
|
int getActivityCount() { return activityCount; }
|
||||||
|
|
||||||
|
/** Sets the count to a starting value. Can be used to disable
|
||||||
|
* the idling option.
|
||||||
|
*/
|
||||||
void setActivityCount(int count)
|
void setActivityCount(int count)
|
||||||
{ activityCount = count; }
|
{ activityCount = count; }
|
||||||
|
|
||||||
|
/** Returns if the CPU should be active. */
|
||||||
bool active() { return activityCount; }
|
bool active() { return activityCount; }
|
||||||
|
|
||||||
|
/** Clears the time buffer and the activity count. */
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
|
/** Debug function to dump the contents of the time buffer. */
|
||||||
void dump();
|
void dump();
|
||||||
|
|
||||||
|
/** Debug function to ensure that the activity count matches the
|
||||||
|
* contents of the time buffer.
|
||||||
|
*/
|
||||||
void validate();
|
void validate();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -45,6 +100,7 @@ class ActivityRecorder {
|
||||||
*/
|
*/
|
||||||
TimeBuffer<bool> activityBuffer;
|
TimeBuffer<bool> activityBuffer;
|
||||||
|
|
||||||
|
/** Longest latency time buffer in the CPU. */
|
||||||
int longestLatency;
|
int longestLatency;
|
||||||
|
|
||||||
/** Tracks how many stages and cycles of time buffer have
|
/** Tracks how many stages and cycles of time buffer have
|
||||||
|
@ -58,6 +114,7 @@ class ActivityRecorder {
|
||||||
*/
|
*/
|
||||||
int activityCount;
|
int activityCount;
|
||||||
|
|
||||||
|
/** Number of stages that can be marked as active or inactive. */
|
||||||
int numStages;
|
int numStages;
|
||||||
|
|
||||||
/** Records which stages are active/inactive. */
|
/** Records which stages are active/inactive. */
|
||||||
|
|
|
@ -168,6 +168,8 @@ BaseDynInst<Impl>::~BaseDynInst()
|
||||||
delete traceData;
|
delete traceData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fault = NoFault;
|
||||||
|
|
||||||
--instcount;
|
--instcount;
|
||||||
|
|
||||||
DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n",
|
DPRINTF(DynInst, "DynInst: [sn:%lli] Instruction destroyed. Instcount=%i\n",
|
||||||
|
@ -298,7 +300,7 @@ BaseDynInst<Impl>::copy(Addr dest)
|
||||||
/*
|
/*
|
||||||
uint8_t data[64];
|
uint8_t data[64];
|
||||||
FunctionalMemory *mem = thread->mem;
|
FunctionalMemory *mem = thread->mem;
|
||||||
assert(thread->copySrcPhysAddr || thread->misspeculating());
|
assert(thread->copySrcPhysAddr);
|
||||||
MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64);
|
MemReqPtr req = new MemReq(dest, thread->getXCProxy(), 64);
|
||||||
req->asid = asid;
|
req->asid = asid;
|
||||||
|
|
||||||
|
|
|
@ -66,6 +66,22 @@ class Checkpoint;
|
||||||
class Request;
|
class Request;
|
||||||
class Sampler;
|
class Sampler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CheckerCPU class. Dynamically verifies instructions as they are
|
||||||
|
* completed by making sure that the instruction and its results match
|
||||||
|
* the independent execution of the benchmark inside the checker. The
|
||||||
|
* checker verifies instructions in order, regardless of the order in
|
||||||
|
* which instructions complete. There are certain results that can
|
||||||
|
* not be verified, specifically the result of a store conditional or
|
||||||
|
* the values of uncached accesses. In these cases, and with
|
||||||
|
* instructions marked as "IsUnverifiable", the checker assumes that
|
||||||
|
* the value from the main CPU's execution is correct and simply
|
||||||
|
* copies that value. It provides a CheckerExecContext (see
|
||||||
|
* checker/exec_context.hh) that provides hooks for updating the
|
||||||
|
* Checker's state through any ExecContext accesses. This allows the
|
||||||
|
* checker to be able to correctly verify instructions, even with
|
||||||
|
* external accesses to the ExecContext that change state.
|
||||||
|
*/
|
||||||
class CheckerCPU : public BaseCPU
|
class CheckerCPU : public BaseCPU
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
|
@ -74,7 +90,6 @@ class CheckerCPU : public BaseCPU
|
||||||
typedef TheISA::FloatRegBits FloatRegBits;
|
typedef TheISA::FloatRegBits FloatRegBits;
|
||||||
typedef TheISA::MiscReg MiscReg;
|
typedef TheISA::MiscReg MiscReg;
|
||||||
public:
|
public:
|
||||||
// main simulation loop (one cycle)
|
|
||||||
virtual void init();
|
virtual void init();
|
||||||
|
|
||||||
struct Params : public BaseCPU::Params
|
struct Params : public BaseCPU::Params
|
||||||
|
@ -329,6 +344,12 @@ class CheckerCPU : public BaseCPU
|
||||||
InstSeqNum youngestSN;
|
InstSeqNum youngestSN;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Templated Checker class. This Checker class is templated on the
|
||||||
|
* DynInstPtr of the instruction type that will be verified. Proper
|
||||||
|
* template instantiations of the Checker must be placed at the bottom
|
||||||
|
* of checker/cpu.cc.
|
||||||
|
*/
|
||||||
template <class DynInstPtr>
|
template <class DynInstPtr>
|
||||||
class Checker : public CheckerCPU
|
class Checker : public CheckerCPU
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,3 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -10,6 +37,9 @@
|
||||||
#include "sim/process.hh"
|
#include "sim/process.hh"
|
||||||
#include "sim/sim_object.hh"
|
#include "sim/sim_object.hh"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specific non-templated derived class used for SimObject configuration.
|
||||||
|
*/
|
||||||
class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
|
class OzoneChecker : public Checker<RefCountingPtr<OzoneDynInst<OzoneImpl> > >
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -38,6 +38,13 @@ namespace Kernel {
|
||||||
class Statistics;
|
class Statistics;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Derived ExecContext class for use with the Checker. The template
|
||||||
|
* parameter is the ExecContext class used by the specific CPU being
|
||||||
|
* verified. This CheckerExecContext is then used by the main CPU in
|
||||||
|
* place of its usual ExecContext class. It handles updating the
|
||||||
|
* checker's state any time state is updated through the ExecContext.
|
||||||
|
*/
|
||||||
template <class XC>
|
template <class XC>
|
||||||
class CheckerExecContext : public ExecContext
|
class CheckerExecContext : public ExecContext
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,3 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -11,6 +38,9 @@
|
||||||
|
|
||||||
class MemObject;
|
class MemObject;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specific non-templated derived class used for SimObject configuration.
|
||||||
|
*/
|
||||||
class O3Checker : public Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >
|
class O3Checker : public Checker<RefCountingPtr<AlphaDynInst<AlphaSimpleImpl> > >
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -33,9 +33,9 @@
|
||||||
#include "base/trace.hh"
|
#include "base/trace.hh"
|
||||||
#include "cpu/o3/2bit_local_pred.hh"
|
#include "cpu/o3/2bit_local_pred.hh"
|
||||||
|
|
||||||
DefaultBP::DefaultBP(unsigned _localPredictorSize,
|
LocalBP::LocalBP(unsigned _localPredictorSize,
|
||||||
unsigned _localCtrBits,
|
unsigned _localCtrBits,
|
||||||
unsigned _instShiftAmt)
|
unsigned _instShiftAmt)
|
||||||
: localPredictorSize(_localPredictorSize),
|
: localPredictorSize(_localPredictorSize),
|
||||||
localCtrBits(_localCtrBits),
|
localCtrBits(_localCtrBits),
|
||||||
instShiftAmt(_instShiftAmt)
|
instShiftAmt(_instShiftAmt)
|
||||||
|
@ -71,7 +71,7 @@ DefaultBP::DefaultBP(unsigned _localPredictorSize,
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
DefaultBP::reset()
|
LocalBP::reset()
|
||||||
{
|
{
|
||||||
for (int i = 0; i < localPredictorSets; ++i) {
|
for (int i = 0; i < localPredictorSets; ++i) {
|
||||||
localCtrs[i].reset();
|
localCtrs[i].reset();
|
||||||
|
@ -79,21 +79,21 @@ DefaultBP::reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
DefaultBP::lookup(Addr &branch_addr)
|
LocalBP::lookup(Addr &branch_addr, void * &bp_history)
|
||||||
{
|
{
|
||||||
bool taken;
|
bool taken;
|
||||||
uint8_t local_prediction;
|
uint8_t counter_val;
|
||||||
unsigned local_predictor_idx = getLocalIndex(branch_addr);
|
unsigned local_predictor_idx = getLocalIndex(branch_addr);
|
||||||
|
|
||||||
DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
|
DPRINTF(Fetch, "Branch predictor: Looking up index %#x\n",
|
||||||
local_predictor_idx);
|
local_predictor_idx);
|
||||||
|
|
||||||
local_prediction = localCtrs[local_predictor_idx].read();
|
counter_val = localCtrs[local_predictor_idx].read();
|
||||||
|
|
||||||
DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
|
DPRINTF(Fetch, "Branch predictor: prediction is %i.\n",
|
||||||
(int)local_prediction);
|
(int)counter_val);
|
||||||
|
|
||||||
taken = getPrediction(local_prediction);
|
taken = getPrediction(counter_val);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// Speculative update.
|
// Speculative update.
|
||||||
|
@ -110,8 +110,9 @@ DefaultBP::lookup(Addr &branch_addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
DefaultBP::update(Addr &branch_addr, bool taken)
|
LocalBP::update(Addr &branch_addr, bool taken, void *bp_history)
|
||||||
{
|
{
|
||||||
|
assert(bp_history == NULL);
|
||||||
unsigned local_predictor_idx;
|
unsigned local_predictor_idx;
|
||||||
|
|
||||||
// Update the local predictor.
|
// Update the local predictor.
|
||||||
|
@ -131,7 +132,7 @@ DefaultBP::update(Addr &branch_addr, bool taken)
|
||||||
|
|
||||||
inline
|
inline
|
||||||
bool
|
bool
|
||||||
DefaultBP::getPrediction(uint8_t &count)
|
LocalBP::getPrediction(uint8_t &count)
|
||||||
{
|
{
|
||||||
// Get the MSB of the count
|
// Get the MSB of the count
|
||||||
return (count >> (localCtrBits - 1));
|
return (count >> (localCtrBits - 1));
|
||||||
|
@ -139,7 +140,7 @@ DefaultBP::getPrediction(uint8_t &count)
|
||||||
|
|
||||||
inline
|
inline
|
||||||
unsigned
|
unsigned
|
||||||
DefaultBP::getLocalIndex(Addr &branch_addr)
|
LocalBP::getLocalIndex(Addr &branch_addr)
|
||||||
{
|
{
|
||||||
return (branch_addr >> instShiftAmt) & indexMask;
|
return (branch_addr >> instShiftAmt) & indexMask;
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,14 @@
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
class DefaultBP
|
/**
|
||||||
|
* Implements a local predictor that uses the PC to index into a table of
|
||||||
|
* counters. Note that any time a pointer to the bp_history is given, it
|
||||||
|
* should be NULL using this predictor because it does not have any branch
|
||||||
|
* predictor state that needs to be recorded or updated; the update can be
|
||||||
|
* determined solely by the branch being taken or not taken.
|
||||||
|
*/
|
||||||
|
class LocalBP
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
/**
|
/**
|
||||||
|
@ -46,28 +53,31 @@ class DefaultBP
|
||||||
* @param localCtrBits Number of bits per counter.
|
* @param localCtrBits Number of bits per counter.
|
||||||
* @param instShiftAmt Offset amount for instructions to ignore alignment.
|
* @param instShiftAmt Offset amount for instructions to ignore alignment.
|
||||||
*/
|
*/
|
||||||
DefaultBP(unsigned localPredictorSize, unsigned localCtrBits,
|
LocalBP(unsigned localPredictorSize, unsigned localCtrBits,
|
||||||
unsigned instShiftAmt);
|
unsigned instShiftAmt);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks up the given address in the branch predictor and returns
|
* Looks up the given address in the branch predictor and returns
|
||||||
* a true/false value as to whether it is taken.
|
* a true/false value as to whether it is taken.
|
||||||
* @param branch_addr The address of the branch to look up.
|
* @param branch_addr The address of the branch to look up.
|
||||||
|
* @param bp_history Pointer to any bp history state.
|
||||||
* @return Whether or not the branch is taken.
|
* @return Whether or not the branch is taken.
|
||||||
*/
|
*/
|
||||||
bool lookup(Addr &branch_addr);
|
bool lookup(Addr &branch_addr, void * &bp_history);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the branch predictor with the actual result of a branch.
|
* Updates the branch predictor with the actual result of a branch.
|
||||||
* @param branch_addr The address of the branch to update.
|
* @param branch_addr The address of the branch to update.
|
||||||
* @param taken Whether or not the branch was taken.
|
* @param taken Whether or not the branch was taken.
|
||||||
*/
|
*/
|
||||||
void update(Addr &branch_addr, bool taken);
|
void update(Addr &branch_addr, bool taken, void *bp_history);
|
||||||
|
|
||||||
|
void squash(void *bp_history)
|
||||||
|
{ assert(bp_history == NULL); }
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the taken/not taken prediction given the value of the
|
* Returns the taken/not taken prediction given the value of the
|
||||||
* counter.
|
* counter.
|
||||||
|
|
|
@ -43,6 +43,14 @@ namespace Kernel {
|
||||||
|
|
||||||
class TranslatingPort;
|
class TranslatingPort;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AlphaFullCPU class. Derives from the FullO3CPU class, and
|
||||||
|
* implements all ISA and implementation specific functions of the
|
||||||
|
* CPU. This is the CPU class that is used for the SimObjects, and is
|
||||||
|
* what is given to the DynInsts. Most of its state exists in the
|
||||||
|
* FullO3CPU; the state is has is mainly for ISA specific
|
||||||
|
* functionality.
|
||||||
|
*/
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
class AlphaFullCPU : public FullO3CPU<Impl>
|
class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
{
|
{
|
||||||
|
@ -62,83 +70,120 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
/** Constructs an AlphaFullCPU with the given parameters. */
|
/** Constructs an AlphaFullCPU with the given parameters. */
|
||||||
AlphaFullCPU(Params *params);
|
AlphaFullCPU(Params *params);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Derived ExecContext class for use with the AlphaFullCPU. It
|
||||||
|
* provides the interface for any external objects to access a
|
||||||
|
* single thread's state and some general CPU state. Any time
|
||||||
|
* external objects try to update state through this interface,
|
||||||
|
* the CPU will create an event to squash all in-flight
|
||||||
|
* instructions in order to ensure state is maintained correctly.
|
||||||
|
*/
|
||||||
class AlphaXC : public ExecContext
|
class AlphaXC : public ExecContext
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
/** Pointer to the CPU. */
|
||||||
AlphaFullCPU<Impl> *cpu;
|
AlphaFullCPU<Impl> *cpu;
|
||||||
|
|
||||||
|
/** Pointer to the thread state that this XC corrseponds to. */
|
||||||
O3ThreadState<Impl> *thread;
|
O3ThreadState<Impl> *thread;
|
||||||
|
|
||||||
|
/** Returns a pointer to this CPU. */
|
||||||
virtual BaseCPU *getCpuPtr() { return cpu; }
|
virtual BaseCPU *getCpuPtr() { return cpu; }
|
||||||
|
|
||||||
|
/** Sets this CPU's ID. */
|
||||||
virtual void setCpuId(int id) { cpu->cpu_id = id; }
|
virtual void setCpuId(int id) { cpu->cpu_id = id; }
|
||||||
|
|
||||||
|
/** Reads this CPU's ID. */
|
||||||
virtual int readCpuId() { return cpu->cpu_id; }
|
virtual int readCpuId() { return cpu->cpu_id; }
|
||||||
|
|
||||||
virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
|
virtual TranslatingPort *getMemPort() { return /*thread->port*/ NULL; }
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
/** Returns a pointer to the system. */
|
||||||
virtual System *getSystemPtr() { return cpu->system; }
|
virtual System *getSystemPtr() { return cpu->system; }
|
||||||
|
|
||||||
|
/** Returns a pointer to physical memory. */
|
||||||
virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
|
virtual PhysicalMemory *getPhysMemPtr() { return cpu->physmem; }
|
||||||
|
|
||||||
|
/** Returns a pointer to the ITB. */
|
||||||
virtual AlphaITB *getITBPtr() { return cpu->itb; }
|
virtual AlphaITB *getITBPtr() { return cpu->itb; }
|
||||||
|
|
||||||
virtual AlphaDTB * getDTBPtr() { return cpu->dtb; }
|
/** Returns a pointer to the DTB. */
|
||||||
|
virtual AlphaDTB *getDTBPtr() { return cpu->dtb; }
|
||||||
|
|
||||||
|
/** Returns a pointer to this thread's kernel statistics. */
|
||||||
virtual Kernel::Statistics *getKernelStats()
|
virtual Kernel::Statistics *getKernelStats()
|
||||||
{ return thread->kernelStats; }
|
{ return thread->kernelStats; }
|
||||||
#else
|
#else
|
||||||
|
/** Returns a pointer to this thread's process. */
|
||||||
virtual Process *getProcessPtr() { return thread->process; }
|
virtual Process *getProcessPtr() { return thread->process; }
|
||||||
#endif
|
#endif
|
||||||
|
/** Returns this thread's status. */
|
||||||
virtual Status status() const { return thread->status(); }
|
virtual Status status() const { return thread->status(); }
|
||||||
|
|
||||||
|
/** Sets this thread's status. */
|
||||||
virtual void setStatus(Status new_status)
|
virtual void setStatus(Status new_status)
|
||||||
{ thread->setStatus(new_status); }
|
{ thread->setStatus(new_status); }
|
||||||
|
|
||||||
/// Set the status to Active. Optional delay indicates number of
|
/** Set the status to Active. Optional delay indicates number of
|
||||||
/// cycles to wait before beginning execution.
|
* cycles to wait before beginning execution. */
|
||||||
virtual void activate(int delay = 1);
|
virtual void activate(int delay = 1);
|
||||||
|
|
||||||
/// Set the status to Suspended.
|
/** Set the status to Suspended. */
|
||||||
virtual void suspend();
|
virtual void suspend();
|
||||||
|
|
||||||
/// Set the status to Unallocated.
|
/** Set the status to Unallocated. */
|
||||||
virtual void deallocate();
|
virtual void deallocate();
|
||||||
|
|
||||||
/// Set the status to Halted.
|
/** Set the status to Halted. */
|
||||||
virtual void halt();
|
virtual void halt();
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
/** Dumps the function profiling information.
|
||||||
|
* @todo: Implement.
|
||||||
|
*/
|
||||||
virtual void dumpFuncProfile();
|
virtual void dumpFuncProfile();
|
||||||
#endif
|
#endif
|
||||||
|
/** Takes over execution of a thread from another CPU. */
|
||||||
virtual void takeOverFrom(ExecContext *old_context);
|
virtual void takeOverFrom(ExecContext *old_context);
|
||||||
|
|
||||||
|
/** Registers statistics associated with this XC. */
|
||||||
virtual void regStats(const std::string &name);
|
virtual void regStats(const std::string &name);
|
||||||
|
|
||||||
|
/** Serializes state. */
|
||||||
virtual void serialize(std::ostream &os);
|
virtual void serialize(std::ostream &os);
|
||||||
|
/** Unserializes state. */
|
||||||
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
/** Returns pointer to the quiesce event. */
|
||||||
virtual EndQuiesceEvent *getQuiesceEvent();
|
virtual EndQuiesceEvent *getQuiesceEvent();
|
||||||
|
|
||||||
|
/** Reads the last tick that this thread was activated on. */
|
||||||
virtual Tick readLastActivate();
|
virtual Tick readLastActivate();
|
||||||
|
/** Reads the last tick that this thread was suspended on. */
|
||||||
virtual Tick readLastSuspend();
|
virtual Tick readLastSuspend();
|
||||||
|
|
||||||
|
/** Clears the function profiling information. */
|
||||||
virtual void profileClear();
|
virtual void profileClear();
|
||||||
|
/** Samples the function profiling information. */
|
||||||
virtual void profileSample();
|
virtual void profileSample();
|
||||||
#endif
|
#endif
|
||||||
|
/** Returns this thread's ID number. */
|
||||||
virtual int getThreadNum() { return thread->tid; }
|
virtual int getThreadNum() { return thread->tid; }
|
||||||
|
|
||||||
|
/** Returns the instruction this thread is currently committing.
|
||||||
|
* Only used when an instruction faults.
|
||||||
|
*/
|
||||||
virtual TheISA::MachInst getInst();
|
virtual TheISA::MachInst getInst();
|
||||||
|
|
||||||
|
/** Copies the architectural registers from another XC into this XC. */
|
||||||
virtual void copyArchRegs(ExecContext *xc);
|
virtual void copyArchRegs(ExecContext *xc);
|
||||||
|
|
||||||
|
/** Resets all architectural registers to 0. */
|
||||||
virtual void clearArchRegs();
|
virtual void clearArchRegs();
|
||||||
|
|
||||||
|
/** Reads an integer register. */
|
||||||
virtual uint64_t readIntReg(int reg_idx);
|
virtual uint64_t readIntReg(int reg_idx);
|
||||||
|
|
||||||
virtual FloatReg readFloatReg(int reg_idx, int width);
|
virtual FloatReg readFloatReg(int reg_idx, int width);
|
||||||
|
@ -149,6 +194,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
|
|
||||||
virtual FloatRegBits readFloatRegBits(int reg_idx);
|
virtual FloatRegBits readFloatRegBits(int reg_idx);
|
||||||
|
|
||||||
|
/** Sets an integer register to a value. */
|
||||||
virtual void setIntReg(int reg_idx, uint64_t val);
|
virtual void setIntReg(int reg_idx, uint64_t val);
|
||||||
|
|
||||||
virtual void setFloatReg(int reg_idx, FloatReg val, int width);
|
virtual void setFloatReg(int reg_idx, FloatReg val, int width);
|
||||||
|
@ -159,14 +205,18 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
|
|
||||||
virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
|
virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
|
||||||
|
|
||||||
|
/** Reads this thread's PC. */
|
||||||
virtual uint64_t readPC()
|
virtual uint64_t readPC()
|
||||||
{ return cpu->readPC(thread->tid); }
|
{ return cpu->readPC(thread->tid); }
|
||||||
|
|
||||||
|
/** Sets this thread's PC. */
|
||||||
virtual void setPC(uint64_t val);
|
virtual void setPC(uint64_t val);
|
||||||
|
|
||||||
|
/** Reads this thread's next PC. */
|
||||||
virtual uint64_t readNextPC()
|
virtual uint64_t readNextPC()
|
||||||
{ return cpu->readNextPC(thread->tid); }
|
{ return cpu->readNextPC(thread->tid); }
|
||||||
|
|
||||||
|
/** Sets this thread's next PC. */
|
||||||
virtual void setNextPC(uint64_t val);
|
virtual void setNextPC(uint64_t val);
|
||||||
|
|
||||||
virtual uint64_t readNextNPC()
|
virtual uint64_t readNextNPC()
|
||||||
|
@ -178,43 +228,60 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
virtual void setNextNPC(uint64_t val)
|
virtual void setNextNPC(uint64_t val)
|
||||||
{ panic("Alpha has no NextNPC!"); }
|
{ panic("Alpha has no NextNPC!"); }
|
||||||
|
|
||||||
|
/** Reads a miscellaneous register. */
|
||||||
virtual MiscReg readMiscReg(int misc_reg)
|
virtual MiscReg readMiscReg(int misc_reg)
|
||||||
{ return cpu->readMiscReg(misc_reg, thread->tid); }
|
{ return cpu->readMiscReg(misc_reg, thread->tid); }
|
||||||
|
|
||||||
|
/** Reads a misc. register, including any side-effects the
|
||||||
|
* read might have as defined by the architecture. */
|
||||||
virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
|
virtual MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
|
||||||
{ return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); }
|
{ return cpu->readMiscRegWithEffect(misc_reg, fault, thread->tid); }
|
||||||
|
|
||||||
|
/** Sets a misc. register. */
|
||||||
virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
|
virtual Fault setMiscReg(int misc_reg, const MiscReg &val);
|
||||||
|
|
||||||
|
/** Sets a misc. register, including any side-effects the
|
||||||
|
* write might have as defined by the architecture. */
|
||||||
virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
|
virtual Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val);
|
||||||
|
|
||||||
|
/** Returns the number of consecutive store conditional failures. */
|
||||||
// @todo: Figure out where these store cond failures should go.
|
// @todo: Figure out where these store cond failures should go.
|
||||||
virtual unsigned readStCondFailures()
|
virtual unsigned readStCondFailures()
|
||||||
{ return thread->storeCondFailures; }
|
{ return thread->storeCondFailures; }
|
||||||
|
|
||||||
|
/** Sets the number of consecutive store conditional failures. */
|
||||||
virtual void setStCondFailures(unsigned sc_failures)
|
virtual void setStCondFailures(unsigned sc_failures)
|
||||||
{ thread->storeCondFailures = sc_failures; }
|
{ thread->storeCondFailures = sc_failures; }
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
/** Returns if the thread is currently in PAL mode, based on
|
||||||
|
* the PC's value. */
|
||||||
virtual bool inPalMode()
|
virtual bool inPalMode()
|
||||||
{ return TheISA::PcPAL(cpu->readPC(thread->tid)); }
|
{ return TheISA::PcPAL(cpu->readPC(thread->tid)); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Only really makes sense for old CPU model. Lots of code
|
// Only really makes sense for old CPU model. Lots of code
|
||||||
// outside the CPU still checks this function, so it will
|
// outside the CPU still checks this function, so it will
|
||||||
// always return false to keep everything working.
|
// always return false to keep everything working.
|
||||||
|
/** Checks if the thread is misspeculating. Because it is
|
||||||
|
* very difficult to determine if the thread is
|
||||||
|
* misspeculating, this is set as false. */
|
||||||
virtual bool misspeculating() { return false; }
|
virtual bool misspeculating() { return false; }
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
|
/** Gets a syscall argument by index. */
|
||||||
virtual IntReg getSyscallArg(int i);
|
virtual IntReg getSyscallArg(int i);
|
||||||
|
|
||||||
|
/** Sets a syscall argument. */
|
||||||
virtual void setSyscallArg(int i, IntReg val);
|
virtual void setSyscallArg(int i, IntReg val);
|
||||||
|
|
||||||
|
/** Sets the syscall return value. */
|
||||||
virtual void setSyscallReturn(SyscallReturn return_value);
|
virtual void setSyscallReturn(SyscallReturn return_value);
|
||||||
|
|
||||||
|
/** Executes a syscall in SE mode. */
|
||||||
virtual void syscall(int64_t callnum)
|
virtual void syscall(int64_t callnum)
|
||||||
{ return cpu->syscall(callnum, thread->tid); }
|
{ return cpu->syscall(callnum, thread->tid); }
|
||||||
|
|
||||||
|
/** Reads the funcExeInst counter. */
|
||||||
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
|
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
|
||||||
#endif
|
#endif
|
||||||
virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
|
virtual void changeRegFileContext(TheISA::RegFile::ContextParam param,
|
||||||
|
@ -274,19 +341,32 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
/** Reads a miscellaneous register. */
|
||||||
MiscReg readMiscReg(int misc_reg, unsigned tid);
|
MiscReg readMiscReg(int misc_reg, unsigned tid);
|
||||||
|
|
||||||
|
/** Reads a misc. register, including any side effects the read
|
||||||
|
* might have as defined by the architecture.
|
||||||
|
*/
|
||||||
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
|
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault, unsigned tid);
|
||||||
|
|
||||||
|
/** Sets a miscellaneous register. */
|
||||||
Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
|
Fault setMiscReg(int misc_reg, const MiscReg &val, unsigned tid);
|
||||||
|
|
||||||
|
/** Sets a misc. register, including any side effects the write
|
||||||
|
* might have as defined by the architecture.
|
||||||
|
*/
|
||||||
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
|
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val, unsigned tid);
|
||||||
|
|
||||||
|
/** Initiates a squash of all in-flight instructions for a given
|
||||||
|
* thread. The source of the squash is an external update of
|
||||||
|
* state through the XC.
|
||||||
|
*/
|
||||||
void squashFromXC(unsigned tid);
|
void squashFromXC(unsigned tid);
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
/** Posts an interrupt. */
|
||||||
void post_interrupt(int int_num, int index);
|
void post_interrupt(int int_num, int index);
|
||||||
|
/** Reads the interrupt flag. */
|
||||||
int readIntrFlag();
|
int readIntrFlag();
|
||||||
/** Sets the interrupt flags. */
|
/** Sets the interrupt flags. */
|
||||||
void setIntrFlag(int val);
|
void setIntrFlag(int val);
|
||||||
|
@ -312,7 +392,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
/** Executes a syscall.
|
/** Executes a syscall.
|
||||||
* @todo: Determine if this needs to be virtual.
|
* @todo: Determine if this needs to be virtual.
|
||||||
*/
|
*/
|
||||||
void syscall(int64_t callnum, int thread_num);
|
void syscall(int64_t callnum, int tid);
|
||||||
/** Gets a syscall argument. */
|
/** Gets a syscall argument. */
|
||||||
IntReg getSyscallArg(int i, int tid);
|
IntReg getSyscallArg(int i, int tid);
|
||||||
|
|
||||||
|
@ -438,6 +518,7 @@ class AlphaFullCPU : public FullO3CPU<Impl>
|
||||||
|
|
||||||
Addr lockAddr;
|
Addr lockAddr;
|
||||||
|
|
||||||
|
/** Temporary fix for the lock flag, works in the UP case. */
|
||||||
bool lockFlag;
|
bool lockFlag;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,7 @@ Param<unsigned> squashWidth;
|
||||||
Param<Tick> trapLatency;
|
Param<Tick> trapLatency;
|
||||||
Param<Tick> fetchTrapLatency;
|
Param<Tick> fetchTrapLatency;
|
||||||
|
|
||||||
|
Param<std::string> predType;
|
||||||
Param<unsigned> localPredictorSize;
|
Param<unsigned> localPredictorSize;
|
||||||
Param<unsigned> localCtrBits;
|
Param<unsigned> localCtrBits;
|
||||||
Param<unsigned> localHistoryTableSize;
|
Param<unsigned> localHistoryTableSize;
|
||||||
|
@ -229,6 +230,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivAlphaFullCPU)
|
||||||
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
|
INIT_PARAM_DFLT(trapLatency, "Number of cycles before the trap is handled", 6),
|
||||||
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
|
INIT_PARAM_DFLT(fetchTrapLatency, "Number of cycles before the fetch trap is handled", 12),
|
||||||
|
|
||||||
|
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
|
||||||
INIT_PARAM(localPredictorSize, "Size of local predictor"),
|
INIT_PARAM(localPredictorSize, "Size of local predictor"),
|
||||||
INIT_PARAM(localCtrBits, "Bits per counter"),
|
INIT_PARAM(localCtrBits, "Bits per counter"),
|
||||||
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
|
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
|
||||||
|
@ -359,6 +361,7 @@ CREATE_SIM_OBJECT(DerivAlphaFullCPU)
|
||||||
params->trapLatency = trapLatency;
|
params->trapLatency = trapLatency;
|
||||||
params->fetchTrapLatency = fetchTrapLatency;
|
params->fetchTrapLatency = fetchTrapLatency;
|
||||||
|
|
||||||
|
params->predType = predType;
|
||||||
params->localPredictorSize = localPredictorSize;
|
params->localPredictorSize = localPredictorSize;
|
||||||
params->localCtrBits = localCtrBits;
|
params->localCtrBits = localCtrBits;
|
||||||
params->localHistoryTableSize = localHistoryTableSize;
|
params->localHistoryTableSize = localHistoryTableSize;
|
||||||
|
|
|
@ -60,10 +60,12 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
|
||||||
{
|
{
|
||||||
DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
|
DPRINTF(FullCPU, "AlphaFullCPU: Creating AlphaFullCPU object.\n");
|
||||||
|
|
||||||
|
// Setup any thread state.
|
||||||
this->thread.resize(this->numThreads);
|
this->thread.resize(this->numThreads);
|
||||||
|
|
||||||
for (int i = 0; i < this->numThreads; ++i) {
|
for (int i = 0; i < this->numThreads; ++i) {
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
// SMT is not supported in FS mode yet.
|
||||||
assert(this->numThreads == 1);
|
assert(this->numThreads == 1);
|
||||||
this->thread[i] = new Thread(this, 0, params->mem);
|
this->thread[i] = new Thread(this, 0, params->mem);
|
||||||
this->thread[i]->setStatus(ExecContext::Suspended);
|
this->thread[i]->setStatus(ExecContext::Suspended);
|
||||||
|
@ -86,29 +88,34 @@ AlphaFullCPU<Impl>::AlphaFullCPU(Params *params)
|
||||||
}
|
}
|
||||||
#endif // !FULL_SYSTEM
|
#endif // !FULL_SYSTEM
|
||||||
|
|
||||||
this->thread[i]->numInst = 0;
|
|
||||||
|
|
||||||
ExecContext *xc_proxy;
|
ExecContext *xc_proxy;
|
||||||
|
|
||||||
AlphaXC *alpha_xc_proxy = new AlphaXC;
|
// Setup the XC that will serve as the interface to the threads/CPU.
|
||||||
|
AlphaXC *alpha_xc = new AlphaXC;
|
||||||
|
|
||||||
|
// If we're using a checker, then the XC should be the
|
||||||
|
// CheckerExecContext.
|
||||||
if (params->checker) {
|
if (params->checker) {
|
||||||
xc_proxy = new CheckerExecContext<AlphaXC>(alpha_xc_proxy, this->checker);
|
xc_proxy = new CheckerExecContext<AlphaXC>(
|
||||||
|
alpha_xc, this->checker);
|
||||||
} else {
|
} else {
|
||||||
xc_proxy = alpha_xc_proxy;
|
xc_proxy = alpha_xc;
|
||||||
}
|
}
|
||||||
|
|
||||||
alpha_xc_proxy->cpu = this;
|
alpha_xc->cpu = this;
|
||||||
alpha_xc_proxy->thread = this->thread[i];
|
alpha_xc->thread = this->thread[i];
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
// Setup quiesce event.
|
||||||
this->thread[i]->quiesceEvent =
|
this->thread[i]->quiesceEvent =
|
||||||
new EndQuiesceEvent(xc_proxy);
|
new EndQuiesceEvent(xc_proxy);
|
||||||
this->thread[i]->lastActivate = 0;
|
this->thread[i]->lastActivate = 0;
|
||||||
this->thread[i]->lastSuspend = 0;
|
this->thread[i]->lastSuspend = 0;
|
||||||
#endif
|
#endif
|
||||||
|
// Give the thread the XC.
|
||||||
this->thread[i]->xcProxy = xc_proxy;
|
this->thread[i]->xcProxy = xc_proxy;
|
||||||
|
|
||||||
|
// Add the XC to the CPU's list of XC's.
|
||||||
this->execContexts.push_back(xc_proxy);
|
this->execContexts.push_back(xc_proxy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -170,6 +177,7 @@ AlphaFullCPU<Impl>::AlphaXC::takeOverFrom(ExecContext *old_context)
|
||||||
setStatus(old_context->status());
|
setStatus(old_context->status());
|
||||||
copyArchRegs(old_context);
|
copyArchRegs(old_context);
|
||||||
setCpuId(old_context->readCpuId());
|
setCpuId(old_context->readCpuId());
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
thread->funcExeInst = old_context->readFuncExeInst();
|
thread->funcExeInst = old_context->readFuncExeInst();
|
||||||
#else
|
#else
|
||||||
|
@ -391,7 +399,6 @@ template <class Impl>
|
||||||
uint64_t
|
uint64_t
|
||||||
AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
|
AlphaFullCPU<Impl>::AlphaXC::readIntReg(int reg_idx)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Reading int register through the XC!\n");
|
|
||||||
return cpu->readArchIntReg(reg_idx, thread->tid);
|
return cpu->readArchIntReg(reg_idx, thread->tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -399,7 +406,6 @@ template <class Impl>
|
||||||
FloatReg
|
FloatReg
|
||||||
AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx, int width)
|
AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx, int width)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Reading float register through the XC!\n");
|
|
||||||
switch(width) {
|
switch(width) {
|
||||||
case 32:
|
case 32:
|
||||||
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
|
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
|
||||||
|
@ -415,7 +421,6 @@ template <class Impl>
|
||||||
FloatReg
|
FloatReg
|
||||||
AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx)
|
AlphaFullCPU<Impl>::AlphaXC::readFloatReg(int reg_idx)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Reading float register through the XC!\n");
|
|
||||||
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
|
return cpu->readArchFloatRegSingle(reg_idx, thread->tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -431,7 +436,6 @@ template <class Impl>
|
||||||
FloatRegBits
|
FloatRegBits
|
||||||
AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx)
|
AlphaFullCPU<Impl>::AlphaXC::readFloatRegBits(int reg_idx)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Reading floatint register through the XC!\n");
|
|
||||||
return cpu->readArchFloatRegInt(reg_idx, thread->tid);
|
return cpu->readArchFloatRegInt(reg_idx, thread->tid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -439,9 +443,9 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
|
AlphaFullCPU<Impl>::AlphaXC::setIntReg(int reg_idx, uint64_t val)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Setting int register through the XC!\n");
|
|
||||||
cpu->setArchIntReg(reg_idx, val, thread->tid);
|
cpu->setArchIntReg(reg_idx, val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -451,7 +455,6 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
|
AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Setting float register through the XC!\n");
|
|
||||||
switch(width) {
|
switch(width) {
|
||||||
case 32:
|
case 32:
|
||||||
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
|
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
|
||||||
|
@ -461,6 +464,7 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val, int width)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -470,7 +474,6 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val)
|
AlphaFullCPU<Impl>::AlphaXC::setFloatReg(int reg_idx, FloatReg val)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Setting float register through the XC!\n");
|
|
||||||
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
|
cpu->setArchFloatRegSingle(reg_idx, val, thread->tid);
|
||||||
|
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
|
@ -486,6 +489,7 @@ AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val,
|
||||||
DPRINTF(Fault, "Setting floatint register through the XC!\n");
|
DPRINTF(Fault, "Setting floatint register through the XC!\n");
|
||||||
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
|
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -495,9 +499,9 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val)
|
AlphaFullCPU<Impl>::AlphaXC::setFloatRegBits(int reg_idx, FloatRegBits val)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Setting floatint register through the XC!\n");
|
|
||||||
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
|
cpu->setArchFloatRegInt(reg_idx, val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -509,6 +513,7 @@ AlphaFullCPU<Impl>::AlphaXC::setPC(uint64_t val)
|
||||||
{
|
{
|
||||||
cpu->setPC(val, thread->tid);
|
cpu->setPC(val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -520,6 +525,7 @@ AlphaFullCPU<Impl>::AlphaXC::setNextPC(uint64_t val)
|
||||||
{
|
{
|
||||||
cpu->setNextPC(val, thread->tid);
|
cpu->setNextPC(val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -529,10 +535,9 @@ template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
|
AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Setting misc register through the XC!\n");
|
|
||||||
|
|
||||||
Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid);
|
Fault ret_fault = cpu->setMiscReg(misc_reg, val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -542,12 +547,12 @@ AlphaFullCPU<Impl>::AlphaXC::setMiscReg(int misc_reg, const MiscReg &val)
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg, const MiscReg &val)
|
AlphaFullCPU<Impl>::AlphaXC::setMiscRegWithEffect(int misc_reg,
|
||||||
|
const MiscReg &val)
|
||||||
{
|
{
|
||||||
DPRINTF(Fault, "Setting misc register through the XC!\n");
|
|
||||||
|
|
||||||
Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid);
|
Fault ret_fault = cpu->setMiscRegWithEffect(misc_reg, val, thread->tid);
|
||||||
|
|
||||||
|
// Squash if we're not already in a state update mode.
|
||||||
if (!thread->trapPending && !thread->inSyscall) {
|
if (!thread->trapPending && !thread->inSyscall) {
|
||||||
cpu->squashFromXC(thread->tid);
|
cpu->squashFromXC(thread->tid);
|
||||||
}
|
}
|
||||||
|
@ -628,7 +633,6 @@ AlphaFullCPU<Impl>::post_interrupt(int int_num, int index)
|
||||||
|
|
||||||
if (this->thread[0]->status() == ExecContext::Suspended) {
|
if (this->thread[0]->status() == ExecContext::Suspended) {
|
||||||
DPRINTF(IPI,"Suspended Processor awoke\n");
|
DPRINTF(IPI,"Suspended Processor awoke\n");
|
||||||
// xcProxies[0]->activate();
|
|
||||||
this->execContexts[0]->activate();
|
this->execContexts[0]->activate();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -691,6 +695,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
|
AlphaFullCPU<Impl>::trap(Fault fault, unsigned tid)
|
||||||
{
|
{
|
||||||
|
// Pass the thread's XC into the invoke method.
|
||||||
fault->invoke(this->execContexts[tid]);
|
fault->invoke(this->execContexts[tid]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -741,6 +746,7 @@ AlphaFullCPU<Impl>::processInterrupts()
|
||||||
if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
|
if (ipl && ipl > this->readMiscReg(IPR_IPLR, 0)) {
|
||||||
this->setMiscReg(IPR_ISR, summary, 0);
|
this->setMiscReg(IPR_ISR, summary, 0);
|
||||||
this->setMiscReg(IPR_INTID, ipl, 0);
|
this->setMiscReg(IPR_INTID, ipl, 0);
|
||||||
|
// Checker needs to know these two registers were updated.
|
||||||
if (this->checker) {
|
if (this->checker) {
|
||||||
this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
|
this->checker->cpuXCBase()->setMiscReg(IPR_ISR, summary);
|
||||||
this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
|
this->checker->cpuXCBase()->setMiscReg(IPR_INTID, ipl);
|
||||||
|
|
|
@ -93,23 +93,31 @@ class AlphaDynInst : public BaseDynInst<Impl>
|
||||||
void initVars();
|
void initVars();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/** Reads a miscellaneous register. */
|
||||||
MiscReg readMiscReg(int misc_reg)
|
MiscReg readMiscReg(int misc_reg)
|
||||||
{
|
{
|
||||||
return this->cpu->readMiscReg(misc_reg, this->threadNumber);
|
return this->cpu->readMiscReg(misc_reg, this->threadNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Reads a misc. register, including any side-effects the read
|
||||||
|
* might have as defined by the architecture.
|
||||||
|
*/
|
||||||
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
|
MiscReg readMiscRegWithEffect(int misc_reg, Fault &fault)
|
||||||
{
|
{
|
||||||
return this->cpu->readMiscRegWithEffect(misc_reg, fault,
|
return this->cpu->readMiscRegWithEffect(misc_reg, fault,
|
||||||
this->threadNumber);
|
this->threadNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sets a misc. register. */
|
||||||
Fault setMiscReg(int misc_reg, const MiscReg &val)
|
Fault setMiscReg(int misc_reg, const MiscReg &val)
|
||||||
{
|
{
|
||||||
this->instResult.integer = val;
|
this->instResult.integer = val;
|
||||||
return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
|
return this->cpu->setMiscReg(misc_reg, val, this->threadNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Sets a misc. register, including any side-effects the write
|
||||||
|
* might have as defined by the architecture.
|
||||||
|
*/
|
||||||
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
|
Fault setMiscRegWithEffect(int misc_reg, const MiscReg &val)
|
||||||
{
|
{
|
||||||
return this->cpu->setMiscRegWithEffect(misc_reg, val,
|
return this->cpu->setMiscRegWithEffect(misc_reg, val,
|
||||||
|
|
|
@ -66,9 +66,10 @@ template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
AlphaDynInst<Impl>::execute()
|
AlphaDynInst<Impl>::execute()
|
||||||
{
|
{
|
||||||
// @todo: Pretty convoluted way to avoid squashing from happening when using
|
// @todo: Pretty convoluted way to avoid squashing from happening
|
||||||
// the XC during an instruction's execution (specifically for instructions
|
// when using the XC during an instruction's execution
|
||||||
// that have sideeffects that use the XC). Fix this.
|
// (specifically for instructions that have side-effects that use
|
||||||
|
// the XC). Fix this.
|
||||||
bool in_syscall = this->thread->inSyscall;
|
bool in_syscall = this->thread->inSyscall;
|
||||||
this->thread->inSyscall = true;
|
this->thread->inSyscall = true;
|
||||||
|
|
||||||
|
@ -83,9 +84,10 @@ template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
AlphaDynInst<Impl>::initiateAcc()
|
AlphaDynInst<Impl>::initiateAcc()
|
||||||
{
|
{
|
||||||
// @todo: Pretty convoluted way to avoid squashing from happening when using
|
// @todo: Pretty convoluted way to avoid squashing from happening
|
||||||
// the XC during an instruction's execution (specifically for instructions
|
// when using the XC during an instruction's execution
|
||||||
// that have sideeffects that use the XC). Fix this.
|
// (specifically for instructions that have side-effects that use
|
||||||
|
// the XC). Fix this.
|
||||||
bool in_syscall = this->thread->inSyscall;
|
bool in_syscall = this->thread->inSyscall;
|
||||||
this->thread->inSyscall = true;
|
this->thread->inSyscall = true;
|
||||||
|
|
||||||
|
@ -118,9 +120,11 @@ template <class Impl>
|
||||||
Fault
|
Fault
|
||||||
AlphaDynInst<Impl>::hwrei()
|
AlphaDynInst<Impl>::hwrei()
|
||||||
{
|
{
|
||||||
|
// Can only do a hwrei when in pal mode.
|
||||||
if (!this->cpu->inPalMode(this->readPC()))
|
if (!this->cpu->inPalMode(this->readPC()))
|
||||||
return new AlphaISA::UnimplementedOpcodeFault;
|
return new AlphaISA::UnimplementedOpcodeFault;
|
||||||
|
|
||||||
|
// Set the next PC based on the value of the EXC_ADDR IPR.
|
||||||
this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR,
|
this->setNextPC(this->cpu->readMiscReg(AlphaISA::IPR_EXC_ADDR,
|
||||||
this->threadNumber));
|
this->threadNumber));
|
||||||
|
|
||||||
|
|
|
@ -126,8 +126,9 @@ class AlphaSimpleParams : public BaseFullCPU::Params
|
||||||
Tick fetchTrapLatency;
|
Tick fetchTrapLatency;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Branch predictor (BP & BTB)
|
// Branch predictor (BP, BTB, RAS)
|
||||||
//
|
//
|
||||||
|
std::string predType;
|
||||||
unsigned localPredictorSize;
|
unsigned localPredictorSize;
|
||||||
unsigned localCtrBits;
|
unsigned localCtrBits;
|
||||||
unsigned localHistoryTableSize;
|
unsigned localHistoryTableSize;
|
||||||
|
|
|
@ -34,6 +34,6 @@
|
||||||
#include "cpu/ozone/ozone_impl.hh"
|
#include "cpu/ozone/ozone_impl.hh"
|
||||||
//#include "cpu/ozone/simple_impl.hh"
|
//#include "cpu/ozone/simple_impl.hh"
|
||||||
|
|
||||||
template class TwobitBPredUnit<AlphaSimpleImpl>;
|
template class BPredUnit<AlphaSimpleImpl>;
|
||||||
template class TwobitBPredUnit<OzoneImpl>;
|
template class BPredUnit<OzoneImpl>;
|
||||||
//template class TwobitBPredUnit<SimpleImpl>;
|
//template class BPredUnit<SimpleImpl>;
|
||||||
|
|
|
@ -48,16 +48,25 @@
|
||||||
* and the BTB.
|
* and the BTB.
|
||||||
*/
|
*/
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
class TwobitBPredUnit
|
class BPredUnit
|
||||||
{
|
{
|
||||||
public:
|
private:
|
||||||
typedef typename Impl::Params Params;
|
typedef typename Impl::Params Params;
|
||||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||||
|
|
||||||
|
enum PredType {
|
||||||
|
Local,
|
||||||
|
Tournament
|
||||||
|
};
|
||||||
|
|
||||||
|
PredType predictor;
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param params The params object, that has the size of the BP and BTB.
|
* @param params The params object, that has the size of the BP and BTB.
|
||||||
*/
|
*/
|
||||||
TwobitBPredUnit(Params *params);
|
BPredUnit(Params *params);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Registers statistics.
|
* Registers statistics.
|
||||||
|
@ -78,6 +87,9 @@ class TwobitBPredUnit
|
||||||
*/
|
*/
|
||||||
bool predict(DynInstPtr &inst, Addr &PC, unsigned tid);
|
bool predict(DynInstPtr &inst, Addr &PC, unsigned tid);
|
||||||
|
|
||||||
|
// @todo: Rename this function.
|
||||||
|
void BPUncond(void * &bp_history);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells the branch predictor to commit any updates until the given
|
* Tells the branch predictor to commit any updates until the given
|
||||||
* sequence number.
|
* sequence number.
|
||||||
|
@ -106,13 +118,20 @@ class TwobitBPredUnit
|
||||||
void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
|
void squash(const InstSeqNum &squashed_sn, const Addr &corr_target,
|
||||||
bool actually_taken, unsigned tid);
|
bool actually_taken, unsigned tid);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param bp_history Pointer to the history object. The predictor
|
||||||
|
* will need to update any state and delete the object.
|
||||||
|
*/
|
||||||
|
void BPSquash(void *bp_history);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks up a given PC in the BP to see if it is taken or not taken.
|
* Looks up a given PC in the BP to see if it is taken or not taken.
|
||||||
* @param inst_PC The PC to look up.
|
* @param inst_PC The PC to look up.
|
||||||
|
* @param bp_history Pointer that will be set to an object that
|
||||||
|
* has the branch predictor state associated with the lookup.
|
||||||
* @return Whether the branch is taken or not taken.
|
* @return Whether the branch is taken or not taken.
|
||||||
*/
|
*/
|
||||||
bool BPLookup(Addr &inst_PC)
|
bool BPLookup(Addr &inst_PC, void * &bp_history);
|
||||||
{ return BP.lookup(inst_PC); }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks up a given PC in the BTB to see if a matching entry exists.
|
* Looks up a given PC in the BTB to see if a matching entry exists.
|
||||||
|
@ -134,10 +153,11 @@ class TwobitBPredUnit
|
||||||
* Updates the BP with taken/not taken information.
|
* Updates the BP with taken/not taken information.
|
||||||
* @param inst_PC The branch's PC that will be updated.
|
* @param inst_PC The branch's PC that will be updated.
|
||||||
* @param taken Whether the branch was taken or not taken.
|
* @param taken Whether the branch was taken or not taken.
|
||||||
|
* @param bp_history Pointer to the branch predictor state that is
|
||||||
|
* associated with the branch lookup that is being updated.
|
||||||
* @todo Make this update flexible enough to handle a global predictor.
|
* @todo Make this update flexible enough to handle a global predictor.
|
||||||
*/
|
*/
|
||||||
void BPUpdate(Addr &inst_PC, bool taken)
|
void BPUpdate(Addr &inst_PC, bool taken, void *bp_history);
|
||||||
{ BP.update(inst_PC, taken); }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the BTB with the target of a branch.
|
* Updates the BTB with the target of a branch.
|
||||||
|
@ -147,18 +167,20 @@ class TwobitBPredUnit
|
||||||
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
|
void BTBUpdate(Addr &inst_PC, Addr &target_PC)
|
||||||
{ BTB.update(inst_PC, target_PC,0); }
|
{ BTB.update(inst_PC, target_PC,0); }
|
||||||
|
|
||||||
|
void dump();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct PredictorHistory {
|
struct PredictorHistory {
|
||||||
/**
|
/**
|
||||||
* Makes a predictor history struct that contains a sequence number,
|
* Makes a predictor history struct that contains any
|
||||||
* the PC of its instruction, and whether or not it was predicted
|
* information needed to update the predictor, BTB, and RAS.
|
||||||
* taken.
|
|
||||||
*/
|
*/
|
||||||
PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC,
|
PredictorHistory(const InstSeqNum &seq_num, const Addr &inst_PC,
|
||||||
const bool pred_taken, const unsigned _tid)
|
const bool pred_taken, void *bp_history,
|
||||||
: seqNum(seq_num), PC(inst_PC), RASTarget(0), globalHistory(0),
|
const unsigned _tid)
|
||||||
|
: seqNum(seq_num), PC(inst_PC), RASTarget(0),
|
||||||
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
|
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
|
||||||
wasCall(0)
|
wasCall(0), bpHistory(bp_history)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
/** The sequence number for the predictor history entry. */
|
/** The sequence number for the predictor history entry. */
|
||||||
|
@ -170,9 +192,6 @@ class TwobitBPredUnit
|
||||||
/** The RAS target (only valid if a return). */
|
/** The RAS target (only valid if a return). */
|
||||||
Addr RASTarget;
|
Addr RASTarget;
|
||||||
|
|
||||||
/** The global history at the time this entry was created. */
|
|
||||||
unsigned globalHistory;
|
|
||||||
|
|
||||||
/** The RAS index of the instruction (only valid if a call). */
|
/** The RAS index of the instruction (only valid if a call). */
|
||||||
unsigned RASIndex;
|
unsigned RASIndex;
|
||||||
|
|
||||||
|
@ -187,6 +206,12 @@ class TwobitBPredUnit
|
||||||
|
|
||||||
/** Whether or not the instruction was a call. */
|
/** Whether or not the instruction was a call. */
|
||||||
bool wasCall;
|
bool wasCall;
|
||||||
|
|
||||||
|
/** Pointer to the history object passed back from the branch
|
||||||
|
* predictor. It is used to update or restore state of the
|
||||||
|
* branch predictor.
|
||||||
|
*/
|
||||||
|
void *bpHistory;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::list<PredictorHistory> History;
|
typedef std::list<PredictorHistory> History;
|
||||||
|
@ -198,8 +223,11 @@ class TwobitBPredUnit
|
||||||
*/
|
*/
|
||||||
History predHist[Impl::MaxThreads];
|
History predHist[Impl::MaxThreads];
|
||||||
|
|
||||||
/** The branch predictor. */
|
/** The local branch predictor. */
|
||||||
DefaultBP BP;
|
LocalBP *localBP;
|
||||||
|
|
||||||
|
/** The tournament branch predictor. */
|
||||||
|
TournamentBP *tournamentBP;
|
||||||
|
|
||||||
/** The BTB. */
|
/** The BTB. */
|
||||||
DefaultBTB BTB;
|
DefaultBTB BTB;
|
||||||
|
|
|
@ -38,21 +38,40 @@
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
TwobitBPredUnit<Impl>::TwobitBPredUnit(Params *params)
|
BPredUnit<Impl>::BPredUnit(Params *params)
|
||||||
: BP(params->localPredictorSize,
|
: BTB(params->BTBEntries,
|
||||||
params->localCtrBits,
|
|
||||||
params->instShiftAmt),
|
|
||||||
BTB(params->BTBEntries,
|
|
||||||
params->BTBTagSize,
|
params->BTBTagSize,
|
||||||
params->instShiftAmt)
|
params->instShiftAmt)
|
||||||
{
|
{
|
||||||
|
// Setup the selected predictor.
|
||||||
|
if (params->predType == "local") {
|
||||||
|
localBP = new LocalBP(params->localPredictorSize,
|
||||||
|
params->localCtrBits,
|
||||||
|
params->instShiftAmt);
|
||||||
|
predictor = Local;
|
||||||
|
} else if (params->predType == "tournament") {
|
||||||
|
tournamentBP = new TournamentBP(params->localPredictorSize,
|
||||||
|
params->localCtrBits,
|
||||||
|
params->localHistoryTableSize,
|
||||||
|
params->localHistoryBits,
|
||||||
|
params->globalPredictorSize,
|
||||||
|
params->globalHistoryBits,
|
||||||
|
params->globalCtrBits,
|
||||||
|
params->choicePredictorSize,
|
||||||
|
params->choiceCtrBits,
|
||||||
|
params->instShiftAmt);
|
||||||
|
predictor = Tournament;
|
||||||
|
} else {
|
||||||
|
fatal("Invalid BP selected!");
|
||||||
|
}
|
||||||
|
|
||||||
for (int i=0; i < Impl::MaxThreads; i++)
|
for (int i=0; i < Impl::MaxThreads; i++)
|
||||||
RAS[i].init(params->RASSize);
|
RAS[i].init(params->RASSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
TwobitBPredUnit<Impl>::regStats()
|
BPredUnit<Impl>::regStats()
|
||||||
{
|
{
|
||||||
lookups
|
lookups
|
||||||
.name(name() + ".BPredUnit.lookups")
|
.name(name() + ".BPredUnit.lookups")
|
||||||
|
@ -98,17 +117,20 @@ TwobitBPredUnit<Impl>::regStats()
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
TwobitBPredUnit<Impl>::switchOut()
|
BPredUnit<Impl>::switchOut()
|
||||||
{
|
{
|
||||||
|
// Clear any state upon switch out.
|
||||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
predHist[i].clear();
|
squash(0, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
TwobitBPredUnit<Impl>::takeOverFrom()
|
BPredUnit<Impl>::takeOverFrom()
|
||||||
{
|
{
|
||||||
|
// Can reset all predictor state, but it's not necessarily better
|
||||||
|
// than leaving it be.
|
||||||
/*
|
/*
|
||||||
for (int i = 0; i < Impl::MaxThreads; ++i)
|
for (int i = 0; i < Impl::MaxThreads; ++i)
|
||||||
RAS[i].reset();
|
RAS[i].reset();
|
||||||
|
@ -120,11 +142,10 @@ TwobitBPredUnit<Impl>::takeOverFrom()
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
bool
|
bool
|
||||||
TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
BPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
{
|
{
|
||||||
// See if branch predictor predicts taken.
|
// See if branch predictor predicts taken.
|
||||||
// If so, get its target addr either from the BTB or the RAS.
|
// If so, get its target addr either from the BTB or the RAS.
|
||||||
// Once that's done, speculatively update the predictor?
|
|
||||||
// Save off record of branch stuff so the RAS can be fixed
|
// Save off record of branch stuff so the RAS can be fixed
|
||||||
// up once it's done.
|
// up once it's done.
|
||||||
|
|
||||||
|
@ -135,20 +156,25 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
|
|
||||||
++lookups;
|
++lookups;
|
||||||
|
|
||||||
|
void *bp_history = NULL;
|
||||||
|
|
||||||
if (inst->isUncondCtrl()) {
|
if (inst->isUncondCtrl()) {
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid);
|
DPRINTF(Fetch, "BranchPred: [tid:%i] Unconditional control.\n", tid);
|
||||||
pred_taken = true;
|
pred_taken = true;
|
||||||
|
// Tell the BP there was an unconditional branch.
|
||||||
|
BPUncond(bp_history);
|
||||||
} else {
|
} else {
|
||||||
++condPredicted;
|
++condPredicted;
|
||||||
|
|
||||||
pred_taken = BPLookup(PC);
|
pred_taken = BPLookup(PC, bp_history);
|
||||||
|
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i "
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i "
|
||||||
"for PC %#x\n",
|
"for PC %#x\n",
|
||||||
tid, pred_taken, inst->readPC());
|
tid, pred_taken, inst->readPC());
|
||||||
}
|
}
|
||||||
|
|
||||||
PredictorHistory predict_record(inst->seqNum, PC, pred_taken, tid);
|
PredictorHistory predict_record(inst->seqNum, PC, pred_taken,
|
||||||
|
bp_history, tid);
|
||||||
|
|
||||||
// Now lookup in the BTB or RAS.
|
// Now lookup in the BTB or RAS.
|
||||||
if (pred_taken) {
|
if (pred_taken) {
|
||||||
|
@ -189,7 +215,7 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
if (BTB.valid(PC, tid)) {
|
if (BTB.valid(PC, tid)) {
|
||||||
++BTBHits;
|
++BTBHits;
|
||||||
|
|
||||||
//If it's anything else, use the BTB to get the target addr.
|
// If it's not a return, use the BTB to get the target addr.
|
||||||
target = BTB.lookup(PC, tid);
|
target = BTB.lookup(PC, tid);
|
||||||
|
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted"
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %#x predicted"
|
||||||
|
@ -223,7 +249,7 @@ TwobitBPredUnit<Impl>::predict(DynInstPtr &inst, Addr &PC, unsigned tid)
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
|
BPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
|
||||||
{
|
{
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence"
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Commiting branches until sequence"
|
||||||
"number %lli.\n", tid, done_sn);
|
"number %lli.\n", tid, done_sn);
|
||||||
|
@ -231,8 +257,9 @@ TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
|
||||||
while (!predHist[tid].empty() &&
|
while (!predHist[tid].empty() &&
|
||||||
predHist[tid].back().seqNum <= done_sn) {
|
predHist[tid].back().seqNum <= done_sn) {
|
||||||
// Update the branch predictor with the correct results.
|
// Update the branch predictor with the correct results.
|
||||||
BP.update(predHist[tid].back().PC,
|
BPUpdate(predHist[tid].back().PC,
|
||||||
predHist[tid].back().predTaken);
|
predHist[tid].back().predTaken,
|
||||||
|
predHist[tid].back().bpHistory);
|
||||||
|
|
||||||
predHist[tid].pop_back();
|
predHist[tid].pop_back();
|
||||||
}
|
}
|
||||||
|
@ -240,13 +267,13 @@ TwobitBPredUnit<Impl>::update(const InstSeqNum &done_sn, unsigned tid)
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
|
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
|
||||||
{
|
{
|
||||||
History &pred_hist = predHist[tid];
|
History &pred_hist = predHist[tid];
|
||||||
|
|
||||||
while (!pred_hist.empty() &&
|
while (!pred_hist.empty() &&
|
||||||
pred_hist.front().seqNum > squashed_sn) {
|
pred_hist.front().seqNum > squashed_sn) {
|
||||||
if (pred_hist.front().usedRAS) {
|
if (pred_hist.front().usedRAS) {
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
|
||||||
" target: %#x.\n",
|
" target: %#x.\n",
|
||||||
tid,
|
tid,
|
||||||
|
@ -257,12 +284,15 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
|
||||||
pred_hist.front().RASTarget);
|
pred_hist.front().RASTarget);
|
||||||
|
|
||||||
} else if (pred_hist.front().wasCall) {
|
} else if (pred_hist.front().wasCall) {
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry added "
|
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry "
|
||||||
"to the RAS.\n",tid);
|
"added to the RAS.\n",tid);
|
||||||
|
|
||||||
RAS[tid].pop();
|
RAS[tid].pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This call should delete the bpHistory.
|
||||||
|
BPSquash(pred_hist.front().bpHistory);
|
||||||
|
|
||||||
pred_hist.pop_front();
|
pred_hist.pop_front();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -270,10 +300,10 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, unsigned tid)
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void
|
void
|
||||||
TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
||||||
const Addr &corr_target,
|
const Addr &corr_target,
|
||||||
const bool actually_taken,
|
const bool actually_taken,
|
||||||
unsigned tid)
|
unsigned tid)
|
||||||
{
|
{
|
||||||
// Now that we know that a branch was mispredicted, we need to undo
|
// Now that we know that a branch was mispredicted, we need to undo
|
||||||
// all the branches that have been seen up until this branch and
|
// all the branches that have been seen up until this branch and
|
||||||
|
@ -287,40 +317,96 @@ TwobitBPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
||||||
"setting target to %#x.\n",
|
"setting target to %#x.\n",
|
||||||
tid, squashed_sn, corr_target);
|
tid, squashed_sn, corr_target);
|
||||||
|
|
||||||
while (!pred_hist.empty() &&
|
squash(squashed_sn, tid);
|
||||||
pred_hist.front().seqNum > squashed_sn) {
|
|
||||||
if (pred_hist.front().usedRAS) {
|
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i, "
|
|
||||||
"target: %#x.\n",
|
|
||||||
tid,
|
|
||||||
pred_hist.front().RASIndex,
|
|
||||||
pred_hist.front().RASTarget);
|
|
||||||
|
|
||||||
RAS[tid].restore(pred_hist.front().RASIndex,
|
|
||||||
pred_hist.front().RASTarget);
|
|
||||||
} else if (pred_hist.front().wasCall) {
|
|
||||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing speculative entry"
|
|
||||||
" added to the RAS.\n", tid);
|
|
||||||
|
|
||||||
RAS[tid].pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
pred_hist.pop_front();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If there's a squash due to a syscall, there may not be an entry
|
// If there's a squash due to a syscall, there may not be an entry
|
||||||
// corresponding to the squash. In that case, don't bother trying to
|
// corresponding to the squash. In that case, don't bother trying to
|
||||||
// fix up the entry.
|
// fix up the entry.
|
||||||
if (!pred_hist.empty()) {
|
if (!pred_hist.empty()) {
|
||||||
pred_hist.front().predTaken = actually_taken;
|
assert(pred_hist.front().seqNum == squashed_sn);
|
||||||
|
|
||||||
if (pred_hist.front().usedRAS) {
|
if (pred_hist.front().usedRAS) {
|
||||||
++RASIncorrect;
|
++RASIncorrect;
|
||||||
}
|
}
|
||||||
|
|
||||||
BP.update(pred_hist.front().PC, actually_taken);
|
BPUpdate(pred_hist.front().PC, actually_taken,
|
||||||
|
pred_hist.front().bpHistory);
|
||||||
|
|
||||||
BTB.update(pred_hist.front().PC, corr_target, tid);
|
BTB.update(pred_hist.front().PC, corr_target, tid);
|
||||||
pred_hist.pop_front();
|
pred_hist.pop_front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
BPredUnit<Impl>::BPUncond(void * &bp_history)
|
||||||
|
{
|
||||||
|
// Only the tournament predictor cares about unconditional branches.
|
||||||
|
if (predictor == Tournament) {
|
||||||
|
tournamentBP->uncondBr(bp_history);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
BPredUnit<Impl>::BPSquash(void *bp_history)
|
||||||
|
{
|
||||||
|
if (predictor == Local) {
|
||||||
|
localBP->squash(bp_history);
|
||||||
|
} else if (predictor == Tournament) {
|
||||||
|
tournamentBP->squash(bp_history);
|
||||||
|
} else {
|
||||||
|
panic("Predictor type is unexpected value!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
bool
|
||||||
|
BPredUnit<Impl>::BPLookup(Addr &inst_PC, void * &bp_history)
|
||||||
|
{
|
||||||
|
if (predictor == Local) {
|
||||||
|
return localBP->lookup(inst_PC, bp_history);
|
||||||
|
} else if (predictor == Tournament) {
|
||||||
|
return tournamentBP->lookup(inst_PC, bp_history);
|
||||||
|
} else {
|
||||||
|
panic("Predictor type is unexpected value!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
BPredUnit<Impl>::BPUpdate(Addr &inst_PC, bool taken, void *bp_history)
|
||||||
|
{
|
||||||
|
if (predictor == Local) {
|
||||||
|
localBP->update(inst_PC, taken, bp_history);
|
||||||
|
} else if (predictor == Tournament) {
|
||||||
|
tournamentBP->update(inst_PC, taken, bp_history);
|
||||||
|
} else {
|
||||||
|
panic("Predictor type is unexpected value!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class Impl>
|
||||||
|
void
|
||||||
|
BPredUnit<Impl>::dump()
|
||||||
|
{
|
||||||
|
typename History::iterator pred_hist_it;
|
||||||
|
|
||||||
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
|
if (!predHist[i].empty()) {
|
||||||
|
pred_hist_it = predHist[i].begin();
|
||||||
|
|
||||||
|
cprintf("predHist[%i].size(): %i\n", i, predHist[i].size());
|
||||||
|
|
||||||
|
while (pred_hist_it != predHist[i].end()) {
|
||||||
|
cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, "
|
||||||
|
"bpHistory:%#x\n",
|
||||||
|
(*pred_hist_it).seqNum, (*pred_hist_it).PC,
|
||||||
|
(*pred_hist_it).tid, (*pred_hist_it).predTaken,
|
||||||
|
(*pred_hist_it).bpHistory);
|
||||||
|
pred_hist_it++;
|
||||||
|
}
|
||||||
|
|
||||||
|
cprintf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -43,6 +43,7 @@
|
||||||
// typedef yet are not templated on the Impl. For now it will be defined here.
|
// typedef yet are not templated on the Impl. For now it will be defined here.
|
||||||
typedef short int PhysRegIndex;
|
typedef short int PhysRegIndex;
|
||||||
|
|
||||||
|
/** Struct that defines the information passed from fetch to decode. */
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
struct DefaultFetchDefaultDecode {
|
struct DefaultFetchDefaultDecode {
|
||||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||||
|
@ -55,6 +56,7 @@ struct DefaultFetchDefaultDecode {
|
||||||
bool clearFetchFault;
|
bool clearFetchFault;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Struct that defines the information passed from decode to rename. */
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
struct DefaultDecodeDefaultRename {
|
struct DefaultDecodeDefaultRename {
|
||||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||||
|
@ -64,6 +66,7 @@ struct DefaultDecodeDefaultRename {
|
||||||
DynInstPtr insts[Impl::MaxWidth];
|
DynInstPtr insts[Impl::MaxWidth];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Struct that defines the information passed from rename to IEW. */
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
struct DefaultRenameDefaultIEW {
|
struct DefaultRenameDefaultIEW {
|
||||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||||
|
@ -73,6 +76,7 @@ struct DefaultRenameDefaultIEW {
|
||||||
DynInstPtr insts[Impl::MaxWidth];
|
DynInstPtr insts[Impl::MaxWidth];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Struct that defines the information passed from IEW to commit. */
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
struct DefaultIEWDefaultCommit {
|
struct DefaultIEWDefaultCommit {
|
||||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||||
|
@ -100,6 +104,7 @@ struct IssueStruct {
|
||||||
DynInstPtr insts[Impl::MaxWidth];
|
DynInstPtr insts[Impl::MaxWidth];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Struct that defines all backwards communication. */
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
struct TimeBufStruct {
|
struct TimeBufStruct {
|
||||||
struct decodeComm {
|
struct decodeComm {
|
||||||
|
@ -121,13 +126,7 @@ struct TimeBufStruct {
|
||||||
|
|
||||||
decodeComm decodeInfo[Impl::MaxThreads];
|
decodeComm decodeInfo[Impl::MaxThreads];
|
||||||
|
|
||||||
// Rename can't actually tell anything to squash or send a new PC back
|
|
||||||
// because it doesn't do anything along those lines. But maybe leave
|
|
||||||
// these fields in here to keep the stages mostly orthagonal.
|
|
||||||
struct renameComm {
|
struct renameComm {
|
||||||
bool squash;
|
|
||||||
|
|
||||||
uint64_t nextPC;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
renameComm renameInfo[Impl::MaxThreads];
|
renameComm renameInfo[Impl::MaxThreads];
|
||||||
|
|
|
@ -85,6 +85,9 @@ class DefaultCommit
|
||||||
|
|
||||||
typedef O3ThreadState<Impl> Thread;
|
typedef O3ThreadState<Impl> Thread;
|
||||||
|
|
||||||
|
/** Event class used to schedule a squash due to a trap (fault or
|
||||||
|
* interrupt) to happen on a specific cycle.
|
||||||
|
*/
|
||||||
class TrapEvent : public Event {
|
class TrapEvent : public Event {
|
||||||
private:
|
private:
|
||||||
DefaultCommit<Impl> *commit;
|
DefaultCommit<Impl> *commit;
|
||||||
|
@ -162,7 +165,7 @@ class DefaultCommit
|
||||||
|
|
||||||
Fetch *fetchStage;
|
Fetch *fetchStage;
|
||||||
|
|
||||||
/** Sets the poitner to the IEW stage. */
|
/** Sets the pointer to the IEW stage. */
|
||||||
void setIEWStage(IEW *iew_stage);
|
void setIEWStage(IEW *iew_stage);
|
||||||
|
|
||||||
/** The pointer to the IEW stage. Used solely to ensure that
|
/** The pointer to the IEW stage. Used solely to ensure that
|
||||||
|
@ -183,10 +186,13 @@ class DefaultCommit
|
||||||
/** Initializes stage by sending back the number of free entries. */
|
/** Initializes stage by sending back the number of free entries. */
|
||||||
void initStage();
|
void initStage();
|
||||||
|
|
||||||
|
/** Initializes the switching out of commit. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Completes the switch out of commit. */
|
||||||
void doSwitchOut();
|
void doSwitchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Ticks the commit stage, which tries to commit instructions. */
|
/** Ticks the commit stage, which tries to commit instructions. */
|
||||||
|
@ -200,11 +206,18 @@ class DefaultCommit
|
||||||
/** Returns the number of free ROB entries for a specific thread. */
|
/** Returns the number of free ROB entries for a specific thread. */
|
||||||
unsigned numROBFreeEntries(unsigned tid);
|
unsigned numROBFreeEntries(unsigned tid);
|
||||||
|
|
||||||
|
/** Generates an event to schedule a squash due to a trap. */
|
||||||
|
void generateTrapEvent(unsigned tid);
|
||||||
|
|
||||||
|
/** Records that commit needs to initiate a squash due to an
|
||||||
|
* external state update through the XC.
|
||||||
|
*/
|
||||||
void generateXCEvent(unsigned tid);
|
void generateXCEvent(unsigned tid);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/** Updates the overall status of commit with the nextStatus, and
|
/** Updates the overall status of commit with the nextStatus, and
|
||||||
* tell the CPU if commit is active/inactive. */
|
* tell the CPU if commit is active/inactive.
|
||||||
|
*/
|
||||||
void updateStatus();
|
void updateStatus();
|
||||||
|
|
||||||
/** Sets the next status based on threads' statuses, which becomes the
|
/** Sets the next status based on threads' statuses, which becomes the
|
||||||
|
@ -223,10 +236,13 @@ class DefaultCommit
|
||||||
*/
|
*/
|
||||||
bool changedROBEntries();
|
bool changedROBEntries();
|
||||||
|
|
||||||
|
/** Squashes all in flight instructions. */
|
||||||
void squashAll(unsigned tid);
|
void squashAll(unsigned tid);
|
||||||
|
|
||||||
|
/** Handles squashing due to a trap. */
|
||||||
void squashFromTrap(unsigned tid);
|
void squashFromTrap(unsigned tid);
|
||||||
|
|
||||||
|
/** Handles squashing due to an XC write. */
|
||||||
void squashFromXC(unsigned tid);
|
void squashFromXC(unsigned tid);
|
||||||
|
|
||||||
/** Commits as many instructions as possible. */
|
/** Commits as many instructions as possible. */
|
||||||
|
@ -237,8 +253,6 @@ class DefaultCommit
|
||||||
*/
|
*/
|
||||||
bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
|
bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
|
||||||
|
|
||||||
void generateTrapEvent(unsigned tid);
|
|
||||||
|
|
||||||
/** Gets instructions from rename and inserts them into the ROB. */
|
/** Gets instructions from rename and inserts them into the ROB. */
|
||||||
void getInsts();
|
void getInsts();
|
||||||
|
|
||||||
|
@ -260,12 +274,16 @@ class DefaultCommit
|
||||||
*/
|
*/
|
||||||
uint64_t readPC() { return PC[0]; }
|
uint64_t readPC() { return PC[0]; }
|
||||||
|
|
||||||
|
/** Returns the PC of a specific thread. */
|
||||||
uint64_t readPC(unsigned tid) { return PC[tid]; }
|
uint64_t readPC(unsigned tid) { return PC[tid]; }
|
||||||
|
|
||||||
|
/** Sets the PC of a specific thread. */
|
||||||
void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
|
void setPC(uint64_t val, unsigned tid) { PC[tid] = val; }
|
||||||
|
|
||||||
|
/** Reads the PC of a specific thread. */
|
||||||
uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
|
uint64_t readNextPC(unsigned tid) { return nextPC[tid]; }
|
||||||
|
|
||||||
|
/** Sets the next PC of a specific thread. */
|
||||||
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
|
void setNextPC(uint64_t val, unsigned tid) { nextPC[tid] = val; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -302,6 +320,7 @@ class DefaultCommit
|
||||||
/** Pointer to FullCPU. */
|
/** Pointer to FullCPU. */
|
||||||
FullCPU *cpu;
|
FullCPU *cpu;
|
||||||
|
|
||||||
|
/** Vector of all of the threads. */
|
||||||
std::vector<Thread *> thread;
|
std::vector<Thread *> thread;
|
||||||
|
|
||||||
Fault fetchFault;
|
Fault fetchFault;
|
||||||
|
@ -360,17 +379,27 @@ class DefaultCommit
|
||||||
/** Number of Active Threads */
|
/** Number of Active Threads */
|
||||||
unsigned numThreads;
|
unsigned numThreads;
|
||||||
|
|
||||||
|
/** Is a switch out pending. */
|
||||||
bool switchPending;
|
bool switchPending;
|
||||||
|
|
||||||
|
/** Is commit switched out. */
|
||||||
bool switchedOut;
|
bool switchedOut;
|
||||||
|
|
||||||
|
/** The latency to handle a trap. Used when scheduling trap
|
||||||
|
* squash event.
|
||||||
|
*/
|
||||||
Tick trapLatency;
|
Tick trapLatency;
|
||||||
|
|
||||||
Tick fetchTrapLatency;
|
Tick fetchTrapLatency;
|
||||||
|
|
||||||
Tick fetchFaultTick;
|
Tick fetchFaultTick;
|
||||||
|
|
||||||
|
/** The commit PC of each thread. Refers to the instruction that
|
||||||
|
* is currently being processed/committed.
|
||||||
|
*/
|
||||||
Addr PC[Impl::MaxThreads];
|
Addr PC[Impl::MaxThreads];
|
||||||
|
|
||||||
|
/** The next PC of each thread. */
|
||||||
Addr nextPC[Impl::MaxThreads];
|
Addr nextPC[Impl::MaxThreads];
|
||||||
|
|
||||||
/** The sequence number of the youngest valid instruction in the ROB. */
|
/** The sequence number of the youngest valid instruction in the ROB. */
|
||||||
|
@ -382,6 +411,7 @@ class DefaultCommit
|
||||||
/** Rename map interface. */
|
/** Rename map interface. */
|
||||||
RenameMap *renameMap[Impl::MaxThreads];
|
RenameMap *renameMap[Impl::MaxThreads];
|
||||||
|
|
||||||
|
/** Updates commit stats based on this instruction. */
|
||||||
void updateComInstStats(DynInstPtr &inst);
|
void updateComInstStats(DynInstPtr &inst);
|
||||||
|
|
||||||
/** Stat for the total number of committed instructions. */
|
/** Stat for the total number of committed instructions. */
|
||||||
|
@ -415,7 +445,9 @@ class DefaultCommit
|
||||||
/** Total number of committed branches. */
|
/** Total number of committed branches. */
|
||||||
Stats::Vector<> statComBranches;
|
Stats::Vector<> statComBranches;
|
||||||
|
|
||||||
|
/** Number of cycles where the commit bandwidth limit is reached. */
|
||||||
Stats::Scalar<> commitEligibleSamples;
|
Stats::Scalar<> commitEligibleSamples;
|
||||||
|
/** Number of instructions not committed due to bandwidth limits. */
|
||||||
Stats::Vector<> commitEligible;
|
Stats::Vector<> commitEligible;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -692,7 +692,7 @@ DefaultCommit<Impl>::commit()
|
||||||
|
|
||||||
while (threads != (*activeThreads).end()) {
|
while (threads != (*activeThreads).end()) {
|
||||||
unsigned tid = *threads++;
|
unsigned tid = *threads++;
|
||||||
|
/*
|
||||||
if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
|
if (fromFetch->fetchFault && commitStatus[0] != TrapPending) {
|
||||||
// Record the fault. Wait until it's empty in the ROB.
|
// Record the fault. Wait until it's empty in the ROB.
|
||||||
// Then handle the trap. Ignore it if there's already a
|
// Then handle the trap. Ignore it if there's already a
|
||||||
|
@ -714,7 +714,7 @@ DefaultCommit<Impl>::commit()
|
||||||
commitStatus[0] = Running;
|
commitStatus[0] = Running;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
// Not sure which one takes priority. I think if we have
|
// Not sure which one takes priority. I think if we have
|
||||||
// both, that's a bad sign.
|
// both, that's a bad sign.
|
||||||
if (trapSquash[tid] == true) {
|
if (trapSquash[tid] == true) {
|
||||||
|
@ -926,7 +926,7 @@ DefaultCommit<Impl>::commitInsts()
|
||||||
numCommittedDist.sample(num_committed);
|
numCommittedDist.sample(num_committed);
|
||||||
|
|
||||||
if (num_committed == commitWidth) {
|
if (num_committed == commitWidth) {
|
||||||
commitEligible[0]++;
|
commitEligibleSamples++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -948,6 +948,7 @@ DefaultCommit<Impl>::commitHead(DynInstPtr &head_inst, unsigned inst_num)
|
||||||
head_inst->reachedCommit = true;
|
head_inst->reachedCommit = true;
|
||||||
|
|
||||||
if (head_inst->isNonSpeculative() ||
|
if (head_inst->isNonSpeculative() ||
|
||||||
|
head_inst->isStoreConditional() ||
|
||||||
head_inst->isMemBarrier() ||
|
head_inst->isMemBarrier() ||
|
||||||
head_inst->isWriteBarrier()) {
|
head_inst->isWriteBarrier()) {
|
||||||
|
|
||||||
|
|
|
@ -72,6 +72,11 @@ class BaseFullCPU : public BaseCPU
|
||||||
int cpu_id;
|
int cpu_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* FullO3CPU class, has each of the stages (fetch through commit)
|
||||||
|
* within it, as well as all of the time buffers between stages. The
|
||||||
|
* tick() function for the CPU is defined here.
|
||||||
|
*/
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
class FullO3CPU : public BaseFullCPU
|
class FullO3CPU : public BaseFullCPU
|
||||||
{
|
{
|
||||||
|
@ -202,17 +207,13 @@ class FullO3CPU : public BaseFullCPU
|
||||||
*/
|
*/
|
||||||
virtual void syscall(int tid) { panic("Unimplemented!"); }
|
virtual void syscall(int tid) { panic("Unimplemented!"); }
|
||||||
|
|
||||||
/** Check if there are any system calls pending. */
|
/** Switches out this CPU. */
|
||||||
void checkSyscalls();
|
|
||||||
|
|
||||||
/** Switches out this CPU.
|
|
||||||
*/
|
|
||||||
void switchOut(Sampler *sampler);
|
void switchOut(Sampler *sampler);
|
||||||
|
|
||||||
|
/** Signals to this CPU that a stage has completed switching out. */
|
||||||
void signalSwitched();
|
void signalSwitched();
|
||||||
|
|
||||||
/** Takes over from another CPU.
|
/** Takes over from another CPU. */
|
||||||
*/
|
|
||||||
void takeOverFrom(BaseCPU *oldCPU);
|
void takeOverFrom(BaseCPU *oldCPU);
|
||||||
|
|
||||||
/** Get the current instruction sequence number, and increment it. */
|
/** Get the current instruction sequence number, and increment it. */
|
||||||
|
@ -244,9 +245,7 @@ class FullO3CPU : public BaseFullCPU
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//
|
/** Register accessors. Index refers to the physical register index. */
|
||||||
// New accessors for new decoder.
|
|
||||||
//
|
|
||||||
uint64_t readIntReg(int reg_idx);
|
uint64_t readIntReg(int reg_idx);
|
||||||
|
|
||||||
FloatReg readFloatReg(int reg_idx);
|
FloatReg readFloatReg(int reg_idx);
|
||||||
|
@ -275,6 +274,11 @@ class FullO3CPU : public BaseFullCPU
|
||||||
|
|
||||||
uint64_t readArchFloatRegInt(int reg_idx, unsigned tid);
|
uint64_t readArchFloatRegInt(int reg_idx, unsigned tid);
|
||||||
|
|
||||||
|
/** Architectural register accessors. Looks up in the commit
|
||||||
|
* rename table to obtain the true physical index of the
|
||||||
|
* architected register first, then accesses that physical
|
||||||
|
* register.
|
||||||
|
*/
|
||||||
void setArchIntReg(int reg_idx, uint64_t val, unsigned tid);
|
void setArchIntReg(int reg_idx, uint64_t val, unsigned tid);
|
||||||
|
|
||||||
void setArchFloatRegSingle(int reg_idx, float val, unsigned tid);
|
void setArchFloatRegSingle(int reg_idx, float val, unsigned tid);
|
||||||
|
@ -283,13 +287,17 @@ class FullO3CPU : public BaseFullCPU
|
||||||
|
|
||||||
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
|
void setArchFloatRegInt(int reg_idx, uint64_t val, unsigned tid);
|
||||||
|
|
||||||
|
/** Reads the commit PC of a specific thread. */
|
||||||
uint64_t readPC(unsigned tid);
|
uint64_t readPC(unsigned tid);
|
||||||
|
|
||||||
void setPC(Addr new_PC,unsigned tid);
|
/** Sets the commit PC of a specific thread. */
|
||||||
|
void setPC(Addr new_PC, unsigned tid);
|
||||||
|
|
||||||
|
/** Reads the next PC of a specific thread. */
|
||||||
uint64_t readNextPC(unsigned tid);
|
uint64_t readNextPC(unsigned tid);
|
||||||
|
|
||||||
void setNextPC(uint64_t val,unsigned tid);
|
/** Sets the next PC of a specific thread. */
|
||||||
|
void setNextPC(uint64_t val, unsigned tid);
|
||||||
|
|
||||||
/** Function to add instruction onto the head of the list of the
|
/** Function to add instruction onto the head of the list of the
|
||||||
* instructions. Used when new instructions are fetched.
|
* instructions. Used when new instructions are fetched.
|
||||||
|
@ -313,21 +321,15 @@ class FullO3CPU : public BaseFullCPU
|
||||||
/** Remove all instructions younger than the given sequence number. */
|
/** Remove all instructions younger than the given sequence number. */
|
||||||
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
|
void removeInstsUntil(const InstSeqNum &seq_num,unsigned tid);
|
||||||
|
|
||||||
|
/** Removes the instruction pointed to by the iterator. */
|
||||||
inline void squashInstIt(const ListIt &instIt, const unsigned &tid);
|
inline void squashInstIt(const ListIt &instIt, const unsigned &tid);
|
||||||
|
|
||||||
|
/** Cleans up all instructions on the remove list. */
|
||||||
void cleanUpRemovedInsts();
|
void cleanUpRemovedInsts();
|
||||||
|
|
||||||
/** Remove all instructions from the list. */
|
/** Debug function to print all instructions on the list. */
|
||||||
// void removeAllInsts();
|
|
||||||
|
|
||||||
void dumpInsts();
|
void dumpInsts();
|
||||||
|
|
||||||
/** Basically a wrapper function so that instructions executed at
|
|
||||||
* commit can tell the instruction queue that they have
|
|
||||||
* completed. Eventually this hack should be removed.
|
|
||||||
*/
|
|
||||||
// void wakeDependents(DynInstPtr &inst);
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** List of all the instructions in flight. */
|
/** List of all the instructions in flight. */
|
||||||
std::list<DynInstPtr> instList;
|
std::list<DynInstPtr> instList;
|
||||||
|
@ -338,6 +340,9 @@ class FullO3CPU : public BaseFullCPU
|
||||||
std::queue<ListIt> removeList;
|
std::queue<ListIt> removeList;
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
/** Debug structure to keep track of the sequence numbers still in
|
||||||
|
* flight.
|
||||||
|
*/
|
||||||
std::set<InstSeqNum> snList;
|
std::set<InstSeqNum> snList;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -424,14 +429,22 @@ class FullO3CPU : public BaseFullCPU
|
||||||
/** The IEW stage's instruction queue. */
|
/** The IEW stage's instruction queue. */
|
||||||
TimeBuffer<IEWStruct> iewQueue;
|
TimeBuffer<IEWStruct> iewQueue;
|
||||||
|
|
||||||
public:
|
private:
|
||||||
|
/** The activity recorder; used to tell if the CPU has any
|
||||||
|
* activity remaining or if it can go to idle and deschedule
|
||||||
|
* itself.
|
||||||
|
*/
|
||||||
ActivityRecorder activityRec;
|
ActivityRecorder activityRec;
|
||||||
|
|
||||||
|
public:
|
||||||
|
/** Records that there was time buffer activity this cycle. */
|
||||||
void activityThisCycle() { activityRec.activity(); }
|
void activityThisCycle() { activityRec.activity(); }
|
||||||
|
|
||||||
|
/** Changes a stage's status to active within the activity recorder. */
|
||||||
void activateStage(const StageIdx idx)
|
void activateStage(const StageIdx idx)
|
||||||
{ activityRec.activateStage(idx); }
|
{ activityRec.activateStage(idx); }
|
||||||
|
|
||||||
|
/** Changes a stage's status to inactive within the activity recorder. */
|
||||||
void deactivateStage(const StageIdx idx)
|
void deactivateStage(const StageIdx idx)
|
||||||
{ activityRec.deactivateStage(idx); }
|
{ activityRec.deactivateStage(idx); }
|
||||||
|
|
||||||
|
@ -442,7 +455,7 @@ class FullO3CPU : public BaseFullCPU
|
||||||
int getFreeTid();
|
int getFreeTid();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/** Temporary function to get pointer to exec context. */
|
/** Returns a pointer to a thread's exec context. */
|
||||||
ExecContext *xcBase(unsigned tid)
|
ExecContext *xcBase(unsigned tid)
|
||||||
{
|
{
|
||||||
return thread[tid]->getXCProxy();
|
return thread[tid]->getXCProxy();
|
||||||
|
@ -451,6 +464,10 @@ class FullO3CPU : public BaseFullCPU
|
||||||
/** The global sequence number counter. */
|
/** The global sequence number counter. */
|
||||||
InstSeqNum globalSeqNum;
|
InstSeqNum globalSeqNum;
|
||||||
|
|
||||||
|
/** Pointer to the checker, which can dynamically verify
|
||||||
|
* instruction results at run time. This can be set to NULL if it
|
||||||
|
* is not being used.
|
||||||
|
*/
|
||||||
Checker<DynInstPtr> *checker;
|
Checker<DynInstPtr> *checker;
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
@ -466,11 +483,13 @@ class FullO3CPU : public BaseFullCPU
|
||||||
/** Pointer to memory. */
|
/** Pointer to memory. */
|
||||||
MemObject *mem;
|
MemObject *mem;
|
||||||
|
|
||||||
|
/** Pointer to the sampler */
|
||||||
Sampler *sampler;
|
Sampler *sampler;
|
||||||
|
|
||||||
|
/** Counter of how many stages have completed switching out. */
|
||||||
int switchCount;
|
int switchCount;
|
||||||
|
|
||||||
// List of all ExecContexts.
|
/** Pointers to all of the threads in the CPU. */
|
||||||
std::vector<Thread *> thread;
|
std::vector<Thread *> thread;
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
|
@ -50,24 +50,50 @@
|
||||||
|
|
||||||
#include "cpu/o3/comm.hh"
|
#include "cpu/o3/comm.hh"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Struct that defines the key classes to be used by the CPU. All
|
||||||
|
* classes use the typedefs defined here to determine what are the
|
||||||
|
* classes of the other stages and communication buffers. In order to
|
||||||
|
* change a structure such as the IQ, simply change the typedef here
|
||||||
|
* to use the desired class instead, and recompile. In order to
|
||||||
|
* create a different CPU to be used simultaneously with this one, see
|
||||||
|
* the alpha_impl.hh file for instructions.
|
||||||
|
*/
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
struct SimpleCPUPolicy
|
struct SimpleCPUPolicy
|
||||||
{
|
{
|
||||||
typedef TwobitBPredUnit<Impl> BPredUnit;
|
/** Typedef for the branch prediction unit (which includes the BP,
|
||||||
|
* RAS, and BTB).
|
||||||
|
*/
|
||||||
|
typedef BPredUnit<Impl> BPredUnit;
|
||||||
|
/** Typedef for the register file. Most classes assume a unified
|
||||||
|
* physical register file.
|
||||||
|
*/
|
||||||
typedef PhysRegFile<Impl> RegFile;
|
typedef PhysRegFile<Impl> RegFile;
|
||||||
|
/** Typedef for the freelist of registers. */
|
||||||
typedef SimpleFreeList FreeList;
|
typedef SimpleFreeList FreeList;
|
||||||
|
/** Typedef for the rename map. */
|
||||||
typedef SimpleRenameMap RenameMap;
|
typedef SimpleRenameMap RenameMap;
|
||||||
|
/** Typedef for the ROB. */
|
||||||
typedef ROB<Impl> ROB;
|
typedef ROB<Impl> ROB;
|
||||||
|
/** Typedef for the instruction queue/scheduler. */
|
||||||
typedef InstructionQueue<Impl> IQ;
|
typedef InstructionQueue<Impl> IQ;
|
||||||
|
/** Typedef for the memory dependence unit. */
|
||||||
typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
|
typedef MemDepUnit<StoreSet, Impl> MemDepUnit;
|
||||||
|
/** Typedef for the LSQ. */
|
||||||
typedef LSQ<Impl> LSQ;
|
typedef LSQ<Impl> LSQ;
|
||||||
|
/** Typedef for the thread-specific LSQ units. */
|
||||||
typedef LSQUnit<Impl> LSQUnit;
|
typedef LSQUnit<Impl> LSQUnit;
|
||||||
|
|
||||||
|
/** Typedef for fetch. */
|
||||||
typedef DefaultFetch<Impl> Fetch;
|
typedef DefaultFetch<Impl> Fetch;
|
||||||
|
/** Typedef for decode. */
|
||||||
typedef DefaultDecode<Impl> Decode;
|
typedef DefaultDecode<Impl> Decode;
|
||||||
|
/** Typedef for rename. */
|
||||||
typedef DefaultRename<Impl> Rename;
|
typedef DefaultRename<Impl> Rename;
|
||||||
|
/** Typedef for Issue/Execute/Writeback. */
|
||||||
typedef DefaultIEW<Impl> IEW;
|
typedef DefaultIEW<Impl> IEW;
|
||||||
|
/** Typedef for commit. */
|
||||||
typedef DefaultCommit<Impl> Commit;
|
typedef DefaultCommit<Impl> Commit;
|
||||||
|
|
||||||
/** The struct for communication between fetch and decode. */
|
/** The struct for communication between fetch and decode. */
|
||||||
|
|
|
@ -109,9 +109,12 @@ class DefaultDecode
|
||||||
/** Sets pointer to list of active threads. */
|
/** Sets pointer to list of active threads. */
|
||||||
void setActiveThreads(std::list<unsigned> *at_ptr);
|
void setActiveThreads(std::list<unsigned> *at_ptr);
|
||||||
|
|
||||||
|
/** Switches out the decode stage. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Ticks decode, processing all input signals and decoding as many
|
/** Ticks decode, processing all input signals and decoding as many
|
||||||
* instructions as possible.
|
* instructions as possible.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -43,6 +43,7 @@ DefaultDecode<Impl>::DefaultDecode(Params *params)
|
||||||
{
|
{
|
||||||
_status = Inactive;
|
_status = Inactive;
|
||||||
|
|
||||||
|
// Setup status, make sure stall signals are clear.
|
||||||
for (int i = 0; i < numThreads; ++i) {
|
for (int i = 0; i < numThreads; ++i) {
|
||||||
decodeStatus[i] = Idle;
|
decodeStatus[i] = Idle;
|
||||||
|
|
||||||
|
@ -167,6 +168,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultDecode<Impl>::switchOut()
|
DefaultDecode<Impl>::switchOut()
|
||||||
{
|
{
|
||||||
|
// Decode can immediately switch out.
|
||||||
cpu->signalSwitched();
|
cpu->signalSwitched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -176,6 +178,7 @@ DefaultDecode<Impl>::takeOverFrom()
|
||||||
{
|
{
|
||||||
_status = Inactive;
|
_status = Inactive;
|
||||||
|
|
||||||
|
// Be sure to reset state and clear out any old instructions.
|
||||||
for (int i = 0; i < numThreads; ++i) {
|
for (int i = 0; i < numThreads; ++i) {
|
||||||
decodeStatus[i] = Idle;
|
decodeStatus[i] = Idle;
|
||||||
|
|
||||||
|
@ -224,22 +227,22 @@ DefaultDecode<Impl>::block(unsigned tid)
|
||||||
{
|
{
|
||||||
DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
|
DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
|
||||||
|
|
||||||
// If the decode status is blocked or unblocking then decode has not yet
|
|
||||||
// signalled fetch to unblock. In that case, there is no need to tell
|
|
||||||
// fetch to block.
|
|
||||||
if (decodeStatus[tid] != Blocked &&
|
|
||||||
decodeStatus[tid] != Unblocking) {
|
|
||||||
toFetch->decodeBlock[tid] = true;
|
|
||||||
wroteToTimeBuffer = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add the current inputs to the skid buffer so they can be
|
// Add the current inputs to the skid buffer so they can be
|
||||||
// reprocessed when this stage unblocks.
|
// reprocessed when this stage unblocks.
|
||||||
skidInsert(tid);
|
skidInsert(tid);
|
||||||
|
|
||||||
|
// If the decode status is blocked or unblocking then decode has not yet
|
||||||
|
// signalled fetch to unblock. In that case, there is no need to tell
|
||||||
|
// fetch to block.
|
||||||
if (decodeStatus[tid] != Blocked) {
|
if (decodeStatus[tid] != Blocked) {
|
||||||
// Set the status to Blocked.
|
// Set the status to Blocked.
|
||||||
decodeStatus[tid] = Blocked;
|
decodeStatus[tid] = Blocked;
|
||||||
|
|
||||||
|
if (decodeStatus[tid] != Unblocking) {
|
||||||
|
toFetch->decodeBlock[tid] = true;
|
||||||
|
wroteToTimeBuffer = true;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -272,13 +275,16 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
||||||
DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
|
DPRINTF(Decode, "[tid:%i]: Squashing due to incorrect branch prediction "
|
||||||
"detected at decode.\n", tid);
|
"detected at decode.\n", tid);
|
||||||
|
|
||||||
|
// Send back mispredict information.
|
||||||
toFetch->decodeInfo[tid].branchMispredict = true;
|
toFetch->decodeInfo[tid].branchMispredict = true;
|
||||||
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
||||||
toFetch->decodeInfo[tid].predIncorrect = true;
|
toFetch->decodeInfo[tid].predIncorrect = true;
|
||||||
toFetch->decodeInfo[tid].squash = true;
|
toFetch->decodeInfo[tid].squash = true;
|
||||||
toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
|
toFetch->decodeInfo[tid].nextPC = inst->readNextPC();
|
||||||
toFetch->decodeInfo[tid].branchTaken = true;
|
toFetch->decodeInfo[tid].branchTaken =
|
||||||
|
inst->readNextPC() != (inst->readPC() + sizeof(TheISA::MachInst));
|
||||||
|
|
||||||
|
// Might have to tell fetch to unblock.
|
||||||
if (decodeStatus[tid] == Blocked ||
|
if (decodeStatus[tid] == Blocked ||
|
||||||
decodeStatus[tid] == Unblocking) {
|
decodeStatus[tid] == Unblocking) {
|
||||||
toFetch->decodeUnblock[tid] = 1;
|
toFetch->decodeUnblock[tid] = 1;
|
||||||
|
@ -294,11 +300,12 @@ DefaultDecode<Impl>::squash(DynInstPtr &inst, unsigned tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear the instruction list and skid buffer in case they have any
|
||||||
|
// insts in them.
|
||||||
while (!insts[tid].empty()) {
|
while (!insts[tid].empty()) {
|
||||||
insts[tid].pop();
|
insts[tid].pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear the skid buffer in case it has any data in it.
|
|
||||||
while (!skidBuffer[tid].empty()) {
|
while (!skidBuffer[tid].empty()) {
|
||||||
skidBuffer[tid].pop();
|
skidBuffer[tid].pop();
|
||||||
}
|
}
|
||||||
|
@ -343,11 +350,12 @@ DefaultDecode<Impl>::squash(unsigned tid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear the instruction list and skid buffer in case they have any
|
||||||
|
// insts in them.
|
||||||
while (!insts[tid].empty()) {
|
while (!insts[tid].empty()) {
|
||||||
insts[tid].pop();
|
insts[tid].pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear the skid buffer in case it has any data in it.
|
|
||||||
while (!skidBuffer[tid].empty()) {
|
while (!skidBuffer[tid].empty()) {
|
||||||
skidBuffer[tid].pop();
|
skidBuffer[tid].pop();
|
||||||
}
|
}
|
||||||
|
@ -723,6 +731,7 @@ DefaultDecode<Impl>::decodeInsts(unsigned tid)
|
||||||
// Might want to set some sort of boolean and just do
|
// Might want to set some sort of boolean and just do
|
||||||
// a check at the end
|
// a check at the end
|
||||||
squash(inst, inst->threadNumber);
|
squash(inst, inst->threadNumber);
|
||||||
|
inst->setPredTarg(inst->branchTarget());
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include "cpu/o3/comm.hh"
|
#include "cpu/o3/comm.hh"
|
||||||
|
|
||||||
|
/** Node in a linked list. */
|
||||||
template <class DynInstPtr>
|
template <class DynInstPtr>
|
||||||
class DependencyEntry
|
class DependencyEntry
|
||||||
{
|
{
|
||||||
|
@ -18,32 +19,50 @@ class DependencyEntry
|
||||||
DependencyEntry<DynInstPtr> *next;
|
DependencyEntry<DynInstPtr> *next;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Array of linked list that maintains the dependencies between
|
||||||
|
* producing instructions and consuming instructions. Each linked
|
||||||
|
* list represents a single physical register, having the future
|
||||||
|
* producer of the register's value, and all consumers waiting on that
|
||||||
|
* value on the list. The head node of each linked list represents
|
||||||
|
* the producing instruction of that register. Instructions are put
|
||||||
|
* on the list upon reaching the IQ, and are removed from the list
|
||||||
|
* either when the producer completes, or the instruction is squashed.
|
||||||
|
*/
|
||||||
template <class DynInstPtr>
|
template <class DynInstPtr>
|
||||||
class DependencyGraph
|
class DependencyGraph
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef DependencyEntry<DynInstPtr> DepEntry;
|
typedef DependencyEntry<DynInstPtr> DepEntry;
|
||||||
|
|
||||||
|
/** Default construction. Must call resize() prior to use. */
|
||||||
DependencyGraph()
|
DependencyGraph()
|
||||||
: numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
|
: numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
|
/** Resize the dependency graph to have num_entries registers. */
|
||||||
void resize(int num_entries);
|
void resize(int num_entries);
|
||||||
|
|
||||||
|
/** Clears all of the linked lists. */
|
||||||
void reset();
|
void reset();
|
||||||
|
|
||||||
|
/** Inserts an instruction to be dependent on the given index. */
|
||||||
void insert(PhysRegIndex idx, DynInstPtr &new_inst);
|
void insert(PhysRegIndex idx, DynInstPtr &new_inst);
|
||||||
|
|
||||||
|
/** Sets the producing instruction of a given register. */
|
||||||
void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
|
void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
|
||||||
{ dependGraph[idx].inst = new_inst; }
|
{ dependGraph[idx].inst = new_inst; }
|
||||||
|
|
||||||
|
/** Clears the producing instruction. */
|
||||||
void clearInst(PhysRegIndex idx)
|
void clearInst(PhysRegIndex idx)
|
||||||
{ dependGraph[idx].inst = NULL; }
|
{ dependGraph[idx].inst = NULL; }
|
||||||
|
|
||||||
|
/** Removes an instruction from a single linked list. */
|
||||||
void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
|
void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
|
||||||
|
|
||||||
|
/** Removes and returns the newest dependent of a specific register. */
|
||||||
DynInstPtr pop(PhysRegIndex idx);
|
DynInstPtr pop(PhysRegIndex idx);
|
||||||
|
|
||||||
|
/** Checks if there are any dependents on a specific register. */
|
||||||
bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
|
bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
|
||||||
|
|
||||||
/** Debugging function to dump out the dependency graph.
|
/** Debugging function to dump out the dependency graph.
|
||||||
|
@ -59,13 +78,16 @@ class DependencyGraph
|
||||||
*/
|
*/
|
||||||
DepEntry *dependGraph;
|
DepEntry *dependGraph;
|
||||||
|
|
||||||
|
/** Number of linked lists; identical to the number of registers. */
|
||||||
int numEntries;
|
int numEntries;
|
||||||
|
|
||||||
// Debug variable, remove when done testing.
|
// Debug variable, remove when done testing.
|
||||||
unsigned memAllocCounter;
|
unsigned memAllocCounter;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// Debug variable, remove when done testing.
|
||||||
uint64_t nodesTraversed;
|
uint64_t nodesTraversed;
|
||||||
|
// Debug variable, remove when done testing.
|
||||||
uint64_t nodesRemoved;
|
uint64_t nodesRemoved;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ class Sampler;
|
||||||
* width is specified by the parameters; each cycle it tries to fetch
|
* width is specified by the parameters; each cycle it tries to fetch
|
||||||
* that many instructions. It supports using a branch predictor to
|
* that many instructions. It supports using a branch predictor to
|
||||||
* predict direction and targets.
|
* predict direction and targets.
|
||||||
* It supports the idling functionalitiy of the CPU by indicating to
|
* It supports the idling functionality of the CPU by indicating to
|
||||||
* the CPU when it is active and inactive.
|
* the CPU when it is active and inactive.
|
||||||
*/
|
*/
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -172,14 +172,19 @@ class DefaultFetch
|
||||||
/** Processes cache completion event. */
|
/** Processes cache completion event. */
|
||||||
void processCacheCompletion(PacketPtr pkt);
|
void processCacheCompletion(PacketPtr pkt);
|
||||||
|
|
||||||
|
/** Begins the switch out of the fetch stage. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Completes the switch out of the fetch stage. */
|
||||||
void doSwitchOut();
|
void doSwitchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
|
/** Checks if the fetch stage is switched out. */
|
||||||
bool isSwitchedOut() { return switchedOut; }
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
|
/** Tells fetch to wake up from a quiesce instruction. */
|
||||||
void wakeFromQuiesce();
|
void wakeFromQuiesce();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -312,8 +317,10 @@ class DefaultFetch
|
||||||
/** BPredUnit. */
|
/** BPredUnit. */
|
||||||
BPredUnit branchPred;
|
BPredUnit branchPred;
|
||||||
|
|
||||||
|
/** Per-thread fetch PC. */
|
||||||
Addr PC[Impl::MaxThreads];
|
Addr PC[Impl::MaxThreads];
|
||||||
|
|
||||||
|
/** Per-thread next PC. */
|
||||||
Addr nextPC[Impl::MaxThreads];
|
Addr nextPC[Impl::MaxThreads];
|
||||||
|
|
||||||
/** Memory packet used to access cache. */
|
/** Memory packet used to access cache. */
|
||||||
|
@ -380,8 +387,12 @@ class DefaultFetch
|
||||||
/** Thread ID being fetched. */
|
/** Thread ID being fetched. */
|
||||||
int threadFetched;
|
int threadFetched;
|
||||||
|
|
||||||
|
/** Checks if there is an interrupt pending. If there is, fetch
|
||||||
|
* must stop once it is not fetching PAL instructions.
|
||||||
|
*/
|
||||||
bool interruptPending;
|
bool interruptPending;
|
||||||
|
|
||||||
|
/** Records if fetch is switched out. */
|
||||||
bool switchedOut;
|
bool switchedOut;
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
|
@ -405,17 +416,23 @@ class DefaultFetch
|
||||||
* the pipeline.
|
* the pipeline.
|
||||||
*/
|
*/
|
||||||
Stats::Scalar<> fetchIdleCycles;
|
Stats::Scalar<> fetchIdleCycles;
|
||||||
|
/** Total number of cycles spent blocked. */
|
||||||
Stats::Scalar<> fetchBlockedCycles;
|
Stats::Scalar<> fetchBlockedCycles;
|
||||||
|
/** Total number of cycles spent in any other state. */
|
||||||
Stats::Scalar<> fetchMiscStallCycles;
|
Stats::Scalar<> fetchMiscStallCycles;
|
||||||
/** Stat for total number of fetched cache lines. */
|
/** Stat for total number of fetched cache lines. */
|
||||||
Stats::Scalar<> fetchedCacheLines;
|
Stats::Scalar<> fetchedCacheLines;
|
||||||
|
/** Total number of outstanding icache accesses that were dropped
|
||||||
|
* due to a squash.
|
||||||
|
*/
|
||||||
Stats::Scalar<> fetchIcacheSquashes;
|
Stats::Scalar<> fetchIcacheSquashes;
|
||||||
/** Distribution of number of instructions fetched each cycle. */
|
/** Distribution of number of instructions fetched each cycle. */
|
||||||
Stats::Distribution<> fetchNisnDist;
|
Stats::Distribution<> fetchNisnDist;
|
||||||
|
/** Rate of how often fetch was idle. */
|
||||||
Stats::Formula idleRate;
|
Stats::Formula idleRate;
|
||||||
|
/** Number of branch fetches per cycle. */
|
||||||
Stats::Formula branchRate;
|
Stats::Formula branchRate;
|
||||||
|
/** Number of instruction fetched per cycle. */
|
||||||
Stats::Formula fetchRate;
|
Stats::Formula fetchRate;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -188,59 +188,59 @@ void
|
||||||
DefaultFetch<Impl>::regStats()
|
DefaultFetch<Impl>::regStats()
|
||||||
{
|
{
|
||||||
icacheStallCycles
|
icacheStallCycles
|
||||||
.name(name() + ".FETCH:icacheStallCycles")
|
.name(name() + ".icacheStallCycles")
|
||||||
.desc("Number of cycles fetch is stalled on an Icache miss")
|
.desc("Number of cycles fetch is stalled on an Icache miss")
|
||||||
.prereq(icacheStallCycles);
|
.prereq(icacheStallCycles);
|
||||||
|
|
||||||
fetchedInsts
|
fetchedInsts
|
||||||
.name(name() + ".FETCH:Insts")
|
.name(name() + ".Insts")
|
||||||
.desc("Number of instructions fetch has processed")
|
.desc("Number of instructions fetch has processed")
|
||||||
.prereq(fetchedInsts);
|
.prereq(fetchedInsts);
|
||||||
|
|
||||||
fetchedBranches
|
fetchedBranches
|
||||||
.name(name() + ".FETCH:Branches")
|
.name(name() + ".Branches")
|
||||||
.desc("Number of branches that fetch encountered")
|
.desc("Number of branches that fetch encountered")
|
||||||
.prereq(fetchedBranches);
|
.prereq(fetchedBranches);
|
||||||
|
|
||||||
predictedBranches
|
predictedBranches
|
||||||
.name(name() + ".FETCH:predictedBranches")
|
.name(name() + ".predictedBranches")
|
||||||
.desc("Number of branches that fetch has predicted taken")
|
.desc("Number of branches that fetch has predicted taken")
|
||||||
.prereq(predictedBranches);
|
.prereq(predictedBranches);
|
||||||
|
|
||||||
fetchCycles
|
fetchCycles
|
||||||
.name(name() + ".FETCH:Cycles")
|
.name(name() + ".Cycles")
|
||||||
.desc("Number of cycles fetch has run and was not squashing or"
|
.desc("Number of cycles fetch has run and was not squashing or"
|
||||||
" blocked")
|
" blocked")
|
||||||
.prereq(fetchCycles);
|
.prereq(fetchCycles);
|
||||||
|
|
||||||
fetchSquashCycles
|
fetchSquashCycles
|
||||||
.name(name() + ".FETCH:SquashCycles")
|
.name(name() + ".SquashCycles")
|
||||||
.desc("Number of cycles fetch has spent squashing")
|
.desc("Number of cycles fetch has spent squashing")
|
||||||
.prereq(fetchSquashCycles);
|
.prereq(fetchSquashCycles);
|
||||||
|
|
||||||
fetchIdleCycles
|
fetchIdleCycles
|
||||||
.name(name() + ".FETCH:IdleCycles")
|
.name(name() + ".IdleCycles")
|
||||||
.desc("Number of cycles fetch was idle")
|
.desc("Number of cycles fetch was idle")
|
||||||
.prereq(fetchIdleCycles);
|
.prereq(fetchIdleCycles);
|
||||||
|
|
||||||
fetchBlockedCycles
|
fetchBlockedCycles
|
||||||
.name(name() + ".FETCH:BlockedCycles")
|
.name(name() + ".BlockedCycles")
|
||||||
.desc("Number of cycles fetch has spent blocked")
|
.desc("Number of cycles fetch has spent blocked")
|
||||||
.prereq(fetchBlockedCycles);
|
.prereq(fetchBlockedCycles);
|
||||||
|
|
||||||
fetchedCacheLines
|
fetchedCacheLines
|
||||||
.name(name() + ".FETCH:CacheLines")
|
.name(name() + ".CacheLines")
|
||||||
.desc("Number of cache lines fetched")
|
.desc("Number of cache lines fetched")
|
||||||
.prereq(fetchedCacheLines);
|
.prereq(fetchedCacheLines);
|
||||||
|
|
||||||
fetchMiscStallCycles
|
fetchMiscStallCycles
|
||||||
.name(name() + ".FETCH:MiscStallCycles")
|
.name(name() + ".MiscStallCycles")
|
||||||
.desc("Number of cycles fetch has spent waiting on interrupts, or "
|
.desc("Number of cycles fetch has spent waiting on interrupts, or "
|
||||||
"bad addresses, or out of MSHRs")
|
"bad addresses, or out of MSHRs")
|
||||||
.prereq(fetchMiscStallCycles);
|
.prereq(fetchMiscStallCycles);
|
||||||
|
|
||||||
fetchIcacheSquashes
|
fetchIcacheSquashes
|
||||||
.name(name() + ".FETCH:IcacheSquashes")
|
.name(name() + ".IcacheSquashes")
|
||||||
.desc("Number of outstanding Icache misses that were squashed")
|
.desc("Number of outstanding Icache misses that were squashed")
|
||||||
.prereq(fetchIcacheSquashes);
|
.prereq(fetchIcacheSquashes);
|
||||||
|
|
||||||
|
@ -248,24 +248,24 @@ DefaultFetch<Impl>::regStats()
|
||||||
.init(/* base value */ 0,
|
.init(/* base value */ 0,
|
||||||
/* last value */ fetchWidth,
|
/* last value */ fetchWidth,
|
||||||
/* bucket size */ 1)
|
/* bucket size */ 1)
|
||||||
.name(name() + ".FETCH:rateDist")
|
.name(name() + ".rateDist")
|
||||||
.desc("Number of instructions fetched each cycle (Total)")
|
.desc("Number of instructions fetched each cycle (Total)")
|
||||||
.flags(Stats::pdf);
|
.flags(Stats::pdf);
|
||||||
|
|
||||||
idleRate
|
idleRate
|
||||||
.name(name() + ".FETCH:idleRate")
|
.name(name() + ".idleRate")
|
||||||
.desc("Percent of cycles fetch was idle")
|
.desc("Percent of cycles fetch was idle")
|
||||||
.prereq(idleRate);
|
.prereq(idleRate);
|
||||||
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
|
idleRate = fetchIdleCycles * 100 / cpu->numCycles;
|
||||||
|
|
||||||
branchRate
|
branchRate
|
||||||
.name(name() + ".FETCH:branchRate")
|
.name(name() + ".branchRate")
|
||||||
.desc("Number of branch fetches per cycle")
|
.desc("Number of branch fetches per cycle")
|
||||||
.flags(Stats::total);
|
.flags(Stats::total);
|
||||||
branchRate = predictedBranches / cpu->numCycles;
|
branchRate = fetchedBranches / cpu->numCycles;
|
||||||
|
|
||||||
fetchRate
|
fetchRate
|
||||||
.name(name() + ".FETCH:rate")
|
.name(name() + ".rate")
|
||||||
.desc("Number of inst fetches per cycle")
|
.desc("Number of inst fetches per cycle")
|
||||||
.flags(Stats::total);
|
.flags(Stats::total);
|
||||||
fetchRate = fetchedInsts / cpu->numCycles;
|
fetchRate = fetchedInsts / cpu->numCycles;
|
||||||
|
@ -337,6 +337,7 @@ template<class Impl>
|
||||||
void
|
void
|
||||||
DefaultFetch<Impl>::initStage()
|
DefaultFetch<Impl>::initStage()
|
||||||
{
|
{
|
||||||
|
// Setup PC and nextPC with initial state.
|
||||||
for (int tid = 0; tid < numThreads; tid++) {
|
for (int tid = 0; tid < numThreads; tid++) {
|
||||||
PC[tid] = cpu->readPC(tid);
|
PC[tid] = cpu->readPC(tid);
|
||||||
nextPC[tid] = cpu->readNextPC(tid);
|
nextPC[tid] = cpu->readNextPC(tid);
|
||||||
|
@ -353,8 +354,6 @@ DefaultFetch<Impl>::processCacheCompletion(PacketPtr pkt)
|
||||||
|
|
||||||
// Only change the status if it's still waiting on the icache access
|
// Only change the status if it's still waiting on the icache access
|
||||||
// to return.
|
// to return.
|
||||||
// Can keep track of how many cache accesses go unused due to
|
|
||||||
// misspeculation here.
|
|
||||||
if (fetchStatus[tid] != IcacheWaitResponse ||
|
if (fetchStatus[tid] != IcacheWaitResponse ||
|
||||||
pkt != memPkt[tid] ||
|
pkt != memPkt[tid] ||
|
||||||
isSwitchedOut()) {
|
isSwitchedOut()) {
|
||||||
|
@ -391,6 +390,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultFetch<Impl>::switchOut()
|
DefaultFetch<Impl>::switchOut()
|
||||||
{
|
{
|
||||||
|
// Fetch is ready to switch out at any time.
|
||||||
switchedOut = true;
|
switchedOut = true;
|
||||||
cpu->signalSwitched();
|
cpu->signalSwitched();
|
||||||
}
|
}
|
||||||
|
@ -399,6 +399,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultFetch<Impl>::doSwitchOut()
|
DefaultFetch<Impl>::doSwitchOut()
|
||||||
{
|
{
|
||||||
|
// Branch predictor needs to have its state cleared.
|
||||||
branchPred.switchOut();
|
branchPred.switchOut();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -429,6 +430,7 @@ DefaultFetch<Impl>::wakeFromQuiesce()
|
||||||
{
|
{
|
||||||
DPRINTF(Fetch, "Waking up from quiesce\n");
|
DPRINTF(Fetch, "Waking up from quiesce\n");
|
||||||
// Hopefully this is safe
|
// Hopefully this is safe
|
||||||
|
// @todo: Allow other threads to wake from quiesce.
|
||||||
fetchStatus[0] = Running;
|
fetchStatus[0] = Running;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1213,7 +1215,7 @@ DefaultFetch<Impl>::lsqCount()
|
||||||
|
|
||||||
if (fetchStatus[high_pri] == Running ||
|
if (fetchStatus[high_pri] == Running ||
|
||||||
fetchStatus[high_pri] == IcacheAccessComplete ||
|
fetchStatus[high_pri] == IcacheAccessComplete ||
|
||||||
fetchStatus[high_pri] == Idle)
|
fetchStatus[high_pri] == Idle)
|
||||||
return high_pri;
|
return high_pri;
|
||||||
else
|
else
|
||||||
PQ.pop();
|
PQ.pop();
|
||||||
|
|
|
@ -183,6 +183,8 @@ FUPool::getUnit(OpClass capability)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
assert(fu_idx < numFU);
|
||||||
|
|
||||||
unitBusy[fu_idx] = true;
|
unitBusy[fu_idx] = true;
|
||||||
|
|
||||||
return fu_idx;
|
return fu_idx;
|
||||||
|
|
|
@ -155,7 +155,10 @@ class FUPool : public SimObject
|
||||||
return maxIssueLatencies[capability];
|
return maxIssueLatencies[capability];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Switches out functional unit pool. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -143,12 +143,16 @@ class DefaultIEW
|
||||||
/** Sets pointer to the scoreboard. */
|
/** Sets pointer to the scoreboard. */
|
||||||
void setScoreboard(Scoreboard *sb_ptr);
|
void setScoreboard(Scoreboard *sb_ptr);
|
||||||
|
|
||||||
|
/** Starts switch out of IEW stage. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Completes switch out of IEW stage. */
|
||||||
void doSwitchOut();
|
void doSwitchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
|
/** Returns if IEW is switched out. */
|
||||||
bool isSwitchedOut() { return switchedOut; }
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
/** Sets page table pointer within LSQ. */
|
/** Sets page table pointer within LSQ. */
|
||||||
|
@ -270,6 +274,7 @@ class DefaultIEW
|
||||||
void tick();
|
void tick();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/** Updates execution stats based on the instruction. */
|
||||||
void updateExeInstStats(DynInstPtr &inst);
|
void updateExeInstStats(DynInstPtr &inst);
|
||||||
|
|
||||||
/** Pointer to main time buffer used for backwards communication. */
|
/** Pointer to main time buffer used for backwards communication. */
|
||||||
|
@ -412,6 +417,7 @@ class DefaultIEW
|
||||||
/** Maximum size of the skid buffer. */
|
/** Maximum size of the skid buffer. */
|
||||||
unsigned skidBufferMax;
|
unsigned skidBufferMax;
|
||||||
|
|
||||||
|
/** Is this stage switched out. */
|
||||||
bool switchedOut;
|
bool switchedOut;
|
||||||
|
|
||||||
/** Stat for total number of idle cycles. */
|
/** Stat for total number of idle cycles. */
|
||||||
|
@ -453,9 +459,13 @@ class DefaultIEW
|
||||||
/** Stat for total number of mispredicted branches detected at execute. */
|
/** Stat for total number of mispredicted branches detected at execute. */
|
||||||
Stats::Formula branchMispredicts;
|
Stats::Formula branchMispredicts;
|
||||||
|
|
||||||
|
/** Number of executed software prefetches. */
|
||||||
Stats::Vector<> exeSwp;
|
Stats::Vector<> exeSwp;
|
||||||
|
/** Number of executed nops. */
|
||||||
Stats::Vector<> exeNop;
|
Stats::Vector<> exeNop;
|
||||||
|
/** Number of executed meomory references. */
|
||||||
Stats::Vector<> exeRefs;
|
Stats::Vector<> exeRefs;
|
||||||
|
/** Number of executed branches. */
|
||||||
Stats::Vector<> exeBranches;
|
Stats::Vector<> exeBranches;
|
||||||
|
|
||||||
// Stats::Vector<> issued_ops;
|
// Stats::Vector<> issued_ops;
|
||||||
|
@ -465,19 +475,30 @@ class DefaultIEW
|
||||||
Stats::Vector<> dist_unissued;
|
Stats::Vector<> dist_unissued;
|
||||||
Stats::Vector2d<> stat_issued_inst_type;
|
Stats::Vector2d<> stat_issued_inst_type;
|
||||||
*/
|
*/
|
||||||
|
/** Number of instructions issued per cycle. */
|
||||||
Stats::Formula issueRate;
|
Stats::Formula issueRate;
|
||||||
|
/** Number of executed store instructions. */
|
||||||
Stats::Formula iewExecStoreInsts;
|
Stats::Formula iewExecStoreInsts;
|
||||||
// Stats::Formula issue_op_rate;
|
// Stats::Formula issue_op_rate;
|
||||||
// Stats::Formula fu_busy_rate;
|
// Stats::Formula fu_busy_rate;
|
||||||
|
/** Number of instructions sent to commit. */
|
||||||
Stats::Vector<> iewInstsToCommit;
|
Stats::Vector<> iewInstsToCommit;
|
||||||
|
/** Number of instructions that writeback. */
|
||||||
Stats::Vector<> writebackCount;
|
Stats::Vector<> writebackCount;
|
||||||
|
/** Number of instructions that wake consumers. */
|
||||||
Stats::Vector<> producerInst;
|
Stats::Vector<> producerInst;
|
||||||
|
/** Number of instructions that wake up from producers. */
|
||||||
Stats::Vector<> consumerInst;
|
Stats::Vector<> consumerInst;
|
||||||
|
/** Number of instructions that were delayed in writing back due
|
||||||
|
* to resource contention.
|
||||||
|
*/
|
||||||
Stats::Vector<> wbPenalized;
|
Stats::Vector<> wbPenalized;
|
||||||
|
|
||||||
|
/** Number of instructions per cycle written back. */
|
||||||
Stats::Formula wbRate;
|
Stats::Formula wbRate;
|
||||||
|
/** Average number of woken instructions per writeback. */
|
||||||
Stats::Formula wbFanout;
|
Stats::Formula wbFanout;
|
||||||
|
/** Number of instructions per cycle delayed in writing back . */
|
||||||
Stats::Formula wbPenalizedRate;
|
Stats::Formula wbPenalizedRate;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -383,6 +383,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::switchOut()
|
DefaultIEW<Impl>::switchOut()
|
||||||
{
|
{
|
||||||
|
// IEW is ready to switch out at any time.
|
||||||
cpu->signalSwitched();
|
cpu->signalSwitched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -390,6 +391,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::doSwitchOut()
|
DefaultIEW<Impl>::doSwitchOut()
|
||||||
{
|
{
|
||||||
|
// Clear any state.
|
||||||
switchedOut = true;
|
switchedOut = true;
|
||||||
|
|
||||||
instQueue.switchOut();
|
instQueue.switchOut();
|
||||||
|
@ -408,6 +410,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultIEW<Impl>::takeOverFrom()
|
DefaultIEW<Impl>::takeOverFrom()
|
||||||
{
|
{
|
||||||
|
// Reset all state.
|
||||||
_status = Active;
|
_status = Active;
|
||||||
exeStatus = Running;
|
exeStatus = Running;
|
||||||
wbStatus = Idle;
|
wbStatus = Idle;
|
||||||
|
@ -521,6 +524,7 @@ DefaultIEW<Impl>::squashDueToMemBlocked(DynInstPtr &inst, unsigned tid)
|
||||||
toCommit->squashedSeqNum[tid] = inst->seqNum;
|
toCommit->squashedSeqNum[tid] = inst->seqNum;
|
||||||
toCommit->nextPC[tid] = inst->readPC();
|
toCommit->nextPC[tid] = inst->readPC();
|
||||||
|
|
||||||
|
// Must include the broadcasted SN in the squash.
|
||||||
toCommit->includeSquashInst[tid] = true;
|
toCommit->includeSquashInst[tid] = true;
|
||||||
|
|
||||||
ldstQueue.setLoadBlockedHandled(tid);
|
ldstQueue.setLoadBlockedHandled(tid);
|
||||||
|
@ -1054,6 +1058,7 @@ DefaultIEW<Impl>::dispatchInsts(unsigned tid)
|
||||||
// Store conditionals need to be set as "canCommit()"
|
// Store conditionals need to be set as "canCommit()"
|
||||||
// so that commit can process them when they reach the
|
// so that commit can process them when they reach the
|
||||||
// head of commit.
|
// head of commit.
|
||||||
|
// @todo: This is somewhat specific to Alpha.
|
||||||
inst->setCanCommit();
|
inst->setCanCommit();
|
||||||
instQueue.insertNonSpec(inst);
|
instQueue.insertNonSpec(inst);
|
||||||
add_to_iq = false;
|
add_to_iq = false;
|
||||||
|
@ -1313,6 +1318,7 @@ DefaultIEW<Impl>::executeInsts()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update and record activity if we processed any instructions.
|
||||||
if (inst_num) {
|
if (inst_num) {
|
||||||
if (exeStatus == Idle) {
|
if (exeStatus == Idle) {
|
||||||
exeStatus = Running;
|
exeStatus = Running;
|
||||||
|
@ -1363,8 +1369,10 @@ DefaultIEW<Impl>::writebackInsts()
|
||||||
scoreboard->setReg(inst->renamedDestRegIdx(i));
|
scoreboard->setReg(inst->renamedDestRegIdx(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
producerInst[tid]++;
|
if (dependents) {
|
||||||
consumerInst[tid]+= dependents;
|
producerInst[tid]++;
|
||||||
|
consumerInst[tid]+= dependents;
|
||||||
|
}
|
||||||
writebackCount[tid]++;
|
writebackCount[tid]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1435,6 +1443,7 @@ DefaultIEW<Impl>::tick()
|
||||||
|
|
||||||
DPRINTF(IEW,"Processing [tid:%i]\n",tid);
|
DPRINTF(IEW,"Processing [tid:%i]\n",tid);
|
||||||
|
|
||||||
|
// Update structures based on instructions committed.
|
||||||
if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
|
if (fromCommit->commitInfo[tid].doneSeqNum != 0 &&
|
||||||
!fromCommit->commitInfo[tid].squash &&
|
!fromCommit->commitInfo[tid].squash &&
|
||||||
!fromCommit->commitInfo[tid].robSquashing) {
|
!fromCommit->commitInfo[tid].robSquashing) {
|
||||||
|
|
|
@ -94,6 +94,9 @@ class InstructionQueue
|
||||||
/** Pointer back to the instruction queue. */
|
/** Pointer back to the instruction queue. */
|
||||||
InstructionQueue<Impl> *iqPtr;
|
InstructionQueue<Impl> *iqPtr;
|
||||||
|
|
||||||
|
/** Should the FU be added to the list to be freed upon
|
||||||
|
* completing this event.
|
||||||
|
*/
|
||||||
bool freeFU;
|
bool freeFU;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -118,6 +121,7 @@ class InstructionQueue
|
||||||
/** Registers statistics. */
|
/** Registers statistics. */
|
||||||
void regStats();
|
void regStats();
|
||||||
|
|
||||||
|
/** Resets all instruction queue state. */
|
||||||
void resetState();
|
void resetState();
|
||||||
|
|
||||||
/** Sets CPU pointer. */
|
/** Sets CPU pointer. */
|
||||||
|
@ -135,10 +139,13 @@ class InstructionQueue
|
||||||
/** Sets the global time buffer. */
|
/** Sets the global time buffer. */
|
||||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||||
|
|
||||||
|
/** Switches out the instruction queue. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Takes over execution from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
|
/** Returns if the IQ is switched out. */
|
||||||
bool isSwitchedOut() { return switchedOut; }
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
/** Number of entries needed for given amount of threads. */
|
/** Number of entries needed for given amount of threads. */
|
||||||
|
@ -173,6 +180,9 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
void insertBarrier(DynInstPtr &barr_inst);
|
void insertBarrier(DynInstPtr &barr_inst);
|
||||||
|
|
||||||
|
/** Returns the oldest scheduled instruction, and removes it from
|
||||||
|
* the list of instructions waiting to execute.
|
||||||
|
*/
|
||||||
DynInstPtr getInstToExecute();
|
DynInstPtr getInstToExecute();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -276,13 +286,15 @@ class InstructionQueue
|
||||||
/** List of all the instructions in the IQ (some of which may be issued). */
|
/** List of all the instructions in the IQ (some of which may be issued). */
|
||||||
std::list<DynInstPtr> instList[Impl::MaxThreads];
|
std::list<DynInstPtr> instList[Impl::MaxThreads];
|
||||||
|
|
||||||
|
/** List of instructions that are ready to be executed. */
|
||||||
std::list<DynInstPtr> instsToExecute;
|
std::list<DynInstPtr> instsToExecute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Struct for comparing entries to be added to the priority queue. This
|
* Struct for comparing entries to be added to the priority queue.
|
||||||
* gives reverse ordering to the instructions in terms of sequence
|
* This gives reverse ordering to the instructions in terms of
|
||||||
* numbers: the instructions with smaller sequence numbers (and hence
|
* sequence numbers: the instructions with smaller sequence
|
||||||
* are older) will be at the top of the priority queue.
|
* numbers (and hence are older) will be at the top of the
|
||||||
|
* priority queue.
|
||||||
*/
|
*/
|
||||||
struct pqCompare {
|
struct pqCompare {
|
||||||
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
||||||
|
@ -395,6 +407,7 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
unsigned commitToIEWDelay;
|
unsigned commitToIEWDelay;
|
||||||
|
|
||||||
|
/** Is the IQ switched out. */
|
||||||
bool switchedOut;
|
bool switchedOut;
|
||||||
|
|
||||||
/** The sequence number of the squashed instruction. */
|
/** The sequence number of the squashed instruction. */
|
||||||
|
@ -462,19 +475,28 @@ class InstructionQueue
|
||||||
*/
|
*/
|
||||||
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
Stats::Scalar<> iqSquashedNonSpecRemoved;
|
||||||
|
|
||||||
|
/** Distribution of number of instructions in the queue. */
|
||||||
Stats::VectorDistribution<> queueResDist;
|
Stats::VectorDistribution<> queueResDist;
|
||||||
|
/** Distribution of the number of instructions issued. */
|
||||||
Stats::Distribution<> numIssuedDist;
|
Stats::Distribution<> numIssuedDist;
|
||||||
|
/** Distribution of the cycles it takes to issue an instruction. */
|
||||||
Stats::VectorDistribution<> issueDelayDist;
|
Stats::VectorDistribution<> issueDelayDist;
|
||||||
|
|
||||||
|
/** Number of times an instruction could not be issued because a
|
||||||
|
* FU was busy.
|
||||||
|
*/
|
||||||
Stats::Vector<> statFuBusy;
|
Stats::Vector<> statFuBusy;
|
||||||
// Stats::Vector<> dist_unissued;
|
// Stats::Vector<> dist_unissued;
|
||||||
|
/** Stat for total number issued for each instruction type. */
|
||||||
Stats::Vector2d<> statIssuedInstType;
|
Stats::Vector2d<> statIssuedInstType;
|
||||||
|
|
||||||
|
/** Number of instructions issued per cycle. */
|
||||||
Stats::Formula issueRate;
|
Stats::Formula issueRate;
|
||||||
// Stats::Formula issue_stores;
|
// Stats::Formula issue_stores;
|
||||||
// Stats::Formula issue_op_rate;
|
// Stats::Formula issue_op_rate;
|
||||||
Stats::Vector<> fuBusy; //cumulative fu busy
|
/** Number of times the FU was busy. */
|
||||||
|
Stats::Vector<> fuBusy;
|
||||||
|
/** Number of times the FU was busy per instruction issued. */
|
||||||
Stats::Formula fuBusyRate;
|
Stats::Formula fuBusyRate;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -152,8 +152,10 @@ template <class Impl>
|
||||||
InstructionQueue<Impl>::~InstructionQueue()
|
InstructionQueue<Impl>::~InstructionQueue()
|
||||||
{
|
{
|
||||||
dependGraph.reset();
|
dependGraph.reset();
|
||||||
|
#ifdef DEBUG
|
||||||
cprintf("Nodes traversed: %i, removed: %i\n",
|
cprintf("Nodes traversed: %i, removed: %i\n",
|
||||||
dependGraph.nodesTraversed, dependGraph.nodesRemoved);
|
dependGraph.nodesTraversed, dependGraph.nodesRemoved);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
|
@ -670,14 +672,8 @@ InstructionQueue<Impl>::processFUCompletion(DynInstPtr &inst, int fu_idx)
|
||||||
// @todo: Ensure that these FU Completions happen at the beginning
|
// @todo: Ensure that these FU Completions happen at the beginning
|
||||||
// of a cycle, otherwise they could add too many instructions to
|
// of a cycle, otherwise they could add too many instructions to
|
||||||
// the queue.
|
// the queue.
|
||||||
// @todo: This could break if there's multiple multi-cycle ops
|
|
||||||
// finishing on this cycle. Maybe implement something like
|
|
||||||
// instToCommit in iew_impl.hh.
|
|
||||||
issueToExecuteQueue->access(0)->size++;
|
issueToExecuteQueue->access(0)->size++;
|
||||||
instsToExecute.push_back(inst);
|
instsToExecute.push_back(inst);
|
||||||
// int &size = issueToExecuteQueue->access(0)->size;
|
|
||||||
|
|
||||||
// issueToExecuteQueue->access(0)->insts[size++] = inst;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// @todo: Figure out a better way to remove the squashed items from the
|
// @todo: Figure out a better way to remove the squashed items from the
|
||||||
|
@ -743,9 +739,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have an instruction that doesn't require a FU, or a
|
||||||
|
// valid FU, then schedule for execution.
|
||||||
if (idx == -2 || idx != -1) {
|
if (idx == -2 || idx != -1) {
|
||||||
if (op_latency == 1) {
|
if (op_latency == 1) {
|
||||||
// i2e_info->insts[exec_queue_slot++] = issuing_inst;
|
|
||||||
i2e_info->size++;
|
i2e_info->size++;
|
||||||
instsToExecute.push_back(issuing_inst);
|
instsToExecute.push_back(issuing_inst);
|
||||||
|
|
||||||
|
@ -763,14 +760,10 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
|
|
||||||
// @todo: Enforce that issue_latency == 1 or op_latency
|
// @todo: Enforce that issue_latency == 1 or op_latency
|
||||||
if (issue_latency > 1) {
|
if (issue_latency > 1) {
|
||||||
|
// If FU isn't pipelined, then it must be freed
|
||||||
|
// upon the execution completing.
|
||||||
execution->setFreeFU();
|
execution->setFreeFU();
|
||||||
} else {
|
} else {
|
||||||
// @todo: Not sure I'm accounting for the
|
|
||||||
// multi-cycle op in a pipelined FU properly, or
|
|
||||||
// the number of instructions issued in one cycle.
|
|
||||||
// i2e_info->insts[exec_queue_slot++] = issuing_inst;
|
|
||||||
// i2e_info->size++;
|
|
||||||
|
|
||||||
// Add the FU onto the list of FU's to be freed next cycle.
|
// Add the FU onto the list of FU's to be freed next cycle.
|
||||||
fuPool->freeUnitNextCycle(idx);
|
fuPool->freeUnitNextCycle(idx);
|
||||||
}
|
}
|
||||||
|
@ -815,6 +808,7 @@ InstructionQueue<Impl>::scheduleReadyInsts()
|
||||||
numIssuedDist.sample(total_issued);
|
numIssuedDist.sample(total_issued);
|
||||||
iqInstsIssued+= total_issued;
|
iqInstsIssued+= total_issued;
|
||||||
|
|
||||||
|
// If we issued any instructions, tell the CPU we had activity.
|
||||||
if (total_issued) {
|
if (total_issued) {
|
||||||
cpu->activityThisCycle();
|
cpu->activityThisCycle();
|
||||||
} else {
|
} else {
|
||||||
|
@ -1365,4 +1359,45 @@ InstructionQueue<Impl>::dumpInsts()
|
||||||
++num;
|
++num;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cprintf("Insts to Execute list:\n");
|
||||||
|
|
||||||
|
int num = 0;
|
||||||
|
int valid_num = 0;
|
||||||
|
ListIt inst_list_it = instsToExecute.begin();
|
||||||
|
|
||||||
|
while (inst_list_it != instsToExecute.end())
|
||||||
|
{
|
||||||
|
cprintf("Instruction:%i\n",
|
||||||
|
num);
|
||||||
|
if (!(*inst_list_it)->isSquashed()) {
|
||||||
|
if (!(*inst_list_it)->isIssued()) {
|
||||||
|
++valid_num;
|
||||||
|
cprintf("Count:%i\n", valid_num);
|
||||||
|
} else if ((*inst_list_it)->isMemRef() &&
|
||||||
|
!(*inst_list_it)->memOpDone) {
|
||||||
|
// Loads that have not been marked as executed
|
||||||
|
// still count towards the total instructions.
|
||||||
|
++valid_num;
|
||||||
|
cprintf("Count:%i\n", valid_num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cprintf("PC:%#x\n[sn:%lli]\n[tid:%i]\n"
|
||||||
|
"Issued:%i\nSquashed:%i\n",
|
||||||
|
(*inst_list_it)->readPC(),
|
||||||
|
(*inst_list_it)->seqNum,
|
||||||
|
(*inst_list_it)->threadNumber,
|
||||||
|
(*inst_list_it)->isIssued(),
|
||||||
|
(*inst_list_it)->isSquashed());
|
||||||
|
|
||||||
|
if ((*inst_list_it)->isMemRef()) {
|
||||||
|
cprintf("MemOpDone:%i\n", (*inst_list_it)->memOpDone);
|
||||||
|
}
|
||||||
|
|
||||||
|
cprintf("\n");
|
||||||
|
|
||||||
|
inst_list_it++;
|
||||||
|
++num;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,7 @@ class LSQ {
|
||||||
typedef typename Impl::CPUPol::IEW IEW;
|
typedef typename Impl::CPUPol::IEW IEW;
|
||||||
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
|
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
|
||||||
|
|
||||||
|
/** SMT policy. */
|
||||||
enum LSQPolicy {
|
enum LSQPolicy {
|
||||||
Dynamic,
|
Dynamic,
|
||||||
Partitioned,
|
Partitioned,
|
||||||
|
@ -69,8 +70,9 @@ class LSQ {
|
||||||
void setIEW(IEW *iew_ptr);
|
void setIEW(IEW *iew_ptr);
|
||||||
/** Sets the page table pointer. */
|
/** Sets the page table pointer. */
|
||||||
// void setPageTable(PageTable *pt_ptr);
|
// void setPageTable(PageTable *pt_ptr);
|
||||||
|
/** Switches out the LSQ. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
/** Takes over execution from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Number of entries needed for the given amount of threads.*/
|
/** Number of entries needed for the given amount of threads.*/
|
||||||
|
@ -95,9 +97,6 @@ class LSQ {
|
||||||
/** Executes a load. */
|
/** Executes a load. */
|
||||||
Fault executeLoad(DynInstPtr &inst);
|
Fault executeLoad(DynInstPtr &inst);
|
||||||
|
|
||||||
Fault executeLoad(int lq_idx, unsigned tid)
|
|
||||||
{ return thread[tid].executeLoad(lq_idx); }
|
|
||||||
|
|
||||||
/** Executes a store. */
|
/** Executes a store. */
|
||||||
Fault executeStore(DynInstPtr &inst);
|
Fault executeStore(DynInstPtr &inst);
|
||||||
|
|
||||||
|
|
|
@ -87,10 +87,13 @@ class LSQUnit {
|
||||||
/** Sets the page table pointer. */
|
/** Sets the page table pointer. */
|
||||||
// void setPageTable(PageTable *pt_ptr);
|
// void setPageTable(PageTable *pt_ptr);
|
||||||
|
|
||||||
|
/** Switches out LSQ unit. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
|
/** Returns if the LSQ is switched out. */
|
||||||
bool isSwitchedOut() { return switchedOut; }
|
bool isSwitchedOut() { return switchedOut; }
|
||||||
|
|
||||||
/** Ticks the LSQ unit, which in this case only resets the number of
|
/** Ticks the LSQ unit, which in this case only resets the number of
|
||||||
|
@ -159,12 +162,15 @@ class LSQUnit {
|
||||||
bool loadBlocked()
|
bool loadBlocked()
|
||||||
{ return isLoadBlocked; }
|
{ return isLoadBlocked; }
|
||||||
|
|
||||||
|
/** Clears the signal that a load became blocked. */
|
||||||
void clearLoadBlocked()
|
void clearLoadBlocked()
|
||||||
{ isLoadBlocked = false; }
|
{ isLoadBlocked = false; }
|
||||||
|
|
||||||
|
/** Returns if the blocked load was handled. */
|
||||||
bool isLoadBlockedHandled()
|
bool isLoadBlockedHandled()
|
||||||
{ return loadBlockedHandled; }
|
{ return loadBlockedHandled; }
|
||||||
|
|
||||||
|
/** Records the blocked load as being handled. */
|
||||||
void setLoadBlockedHandled()
|
void setLoadBlockedHandled()
|
||||||
{ loadBlockedHandled = true; }
|
{ loadBlockedHandled = true; }
|
||||||
|
|
||||||
|
@ -339,6 +345,7 @@ class LSQUnit {
|
||||||
/** The number of used cache ports in this cycle. */
|
/** The number of used cache ports in this cycle. */
|
||||||
int usedPorts;
|
int usedPorts;
|
||||||
|
|
||||||
|
/** Is the LSQ switched out. */
|
||||||
bool switchedOut;
|
bool switchedOut;
|
||||||
|
|
||||||
//list<InstSeqNum> mshrSeqNums;
|
//list<InstSeqNum> mshrSeqNums;
|
||||||
|
@ -358,8 +365,10 @@ class LSQUnit {
|
||||||
/** Whether or not a load is blocked due to the memory system. */
|
/** Whether or not a load is blocked due to the memory system. */
|
||||||
bool isLoadBlocked;
|
bool isLoadBlocked;
|
||||||
|
|
||||||
|
/** Has the blocked load been handled. */
|
||||||
bool loadBlockedHandled;
|
bool loadBlockedHandled;
|
||||||
|
|
||||||
|
/** The sequence number of the blocked load. */
|
||||||
InstSeqNum blockedLoadSeqNum;
|
InstSeqNum blockedLoadSeqNum;
|
||||||
|
|
||||||
/** The oldest load that caused a memory ordering violation. */
|
/** The oldest load that caused a memory ordering violation. */
|
||||||
|
|
|
@ -78,8 +78,12 @@ LSQUnit<Impl>::completeStoreDataAccess(DynInstPtr &inst)
|
||||||
|
|
||||||
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
|
//lsqPtr->removeMSHR(lsqPtr->storeQueue[storeIdx].inst->seqNum);
|
||||||
|
|
||||||
if (lsqPtr->isSwitchedOut())
|
if (lsqPtr->isSwitchedOut()) {
|
||||||
|
if (wbEvent)
|
||||||
|
delete wbEvent;
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
lsqPtr->cpu->wakeCPU();
|
lsqPtr->cpu->wakeCPU();
|
||||||
|
|
||||||
|
@ -500,7 +504,6 @@ LSQUnit<Impl>::commitLoad()
|
||||||
DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
|
DPRINTF(LSQUnit, "Committing head load instruction, PC %#x\n",
|
||||||
loadQueue[loadHead]->readPC());
|
loadQueue[loadHead]->readPC());
|
||||||
|
|
||||||
|
|
||||||
loadQueue[loadHead] = NULL;
|
loadQueue[loadHead] = NULL;
|
||||||
|
|
||||||
incrLdIdx(loadHead);
|
incrLdIdx(loadHead);
|
||||||
|
|
|
@ -86,8 +86,10 @@ class MemDepUnit {
|
||||||
/** Registers statistics. */
|
/** Registers statistics. */
|
||||||
void regStats();
|
void regStats();
|
||||||
|
|
||||||
|
/** Switches out the memory dependence predictor. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Sets the pointer to the IQ. */
|
/** Sets the pointer to the IQ. */
|
||||||
|
@ -157,10 +159,12 @@ class MemDepUnit {
|
||||||
: inst(new_inst), regsReady(false), memDepReady(false),
|
: inst(new_inst), regsReady(false), memDepReady(false),
|
||||||
completed(false), squashed(false)
|
completed(false), squashed(false)
|
||||||
{
|
{
|
||||||
|
#ifdef DEBUG
|
||||||
++memdep_count;
|
++memdep_count;
|
||||||
|
|
||||||
DPRINTF(MemDepUnit, "Memory dependency entry created. "
|
DPRINTF(MemDepUnit, "Memory dependency entry created. "
|
||||||
"memdep_count=%i\n", memdep_count);
|
"memdep_count=%i\n", memdep_count);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Frees any pointers. */
|
/** Frees any pointers. */
|
||||||
|
@ -169,11 +173,12 @@ class MemDepUnit {
|
||||||
for (int i = 0; i < dependInsts.size(); ++i) {
|
for (int i = 0; i < dependInsts.size(); ++i) {
|
||||||
dependInsts[i] = NULL;
|
dependInsts[i] = NULL;
|
||||||
}
|
}
|
||||||
|
#ifdef DEBUG
|
||||||
--memdep_count;
|
--memdep_count;
|
||||||
|
|
||||||
DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
|
DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
|
||||||
"memdep_count=%i\n", memdep_count);
|
"memdep_count=%i\n", memdep_count);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the name of the memory dependence entry. */
|
/** Returns the name of the memory dependence entry. */
|
||||||
|
@ -198,9 +203,11 @@ class MemDepUnit {
|
||||||
bool squashed;
|
bool squashed;
|
||||||
|
|
||||||
/** For debugging. */
|
/** For debugging. */
|
||||||
|
#ifdef DEBUG
|
||||||
static int memdep_count;
|
static int memdep_count;
|
||||||
static int memdep_insert;
|
static int memdep_insert;
|
||||||
static int memdep_erase;
|
static int memdep_erase;
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Finds the memory dependence entry in the hash map. */
|
/** Finds the memory dependence entry in the hash map. */
|
||||||
|
@ -229,9 +236,13 @@ class MemDepUnit {
|
||||||
*/
|
*/
|
||||||
MemDepPred depPred;
|
MemDepPred depPred;
|
||||||
|
|
||||||
|
/** Is there an outstanding load barrier that loads must wait on. */
|
||||||
bool loadBarrier;
|
bool loadBarrier;
|
||||||
|
/** The sequence number of the load barrier. */
|
||||||
InstSeqNum loadBarrierSN;
|
InstSeqNum loadBarrierSN;
|
||||||
|
/** Is there an outstanding store barrier that loads must wait on. */
|
||||||
bool storeBarrier;
|
bool storeBarrier;
|
||||||
|
/** The sequence number of the store barrier. */
|
||||||
InstSeqNum storeBarrierSN;
|
InstSeqNum storeBarrierSN;
|
||||||
|
|
||||||
/** Pointer to the IQ. */
|
/** Pointer to the IQ. */
|
||||||
|
|
|
@ -107,6 +107,7 @@ template <class MemDepPred, class Impl>
|
||||||
void
|
void
|
||||||
MemDepUnit<MemDepPred, Impl>::switchOut()
|
MemDepUnit<MemDepPred, Impl>::switchOut()
|
||||||
{
|
{
|
||||||
|
// Clear any state.
|
||||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||||
instList[i].clear();
|
instList[i].clear();
|
||||||
}
|
}
|
||||||
|
@ -118,6 +119,7 @@ template <class MemDepPred, class Impl>
|
||||||
void
|
void
|
||||||
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
|
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
|
||||||
{
|
{
|
||||||
|
// Be sure to reset all state.
|
||||||
loadBarrier = storeBarrier = false;
|
loadBarrier = storeBarrier = false;
|
||||||
loadBarrierSN = storeBarrierSN = 0;
|
loadBarrierSN = storeBarrierSN = 0;
|
||||||
depPred.clear();
|
depPred.clear();
|
||||||
|
@ -148,7 +150,7 @@ MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
|
||||||
inst_entry->listIt = --(instList[tid].end());
|
inst_entry->listIt = --(instList[tid].end());
|
||||||
|
|
||||||
// Check any barriers and the dependence predictor for any
|
// Check any barriers and the dependence predictor for any
|
||||||
// producing stores.
|
// producing memrefs/stores.
|
||||||
InstSeqNum producing_store;
|
InstSeqNum producing_store;
|
||||||
if (inst->isLoad() && loadBarrier) {
|
if (inst->isLoad() && loadBarrier) {
|
||||||
producing_store = loadBarrierSN;
|
producing_store = loadBarrierSN;
|
||||||
|
@ -255,6 +257,7 @@ void
|
||||||
MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
|
MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
|
||||||
{
|
{
|
||||||
InstSeqNum barr_sn = barr_inst->seqNum;
|
InstSeqNum barr_sn = barr_inst->seqNum;
|
||||||
|
// Memory barriers block loads and stores, write barriers only stores.
|
||||||
if (barr_inst->isMemBarrier()) {
|
if (barr_inst->isMemBarrier()) {
|
||||||
loadBarrier = true;
|
loadBarrier = true;
|
||||||
loadBarrierSN = barr_sn;
|
loadBarrierSN = barr_sn;
|
||||||
|
@ -332,6 +335,7 @@ MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
|
||||||
DynInstPtr temp_inst;
|
DynInstPtr temp_inst;
|
||||||
bool found_inst = false;
|
bool found_inst = false;
|
||||||
|
|
||||||
|
// For now this replay function replays all waiting memory ops.
|
||||||
while (!instsToReplay.empty()) {
|
while (!instsToReplay.empty()) {
|
||||||
temp_inst = instsToReplay.front();
|
temp_inst = instsToReplay.front();
|
||||||
|
|
||||||
|
|
|
@ -157,10 +157,13 @@ class DefaultRename
|
||||||
/** Sets pointer to the scoreboard. */
|
/** Sets pointer to the scoreboard. */
|
||||||
void setScoreboard(Scoreboard *_scoreboard);
|
void setScoreboard(Scoreboard *_scoreboard);
|
||||||
|
|
||||||
|
/** Switches out the rename stage. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Completes the switch out. */
|
||||||
void doSwitchOut();
|
void doSwitchOut();
|
||||||
|
|
||||||
|
/** Takes over from another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Squashes all instructions in a thread. */
|
/** Squashes all instructions in a thread. */
|
||||||
|
@ -245,8 +248,10 @@ class DefaultRename
|
||||||
/** Checks if any stages are telling rename to block. */
|
/** Checks if any stages are telling rename to block. */
|
||||||
bool checkStall(unsigned tid);
|
bool checkStall(unsigned tid);
|
||||||
|
|
||||||
|
/** Gets the number of free entries for a specific thread. */
|
||||||
void readFreeEntries(unsigned tid);
|
void readFreeEntries(unsigned tid);
|
||||||
|
|
||||||
|
/** Checks the signals and updates the status. */
|
||||||
bool checkSignalsAndUpdate(unsigned tid);
|
bool checkSignalsAndUpdate(unsigned tid);
|
||||||
|
|
||||||
/** Either serializes on the next instruction available in the InstQueue,
|
/** Either serializes on the next instruction available in the InstQueue,
|
||||||
|
@ -456,8 +461,11 @@ class DefaultRename
|
||||||
Stats::Scalar<> renameCommittedMaps;
|
Stats::Scalar<> renameCommittedMaps;
|
||||||
/** Stat for total number of mappings that were undone due to a squash. */
|
/** Stat for total number of mappings that were undone due to a squash. */
|
||||||
Stats::Scalar<> renameUndoneMaps;
|
Stats::Scalar<> renameUndoneMaps;
|
||||||
|
/** Number of serialize instructions handled. */
|
||||||
Stats::Scalar<> renamedSerializing;
|
Stats::Scalar<> renamedSerializing;
|
||||||
|
/** Number of instructions marked as temporarily serializing. */
|
||||||
Stats::Scalar<> renamedTempSerializing;
|
Stats::Scalar<> renamedTempSerializing;
|
||||||
|
/** Number of instructions inserted into skid buffers. */
|
||||||
Stats::Scalar<> renameSkidInsts;
|
Stats::Scalar<> renameSkidInsts;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -260,6 +260,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultRename<Impl>::switchOut()
|
DefaultRename<Impl>::switchOut()
|
||||||
{
|
{
|
||||||
|
// Rename is ready to switch out at any time.
|
||||||
cpu->signalSwitched();
|
cpu->signalSwitched();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -267,6 +268,7 @@ template <class Impl>
|
||||||
void
|
void
|
||||||
DefaultRename<Impl>::doSwitchOut()
|
DefaultRename<Impl>::doSwitchOut()
|
||||||
{
|
{
|
||||||
|
// Clear any state, fix up the rename map.
|
||||||
for (int i = 0; i < numThreads; i++) {
|
for (int i = 0; i < numThreads; i++) {
|
||||||
typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
|
typename list<RenameHistory>::iterator hb_it = historyBuffer[i].begin();
|
||||||
|
|
||||||
|
|
|
@ -64,12 +64,13 @@ class SimpleRenameMap
|
||||||
typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
|
typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
//Constructor
|
/** Default constructor. init() must be called prior to use. */
|
||||||
SimpleRenameMap() {};
|
SimpleRenameMap() {};
|
||||||
|
|
||||||
/** Destructor. */
|
/** Destructor. */
|
||||||
~SimpleRenameMap();
|
~SimpleRenameMap();
|
||||||
|
|
||||||
|
/** Initializes rename map with given parameters. */
|
||||||
void init(unsigned _numLogicalIntRegs,
|
void init(unsigned _numLogicalIntRegs,
|
||||||
unsigned _numPhysicalIntRegs,
|
unsigned _numPhysicalIntRegs,
|
||||||
PhysRegIndex &_int_reg_start,
|
PhysRegIndex &_int_reg_start,
|
||||||
|
@ -86,6 +87,7 @@ class SimpleRenameMap
|
||||||
int id,
|
int id,
|
||||||
bool bindRegs);
|
bool bindRegs);
|
||||||
|
|
||||||
|
/** Sets the free list used with this rename map. */
|
||||||
void setFreeList(SimpleFreeList *fl_ptr);
|
void setFreeList(SimpleFreeList *fl_ptr);
|
||||||
|
|
||||||
//Tell rename map to get a free physical register for a given
|
//Tell rename map to get a free physical register for a given
|
||||||
|
@ -151,7 +153,6 @@ class SimpleRenameMap
|
||||||
{ }
|
{ }
|
||||||
};
|
};
|
||||||
|
|
||||||
//Change this to private
|
|
||||||
private:
|
private:
|
||||||
/** Integer rename map. */
|
/** Integer rename map. */
|
||||||
std::vector<RenameEntry> intRenameMap;
|
std::vector<RenameEntry> intRenameMap;
|
||||||
|
|
|
@ -97,8 +97,10 @@ class ROB
|
||||||
*/
|
*/
|
||||||
void setActiveThreads(std::list<unsigned>* at_ptr);
|
void setActiveThreads(std::list<unsigned>* at_ptr);
|
||||||
|
|
||||||
|
/** Switches out the ROB. */
|
||||||
void switchOut();
|
void switchOut();
|
||||||
|
|
||||||
|
/** Takes over another CPU's thread. */
|
||||||
void takeOverFrom();
|
void takeOverFrom();
|
||||||
|
|
||||||
/** Function to insert an instruction into the ROB. Note that whatever
|
/** Function to insert an instruction into the ROB. Note that whatever
|
||||||
|
@ -300,6 +302,7 @@ class ROB
|
||||||
/** Number of instructions in the ROB. */
|
/** Number of instructions in the ROB. */
|
||||||
int numInstsInROB;
|
int numInstsInROB;
|
||||||
|
|
||||||
|
/** Dummy instruction returned if there are no insts left. */
|
||||||
DynInstPtr dummyInst;
|
DynInstPtr dummyInst;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
* Authors: Kevin Lim
|
* Authors: Kevin Lim
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "base/intmath.hh"
|
||||||
#include "base/trace.hh"
|
#include "base/trace.hh"
|
||||||
#include "cpu/o3/store_set.hh"
|
#include "cpu/o3/store_set.hh"
|
||||||
|
|
||||||
|
@ -38,6 +39,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
|
||||||
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
|
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
|
||||||
SSITSize, LFSTSize);
|
SSITSize, LFSTSize);
|
||||||
|
|
||||||
|
if (!isPowerOf2(SSITSize)) {
|
||||||
|
fatal("Invalid SSIT size!\n");
|
||||||
|
}
|
||||||
|
|
||||||
SSIT.resize(SSITSize);
|
SSIT.resize(SSITSize);
|
||||||
|
|
||||||
validSSIT.resize(SSITSize);
|
validSSIT.resize(SSITSize);
|
||||||
|
@ -45,6 +50,10 @@ StoreSet::StoreSet(int _SSIT_size, int _LFST_size)
|
||||||
for (int i = 0; i < SSITSize; ++i)
|
for (int i = 0; i < SSITSize; ++i)
|
||||||
validSSIT[i] = false;
|
validSSIT[i] = false;
|
||||||
|
|
||||||
|
if (!isPowerOf2(LFSTSize)) {
|
||||||
|
fatal("Invalid LFST size!\n");
|
||||||
|
}
|
||||||
|
|
||||||
LFST.resize(LFSTSize);
|
LFST.resize(LFSTSize);
|
||||||
|
|
||||||
validLFST.resize(LFSTSize);
|
validLFST.resize(LFSTSize);
|
||||||
|
@ -320,3 +329,19 @@ StoreSet::clear()
|
||||||
|
|
||||||
storeList.clear();
|
storeList.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
StoreSet::dump()
|
||||||
|
{
|
||||||
|
cprintf("storeList.size(): %i\n", storeList.size());
|
||||||
|
SeqNumMapIt store_list_it = storeList.begin();
|
||||||
|
|
||||||
|
int num = 0;
|
||||||
|
|
||||||
|
while (store_list_it != storeList.end()) {
|
||||||
|
cprintf("%i: [sn:%lli] SSID:%i\n",
|
||||||
|
num, (*store_list_it).first, (*store_list_it).second);
|
||||||
|
num++;
|
||||||
|
store_list_it++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -46,58 +46,98 @@ struct ltseqnum {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements a store set predictor for determining if memory
|
||||||
|
* instructions are dependent upon each other. See paper "Memory
|
||||||
|
* Dependence Prediction using Store Sets" by Chrysos and Emer. SSID
|
||||||
|
* stands for Store Set ID, SSIT stands for Store Set ID Table, and
|
||||||
|
* LFST is Last Fetched Store Table.
|
||||||
|
*/
|
||||||
class StoreSet
|
class StoreSet
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef unsigned SSID;
|
typedef unsigned SSID;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
/** Default constructor. init() must be called prior to use. */
|
||||||
StoreSet() { };
|
StoreSet() { };
|
||||||
|
|
||||||
|
/** Creates store set predictor with given table sizes. */
|
||||||
StoreSet(int SSIT_size, int LFST_size);
|
StoreSet(int SSIT_size, int LFST_size);
|
||||||
|
|
||||||
|
/** Default destructor. */
|
||||||
~StoreSet();
|
~StoreSet();
|
||||||
|
|
||||||
|
/** Initializes the store set predictor with the given table sizes. */
|
||||||
void init(int SSIT_size, int LFST_size);
|
void init(int SSIT_size, int LFST_size);
|
||||||
|
|
||||||
|
/** Records a memory ordering violation between the younger load
|
||||||
|
* and the older store. */
|
||||||
void violation(Addr store_PC, Addr load_PC);
|
void violation(Addr store_PC, Addr load_PC);
|
||||||
|
|
||||||
|
/** Inserts a load into the store set predictor. This does nothing but
|
||||||
|
* is included in case other predictors require a similar function.
|
||||||
|
*/
|
||||||
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
|
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
|
||||||
|
|
||||||
|
/** Inserts a store into the store set predictor. Updates the
|
||||||
|
* LFST if the store has a valid SSID. */
|
||||||
void insertStore(Addr store_PC, InstSeqNum store_seq_num,
|
void insertStore(Addr store_PC, InstSeqNum store_seq_num,
|
||||||
unsigned tid);
|
unsigned tid);
|
||||||
|
|
||||||
|
/** Checks if the instruction with the given PC is dependent upon
|
||||||
|
* any store. @return Returns the sequence number of the store
|
||||||
|
* instruction this PC is dependent upon. Returns 0 if none.
|
||||||
|
*/
|
||||||
InstSeqNum checkInst(Addr PC);
|
InstSeqNum checkInst(Addr PC);
|
||||||
|
|
||||||
|
/** Records this PC/sequence number as issued. */
|
||||||
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
|
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
|
||||||
|
|
||||||
|
/** Squashes for a specific thread until the given sequence number. */
|
||||||
void squash(InstSeqNum squashed_num, unsigned tid);
|
void squash(InstSeqNum squashed_num, unsigned tid);
|
||||||
|
|
||||||
|
/** Resets all tables. */
|
||||||
void clear();
|
void clear();
|
||||||
|
|
||||||
|
/** Debug function to dump the contents of the store list. */
|
||||||
|
void dump();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/** Calculates the index into the SSIT based on the PC. */
|
||||||
inline int calcIndex(Addr PC)
|
inline int calcIndex(Addr PC)
|
||||||
{ return (PC >> offsetBits) & indexMask; }
|
{ return (PC >> offsetBits) & indexMask; }
|
||||||
|
|
||||||
|
/** Calculates a Store Set ID based on the PC. */
|
||||||
inline SSID calcSSID(Addr PC)
|
inline SSID calcSSID(Addr PC)
|
||||||
{ return ((PC ^ (PC >> 10)) % LFSTSize); }
|
{ return ((PC ^ (PC >> 10)) % LFSTSize); }
|
||||||
|
|
||||||
|
/** The Store Set ID Table. */
|
||||||
std::vector<SSID> SSIT;
|
std::vector<SSID> SSIT;
|
||||||
|
|
||||||
|
/** Bit vector to tell if the SSIT has a valid entry. */
|
||||||
std::vector<bool> validSSIT;
|
std::vector<bool> validSSIT;
|
||||||
|
|
||||||
|
/** Last Fetched Store Table. */
|
||||||
std::vector<InstSeqNum> LFST;
|
std::vector<InstSeqNum> LFST;
|
||||||
|
|
||||||
|
/** Bit vector to tell if the LFST has a valid entry. */
|
||||||
std::vector<bool> validLFST;
|
std::vector<bool> validLFST;
|
||||||
|
|
||||||
|
/** Map of stores that have been inserted into the store set, but
|
||||||
|
* not yet issued or squashed.
|
||||||
|
*/
|
||||||
std::map<InstSeqNum, int, ltseqnum> storeList;
|
std::map<InstSeqNum, int, ltseqnum> storeList;
|
||||||
|
|
||||||
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
|
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
|
||||||
|
|
||||||
|
/** Store Set ID Table size, in entries. */
|
||||||
int SSITSize;
|
int SSITSize;
|
||||||
|
|
||||||
|
/** Last Fetched Store Table size, in entries. */
|
||||||
int LFSTSize;
|
int LFSTSize;
|
||||||
|
|
||||||
|
/** Mask to obtain the index. */
|
||||||
int indexMask;
|
int indexMask;
|
||||||
|
|
||||||
// HACK: Hardcoded for now.
|
// HACK: Hardcoded for now.
|
||||||
|
|
|
@ -58,16 +58,26 @@ struct O3ThreadState : public ThreadState {
|
||||||
typedef ExecContext::Status Status;
|
typedef ExecContext::Status Status;
|
||||||
typedef typename Impl::FullCPU FullCPU;
|
typedef typename Impl::FullCPU FullCPU;
|
||||||
|
|
||||||
|
/** Current status of the thread. */
|
||||||
Status _status;
|
Status _status;
|
||||||
|
|
||||||
// Current instruction
|
/** Current instruction the thread is committing. Only set and
|
||||||
|
* used for DTB faults currently.
|
||||||
|
*/
|
||||||
TheISA::MachInst inst;
|
TheISA::MachInst inst;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/** Pointer to the CPU. */
|
||||||
FullCPU *cpu;
|
FullCPU *cpu;
|
||||||
public:
|
public:
|
||||||
|
/** Whether or not the thread is currently in syscall mode, and
|
||||||
|
* thus able to be externally updated without squashing.
|
||||||
|
*/
|
||||||
bool inSyscall;
|
bool inSyscall;
|
||||||
|
|
||||||
|
/** Whether or not the thread is currently waiting on a trap, and
|
||||||
|
* thus able to be externally updated without squashing.
|
||||||
|
*/
|
||||||
bool trapPending;
|
bool trapPending;
|
||||||
|
|
||||||
#if FULL_SYSTEM
|
#if FULL_SYSTEM
|
||||||
|
@ -88,23 +98,34 @@ struct O3ThreadState : public ThreadState {
|
||||||
{ }
|
{ }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/** Pointer to the ExecContext of this thread. @todo: Don't call
|
||||||
|
this a proxy.*/
|
||||||
ExecContext *xcProxy;
|
ExecContext *xcProxy;
|
||||||
|
|
||||||
|
/** Returns a pointer to the XC of this thread. */
|
||||||
ExecContext *getXCProxy() { return xcProxy; }
|
ExecContext *getXCProxy() { return xcProxy; }
|
||||||
|
|
||||||
|
/** Returns the status of this thread. */
|
||||||
Status status() const { return _status; }
|
Status status() const { return _status; }
|
||||||
|
|
||||||
|
/** Sets the status of this thread. */
|
||||||
void setStatus(Status new_status) { _status = new_status; }
|
void setStatus(Status new_status) { _status = new_status; }
|
||||||
|
|
||||||
bool misspeculating() { return false; }
|
/** Sets the current instruction being committed. */
|
||||||
|
|
||||||
void setInst(TheISA::MachInst _inst) { inst = _inst; }
|
void setInst(TheISA::MachInst _inst) { inst = _inst; }
|
||||||
|
|
||||||
|
/** Reads the number of instructions functionally executed and
|
||||||
|
* committed.
|
||||||
|
*/
|
||||||
Counter readFuncExeInst() { return funcExeInst; }
|
Counter readFuncExeInst() { return funcExeInst; }
|
||||||
|
|
||||||
|
/** Sets the total number of instructions functionally executed
|
||||||
|
* and committed.
|
||||||
|
*/
|
||||||
void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
|
void setFuncExeInst(Counter new_val) { funcExeInst = new_val; }
|
||||||
|
|
||||||
#if !FULL_SYSTEM
|
#if !FULL_SYSTEM
|
||||||
|
/** Handles the syscall. */
|
||||||
void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); }
|
void syscall(int64_t callnum) { process->syscall(callnum, xcProxy); }
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -28,6 +28,7 @@
|
||||||
* Authors: Kevin Lim
|
* Authors: Kevin Lim
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "base/intmath.hh"
|
||||||
#include "cpu/o3/tournament_pred.hh"
|
#include "cpu/o3/tournament_pred.hh"
|
||||||
|
|
||||||
TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||||
|
@ -51,7 +52,9 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||||
choiceCtrBits(_choiceCtrBits),
|
choiceCtrBits(_choiceCtrBits),
|
||||||
instShiftAmt(_instShiftAmt)
|
instShiftAmt(_instShiftAmt)
|
||||||
{
|
{
|
||||||
//Should do checks here to make sure sizes are correct (powers of 2)
|
if (!isPowerOf2(localPredictorSize)) {
|
||||||
|
fatal("Invalid local predictor size!\n");
|
||||||
|
}
|
||||||
|
|
||||||
//Setup the array of counters for the local predictor
|
//Setup the array of counters for the local predictor
|
||||||
localCtrs.resize(localPredictorSize);
|
localCtrs.resize(localPredictorSize);
|
||||||
|
@ -59,6 +62,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||||
for (int i = 0; i < localPredictorSize; ++i)
|
for (int i = 0; i < localPredictorSize; ++i)
|
||||||
localCtrs[i].setBits(localCtrBits);
|
localCtrs[i].setBits(localCtrBits);
|
||||||
|
|
||||||
|
if (!isPowerOf2(localHistoryTableSize)) {
|
||||||
|
fatal("Invalid local history table size!\n");
|
||||||
|
}
|
||||||
|
|
||||||
//Setup the history table for the local table
|
//Setup the history table for the local table
|
||||||
localHistoryTable.resize(localHistoryTableSize);
|
localHistoryTable.resize(localHistoryTableSize);
|
||||||
|
|
||||||
|
@ -68,6 +75,10 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||||
// Setup the local history mask
|
// Setup the local history mask
|
||||||
localHistoryMask = (1 << localHistoryBits) - 1;
|
localHistoryMask = (1 << localHistoryBits) - 1;
|
||||||
|
|
||||||
|
if (!isPowerOf2(globalPredictorSize)) {
|
||||||
|
fatal("Invalid global predictor size!\n");
|
||||||
|
}
|
||||||
|
|
||||||
//Setup the array of counters for the global predictor
|
//Setup the array of counters for the global predictor
|
||||||
globalCtrs.resize(globalPredictorSize);
|
globalCtrs.resize(globalPredictorSize);
|
||||||
|
|
||||||
|
@ -79,12 +90,17 @@ TournamentBP::TournamentBP(unsigned _localPredictorSize,
|
||||||
// Setup the global history mask
|
// Setup the global history mask
|
||||||
globalHistoryMask = (1 << globalHistoryBits) - 1;
|
globalHistoryMask = (1 << globalHistoryBits) - 1;
|
||||||
|
|
||||||
|
if (!isPowerOf2(choicePredictorSize)) {
|
||||||
|
fatal("Invalid choice predictor size!\n");
|
||||||
|
}
|
||||||
|
|
||||||
//Setup the array of counters for the choice predictor
|
//Setup the array of counters for the choice predictor
|
||||||
choiceCtrs.resize(choicePredictorSize);
|
choiceCtrs.resize(choicePredictorSize);
|
||||||
|
|
||||||
for (int i = 0; i < choicePredictorSize; ++i)
|
for (int i = 0; i < choicePredictorSize; ++i)
|
||||||
choiceCtrs[i].setBits(choiceCtrBits);
|
choiceCtrs[i].setBits(choiceCtrBits);
|
||||||
|
|
||||||
|
// @todo: Allow for different thresholds between the predictors.
|
||||||
threshold = (1 << (localCtrBits - 1)) - 1;
|
threshold = (1 << (localCtrBits - 1)) - 1;
|
||||||
threshold = threshold / 2;
|
threshold = threshold / 2;
|
||||||
}
|
}
|
||||||
|
@ -93,165 +109,185 @@ inline
|
||||||
unsigned
|
unsigned
|
||||||
TournamentBP::calcLocHistIdx(Addr &branch_addr)
|
TournamentBP::calcLocHistIdx(Addr &branch_addr)
|
||||||
{
|
{
|
||||||
|
// Get low order bits after removing instruction offset.
|
||||||
return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
|
return (branch_addr >> instShiftAmt) & (localHistoryTableSize - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
void
|
void
|
||||||
TournamentBP::updateHistoriesTaken(unsigned local_history_idx)
|
TournamentBP::updateGlobalHistTaken()
|
||||||
{
|
{
|
||||||
globalHistory = (globalHistory << 1) | 1;
|
globalHistory = (globalHistory << 1) | 1;
|
||||||
globalHistory = globalHistory & globalHistoryMask;
|
globalHistory = globalHistory & globalHistoryMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void
|
||||||
|
TournamentBP::updateGlobalHistNotTaken()
|
||||||
|
{
|
||||||
|
globalHistory = (globalHistory << 1);
|
||||||
|
globalHistory = globalHistory & globalHistoryMask;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline
|
||||||
|
void
|
||||||
|
TournamentBP::updateLocalHistTaken(unsigned local_history_idx)
|
||||||
|
{
|
||||||
localHistoryTable[local_history_idx] =
|
localHistoryTable[local_history_idx] =
|
||||||
(localHistoryTable[local_history_idx] << 1) | 1;
|
(localHistoryTable[local_history_idx] << 1) | 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
void
|
void
|
||||||
TournamentBP::updateHistoriesNotTaken(unsigned local_history_idx)
|
TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx)
|
||||||
{
|
{
|
||||||
globalHistory = (globalHistory << 1);
|
|
||||||
globalHistory = globalHistory & globalHistoryMask;
|
|
||||||
|
|
||||||
localHistoryTable[local_history_idx] =
|
localHistoryTable[local_history_idx] =
|
||||||
(localHistoryTable[local_history_idx] << 1);
|
(localHistoryTable[local_history_idx] << 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
TournamentBP::lookup(Addr &branch_addr)
|
TournamentBP::lookup(Addr &branch_addr, void * &bp_history)
|
||||||
{
|
{
|
||||||
uint8_t local_prediction;
|
bool local_prediction;
|
||||||
unsigned local_history_idx;
|
unsigned local_history_idx;
|
||||||
unsigned local_predictor_idx;
|
unsigned local_predictor_idx;
|
||||||
|
|
||||||
uint8_t global_prediction;
|
bool global_prediction;
|
||||||
uint8_t choice_prediction;
|
bool choice_prediction;
|
||||||
|
|
||||||
//Lookup in the local predictor to get its branch prediction
|
//Lookup in the local predictor to get its branch prediction
|
||||||
local_history_idx = calcLocHistIdx(branch_addr);
|
local_history_idx = calcLocHistIdx(branch_addr);
|
||||||
local_predictor_idx = localHistoryTable[local_history_idx]
|
local_predictor_idx = localHistoryTable[local_history_idx]
|
||||||
& localHistoryMask;
|
& localHistoryMask;
|
||||||
local_prediction = localCtrs[local_predictor_idx].read();
|
local_prediction = localCtrs[local_predictor_idx].read() > threshold;
|
||||||
|
|
||||||
//Lookup in the global predictor to get its branch prediction
|
//Lookup in the global predictor to get its branch prediction
|
||||||
global_prediction = globalCtrs[globalHistory].read();
|
global_prediction = globalCtrs[globalHistory].read() > threshold;
|
||||||
|
|
||||||
//Lookup in the choice predictor to see which one to use
|
//Lookup in the choice predictor to see which one to use
|
||||||
choice_prediction = choiceCtrs[globalHistory].read();
|
choice_prediction = choiceCtrs[globalHistory].read() > threshold;
|
||||||
|
|
||||||
//@todo Put a threshold value in for the three predictors that can
|
// Create BPHistory and pass it back to be recorded.
|
||||||
// be set through the constructor (so this isn't hard coded).
|
BPHistory *history = new BPHistory;
|
||||||
//Also should put some of this code into functions.
|
history->globalHistory = globalHistory;
|
||||||
if (choice_prediction > threshold) {
|
history->localPredTaken = local_prediction;
|
||||||
if (global_prediction > threshold) {
|
history->globalPredTaken = global_prediction;
|
||||||
updateHistoriesTaken(local_history_idx);
|
history->globalUsed = choice_prediction;
|
||||||
|
bp_history = (void *)history;
|
||||||
|
|
||||||
assert(globalHistory < globalPredictorSize &&
|
assert(globalHistory < globalPredictorSize &&
|
||||||
local_history_idx < localPredictorSize);
|
local_history_idx < localPredictorSize);
|
||||||
|
|
||||||
globalCtrs[globalHistory].increment();
|
|
||||||
localCtrs[local_history_idx].increment();
|
|
||||||
|
|
||||||
|
// Commented code is for doing speculative update of counters and
|
||||||
|
// all histories.
|
||||||
|
if (choice_prediction) {
|
||||||
|
if (global_prediction) {
|
||||||
|
// updateHistoriesTaken(local_history_idx);
|
||||||
|
// globalCtrs[globalHistory].increment();
|
||||||
|
// localCtrs[local_history_idx].increment();
|
||||||
|
updateGlobalHistTaken();
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
updateHistoriesNotTaken(local_history_idx);
|
// updateHistoriesNotTaken(local_history_idx);
|
||||||
|
// globalCtrs[globalHistory].decrement();
|
||||||
assert(globalHistory < globalPredictorSize &&
|
// localCtrs[local_history_idx].decrement();
|
||||||
local_history_idx < localPredictorSize);
|
updateGlobalHistNotTaken();
|
||||||
|
|
||||||
globalCtrs[globalHistory].decrement();
|
|
||||||
localCtrs[local_history_idx].decrement();
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (local_prediction > threshold) {
|
if (local_prediction) {
|
||||||
updateHistoriesTaken(local_history_idx);
|
// updateHistoriesTaken(local_history_idx);
|
||||||
|
// globalCtrs[globalHistory].increment();
|
||||||
assert(globalHistory < globalPredictorSize &&
|
// localCtrs[local_history_idx].increment();
|
||||||
local_history_idx < localPredictorSize);
|
updateGlobalHistTaken();
|
||||||
|
|
||||||
globalCtrs[globalHistory].increment();
|
|
||||||
localCtrs[local_history_idx].increment();
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
updateHistoriesNotTaken(local_history_idx);
|
// updateHistoriesNotTaken(local_history_idx);
|
||||||
|
// globalCtrs[globalHistory].decrement();
|
||||||
assert(globalHistory < globalPredictorSize &&
|
// localCtrs[local_history_idx].decrement();
|
||||||
local_history_idx < localPredictorSize);
|
updateGlobalHistNotTaken();
|
||||||
|
|
||||||
globalCtrs[globalHistory].decrement();
|
|
||||||
localCtrs[local_history_idx].decrement();
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the branch predictor if it predicted a branch wrong.
|
|
||||||
void
|
void
|
||||||
TournamentBP::update(Addr &branch_addr, unsigned correct_gh, bool taken)
|
TournamentBP::uncondBr(void * &bp_history)
|
||||||
{
|
{
|
||||||
|
// Create BPHistory and pass it back to be recorded.
|
||||||
|
BPHistory *history = new BPHistory;
|
||||||
|
history->globalHistory = globalHistory;
|
||||||
|
history->localPredTaken = true;
|
||||||
|
history->globalPredTaken = true;
|
||||||
|
bp_history = static_cast<void *>(history);
|
||||||
|
|
||||||
uint8_t local_prediction;
|
updateGlobalHistTaken();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TournamentBP::update(Addr &branch_addr, bool taken, void *bp_history)
|
||||||
|
{
|
||||||
unsigned local_history_idx;
|
unsigned local_history_idx;
|
||||||
unsigned local_predictor_idx;
|
unsigned local_predictor_idx;
|
||||||
bool local_pred_taken;
|
unsigned local_predictor_hist;
|
||||||
|
|
||||||
uint8_t global_prediction;
|
// Get the local predictor's current prediction
|
||||||
bool global_pred_taken;
|
|
||||||
|
|
||||||
// Load the correct global history into the register.
|
|
||||||
globalHistory = correct_gh;
|
|
||||||
|
|
||||||
// Get the local predictor's current prediction, remove the incorrect
|
|
||||||
// update, and update the local predictor
|
|
||||||
local_history_idx = calcLocHistIdx(branch_addr);
|
local_history_idx = calcLocHistIdx(branch_addr);
|
||||||
local_predictor_idx = localHistoryTable[local_history_idx];
|
local_predictor_hist = localHistoryTable[local_history_idx];
|
||||||
local_predictor_idx = (local_predictor_idx >> 1) & localHistoryMask;
|
local_predictor_idx = local_predictor_hist & localHistoryMask;
|
||||||
|
|
||||||
local_prediction = localCtrs[local_predictor_idx].read();
|
// Update the choice predictor to tell it which one was correct if
|
||||||
local_pred_taken = local_prediction > threshold;
|
// there was a prediction.
|
||||||
|
if (bp_history) {
|
||||||
//Get the global predictor's current prediction, and update the
|
BPHistory *history = static_cast<BPHistory *>(bp_history);
|
||||||
//global predictor
|
if (history->localPredTaken != history->globalPredTaken) {
|
||||||
global_prediction = globalCtrs[globalHistory].read();
|
// If the local prediction matches the actual outcome,
|
||||||
global_pred_taken = global_prediction > threshold;
|
// decerement the counter. Otherwise increment the
|
||||||
|
// counter.
|
||||||
//Update the choice predictor to tell it which one was correct
|
if (history->localPredTaken == taken) {
|
||||||
if (local_pred_taken != global_pred_taken) {
|
choiceCtrs[globalHistory].decrement();
|
||||||
//If the local prediction matches the actual outcome, decerement
|
} else if (history->globalPredTaken == taken){
|
||||||
//the counter. Otherwise increment the counter.
|
choiceCtrs[globalHistory].increment();
|
||||||
if (local_pred_taken == taken) {
|
}
|
||||||
choiceCtrs[globalHistory].decrement();
|
|
||||||
} else {
|
|
||||||
choiceCtrs[globalHistory].increment();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We're done with this history, now delete it.
|
||||||
|
delete history;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (taken) {
|
assert(globalHistory < globalPredictorSize &&
|
||||||
assert(globalHistory < globalPredictorSize &&
|
local_predictor_idx < localPredictorSize);
|
||||||
local_predictor_idx < localPredictorSize);
|
|
||||||
|
|
||||||
|
// Update the counters and local history with the proper
|
||||||
|
// resolution of the branch. Global history is updated
|
||||||
|
// speculatively and restored upon squash() calls, so it does not
|
||||||
|
// need to be updated.
|
||||||
|
if (taken) {
|
||||||
localCtrs[local_predictor_idx].increment();
|
localCtrs[local_predictor_idx].increment();
|
||||||
globalCtrs[globalHistory].increment();
|
globalCtrs[globalHistory].increment();
|
||||||
|
|
||||||
globalHistory = (globalHistory << 1) | 1;
|
updateLocalHistTaken(local_history_idx);
|
||||||
globalHistory = globalHistory & globalHistoryMask;
|
|
||||||
|
|
||||||
localHistoryTable[local_history_idx] |= 1;
|
|
||||||
} else {
|
} else {
|
||||||
assert(globalHistory < globalPredictorSize &&
|
|
||||||
local_predictor_idx < localPredictorSize);
|
|
||||||
|
|
||||||
localCtrs[local_predictor_idx].decrement();
|
localCtrs[local_predictor_idx].decrement();
|
||||||
globalCtrs[globalHistory].decrement();
|
globalCtrs[globalHistory].decrement();
|
||||||
|
|
||||||
globalHistory = (globalHistory << 1);
|
updateLocalHistNotTaken(local_history_idx);
|
||||||
globalHistory = globalHistory & globalHistoryMask;
|
|
||||||
|
|
||||||
localHistoryTable[local_history_idx] &= ~1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
TournamentBP::squash(void *bp_history)
|
||||||
|
{
|
||||||
|
BPHistory *history = static_cast<BPHistory *>(bp_history);
|
||||||
|
|
||||||
|
// Restore global history to state prior to this branch.
|
||||||
|
globalHistory = history->globalHistory;
|
||||||
|
|
||||||
|
// Delete this BPHistory now that we're done with it.
|
||||||
|
delete history;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
int
|
||||||
|
TournamentBP::BPHistory::newCount = 0;
|
||||||
|
#endif
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -36,6 +36,15 @@
|
||||||
#include "cpu/o3/sat_counter.hh"
|
#include "cpu/o3/sat_counter.hh"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements a tournament branch predictor, hopefully identical to the one
|
||||||
|
* used in the 21264. It has a local predictor, which uses a local history
|
||||||
|
* table to index into a table of counters, and a global predictor, which
|
||||||
|
* uses a global history to index into a table of counters. A choice
|
||||||
|
* predictor chooses between the two. Only the global history register
|
||||||
|
* is speculatively updated, the rest are updated upon branches committing
|
||||||
|
* or misspeculating.
|
||||||
|
*/
|
||||||
class TournamentBP
|
class TournamentBP
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -55,30 +64,95 @@ class TournamentBP
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks up the given address in the branch predictor and returns
|
* Looks up the given address in the branch predictor and returns
|
||||||
* a true/false value as to whether it is taken.
|
* a true/false value as to whether it is taken. Also creates a
|
||||||
|
* BPHistory object to store any state it will need on squash/update.
|
||||||
* @param branch_addr The address of the branch to look up.
|
* @param branch_addr The address of the branch to look up.
|
||||||
|
* @param bp_history Pointer that will be set to the BPHistory object.
|
||||||
* @return Whether or not the branch is taken.
|
* @return Whether or not the branch is taken.
|
||||||
*/
|
*/
|
||||||
bool lookup(Addr &branch_addr);
|
bool lookup(Addr &branch_addr, void * &bp_history);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records that there was an unconditional branch, and modifies
|
||||||
|
* the bp history to point to an object that has the previous
|
||||||
|
* global history stored in it.
|
||||||
|
* @param bp_history Pointer that will be set to the BPHistory object.
|
||||||
|
*/
|
||||||
|
void uncondBr(void * &bp_history);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Updates the branch predictor with the actual result of a branch.
|
* Updates the branch predictor with the actual result of a branch.
|
||||||
* @param branch_addr The address of the branch to update.
|
* @param branch_addr The address of the branch to update.
|
||||||
* @param taken Whether or not the branch was taken.
|
* @param taken Whether or not the branch was taken.
|
||||||
|
* @param bp_history Pointer to the BPHistory object that was created
|
||||||
|
* when the branch was predicted.
|
||||||
*/
|
*/
|
||||||
void update(Addr &branch_addr, unsigned global_history, bool taken);
|
void update(Addr &branch_addr, bool taken, void *bp_history);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Restores the global branch history on a squash.
|
||||||
|
* @param bp_history Pointer to the BPHistory object that has the
|
||||||
|
* previous global branch history in it.
|
||||||
|
*/
|
||||||
|
void squash(void *bp_history);
|
||||||
|
|
||||||
|
/** Returns the global history. */
|
||||||
inline unsigned readGlobalHist() { return globalHistory; }
|
inline unsigned readGlobalHist() { return globalHistory; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/**
|
||||||
|
* Returns if the branch should be taken or not, given a counter
|
||||||
|
* value.
|
||||||
|
* @param count The counter value.
|
||||||
|
*/
|
||||||
inline bool getPrediction(uint8_t &count);
|
inline bool getPrediction(uint8_t &count);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the local history index, given a branch address.
|
||||||
|
* @param branch_addr The branch's PC address.
|
||||||
|
*/
|
||||||
inline unsigned calcLocHistIdx(Addr &branch_addr);
|
inline unsigned calcLocHistIdx(Addr &branch_addr);
|
||||||
|
|
||||||
inline void updateHistoriesTaken(unsigned local_history_idx);
|
/** Updates global history as taken. */
|
||||||
|
inline void updateGlobalHistTaken();
|
||||||
|
|
||||||
inline void updateHistoriesNotTaken(unsigned local_history_idx);
|
/** Updates global history as not taken. */
|
||||||
|
inline void updateGlobalHistNotTaken();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates local histories as taken.
|
||||||
|
* @param local_history_idx The local history table entry that
|
||||||
|
* will be updated.
|
||||||
|
*/
|
||||||
|
inline void updateLocalHistTaken(unsigned local_history_idx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Updates local histories as not taken.
|
||||||
|
* @param local_history_idx The local history table entry that
|
||||||
|
* will be updated.
|
||||||
|
*/
|
||||||
|
inline void updateLocalHistNotTaken(unsigned local_history_idx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The branch history information that is created upon predicting
|
||||||
|
* a branch. It will be passed back upon updating and squashing,
|
||||||
|
* when the BP can use this information to update/restore its
|
||||||
|
* state properly.
|
||||||
|
*/
|
||||||
|
struct BPHistory {
|
||||||
|
#ifdef DEBUG
|
||||||
|
BPHistory()
|
||||||
|
{ newCount++; }
|
||||||
|
~BPHistory()
|
||||||
|
{ newCount--; }
|
||||||
|
|
||||||
|
static int newCount;
|
||||||
|
#endif
|
||||||
|
unsigned globalHistory;
|
||||||
|
bool localPredTaken;
|
||||||
|
bool globalPredTaken;
|
||||||
|
bool globalUsed;
|
||||||
|
};
|
||||||
|
|
||||||
/** Local counters. */
|
/** Local counters. */
|
||||||
std::vector<SatCounter> localCtrs;
|
std::vector<SatCounter> localCtrs;
|
||||||
|
@ -103,7 +177,6 @@ class TournamentBP
|
||||||
/** Mask to get the proper local history. */
|
/** Mask to get the proper local history. */
|
||||||
unsigned localHistoryMask;
|
unsigned localHistoryMask;
|
||||||
|
|
||||||
|
|
||||||
/** Array of counters that make up the global predictor. */
|
/** Array of counters that make up the global predictor. */
|
||||||
std::vector<SatCounter> globalCtrs;
|
std::vector<SatCounter> globalCtrs;
|
||||||
|
|
||||||
|
@ -122,7 +195,6 @@ class TournamentBP
|
||||||
/** Mask to get the proper global history. */
|
/** Mask to get the proper global history. */
|
||||||
unsigned globalHistoryMask;
|
unsigned globalHistoryMask;
|
||||||
|
|
||||||
|
|
||||||
/** Array of counters that make up the choice predictor. */
|
/** Array of counters that make up the choice predictor. */
|
||||||
std::vector<SatCounter> choiceCtrs;
|
std::vector<SatCounter> choiceCtrs;
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -103,6 +130,7 @@ Param<unsigned> renameToROBDelay;
|
||||||
Param<unsigned> commitWidth;
|
Param<unsigned> commitWidth;
|
||||||
Param<unsigned> squashWidth;
|
Param<unsigned> squashWidth;
|
||||||
|
|
||||||
|
Param<std::string> predType;
|
||||||
Param<unsigned> localPredictorSize;
|
Param<unsigned> localPredictorSize;
|
||||||
Param<unsigned> localCtrBits;
|
Param<unsigned> localCtrBits;
|
||||||
Param<unsigned> localHistoryTableSize;
|
Param<unsigned> localHistoryTableSize;
|
||||||
|
@ -236,6 +264,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(DerivOzoneCPU)
|
||||||
INIT_PARAM(commitWidth, "Commit width"),
|
INIT_PARAM(commitWidth, "Commit width"),
|
||||||
INIT_PARAM(squashWidth, "Squash width"),
|
INIT_PARAM(squashWidth, "Squash width"),
|
||||||
|
|
||||||
|
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
|
||||||
INIT_PARAM(localPredictorSize, "Size of local predictor"),
|
INIT_PARAM(localPredictorSize, "Size of local predictor"),
|
||||||
INIT_PARAM(localCtrBits, "Bits per counter"),
|
INIT_PARAM(localCtrBits, "Bits per counter"),
|
||||||
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
|
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
|
||||||
|
@ -375,7 +404,7 @@ CREATE_SIM_OBJECT(DerivOzoneCPU)
|
||||||
params->commitWidth = commitWidth;
|
params->commitWidth = commitWidth;
|
||||||
params->squashWidth = squashWidth;
|
params->squashWidth = squashWidth;
|
||||||
|
|
||||||
|
params->predType = predType;
|
||||||
params->localPredictorSize = localPredictorSize;
|
params->localPredictorSize = localPredictorSize;
|
||||||
params->localCtrBits = localCtrBits;
|
params->localCtrBits = localCtrBits;
|
||||||
params->localHistoryTableSize = localHistoryTableSize;
|
params->localHistoryTableSize = localHistoryTableSize;
|
||||||
|
@ -504,6 +533,7 @@ Param<unsigned> renameToROBDelay;
|
||||||
Param<unsigned> commitWidth;
|
Param<unsigned> commitWidth;
|
||||||
Param<unsigned> squashWidth;
|
Param<unsigned> squashWidth;
|
||||||
|
|
||||||
|
Param<std::string> predType;
|
||||||
Param<unsigned> localPredictorSize;
|
Param<unsigned> localPredictorSize;
|
||||||
Param<unsigned> localCtrBits;
|
Param<unsigned> localCtrBits;
|
||||||
Param<unsigned> localHistoryTableSize;
|
Param<unsigned> localHistoryTableSize;
|
||||||
|
@ -636,6 +666,7 @@ BEGIN_INIT_SIM_OBJECT_PARAMS(SimpleOzoneCPU)
|
||||||
INIT_PARAM(commitWidth, "Commit width"),
|
INIT_PARAM(commitWidth, "Commit width"),
|
||||||
INIT_PARAM(squashWidth, "Squash width"),
|
INIT_PARAM(squashWidth, "Squash width"),
|
||||||
|
|
||||||
|
INIT_PARAM(predType, "Type of branch predictor ('local', 'tournament')"),
|
||||||
INIT_PARAM(localPredictorSize, "Size of local predictor"),
|
INIT_PARAM(localPredictorSize, "Size of local predictor"),
|
||||||
INIT_PARAM(localCtrBits, "Bits per counter"),
|
INIT_PARAM(localCtrBits, "Bits per counter"),
|
||||||
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
|
INIT_PARAM(localHistoryTableSize, "Size of local history table"),
|
||||||
|
@ -774,7 +805,7 @@ CREATE_SIM_OBJECT(SimpleOzoneCPU)
|
||||||
params->commitWidth = commitWidth;
|
params->commitWidth = commitWidth;
|
||||||
params->squashWidth = squashWidth;
|
params->squashWidth = squashWidth;
|
||||||
|
|
||||||
|
params->predType = predType;
|
||||||
params->localPredictorSize = localPredictorSize;
|
params->localPredictorSize = localPredictorSize;
|
||||||
params->localCtrBits = localCtrBits;
|
params->localCtrBits = localCtrBits;
|
||||||
params->localHistoryTableSize = localHistoryTableSize;
|
params->localHistoryTableSize = localHistoryTableSize;
|
||||||
|
|
|
@ -52,7 +52,7 @@ struct OzoneImpl {
|
||||||
|
|
||||||
// Would like to put these into their own area.
|
// Would like to put these into their own area.
|
||||||
// typedef NullPredictor BranchPred;
|
// typedef NullPredictor BranchPred;
|
||||||
typedef TwobitBPredUnit<OzoneImpl> BranchPred;
|
typedef BPredUnit<OzoneImpl> BranchPred;
|
||||||
typedef FrontEnd<OzoneImpl> FrontEnd;
|
typedef FrontEnd<OzoneImpl> FrontEnd;
|
||||||
// Will need IQ, LSQ eventually
|
// Will need IQ, LSQ eventually
|
||||||
typedef LWBackEnd<OzoneImpl> BackEnd;
|
typedef LWBackEnd<OzoneImpl> BackEnd;
|
||||||
|
|
|
@ -51,7 +51,7 @@ struct SimpleImpl {
|
||||||
|
|
||||||
// Would like to put these into their own area.
|
// Would like to put these into their own area.
|
||||||
// typedef NullPredictor BranchPred;
|
// typedef NullPredictor BranchPred;
|
||||||
typedef TwobitBPredUnit<SimpleImpl> BranchPred;
|
typedef BPredUnit<SimpleImpl> BranchPred;
|
||||||
typedef FrontEnd<SimpleImpl> FrontEnd;
|
typedef FrontEnd<SimpleImpl> FrontEnd;
|
||||||
// Will need IQ, LSQ eventually
|
// Will need IQ, LSQ eventually
|
||||||
typedef InorderBackEnd<SimpleImpl> BackEnd;
|
typedef InorderBackEnd<SimpleImpl> BackEnd;
|
||||||
|
|
|
@ -1,4 +1,30 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met: redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer;
|
||||||
|
* redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution;
|
||||||
|
* neither the name of the copyright holders nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
|
#ifndef __CPU_OZONE_SIMPLE_PARAMS_HH__
|
||||||
#define __CPU_OZONE_SIMPLE_PARAMS_HH__
|
#define __CPU_OZONE_SIMPLE_PARAMS_HH__
|
||||||
|
@ -29,7 +55,6 @@ class SimpleParams : public BaseCPU::Params
|
||||||
AlphaITB *itb; AlphaDTB *dtb;
|
AlphaITB *itb; AlphaDTB *dtb;
|
||||||
#else
|
#else
|
||||||
std::vector<Process *> workload;
|
std::vector<Process *> workload;
|
||||||
// Process *process;
|
|
||||||
#endif // FULL_SYSTEM
|
#endif // FULL_SYSTEM
|
||||||
|
|
||||||
//Page Table
|
//Page Table
|
||||||
|
@ -103,6 +128,7 @@ class SimpleParams : public BaseCPU::Params
|
||||||
//
|
//
|
||||||
// Branch predictor (BP & BTB)
|
// Branch predictor (BP & BTB)
|
||||||
//
|
//
|
||||||
|
std::string predType;
|
||||||
unsigned localPredictorSize;
|
unsigned localPredictorSize;
|
||||||
unsigned localCtrBits;
|
unsigned localCtrBits;
|
||||||
unsigned localHistoryTableSize;
|
unsigned localHistoryTableSize;
|
||||||
|
|
|
@ -55,6 +55,7 @@ class DerivAlphaFullCPU(BaseCPU):
|
||||||
trapLatency = Param.Tick("Trap latency")
|
trapLatency = Param.Tick("Trap latency")
|
||||||
fetchTrapLatency = Param.Tick("Fetch trap latency")
|
fetchTrapLatency = Param.Tick("Fetch trap latency")
|
||||||
|
|
||||||
|
predType = Param.String("Branch predictor type ('local', 'tournament')")
|
||||||
localPredictorSize = Param.Unsigned("Size of local predictor")
|
localPredictorSize = Param.Unsigned("Size of local predictor")
|
||||||
localCtrBits = Param.Unsigned("Bits per counter")
|
localCtrBits = Param.Unsigned("Bits per counter")
|
||||||
localHistoryTableSize = Param.Unsigned("Size of local history table")
|
localHistoryTableSize = Param.Unsigned("Size of local history table")
|
||||||
|
|
|
@ -57,6 +57,7 @@ class DerivOzoneCPU(BaseCPU):
|
||||||
commitWidth = Param.Unsigned("Commit width")
|
commitWidth = Param.Unsigned("Commit width")
|
||||||
squashWidth = Param.Unsigned("Squash width")
|
squashWidth = Param.Unsigned("Squash width")
|
||||||
|
|
||||||
|
predType = Param.String("Type of branch predictor ('local', 'tournament')")
|
||||||
localPredictorSize = Param.Unsigned("Size of local predictor")
|
localPredictorSize = Param.Unsigned("Size of local predictor")
|
||||||
localCtrBits = Param.Unsigned("Bits per counter")
|
localCtrBits = Param.Unsigned("Bits per counter")
|
||||||
localHistoryTableSize = Param.Unsigned("Size of local history table")
|
localHistoryTableSize = Param.Unsigned("Size of local history table")
|
||||||
|
|
Loading…
Reference in a new issue