config,cpu: Add SMT support to Atomic and Timing CPUs

Adds SMT support to the "simple" CPU models so that they can be
used with other SMT-supported CPUs. Example usage: this enables
the TimingSimpleCPU to be used to warmup caches before swapping to
detailed mode with the in-order or out-of-order based CPU models.
This commit is contained in:
Mitch Hayenga 2015-09-30 11:14:19 -05:00
parent 52d521e433
commit 582a0148b4
9 changed files with 952 additions and 669 deletions

View file

@ -178,6 +178,9 @@ system = System(cpu = [CPUClass(cpu_id=i) for i in xrange(np)],
mem_ranges = [AddrRange(options.mem_size)],
cache_line_size = options.cacheline_size)
if numThreads > 1:
system.multi_thread = True
# Create a top-level voltage domain
system.voltage_domain = VoltageDomain(voltage = options.sys_voltage)

View file

@ -1,6 +1,6 @@
/*
* Copyright 2014 Google, Inc.
* Copyright (c) 2012-2013 ARM Limited
* Copyright (c) 2012-2013,2015 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@ -84,24 +84,11 @@ AtomicSimpleCPU::TickEvent::description() const
void
AtomicSimpleCPU::init()
{
BaseCPU::init();
BaseSimpleCPU::init();
// Initialise the ThreadContext's memory proxies
tcBase()->initMemProxies(tcBase());
if (FullSystem && !params()->switched_out) {
ThreadID size = threadContexts.size();
for (ThreadID i = 0; i < size; ++i) {
ThreadContext *tc = threadContexts[i];
// initialize CPU, including PC
TheISA::initCPU(tc, tc->contextId());
}
}
// Atomic doesn't do MT right now, so contextId == threadId
ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
ifetch_req.setThreadContext(_cpuId, 0);
data_read_req.setThreadContext(_cpuId, 0);
data_write_req.setThreadContext(_cpuId, 0);
}
AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
@ -131,12 +118,13 @@ AtomicSimpleCPU::drain()
return DrainState::Drained;
if (!isDrained()) {
DPRINTF(Drain, "Requesting drain: %s\n", pcState());
DPRINTF(Drain, "Requesting drain.\n");
return DrainState::Draining;
} else {
if (tickEvent.scheduled())
deschedule(tickEvent);
activeThreads.clear();
DPRINTF(Drain, "Not executing microcode, no need to drain.\n");
return DrainState::Drained;
}
@ -153,16 +141,22 @@ AtomicSimpleCPU::drainResume()
verifyMemoryMode();
assert(!threadContexts.empty());
if (threadContexts.size() > 1)
fatal("The atomic CPU only supports one thread.\n");
if (thread->status() == ThreadContext::Active) {
schedule(tickEvent, nextCycle());
_status = BaseSimpleCPU::Running;
notIdleFraction = 1;
} else {
_status = BaseSimpleCPU::Idle;
notIdleFraction = 0;
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
threadInfo[tid]->notIdleFraction = 1;
activeThreads.push_back(tid);
_status = BaseSimpleCPU::Running;
// Tick if any threads active
if (!tickEvent.scheduled()) {
schedule(tickEvent, nextCycle());
}
} else {
threadInfo[tid]->notIdleFraction = 0;
}
}
}
@ -172,7 +166,7 @@ AtomicSimpleCPU::tryCompleteDrain()
if (drainState() != DrainState::Draining)
return false;
DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
DPRINTF(Drain, "tryCompleteDrain.\n");
if (!isDrained())
return false;
@ -201,10 +195,6 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
// The tick event should have been descheduled by drain()
assert(!tickEvent.scheduled());
ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
}
void
@ -221,20 +211,23 @@ AtomicSimpleCPU::activateContext(ThreadID thread_num)
{
DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
assert(thread_num == 0);
assert(thread);
assert(thread_num < numThreads);
assert(_status == Idle);
assert(!tickEvent.scheduled());
notIdleFraction = 1;
Cycles delta = ticksToCycles(thread->lastActivate - thread->lastSuspend);
threadInfo[thread_num]->notIdleFraction = 1;
Cycles delta = ticksToCycles(threadInfo[thread_num]->thread->lastActivate -
threadInfo[thread_num]->thread->lastSuspend);
numCycles += delta;
ppCycles->notify(delta);
if (!tickEvent.scheduled()) {
//Make sure ticks are still on multiples of cycles
schedule(tickEvent, clockEdge(Cycles(0)));
}
_status = BaseSimpleCPU::Running;
if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
== activeThreads.end()) {
activeThreads.push_back(thread_num);
}
}
@ -243,21 +236,24 @@ AtomicSimpleCPU::suspendContext(ThreadID thread_num)
{
DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
assert(thread_num == 0);
assert(thread);
assert(thread_num < numThreads);
activeThreads.remove(thread_num);
if (_status == Idle)
return;
assert(_status == BaseSimpleCPU::Running);
// tick event may not be scheduled if this gets called from inside
// an instruction's execution, e.g. "quiesce"
if (tickEvent.scheduled())
deschedule(tickEvent);
threadInfo[thread_num]->notIdleFraction = 0;
notIdleFraction = 0;
if (activeThreads.empty()) {
_status = Idle;
if (tickEvent.scheduled()) {
deschedule(tickEvent);
}
}
}
@ -269,7 +265,7 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
cpu->wakeup();
}
@ -277,7 +273,9 @@ AtomicSimpleCPU::AtomicCPUDPort::recvAtomicSnoop(PacketPtr pkt)
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
pkt->getAddr());
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
for (auto &t_info : cpu->threadInfo) {
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
}
}
return 0;
@ -291,7 +289,7 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
// X86 ISA: Snooping an invalidation for monitor/mwait
AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner);
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
cpu->wakeup();
}
@ -299,7 +297,9 @@ AtomicSimpleCPU::AtomicCPUDPort::recvFunctionalSnoop(PacketPtr pkt)
if (pkt->isInvalidate()) {
DPRINTF(SimpleCPU, "received invalidation for addr:%#x\n",
pkt->getAddr());
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
for (auto &t_info : cpu->threadInfo) {
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
}
}
}
@ -307,6 +307,9 @@ Fault
AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
unsigned size, unsigned flags)
{
SimpleExecContext& t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
// use the CPU's statically allocated read request and packet objects
Request *req = &data_read_req;
@ -330,7 +333,8 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
// translate to physical address
Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
Fault fault = thread->dtb->translateAtomic(req, thread->getTC(),
BaseTLB::Read);
// Now do the access.
if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
@ -370,6 +374,7 @@ AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
assert(!locked);
locked = true;
}
return fault;
}
@ -391,7 +396,8 @@ Fault
AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
Addr addr, unsigned flags, uint64_t *res)
{
SimpleExecContext& t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
static uint8_t zero_array[64] = {};
if (data == NULL) {
@ -424,7 +430,7 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
// translate to physical address
Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
Fault fault = thread->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write);
// Now do the access.
if (fault == NoFault) {
@ -477,6 +483,8 @@ AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
assert(locked);
locked = false;
}
if (fault != NoFault && req->isPrefetch()) {
return NoFault;
} else {
@ -503,6 +511,19 @@ AtomicSimpleCPU::tick()
{
DPRINTF(SimpleCPU, "Tick\n");
// Change thread if multi-threaded
swapActiveThread();
// Set memroy request ids to current thread
if (numThreads > 1) {
ifetch_req.setThreadContext(_cpuId, curThread);
data_read_req.setThreadContext(_cpuId, curThread);
data_write_req.setThreadContext(_cpuId, curThread);
}
SimpleExecContext& t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
Tick latency = 0;
for (int i = 0; i < width || locked; ++i) {
@ -529,7 +550,7 @@ AtomicSimpleCPU::tick()
if (needToFetch) {
ifetch_req.taskId(taskId());
setupFetchRequest(&ifetch_req);
fault = thread->itb->translateAtomic(&ifetch_req, tc,
fault = thread->itb->translateAtomic(&ifetch_req, thread->getTC(),
BaseTLB::Execute);
}
@ -565,7 +586,7 @@ AtomicSimpleCPU::tick()
preExecute();
if (curStaticInst) {
fault = curStaticInst->execute(this, traceData);
fault = curStaticInst->execute(&t_info, traceData);
// keep an instruction count
if (fault == NoFault) {
@ -601,7 +622,7 @@ AtomicSimpleCPU::tick()
}
}
if(fault != NoFault || !stayAtPC)
if(fault != NoFault || !t_info.stayAtPC)
advancePC(fault);
}
@ -613,7 +634,7 @@ AtomicSimpleCPU::tick()
latency = clockPeriod();
if (_status != Idle)
schedule(tickEvent, curTick() + latency);
reschedule(tickEvent, curTick() + latency, true);
}
void
@ -638,8 +659,5 @@ AtomicSimpleCPU::printAddr(Addr a)
AtomicSimpleCPU *
AtomicSimpleCPUParams::create()
{
numThreads = 1;
if (!FullSystem && workload.size() != 1)
panic("only one workload allowed");
return new AtomicSimpleCPU(this);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012-2013 ARM Limited
* Copyright (c) 2012-2013,2015 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
@ -44,6 +44,7 @@
#define __CPU_SIMPLE_ATOMIC_HH__
#include "cpu/simple/base.hh"
#include "cpu/simple/exec_context.hh"
#include "params/AtomicSimpleCPU.hh"
#include "sim/probe/probe.hh"
@ -96,9 +97,11 @@ class AtomicSimpleCPU : public BaseSimpleCPU
* </ul>
*/
bool isDrained() {
return microPC() == 0 &&
SimpleExecContext &t_info = *threadInfo[curThread];
return t_info.thread->microPC() == 0 &&
!locked &&
!stayAtPC;
!t_info.stayAtPC;
}
/**

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010-2012 ARM Limited
* Copyright (c) 2010-2012,2015 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@ -62,6 +62,7 @@
#include "cpu/exetrace.hh"
#include "cpu/pred/bpred_unit.hh"
#include "cpu/profile.hh"
#include "cpu/simple/exec_context.hh"
#include "cpu/simple_thread.hh"
#include "cpu/smt.hh"
#include "cpu/static_inst.hh"
@ -87,46 +88,121 @@ using namespace TheISA;
BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
: BaseCPU(p),
curThread(0),
branchPred(p->branchPred),
traceData(NULL), thread(NULL), _status(Idle), interval_stats(false),
inst()
traceData(NULL),
inst(),
_status(Idle)
{
if (FullSystem)
thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb,
p->isa[0]);
else
thread = new SimpleThread(this, /* thread_num */ 0, p->system,
p->workload[0], p->itb, p->dtb, p->isa[0]);
SimpleThread *thread;
thread->setStatus(ThreadContext::Halted);
tc = thread->getTC();
for (unsigned i = 0; i < numThreads; i++) {
if (FullSystem) {
thread = new SimpleThread(this, i, p->system,
p->itb, p->dtb, p->isa[i]);
} else {
thread = new SimpleThread(this, i, p->system, p->workload[i],
p->itb, p->dtb, p->isa[i]);
}
threadInfo.push_back(new SimpleExecContext(this, thread));
ThreadContext *tc = thread->getTC();
threadContexts.push_back(tc);
}
if (p->checker) {
if (numThreads != 1)
fatal("Checker currently does not support SMT");
BaseCPU *temp_checker = p->checker;
checker = dynamic_cast<CheckerCPU *>(temp_checker);
checker->setSystem(p->system);
// Manipulate thread context
ThreadContext *cpu_tc = tc;
tc = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
ThreadContext *cpu_tc = threadContexts[0];
threadContexts[0] = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
} else {
checker = NULL;
}
}
numInst = 0;
startNumInst = 0;
numOp = 0;
startNumOp = 0;
numLoad = 0;
startNumLoad = 0;
lastIcacheStall = 0;
lastDcacheStall = 0;
void
BaseSimpleCPU::init()
{
BaseCPU::init();
threadContexts.push_back(tc);
for (auto tc : threadContexts) {
// Initialise the ThreadContext's memory proxies
tc->initMemProxies(tc);
if (FullSystem && !params()->switched_out) {
// initialize CPU, including PC
TheISA::initCPU(tc, tc->contextId());
}
}
}
fetchOffset = 0;
stayAtPC = false;
void
BaseSimpleCPU::checkPcEventQueue()
{
Addr oldpc, pc = threadInfo[curThread]->thread->instAddr();
do {
oldpc = pc;
system->pcEventQueue.service(threadContexts[curThread]);
pc = threadInfo[curThread]->thread->instAddr();
} while (oldpc != pc);
}
void
BaseSimpleCPU::swapActiveThread()
{
if (numThreads > 1) {
if ((!curStaticInst || !curStaticInst->isDelayedCommit()) &&
!threadInfo[curThread]->stayAtPC) {
// Swap active threads
if (!activeThreads.empty()) {
curThread = activeThreads.front();
activeThreads.pop_front();
activeThreads.push_back(curThread);
}
}
}
}
void
BaseSimpleCPU::countInst()
{
SimpleExecContext& t_info = *threadInfo[curThread];
if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
t_info.numInst++;
t_info.numInsts++;
}
t_info.numOp++;
t_info.numOps++;
system->totalNumInsts++;
t_info.thread->funcExeInst++;
}
Counter
BaseSimpleCPU::totalInsts() const
{
Counter total_inst = 0;
for (auto& t_info : threadInfo) {
total_inst += t_info->numInst;
}
return total_inst;
}
Counter
BaseSimpleCPU::totalOps() const
{
Counter total_op = 0;
for (auto& t_info : threadInfo) {
total_op += t_info->numOp;
}
return total_op;
}
BaseSimpleCPU::~BaseSimpleCPU()
@ -148,177 +224,184 @@ BaseSimpleCPU::regStats()
BaseCPU::regStats();
numInsts
.name(name() + ".committedInsts")
for (ThreadID tid = 0; tid < numThreads; tid++) {
SimpleExecContext& t_info = *threadInfo[tid];
std::string thread_str = name();
if (numThreads > 1)
thread_str += ".thread" + std::to_string(tid);
t_info.numInsts
.name(thread_str + ".committedInsts")
.desc("Number of instructions committed")
;
numOps
.name(name() + ".committedOps")
t_info.numOps
.name(thread_str + ".committedOps")
.desc("Number of ops (including micro ops) committed")
;
numIntAluAccesses
.name(name() + ".num_int_alu_accesses")
t_info.numIntAluAccesses
.name(thread_str + ".num_int_alu_accesses")
.desc("Number of integer alu accesses")
;
numFpAluAccesses
.name(name() + ".num_fp_alu_accesses")
t_info.numFpAluAccesses
.name(thread_str + ".num_fp_alu_accesses")
.desc("Number of float alu accesses")
;
numCallsReturns
.name(name() + ".num_func_calls")
t_info.numCallsReturns
.name(thread_str + ".num_func_calls")
.desc("number of times a function call or return occured")
;
numCondCtrlInsts
.name(name() + ".num_conditional_control_insts")
t_info.numCondCtrlInsts
.name(thread_str + ".num_conditional_control_insts")
.desc("number of instructions that are conditional controls")
;
numIntInsts
.name(name() + ".num_int_insts")
t_info.numIntInsts
.name(thread_str + ".num_int_insts")
.desc("number of integer instructions")
;
numFpInsts
.name(name() + ".num_fp_insts")
t_info.numFpInsts
.name(thread_str + ".num_fp_insts")
.desc("number of float instructions")
;
numIntRegReads
.name(name() + ".num_int_register_reads")
t_info.numIntRegReads
.name(thread_str + ".num_int_register_reads")
.desc("number of times the integer registers were read")
;
numIntRegWrites
.name(name() + ".num_int_register_writes")
t_info.numIntRegWrites
.name(thread_str + ".num_int_register_writes")
.desc("number of times the integer registers were written")
;
numFpRegReads
.name(name() + ".num_fp_register_reads")
t_info.numFpRegReads
.name(thread_str + ".num_fp_register_reads")
.desc("number of times the floating registers were read")
;
numFpRegWrites
.name(name() + ".num_fp_register_writes")
t_info.numFpRegWrites
.name(thread_str + ".num_fp_register_writes")
.desc("number of times the floating registers were written")
;
numCCRegReads
.name(name() + ".num_cc_register_reads")
t_info.numCCRegReads
.name(thread_str + ".num_cc_register_reads")
.desc("number of times the CC registers were read")
.flags(nozero)
;
numCCRegWrites
.name(name() + ".num_cc_register_writes")
t_info.numCCRegWrites
.name(thread_str + ".num_cc_register_writes")
.desc("number of times the CC registers were written")
.flags(nozero)
;
numMemRefs
.name(name()+".num_mem_refs")
t_info.numMemRefs
.name(thread_str + ".num_mem_refs")
.desc("number of memory refs")
;
numStoreInsts
.name(name() + ".num_store_insts")
t_info.numStoreInsts
.name(thread_str + ".num_store_insts")
.desc("Number of store instructions")
;
numLoadInsts
.name(name() + ".num_load_insts")
t_info.numLoadInsts
.name(thread_str + ".num_load_insts")
.desc("Number of load instructions")
;
notIdleFraction
.name(name() + ".not_idle_fraction")
t_info.notIdleFraction
.name(thread_str + ".not_idle_fraction")
.desc("Percentage of non-idle cycles")
;
idleFraction
.name(name() + ".idle_fraction")
t_info.idleFraction
.name(thread_str + ".idle_fraction")
.desc("Percentage of idle cycles")
;
numBusyCycles
.name(name() + ".num_busy_cycles")
t_info.numBusyCycles
.name(thread_str + ".num_busy_cycles")
.desc("Number of busy cycles")
;
numIdleCycles
.name(name()+".num_idle_cycles")
t_info.numIdleCycles
.name(thread_str + ".num_idle_cycles")
.desc("Number of idle cycles")
;
icacheStallCycles
.name(name() + ".icache_stall_cycles")
t_info.icacheStallCycles
.name(thread_str + ".icache_stall_cycles")
.desc("ICache total stall cycles")
.prereq(icacheStallCycles)
.prereq(t_info.icacheStallCycles)
;
dcacheStallCycles
.name(name() + ".dcache_stall_cycles")
t_info.dcacheStallCycles
.name(thread_str + ".dcache_stall_cycles")
.desc("DCache total stall cycles")
.prereq(dcacheStallCycles)
.prereq(t_info.dcacheStallCycles)
;
statExecutedInstType
t_info.statExecutedInstType
.init(Enums::Num_OpClass)
.name(name() + ".op_class")
.name(thread_str + ".op_class")
.desc("Class of executed instruction")
.flags(total | pdf | dist)
;
for (unsigned i = 0; i < Num_OpClasses; ++i) {
statExecutedInstType.subname(i, Enums::OpClassStrings[i]);
t_info.statExecutedInstType.subname(i, Enums::OpClassStrings[i]);
}
idleFraction = constant(1.0) - notIdleFraction;
numIdleCycles = idleFraction * numCycles;
numBusyCycles = (notIdleFraction)*numCycles;
t_info.idleFraction = constant(1.0) - t_info.notIdleFraction;
t_info.numIdleCycles = t_info.idleFraction * numCycles;
t_info.numBusyCycles = t_info.notIdleFraction * numCycles;
numBranches
.name(name() + ".Branches")
t_info.numBranches
.name(thread_str + ".Branches")
.desc("Number of branches fetched")
.prereq(numBranches);
.prereq(t_info.numBranches);
numPredictedBranches
.name(name() + ".predictedBranches")
t_info.numPredictedBranches
.name(thread_str + ".predictedBranches")
.desc("Number of branches predicted as taken")
.prereq(numPredictedBranches);
.prereq(t_info.numPredictedBranches);
numBranchMispred
.name(name() + ".BranchMispred")
t_info.numBranchMispred
.name(thread_str + ".BranchMispred")
.desc("Number of branch mispredictions")
.prereq(numBranchMispred);
.prereq(t_info.numBranchMispred);
}
}
void
BaseSimpleCPU::resetStats()
{
// startNumInst = numInst;
notIdleFraction = (_status != Idle);
for (auto &thread_info : threadInfo) {
thread_info->notIdleFraction = (_status != Idle);
}
}
void
BaseSimpleCPU::serializeThread(CheckpointOut &cp, ThreadID tid) const
{
assert(_status == Idle || _status == Running);
assert(tid == 0);
thread->serialize(cp);
threadInfo[tid]->thread->serialize(cp);
}
void
BaseSimpleCPU::unserializeThread(CheckpointIn &cp, ThreadID tid)
{
if (tid != 0)
fatal("Trying to load more than one thread into a SimpleCPU\n");
thread->unserialize(cp);
threadInfo[tid]->thread->unserialize(cp);
}
void
@ -329,29 +412,34 @@ change_thread_state(ThreadID tid, int activate, int priority)
Addr
BaseSimpleCPU::dbg_vtophys(Addr addr)
{
return vtophys(tc, addr);
return vtophys(threadContexts[curThread], addr);
}
void
BaseSimpleCPU::wakeup()
{
getAddrMonitor()->gotWakeup = true;
if (thread->status() != ThreadContext::Suspended)
return;
getCpuAddrMonitor()->gotWakeup = true;
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) {
DPRINTF(Quiesce,"Suspended Processor awoke\n");
thread->activate();
threadInfo[tid]->thread->activate();
}
}
}
void
BaseSimpleCPU::checkForInterrupts()
{
SimpleExecContext&t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
ThreadContext* tc = thread->getTC();
if (checkInterrupts(tc)) {
Fault interrupt = interrupts->getInterrupt(tc);
if (interrupt != NoFault) {
fetchOffset = 0;
t_info.fetchOffset = 0;
interrupts->updateIntrInfo(tc);
interrupt->invoke(tc);
thread->decoder.reset();
@ -363,12 +451,15 @@ BaseSimpleCPU::checkForInterrupts()
void
BaseSimpleCPU::setupFetchRequest(Request *req)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
Addr instAddr = thread->instAddr();
// set up memory request for instruction fetch
DPRINTF(Fetch, "Fetch: PC:%08p\n", instAddr);
Addr fetchPC = (instAddr & PCMask) + fetchOffset;
Addr fetchPC = (instAddr & PCMask) + t_info.fetchOffset;
req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instMasterId(),
instAddr);
}
@ -377,6 +468,9 @@ BaseSimpleCPU::setupFetchRequest(Request *req)
void
BaseSimpleCPU::preExecute()
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
// maintain $r0 semantics
thread->setIntReg(ZeroReg, 0);
#if THE_ISA == ALPHA_ISA
@ -384,7 +478,7 @@ BaseSimpleCPU::preExecute()
#endif // ALPHA_ISA
// check for instruction-count-based events
comInstEventQueue[0]->serviceEvents(numInst);
comInstEventQueue[curThread]->serviceEvents(t_info.numInst);
system->instEventQueue.serviceEvents(system->totalNumInsts);
// decode the instruction
@ -393,7 +487,7 @@ BaseSimpleCPU::preExecute()
TheISA::PCState pcState = thread->pcState();
if (isRomMicroPC(pcState.microPC())) {
stayAtPC = false;
t_info.stayAtPC = false;
curStaticInst = microcodeRom.fetchMicroop(pcState.microPC(),
curMacroStaticInst);
} else if (!curMacroStaticInst) {
@ -404,7 +498,7 @@ BaseSimpleCPU::preExecute()
//Predecode, ie bundle up an ExtMachInst
//If more fetch data is needed, pass it in.
Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset;
Addr fetchPC = (pcState.instAddr() & PCMask) + t_info.fetchOffset;
//if(decoder->needMoreBytes())
decoder->moreBytes(pcState, fetchPC, inst);
//else
@ -414,18 +508,19 @@ BaseSimpleCPU::preExecute()
//fetch beyond the MachInst at the current pc.
instPtr = decoder->decode(pcState);
if (instPtr) {
stayAtPC = false;
t_info.stayAtPC = false;
thread->pcState(pcState);
} else {
stayAtPC = true;
fetchOffset += sizeof(MachInst);
t_info.stayAtPC = true;
t_info.fetchOffset += sizeof(MachInst);
}
//If we decoded an instruction and it's microcoded, start pulling
//out micro ops
if (instPtr && instPtr->isMacroop()) {
curMacroStaticInst = instPtr;
curStaticInst = curMacroStaticInst->fetchMicroop(pcState.microPC());
curStaticInst =
curMacroStaticInst->fetchMicroop(pcState.microPC());
} else {
curStaticInst = instPtr;
}
@ -437,7 +532,7 @@ BaseSimpleCPU::preExecute()
//If we decoded an instruction this "tick", record information about it.
if (curStaticInst) {
#if TRACING_ON
traceData = tracer->getInstRecord(curTick(), tc,
traceData = tracer->getInstRecord(curTick(), thread->getTC(),
curStaticInst, thread->pcState(), curMacroStaticInst);
DPRINTF(Decode,"Decode: Decoded %s instruction: %#x\n",
@ -445,86 +540,91 @@ BaseSimpleCPU::preExecute()
#endif // TRACING_ON
}
if (branchPred && curStaticInst && curStaticInst->isControl()) {
if (branchPred && curStaticInst &&
curStaticInst->isControl()) {
// Use a fake sequence number since we only have one
// instruction in flight at the same time.
const InstSeqNum cur_sn(0);
const ThreadID tid(0);
pred_pc = thread->pcState();
t_info.predPC = thread->pcState();
const bool predict_taken(
branchPred->predict(curStaticInst, cur_sn, pred_pc, tid));
branchPred->predict(curStaticInst, cur_sn, t_info.predPC,
curThread));
if (predict_taken)
++numPredictedBranches;
++t_info.numPredictedBranches;
}
}
void
BaseSimpleCPU::postExecute()
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
assert(curStaticInst);
TheISA::PCState pc = tc->pcState();
TheISA::PCState pc = threadContexts[curThread]->pcState();
Addr instAddr = pc.instAddr();
if (FullSystem && thread->profile) {
bool usermode = TheISA::inUserMode(tc);
bool usermode = TheISA::inUserMode(threadContexts[curThread]);
thread->profilePC = usermode ? 1 : instAddr;
ProfileNode *node = thread->profile->consume(tc, curStaticInst);
ProfileNode *node = thread->profile->consume(threadContexts[curThread],
curStaticInst);
if (node)
thread->profileNode = node;
}
if (curStaticInst->isMemRef()) {
numMemRefs++;
t_info.numMemRefs++;
}
if (curStaticInst->isLoad()) {
++numLoad;
comLoadEventQueue[0]->serviceEvents(numLoad);
++t_info.numLoad;
comLoadEventQueue[curThread]->serviceEvents(t_info.numLoad);
}
if (CPA::available()) {
CPA::cpa()->swAutoBegin(tc, pc.nextInstAddr());
CPA::cpa()->swAutoBegin(threadContexts[curThread], pc.nextInstAddr());
}
if (curStaticInst->isControl()) {
++numBranches;
++t_info.numBranches;
}
/* Power model statistics */
//integer alu accesses
if (curStaticInst->isInteger()){
numIntAluAccesses++;
numIntInsts++;
t_info.numIntAluAccesses++;
t_info.numIntInsts++;
}
//float alu accesses
if (curStaticInst->isFloating()){
numFpAluAccesses++;
numFpInsts++;
t_info.numFpAluAccesses++;
t_info.numFpInsts++;
}
//number of function calls/returns to get window accesses
if (curStaticInst->isCall() || curStaticInst->isReturn()){
numCallsReturns++;
t_info.numCallsReturns++;
}
//the number of branch predictions that will be made
if (curStaticInst->isCondCtrl()){
numCondCtrlInsts++;
t_info.numCondCtrlInsts++;
}
//result bus acceses
if (curStaticInst->isLoad()){
numLoadInsts++;
t_info.numLoadInsts++;
}
if (curStaticInst->isStore()){
numStoreInsts++;
t_info.numStoreInsts++;
}
/* End power model statistics */
statExecutedInstType[curStaticInst->opClass()]++;
t_info.statExecutedInstType[curStaticInst->opClass()]++;
if (FullSystem)
traceFunctions(instAddr);
@ -542,13 +642,16 @@ BaseSimpleCPU::postExecute()
void
BaseSimpleCPU::advancePC(const Fault &fault)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
const bool branching(thread->pcState().branching());
//Since we're moving to a new pc, zero out the offset
fetchOffset = 0;
t_info.fetchOffset = 0;
if (fault != NoFault) {
curMacroStaticInst = StaticInst::nullStaticInstPtr;
fault->invoke(tc, curStaticInst);
fault->invoke(threadContexts[curThread], curStaticInst);
thread->decoder.reset();
} else {
if (curStaticInst) {
@ -564,16 +667,14 @@ BaseSimpleCPU::advancePC(const Fault &fault)
// Use a fake sequence number since we only have one
// instruction in flight at the same time.
const InstSeqNum cur_sn(0);
const ThreadID tid(0);
if (pred_pc == thread->pcState()) {
if (t_info.predPC == thread->pcState()) {
// Correctly predicted branch
branchPred->update(cur_sn, tid);
branchPred->update(cur_sn, curThread);
} else {
// Mis-predicted branch
branchPred->squash(cur_sn, pcState(),
branching, tid);
++numBranchMispred;
branchPred->squash(cur_sn, thread->pcState(), branching, curThread);
++t_info.numBranchMispred;
}
}
}
@ -582,5 +683,6 @@ void
BaseSimpleCPU::startup()
{
BaseCPU::startup();
thread->startup();
for (auto& t_info : threadInfo)
t_info->thread->startup();
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2012 ARM Limited
* Copyright (c) 2011-2012,2015 ARM Limited
* Copyright (c) 2013 Advanced Micro Devices, Inc.
* All rights reserved
*
@ -79,57 +79,35 @@ namespace Trace {
struct BaseSimpleCPUParams;
class BPredUnit;
class SimpleExecContext;
class BaseSimpleCPU : public BaseCPU, public ExecContext
class BaseSimpleCPU : public BaseCPU
{
protected:
typedef TheISA::MiscReg MiscReg;
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
ThreadID curThread;
BPredUnit *branchPred;
protected:
Trace::InstRecord *traceData;
inline void checkPcEventQueue() {
Addr oldpc, pc = thread->instAddr();
do {
oldpc = pc;
system->pcEventQueue.service(tc);
pc = thread->instAddr();
} while (oldpc != pc);
}
public:
void wakeup();
void zero_fill_64(Addr addr) {
static int warned = 0;
if (!warned) {
warn ("WH64 is not implemented");
warned = 1;
}
};
void checkPcEventQueue();
void swapActiveThread();
public:
BaseSimpleCPU(BaseSimpleCPUParams *params);
virtual ~BaseSimpleCPU();
void wakeup();
virtual void init();
public:
/** SimpleThread object, provides all the architectural state. */
SimpleThread *thread;
/** ThreadContext object, provides an interface for external
* objects to modify this thread's state.
*/
ThreadContext *tc;
Trace::InstRecord *traceData;
CheckerCPU *checker;
protected:
std::vector<SimpleExecContext*> threadInfo;
std::list<ThreadID> activeThreads;
/** Current instruction */
TheISA::MachInst inst;
StaticInstPtr curStaticInst;
StaticInstPtr curMacroStaticInst;
protected:
enum Status {
Idle,
Running,
@ -147,22 +125,8 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
Status _status;
public:
Addr dbg_vtophys(Addr addr);
bool interval_stats;
// current instruction
TheISA::MachInst inst;
StaticInstPtr curStaticInst;
StaticInstPtr curMacroStaticInst;
//This is the offset from the current pc that fetch should be performed at
Addr fetchOffset;
//This flag says to stay at the current pc. This is useful for
//instructions which go beyond MachInst boundaries.
bool stayAtPC;
void checkForInterrupts();
void setupFetchRequest(Request *req);
@ -178,289 +142,20 @@ class BaseSimpleCPU : public BaseCPU, public ExecContext
virtual void startup();
// number of simulated instructions
Counter numInst;
Counter startNumInst;
Stats::Scalar numInsts;
Counter numOp;
Counter startNumOp;
Stats::Scalar numOps;
virtual Fault readMem(Addr addr, uint8_t* data, unsigned size,
unsigned flags) = 0;
void countInst()
{
if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
numInst++;
numInsts++;
}
numOp++;
numOps++;
virtual Fault writeMem(uint8_t* data, unsigned size, Addr addr,
unsigned flags, uint64_t* res) = 0;
system->totalNumInsts++;
thread->funcExeInst++;
}
virtual Counter totalInsts() const
{
return numInst - startNumInst;
}
virtual Counter totalOps() const
{
return numOp - startNumOp;
}
//number of integer alu accesses
Stats::Scalar numIntAluAccesses;
//number of float alu accesses
Stats::Scalar numFpAluAccesses;
//number of function calls/returns
Stats::Scalar numCallsReturns;
//conditional control instructions;
Stats::Scalar numCondCtrlInsts;
//number of int instructions
Stats::Scalar numIntInsts;
//number of float instructions
Stats::Scalar numFpInsts;
//number of integer register file accesses
Stats::Scalar numIntRegReads;
Stats::Scalar numIntRegWrites;
//number of float register file accesses
Stats::Scalar numFpRegReads;
Stats::Scalar numFpRegWrites;
//number of condition code register file accesses
Stats::Scalar numCCRegReads;
Stats::Scalar numCCRegWrites;
// number of simulated memory references
Stats::Scalar numMemRefs;
Stats::Scalar numLoadInsts;
Stats::Scalar numStoreInsts;
// number of idle cycles
Stats::Formula numIdleCycles;
// number of busy cycles
Stats::Formula numBusyCycles;
// number of simulated loads
Counter numLoad;
Counter startNumLoad;
// number of idle cycles
Stats::Average notIdleFraction;
Stats::Formula idleFraction;
// number of cycles stalled for I-cache responses
Stats::Scalar icacheStallCycles;
Counter lastIcacheStall;
// number of cycles stalled for D-cache responses
Stats::Scalar dcacheStallCycles;
Counter lastDcacheStall;
/// @{
/// Total number of branches fetched
Stats::Scalar numBranches;
/// Number of branches predicted as taken
Stats::Scalar numPredictedBranches;
/// Number of misprediced branches
Stats::Scalar numBranchMispred;
/// @}
// instruction mix histogram by OpClass
Stats::Vector statExecutedInstType;
void countInst();
virtual Counter totalInsts() const;
virtual Counter totalOps() const;
void serializeThread(CheckpointOut &cp,
ThreadID tid) const M5_ATTR_OVERRIDE;
void unserializeThread(CheckpointIn &cp, ThreadID tid) M5_ATTR_OVERRIDE;
// These functions are only used in CPU models that split
// effective address computation from the actual memory access.
void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); }
Addr getEA() const { panic("BaseSimpleCPU::getEA() not implemented\n"); }
// The register accessor methods provide the index of the
// instruction's operand (e.g., 0 or 1), not the architectural
// register index, to simplify the implementation of register
// renaming. We find the architectural register index by indexing
// into the instruction's own operand index table. Note that a
// raw pointer to the StaticInst is provided instead of a
// ref-counted StaticInstPtr to redice overhead. This is fine as
// long as these methods don't copy the pointer into any long-term
// storage (which is pretty hard to imagine they would have reason
// to do).
IntReg readIntRegOperand(const StaticInst *si, int idx)
{
numIntRegReads++;
return thread->readIntReg(si->srcRegIdx(idx));
}
FloatReg readFloatRegOperand(const StaticInst *si, int idx)
{
numFpRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
return thread->readFloatReg(reg_idx);
}
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
{
numFpRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
return thread->readFloatRegBits(reg_idx);
}
CCReg readCCRegOperand(const StaticInst *si, int idx)
{
numCCRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base;
return thread->readCCReg(reg_idx);
}
void setIntRegOperand(const StaticInst *si, int idx, IntReg val)
{
numIntRegWrites++;
thread->setIntReg(si->destRegIdx(idx), val);
}
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
{
numFpRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
thread->setFloatReg(reg_idx, val);
}
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val)
{
numFpRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
thread->setFloatRegBits(reg_idx, val);
}
void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
{
numCCRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base;
thread->setCCReg(reg_idx, val);
}
bool readPredicate() { return thread->readPredicate(); }
void setPredicate(bool val)
{
thread->setPredicate(val);
if (traceData) {
traceData->setPredicate(val);
}
}
TheISA::PCState pcState() const { return thread->pcState(); }
void pcState(const TheISA::PCState &val) { thread->pcState(val); }
Addr instAddr() { return thread->instAddr(); }
Addr nextInstAddr() { return thread->nextInstAddr(); }
MicroPC microPC() { return thread->microPC(); }
MiscReg readMiscRegNoEffect(int misc_reg) const
{
return thread->readMiscRegNoEffect(misc_reg);
}
MiscReg readMiscReg(int misc_reg)
{
numIntRegReads++;
return thread->readMiscReg(misc_reg);
}
void setMiscReg(int misc_reg, const MiscReg &val)
{
numIntRegWrites++;
return thread->setMiscReg(misc_reg, val);
}
MiscReg readMiscRegOperand(const StaticInst *si, int idx)
{
numIntRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base;
return thread->readMiscReg(reg_idx);
}
void setMiscRegOperand(
const StaticInst *si, int idx, const MiscReg &val)
{
numIntRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base;
return thread->setMiscReg(reg_idx, val);
}
void demapPage(Addr vaddr, uint64_t asn)
{
thread->demapPage(vaddr, asn);
}
void demapInstPage(Addr vaddr, uint64_t asn)
{
thread->demapInstPage(vaddr, asn);
}
void demapDataPage(Addr vaddr, uint64_t asn)
{
thread->demapDataPage(vaddr, asn);
}
unsigned int readStCondFailures() const {
return thread->readStCondFailures();
}
void setStCondFailures(unsigned int sc_failures) {
thread->setStCondFailures(sc_failures);
}
MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID)
{
panic("Simple CPU models do not support multithreaded "
"register access.\n");
}
void setRegOtherThread(int regIdx, MiscReg val,
ThreadID tid = InvalidThreadID)
{
panic("Simple CPU models do not support multithreaded "
"register access.\n");
}
//Fault CacheOp(uint8_t Op, Addr EA);
Fault hwrei() { return thread->hwrei(); }
bool simPalCheck(int palFunc) { return thread->simPalCheck(palFunc); }
void
syscall(int64_t callnum)
{
if (FullSystem)
panic("Syscall emulation isn't available in FS mode.\n");
thread->syscall(callnum);
}
ThreadContext *tcBase() { return tc; }
private:
TheISA::PCState pred_pc;
public:
// monitor/mwait funtions
void armMonitor(Addr address) { BaseCPU::armMonitor(address); }
bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); }
void mwaitAtomic(ThreadContext *tc)
{ return BaseCPU::mwaitAtomic(tc, thread->dtb); }
AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); }
};
#endif // __CPU_SIMPLE_BASE_HH__

View file

@ -0,0 +1,416 @@
/*
* Copyright (c) 2014-2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2002-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Andreas Sandberg
* Mitch Hayenga
*/
#ifndef __CPU_SIMPLE_EXEC_CONTEXT_HH__
#define __CPU_SIMPLE_EXEC_CONTEXT_HH__
#include "arch/registers.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
#include "cpu/base.hh"
#include "cpu/exec_context.hh"
#include "cpu/simple/base.hh"
#include "cpu/static_inst_fwd.hh"
#include "cpu/translation.hh"
class BaseSimpleCPU;
class SimpleExecContext : public ExecContext {
protected:
typedef TheISA::MiscReg MiscReg;
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef TheISA::CCReg CCReg;
public:
BaseSimpleCPU *cpu;
SimpleThread* thread;
// This is the offset from the current pc that fetch should be performed
Addr fetchOffset;
// This flag says to stay at the current pc. This is useful for
// instructions which go beyond MachInst boundaries.
bool stayAtPC;
// Branch prediction
TheISA::PCState predPC;
/** PER-THREAD STATS */
// Number of simulated instructions
Counter numInst;
Stats::Scalar numInsts;
Counter numOp;
Stats::Scalar numOps;
// Number of integer alu accesses
Stats::Scalar numIntAluAccesses;
// Number of float alu accesses
Stats::Scalar numFpAluAccesses;
// Number of function calls/returns
Stats::Scalar numCallsReturns;
// Conditional control instructions;
Stats::Scalar numCondCtrlInsts;
// Number of int instructions
Stats::Scalar numIntInsts;
// Number of float instructions
Stats::Scalar numFpInsts;
// Number of integer register file accesses
Stats::Scalar numIntRegReads;
Stats::Scalar numIntRegWrites;
// Number of float register file accesses
Stats::Scalar numFpRegReads;
Stats::Scalar numFpRegWrites;
// Number of condition code register file accesses
Stats::Scalar numCCRegReads;
Stats::Scalar numCCRegWrites;
// Number of simulated memory references
Stats::Scalar numMemRefs;
Stats::Scalar numLoadInsts;
Stats::Scalar numStoreInsts;
// Number of idle cycles
Stats::Formula numIdleCycles;
// Number of busy cycles
Stats::Formula numBusyCycles;
// Number of simulated loads
Counter numLoad;
// Number of idle cycles
Stats::Average notIdleFraction;
Stats::Formula idleFraction;
// Number of cycles stalled for I-cache responses
Stats::Scalar icacheStallCycles;
Counter lastIcacheStall;
// Number of cycles stalled for D-cache responses
Stats::Scalar dcacheStallCycles;
Counter lastDcacheStall;
/// @{
/// Total number of branches fetched
Stats::Scalar numBranches;
/// Number of branches predicted as taken
Stats::Scalar numPredictedBranches;
/// Number of misprediced branches
Stats::Scalar numBranchMispred;
/// @}
// Instruction mix histogram by OpClass
Stats::Vector statExecutedInstType;
public:
/** Constructor */
SimpleExecContext(BaseSimpleCPU* _cpu, SimpleThread* _thread)
: cpu(_cpu), thread(_thread), fetchOffset(0), stayAtPC(false),
numInst(0), numOp(0), numLoad(0), lastIcacheStall(0), lastDcacheStall(0)
{ }
/** Reads an integer register. */
IntReg readIntRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE
{
numIntRegReads++;
return thread->readIntReg(si->srcRegIdx(idx));
}
/** Sets an integer register to a value. */
void setIntRegOperand(const StaticInst *si, int idx, IntReg val)
M5_ATTR_OVERRIDE
{
numIntRegWrites++;
thread->setIntReg(si->destRegIdx(idx), val);
}
/** Reads a floating point register of single register width. */
FloatReg readFloatRegOperand(const StaticInst *si, int idx)
M5_ATTR_OVERRIDE
{
numFpRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
return thread->readFloatReg(reg_idx);
}
/** Reads a floating point register in its binary format, instead
* of by value. */
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
M5_ATTR_OVERRIDE
{
numFpRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Reg_Base;
return thread->readFloatRegBits(reg_idx);
}
/** Sets a floating point register of single width to a value. */
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
M5_ATTR_OVERRIDE
{
numFpRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
thread->setFloatReg(reg_idx, val);
}
/** Sets the bits of a floating point register of single width
* to a binary value. */
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val) M5_ATTR_OVERRIDE
{
numFpRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::FP_Reg_Base;
thread->setFloatRegBits(reg_idx, val);
}
CCReg readCCRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE
{
numCCRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::CC_Reg_Base;
return thread->readCCReg(reg_idx);
}
void setCCRegOperand(const StaticInst *si, int idx, CCReg val)
M5_ATTR_OVERRIDE
{
numCCRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::CC_Reg_Base;
thread->setCCReg(reg_idx, val);
}
MiscReg readMiscRegOperand(const StaticInst *si, int idx) M5_ATTR_OVERRIDE
{
numIntRegReads++;
int reg_idx = si->srcRegIdx(idx) - TheISA::Misc_Reg_Base;
return thread->readMiscReg(reg_idx);
}
void setMiscRegOperand(const StaticInst *si, int idx, const MiscReg &val)
M5_ATTR_OVERRIDE
{
numIntRegWrites++;
int reg_idx = si->destRegIdx(idx) - TheISA::Misc_Reg_Base;
thread->setMiscReg(reg_idx, val);
}
/**
* Reads a miscellaneous register, handling any architectural
* side effects due to reading that register.
*/
MiscReg readMiscReg(int misc_reg) M5_ATTR_OVERRIDE
{
numIntRegReads++;
return thread->readMiscReg(misc_reg);
}
/**
* Sets a miscellaneous register, handling any architectural
* side effects due to writing that register.
*/
void setMiscReg(int misc_reg, const MiscReg &val) M5_ATTR_OVERRIDE
{
numIntRegWrites++;
thread->setMiscReg(misc_reg, val);
}
PCState pcState() const M5_ATTR_OVERRIDE
{
return thread->pcState();
}
void pcState(const PCState &val) M5_ATTR_OVERRIDE
{
thread->pcState(val);
}
/**
* Record the effective address of the instruction.
*
* @note Only valid for memory ops.
*/
void setEA(Addr EA) M5_ATTR_OVERRIDE
{ panic("BaseSimpleCPU::setEA() not implemented\n"); }
/**
* Get the effective address of the instruction.
*
* @note Only valid for memory ops.
*/
Addr getEA() const M5_ATTR_OVERRIDE
{ panic("BaseSimpleCPU::getEA() not implemented\n"); }
Fault readMem(Addr addr, uint8_t *data, unsigned int size,
unsigned int flags) M5_ATTR_OVERRIDE
{
return cpu->readMem(addr, data, size, flags);
}
Fault writeMem(uint8_t *data, unsigned int size, Addr addr,
unsigned int flags, uint64_t *res) M5_ATTR_OVERRIDE
{
return cpu->writeMem(data, size, addr, flags, res);
}
/**
* Sets the number of consecutive store conditional failures.
*/
void setStCondFailures(unsigned int sc_failures) M5_ATTR_OVERRIDE
{
thread->setStCondFailures(sc_failures);
}
/**
* Returns the number of consecutive store conditional failures.
*/
unsigned int readStCondFailures() const M5_ATTR_OVERRIDE
{
return thread->readStCondFailures();
}
/**
* Executes a syscall specified by the callnum.
*/
void syscall(int64_t callnum) M5_ATTR_OVERRIDE
{
if (FullSystem)
panic("Syscall emulation isn't available in FS mode.");
thread->syscall(callnum);
}
/** Returns a pointer to the ThreadContext. */
ThreadContext *tcBase() M5_ATTR_OVERRIDE
{
return thread->getTC();
}
/**
* Somewhat Alpha-specific function that handles returning from an
* error or interrupt.
*/
Fault hwrei() M5_ATTR_OVERRIDE
{
return thread->hwrei();
}
/**
* Check for special simulator handling of specific PAL calls. If
* return value is false, actual PAL call will be suppressed.
*/
bool simPalCheck(int palFunc) M5_ATTR_OVERRIDE
{
return thread->simPalCheck(palFunc);
}
bool readPredicate() M5_ATTR_OVERRIDE
{
return thread->readPredicate();
}
void setPredicate(bool val) M5_ATTR_OVERRIDE
{
thread->setPredicate(val);
if (cpu->traceData) {
cpu->traceData->setPredicate(val);
}
}
/**
* Invalidate a page in the DTLB <i>and</i> ITLB.
*/
void demapPage(Addr vaddr, uint64_t asn) M5_ATTR_OVERRIDE
{
thread->demapPage(vaddr, asn);
}
void armMonitor(Addr address) M5_ATTR_OVERRIDE
{
cpu->armMonitor(address);
}
bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE
{
return cpu->mwait(pkt);
}
void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE
{
cpu->mwaitAtomic(tc, thread->dtb);
}
AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE
{
return cpu->getCpuAddrMonitor();
}
#if THE_ISA == MIPS_ISA
MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID)
M5_ATTR_OVERRIDE
{
panic("Simple CPU models do not support multithreaded "
"register access.");
}
void setRegOtherThread(int regIdx, MiscReg val,
ThreadID tid = InvalidThreadID) M5_ATTR_OVERRIDE
{
panic("Simple CPU models do not support multithreaded "
"register access.");
}
#endif
};
#endif // __CPU_EXEC_CONTEXT_HH__

View file

@ -1,6 +1,6 @@
/*
* Copyright 2014 Google, Inc.
* Copyright (c) 2010-2013 ARM Limited
* Copyright (c) 2010-2013,2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@ -67,18 +67,7 @@ using namespace TheISA;
void
TimingSimpleCPU::init()
{
BaseCPU::init();
// Initialise the ThreadContext's memory proxies
tcBase()->initMemProxies(tcBase());
if (FullSystem && !params()->switched_out) {
for (int i = 0; i < threadContexts.size(); ++i) {
ThreadContext *tc = threadContexts[i];
// initialize CPU, including PC
TheISA::initCPU(tc, _cpuId);
}
}
BaseSimpleCPU::init();
}
void
@ -111,9 +100,10 @@ TimingSimpleCPU::drain()
if (_status == Idle ||
(_status == BaseSimpleCPU::Running && isDrained())) {
DPRINTF(Drain, "No need to drain.\n");
activeThreads.clear();
return DrainState::Drained;
} else {
DPRINTF(Drain, "Requesting drain: %s\n", pcState());
DPRINTF(Drain, "Requesting drain.\n");
// The fetch event can become descheduled if a drain didn't
// succeed on the first attempt. We need to reschedule it if
@ -136,17 +126,27 @@ TimingSimpleCPU::drainResume()
verifyMemoryMode();
assert(!threadContexts.empty());
if (threadContexts.size() > 1)
fatal("The timing CPU only supports one thread.\n");
if (thread->status() == ThreadContext::Active) {
schedule(fetchEvent, nextCycle());
_status = BaseSimpleCPU::Running;
notIdleFraction = 1;
} else {
_status = BaseSimpleCPU::Idle;
notIdleFraction = 0;
for (ThreadID tid = 0; tid < numThreads; tid++) {
if (threadInfo[tid]->thread->status() == ThreadContext::Active) {
threadInfo[tid]->notIdleFraction = 1;
activeThreads.push_back(tid);
_status = BaseSimpleCPU::Running;
// Fetch if any threads active
if (!fetchEvent.scheduled()) {
schedule(fetchEvent, nextCycle());
}
} else {
threadInfo[tid]->notIdleFraction = 0;
}
}
system->totalNumInsts = 0;
}
bool
@ -155,7 +155,7 @@ TimingSimpleCPU::tryCompleteDrain()
if (drainState() != DrainState::Draining)
return false;
DPRINTF(Drain, "tryCompleteDrain: %s\n", pcState());
DPRINTF(Drain, "tryCompleteDrain.\n");
if (!isDrained())
return false;
@ -168,12 +168,15 @@ TimingSimpleCPU::tryCompleteDrain()
void
TimingSimpleCPU::switchOut()
{
SimpleExecContext& t_info = *threadInfo[curThread];
M5_VAR_USED SimpleThread* thread = t_info.thread;
BaseSimpleCPU::switchOut();
assert(!fetchEvent.scheduled());
assert(_status == BaseSimpleCPU::Running || _status == Idle);
assert(!stayAtPC);
assert(microPC() == 0);
assert(!t_info.stayAtPC);
assert(thread->microPC() == 0);
updateCycleCounts();
}
@ -201,16 +204,20 @@ TimingSimpleCPU::activateContext(ThreadID thread_num)
{
DPRINTF(SimpleCPU, "ActivateContext %d\n", thread_num);
assert(thread_num == 0);
assert(thread);
assert(thread_num < numThreads);
assert(_status == Idle);
notIdleFraction = 1;
threadInfo[thread_num]->notIdleFraction = 1;
if (_status == BaseSimpleCPU::Idle)
_status = BaseSimpleCPU::Running;
// kick things off by initiating the fetch of the next instruction
if (!fetchEvent.scheduled())
schedule(fetchEvent, clockEdge(Cycles(0)));
if (std::find(activeThreads.begin(), activeThreads.end(), thread_num)
== activeThreads.end()) {
activeThreads.push_back(thread_num);
}
}
@ -219,24 +226,31 @@ TimingSimpleCPU::suspendContext(ThreadID thread_num)
{
DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
assert(thread_num == 0);
assert(thread);
assert(thread_num < numThreads);
activeThreads.remove(thread_num);
if (_status == Idle)
return;
assert(_status == BaseSimpleCPU::Running);
// just change status to Idle... if status != Running,
// completeInst() will not initiate fetch of next instruction.
threadInfo[thread_num]->notIdleFraction = 0;
notIdleFraction = 0;
if (activeThreads.empty()) {
_status = Idle;
if (fetchEvent.scheduled()) {
deschedule(fetchEvent);
}
}
}
bool
TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
RequestPtr req = pkt->req;
// We're about the issues a locked load, so tell the monitor
@ -264,6 +278,9 @@ void
TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
bool read)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
PacketPtr pkt = buildPacket(req, read);
pkt->dataDynamic<uint8_t>(data);
if (req->getFlags().isSet(Request::NO_ACCESS)) {
@ -389,9 +406,12 @@ Fault
TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
unsigned size, unsigned flags)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
Fault fault;
const int asid = 0;
const ThreadID tid = 0;
const ThreadID tid = curThread;
const Addr pc = thread->instAddr();
unsigned block_size = cacheLineSize();
BaseTLB::Mode mode = BaseTLB::Read;
@ -400,7 +420,8 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
traceData->setMem(addr, size, flags);
RequestPtr req = new Request(asid, addr, size,
flags, dataMasterId(), pc, _cpuId, tid);
flags, dataMasterId(), pc,
thread->contextId(), tid);
req->taskId(taskId());
@ -421,14 +442,14 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
DataTranslation<TimingSimpleCPU *> *trans2 =
new DataTranslation<TimingSimpleCPU *>(this, state, 1);
thread->dtb->translateTiming(req1, tc, trans1, mode);
thread->dtb->translateTiming(req2, tc, trans2, mode);
thread->dtb->translateTiming(req1, thread->getTC(), trans1, mode);
thread->dtb->translateTiming(req2, thread->getTC(), trans2, mode);
} else {
WholeTranslationState *state =
new WholeTranslationState(req, new uint8_t[size], NULL, mode);
DataTranslation<TimingSimpleCPU *> *translation
= new DataTranslation<TimingSimpleCPU *>(this, state);
thread->dtb->translateTiming(req, tc, translation, mode);
thread->dtb->translateTiming(req, thread->getTC(), translation, mode);
}
return NoFault;
@ -437,6 +458,9 @@ TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
bool
TimingSimpleCPU::handleWritePacket()
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
RequestPtr req = dcache_pkt->req;
if (req->isMmappedIpr()) {
Cycles delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
@ -457,9 +481,12 @@ Fault
TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
Addr addr, unsigned flags, uint64_t *res)
{
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
uint8_t *newData = new uint8_t[size];
const int asid = 0;
const ThreadID tid = 0;
const ThreadID tid = curThread;
const Addr pc = thread->instAddr();
unsigned block_size = cacheLineSize();
BaseTLB::Mode mode = BaseTLB::Write;
@ -476,7 +503,8 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
traceData->setMem(addr, size, flags);
RequestPtr req = new Request(asid, addr, size,
flags, dataMasterId(), pc, _cpuId, tid);
flags, dataMasterId(), pc,
thread->contextId(), tid);
req->taskId(taskId());
@ -496,14 +524,14 @@ TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
DataTranslation<TimingSimpleCPU *> *trans2 =
new DataTranslation<TimingSimpleCPU *>(this, state, 1);
thread->dtb->translateTiming(req1, tc, trans1, mode);
thread->dtb->translateTiming(req2, tc, trans2, mode);
thread->dtb->translateTiming(req1, thread->getTC(), trans1, mode);
thread->dtb->translateTiming(req2, thread->getTC(), trans2, mode);
} else {
WholeTranslationState *state =
new WholeTranslationState(req, newData, res, mode);
DataTranslation<TimingSimpleCPU *> *translation =
new DataTranslation<TimingSimpleCPU *>(this, state);
thread->dtb->translateTiming(req, tc, translation, mode);
thread->dtb->translateTiming(req, thread->getTC(), translation, mode);
}
// Translation faults will be returned via finishTranslation()
@ -540,6 +568,12 @@ TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
void
TimingSimpleCPU::fetch()
{
// Change thread if multi-threaded
swapActiveThread();
SimpleExecContext &t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
DPRINTF(SimpleCPU, "Fetch\n");
if (!curStaticInst || !curStaticInst->isDelayedCommit()) {
@ -552,17 +586,18 @@ TimingSimpleCPU::fetch()
return;
TheISA::PCState pcState = thread->pcState();
bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst;
bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
!curMacroStaticInst;
if (needToFetch) {
_status = BaseSimpleCPU::Running;
Request *ifetch_req = new Request();
ifetch_req->taskId(taskId());
ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
ifetch_req->setThreadContext(thread->contextId(), curThread);
setupFetchRequest(ifetch_req);
DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr());
thread->itb->translateTiming(ifetch_req, tc, &fetchTranslation,
BaseTLB::Execute);
thread->itb->translateTiming(ifetch_req, thread->getTC(),
&fetchTranslation, BaseTLB::Execute);
} else {
_status = IcacheWaitResponse;
completeIfetch(NULL);
@ -607,6 +642,8 @@ TimingSimpleCPU::sendFetch(const Fault &fault, RequestPtr req,
void
TimingSimpleCPU::advanceInst(const Fault &fault)
{
SimpleExecContext &t_info = *threadInfo[curThread];
if (_status == Faulting)
return;
@ -619,7 +656,7 @@ TimingSimpleCPU::advanceInst(const Fault &fault)
}
if (!stayAtPC)
if (!t_info.stayAtPC)
advancePC(fault);
if (tryCompleteDrain())
@ -637,6 +674,8 @@ TimingSimpleCPU::advanceInst(const Fault &fault)
void
TimingSimpleCPU::completeIfetch(PacketPtr pkt)
{
SimpleExecContext& t_info = *threadInfo[curThread];
DPRINTF(SimpleCPU, "Complete ICache Fetch for addr %#x\n", pkt ?
pkt->getAddr() : 0);
@ -656,7 +695,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
preExecute();
if (curStaticInst && curStaticInst->isMemRef()) {
// load or store: just send to dcache
Fault fault = curStaticInst->initiateAcc(this, traceData);
Fault fault = curStaticInst->initiateAcc(&t_info, traceData);
// If we're not running now the instruction will complete in a dcache
// response callback or the instruction faulted and has started an
@ -677,7 +716,7 @@ TimingSimpleCPU::completeIfetch(PacketPtr pkt)
}
} else if (curStaticInst) {
// non-memory instruction: execute completely now
Fault fault = curStaticInst->execute(this, traceData);
Fault fault = curStaticInst->execute(&t_info, traceData);
// keep an instruction count
if (fault == NoFault)
@ -776,7 +815,8 @@ TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
_status = BaseSimpleCPU::Running;
Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
Fault fault = curStaticInst->completeAcc(pkt, threadInfo[curThread],
traceData);
// keep an instruction count
if (fault == NoFault)
@ -810,17 +850,20 @@ void
TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt)
{
// X86 ISA: Snooping an invalidation for monitor/mwait
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
cpu->wakeup();
}
TheISA::handleLockedSnoop(cpu->thread, pkt, cacheBlockMask);
for (auto &t_info : cpu->threadInfo) {
TheISA::handleLockedSnoop(t_info->thread, pkt, cacheBlockMask);
}
}
void
TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt)
{
// X86 ISA: Snooping an invalidation for monitor/mwait
if(cpu->getAddrMonitor()->doMonitor(pkt)) {
if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) {
cpu->wakeup();
}
}
@ -930,8 +973,5 @@ TimingSimpleCPU::printAddr(Addr a)
TimingSimpleCPU *
TimingSimpleCPUParams::create()
{
numThreads = 1;
if (!FullSystem && workload.size() != 1)
panic("only one workload allowed");
return new TimingSimpleCPU(this);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012-2013 ARM Limited
* Copyright (c) 2012-2013,2015 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
@ -44,6 +44,7 @@
#define __CPU_SIMPLE_TIMING_HH__
#include "cpu/simple/base.hh"
#include "cpu/simple/exec_context.hh"
#include "cpu/translation.hh"
#include "params/TimingSimpleCPU.hh"
@ -342,7 +343,11 @@ class TimingSimpleCPU : public BaseSimpleCPU
* </ul>
*/
bool isDrained() {
return microPC() == 0 && !stayAtPC && !fetchEvent.scheduled();
SimpleExecContext& t_info = *threadInfo[curThread];
SimpleThread* thread = t_info.thread;
return thread->microPC() == 0 && !t_info.stayAtPC &&
!fetchEvent.scheduled();
}
/**

View file

@ -29,5 +29,6 @@
process1 = LiveProcess(cmd = 'hello', executable = binpath('hello'))
process2 = LiveProcess(cmd = 'hello', executable = binpath('hello'))
root.system.multi_thread = True
root.system.cpu[0].workload = [process1, process2]
root.system.cpu[0].numThreads = 2