First pass at snooping stuff that compiles and doesn't break.

Still need:
-Handle NACK's on the recieve side
-Distinguish top level caches
-Handle repsonses from caches failing the fast path
-Handle BusError and propogate it
-Fix the invalidate packet associated with snooping in the cache

src/mem/bus.cc:
    Make sure to snoop on functional accesses
src/mem/cache/base_cache.cc:
    Wait to make a request into a response until it is ready to be issued
src/mem/cache/base_cache.hh:
    Support range changes for snoops
    Set up snoop responses for cache->cache transfers
src/mem/cache/cache_impl.hh:
    Only access the cache if it wasn't satisfied by cache->cache transfer
    Handle snoop phases (detect block, then snoop)
    Fix functional access to work properly (still need to fix snoop path for functional accesses)

--HG--
extra : convert_revision : 4c25f11d7a996c1f56f4f7b55dde87a344e5fdf8
This commit is contained in:
Ron Dreslinski 2006-10-05 21:10:03 -04:00
parent 868d112578
commit 45f881a4ce
6 changed files with 237 additions and 20 deletions

View file

@ -252,6 +252,7 @@ Bus::recvFunctional(Packet *pkt)
DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n", DPRINTF(Bus, "recvFunctional: packet src %d dest %d addr 0x%x cmd %s\n",
pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString()); pkt->getSrc(), pkt->getDest(), pkt->getAddr(), pkt->cmdString());
assert(pkt->getDest() == Packet::Broadcast); assert(pkt->getDest() == Packet::Broadcast);
atomicSnoop(pkt);
findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt); findPort(pkt->getAddr(), pkt->getSrc())->sendFunctional(pkt);
} }

View file

@ -199,7 +199,9 @@ BaseCache::CacheEvent::process()
return; return;
} }
//Response //Response
//Know the packet to send, no need to mark in service (must succed) //Know the packet to send
pkt->result = Packet::Success;
pkt->makeTimingResponse();
assert(cachePort->sendTiming(pkt)); assert(cachePort->sendTiming(pkt));
} }

View file

@ -127,6 +127,8 @@ class BaseCache : public MemObject
CachePort *cpuSidePort; CachePort *cpuSidePort;
CachePort *memSidePort; CachePort *memSidePort;
bool snoopRangesSent;
public: public:
virtual Port *getPort(const std::string &if_name, int idx = -1); virtual Port *getPort(const std::string &if_name, int idx = -1);
@ -149,17 +151,22 @@ class BaseCache : public MemObject
void recvStatusChange(Port::Status status, bool isCpuSide) void recvStatusChange(Port::Status status, bool isCpuSide)
{ {
if (status == Port::RangeChange) if (status == Port::RangeChange){
{ if (!isCpuSide) {
if (!isCpuSide)
{
cpuSidePort->sendStatusChange(Port::RangeChange); cpuSidePort->sendStatusChange(Port::RangeChange);
if (topLevelCache && !snoopRangesSent) {
snoopRangesSent = true;
memSidePort->sendStatusChange(Port::RangeChange);
}
} }
else else {
{
memSidePort->sendStatusChange(Port::RangeChange); memSidePort->sendStatusChange(Port::RangeChange);
} }
} }
else if (status == Port::SnoopSquash) {
assert(snoopPhase2);
snoopPhase2 = false;
}
} }
virtual Packet *getPacket() virtual Packet *getPacket()
@ -205,6 +212,10 @@ class BaseCache : public MemObject
/** True if this cache is connected to the CPU. */ /** True if this cache is connected to the CPU. */
bool topLevelCache; bool topLevelCache;
/** True if we are now in phase 2 of the snoop process. */
bool snoopPhase2;
/** Stores time the cache blocked for statistics. */ /** Stores time the cache blocked for statistics. */
Tick blockedCycle; Tick blockedCycle;
@ -332,6 +343,7 @@ class BaseCache : public MemObject
//Start ports at null if more than one is created we should panic //Start ports at null if more than one is created we should panic
cpuSidePort = NULL; cpuSidePort = NULL;
memSidePort = NULL; memSidePort = NULL;
snoopRangesSent = false;
} }
virtual void init(); virtual void init();
@ -519,8 +531,6 @@ class BaseCache : public MemObject
if (!pkt->req->isUncacheable()) { if (!pkt->req->isUncacheable()) {
missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time; missLatency[pkt->cmdToIndex()][pkt->req->getThreadNum()] += time - pkt->time;
} }
pkt->makeTimingResponse();
pkt->result = Packet::Success;
CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt); CacheEvent *reqCpu = new CacheEvent(cpuSidePort, pkt);
reqCpu->schedule(time); reqCpu->schedule(time);
} }
@ -529,10 +539,12 @@ class BaseCache : public MemObject
* Suppliess the data if cache to cache transfers are enabled. * Suppliess the data if cache to cache transfers are enabled.
* @param pkt The bus transaction to fulfill. * @param pkt The bus transaction to fulfill.
*/ */
void respondToSnoop(Packet *pkt) void respondToSnoop(Packet *pkt, Tick time)
{ {
assert("Implement\n" && 0); // assert("Implement\n" && 0);
// mi->respond(pkt,curTick + hitLatency); // mi->respond(pkt,curTick + hitLatency);
CacheEvent *reqMem = new CacheEvent(memSidePort, pkt);
reqMem->schedule(time);
} }
/** /**
@ -551,6 +563,16 @@ class BaseCache : public MemObject
else else
{ {
//This is where snoops get updated //This is where snoops get updated
AddrRangeList dummy;
if (!topLevelCache)
{
cpuSidePort->getPeerAddressRanges(dummy, snoop);
}
else
{
snoop.push_back(RangeSize(0,-1));
}
return; return;
} }
} }

View file

@ -63,14 +63,26 @@ doTimingAccess(Packet *pkt, CachePort *cachePort, bool isCpuSide)
if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) { if (pkt->isWrite() && (pkt->req->getFlags() & LOCKED)) {
pkt->req->setScResult(1); pkt->req->setScResult(1);
} }
access(pkt); if (!(pkt->flags & SATISFIED)) {
access(pkt);
}
} }
else else
{ {
if (pkt->isResponse()) if (pkt->isResponse())
handleResponse(pkt); handleResponse(pkt);
else else {
snoop(pkt); //Check if we are in phase1
if (!snoopPhase2) {
snoopPhase2 = true;
}
else {
//Check if we should do the snoop
if (pkt->flags && SNOOP_COMMIT)
snoop(pkt);
snoopPhase2 = false;
}
}
} }
return true; return true;
} }
@ -117,7 +129,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
assert("Can't handle LL/SC on functional path\n"); assert("Can't handle LL/SC on functional path\n");
} }
probe(pkt, true); probe(pkt, false);
//TEMP ALWAYS SUCCESFUL FOR NOW //TEMP ALWAYS SUCCESFUL FOR NOW
pkt->result = Packet::Success; pkt->result = Packet::Success;
} }
@ -126,7 +138,7 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
if (pkt->isResponse()) if (pkt->isResponse())
handleResponse(pkt); handleResponse(pkt);
else else
snoopProbe(pkt, true); snoopProbe(pkt, false);
} }
} }
@ -372,7 +384,7 @@ template<class TagStore, class Buffering, class Coherence>
void void
Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt) Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
{ {
DPRINTF(Cache, "SNOOPING");
Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1)); Addr blk_addr = pkt->getAddr() & ~(Addr(blkSize-1));
BlkType *blk = tags->findBlock(pkt); BlkType *blk = tags->findBlock(pkt);
MSHR *mshr = missQueue->findMSHR(blk_addr); MSHR *mshr = missQueue->findMSHR(blk_addr);
@ -385,7 +397,10 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
//If the outstanding request was an invalidate (upgrade,readex,..) //If the outstanding request was an invalidate (upgrade,readex,..)
//Then we need to ACK the request until we get the data //Then we need to ACK the request until we get the data
//Also NACK if the outstanding request is not a cachefill (writeback) //Also NACK if the outstanding request is not a cachefill (writeback)
pkt->flags |= SATISFIED;
pkt->flags |= NACKED_LINE; pkt->flags |= NACKED_LINE;
assert("Don't detect these on the other side yet\n");
respondToSnoop(pkt, curTick + hitLatency);
return; return;
} }
else { else {
@ -398,6 +413,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
//@todo Make it so that a read to a pending read can't be exclusive now. //@todo Make it so that a read to a pending read can't be exclusive now.
//Set the address so find match works //Set the address so find match works
assert("Don't have invalidates yet\n");
invalidatePkt->addrOverride(pkt->getAddr()); invalidatePkt->addrOverride(pkt->getAddr());
//Append the invalidate on //Append the invalidate on
@ -433,7 +449,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
assert(offset + pkt->getSize() <=blkSize); assert(offset + pkt->getSize() <=blkSize);
memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize()); memcpy(pkt->getPtr<uint8_t>(), mshr->pkt->getPtr<uint8_t>() + offset, pkt->getSize());
respondToSnoop(pkt); respondToSnoop(pkt, curTick + hitLatency);
} }
if (pkt->isInvalidate()) { if (pkt->isInvalidate()) {
@ -449,7 +465,7 @@ Cache<TagStore,Buffering,Coherence>::snoop(Packet * &pkt)
bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state); bool satisfy = coherence->handleBusRequest(pkt,blk,mshr, new_state);
if (satisfy) { if (satisfy) {
tags->handleSnoop(blk, new_state, pkt); tags->handleSnoop(blk, new_state, pkt);
respondToSnoop(pkt); respondToSnoop(pkt, curTick + hitLatency);
return; return;
} }
tags->handleSnoop(blk, new_state); tags->handleSnoop(blk, new_state);
@ -517,7 +533,7 @@ Cache<TagStore,Buffering,Coherence>::probe(Packet * &pkt, bool update)
missQueue->findWrites(blk_addr, writes); missQueue->findWrites(blk_addr, writes);
if (!update) { if (!update) {
memSidePort->sendFunctional(pkt); memSidePort->sendFunctional(pkt);
// Check for data in MSHR and writebuffer. // Check for data in MSHR and writebuffer.
if (mshr) { if (mshr) {
warn("Found outstanding miss on an non-update probe"); warn("Found outstanding miss on an non-update probe");

View file

@ -0,0 +1,90 @@
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Ron Dreslinski
import m5
from m5.objects import *
m5.AddToPath('../configs/common')
from FullO3Config import *
# --------------------
# Base L1 Cache
# ====================
class L1(BaseCache):
latency = 1
block_size = 64
mshrs = 4
tgts_per_mshr = 8
protocol = CoherenceProtocol(protocol='moesi')
# ----------------------
# Base L2 Cache
# ----------------------
class L2(BaseCache):
block_size = 64
latency = 100
mshrs = 92
tgts_per_mshr = 16
write_buffers = 8
nb_cores = 4
cpus = [ DetailedO3CPU() for i in xrange(nb_cores) ]
# system simulated
system = System(cpu = cpus, physmem = PhysicalMemory(), membus =
Bus())
# l2cache & bus
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
# connect l2c to membus
system.l2c.mem_side = system.membus.port
# add L1 caches
for cpu in cpus:
cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1),
L1(size = '32kB', assoc = 4))
cpu.mem = cpu.dcache
# connect cpu level-1 caches to shared level-2 cache
cpu.connectMemPorts(system.toL2Bus)
# connect memory to membus
system.physmem.port = system.membus.port
# -----------------------
# run simulation
# -----------------------
root = Root( system = system )
root.system.mem_mode = 'timing'
root.trace.flags="Bus Cache"
#root.trace.flags = "BusAddrRanges"

View file

@ -0,0 +1,86 @@
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Ron Dreslinski
import m5
from m5.objects import *
# --------------------
# Base L1 Cache
# ====================
class L1(BaseCache):
latency = 1
block_size = 64
mshrs = 4
tgts_per_mshr = 8
protocol = CoherenceProtocol(protocol='moesi')
# ----------------------
# Base L2 Cache
# ----------------------
class L2(BaseCache):
block_size = 64
latency = 100
mshrs = 92
tgts_per_mshr = 16
write_buffers = 8
nb_cores = 4
cpus = [ AtomicSimpleCPU() for i in xrange(nb_cores) ]
# system simulated
system = System(cpu = cpus, physmem = PhysicalMemory(), membus =
Bus())
# l2cache & bus
system.toL2Bus = Bus()
system.l2c = L2(size='4MB', assoc=8)
system.l2c.cpu_side = system.toL2Bus.port
# connect l2c to membus
system.l2c.mem_side = system.membus.port
# add L1 caches
for cpu in cpus:
cpu.addPrivateSplitL1Caches(L1(size = '32kB', assoc = 1),
L1(size = '32kB', assoc = 4))
cpu.mem = cpu.dcache
# connect cpu level-1 caches to shared level-2 cache
cpu.connectMemPorts(system.toL2Bus)
# connect memory to membus
system.physmem.port = system.membus.port
# -----------------------
# run simulation
# -----------------------
root = Root( system = system )
root.system.mem_mode = 'atomic'