From 6629d9b2bc58a885bfebce1517fd12483497b6e4 Mon Sep 17 00:00:00 2001 From: Steve Reinhardt Date: Wed, 16 Jul 2008 11:10:33 -0700 Subject: [PATCH] mem: use single BadAddr responder per system. Previously there was one per bus, which caused some coherence problems when more than one decided to respond. Now there is just one on the main memory bus. The default bus responder on all other buses is now the downstream cache's cpu_side port. Caches no longer need to do address range filtering; instead, we just have a simple flag to prevent snoops from propagating to the I/O bus. --- configs/common/Caches.py | 1 + configs/common/FSConfig.py | 13 ++- configs/example/fs.py | 3 +- src/dev/Device.py | 1 + src/mem/Bus.py | 6 +- src/mem/bus.cc | 3 +- src/mem/cache/BaseCache.py | 8 +- src/mem/cache/base.cc | 9 +- src/mem/cache/base.hh | 20 +++-- src/mem/cache/cache.hh | 6 +- src/mem/cache/cache_impl.hh | 92 ++++++++++----------- tests/configs/tsunami-o3-dual.py | 4 +- tests/configs/tsunami-o3.py | 4 +- tests/configs/tsunami-simple-atomic-dual.py | 4 +- tests/configs/tsunami-simple-atomic.py | 4 +- tests/configs/tsunami-simple-timing-dual.py | 4 +- tests/configs/tsunami-simple-timing.py | 4 +- 17 files changed, 92 insertions(+), 94 deletions(-) diff --git a/configs/common/Caches.py b/configs/common/Caches.py index f1ea957b5..1c3b089c7 100644 --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -50,3 +50,4 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 + forward_snoops = False diff --git a/configs/common/FSConfig.py b/configs/common/FSConfig.py index 974003005..a9cd24ba3 100644 --- a/configs/common/FSConfig.py +++ b/configs/common/FSConfig.py @@ -38,6 +38,11 @@ class CowIdeDisk(IdeDisk): def childImage(self, ci): self.image.child.image_file = ci +class MemBus(Bus): + badaddr_responder = BadAddr() + default = Self.badaddr_responder.pio + + def makeLinuxAlphaSystem(mem_mode, mdesc = None): class BaseTsunami(Tsunami): ethernet = NSGigE(pci_bus=0, pci_dev=1, pci_func=0) @@ -50,7 +55,7 @@ def makeLinuxAlphaSystem(mem_mode, mdesc = None): mdesc = SysConfig() self.readfile = mdesc.script() self.iobus = Bus(bus_id=0) - self.membus = Bus(bus_id=1) + self.membus = MemBus(bus_id=1) self.bridge = Bridge(delay='50ns', nack_delay='4ns') self.physmem = PhysicalMemory(range = AddrRange(mdesc.mem())) self.bridge.side_a = self.iobus.port @@ -90,7 +95,7 @@ def makeSparcSystem(mem_mode, mdesc = None): mdesc = SysConfig() self.readfile = mdesc.script() self.iobus = Bus(bus_id=0) - self.membus = Bus(bus_id=1) + self.membus = MemBus(bus_id=1) self.bridge = Bridge(delay='50ns', nack_delay='4ns') self.t1000 = T1000() self.t1000.attachOnChipIO(self.membus) @@ -130,7 +135,7 @@ def makeLinuxMipsSystem(mem_mode, mdesc = None): mdesc = SysConfig() self.readfile = mdesc.script() self.iobus = Bus(bus_id=0) - self.membus = Bus(bus_id=1) + self.membus = MemBus(bus_id=1) self.bridge = Bridge(delay='50ns', nack_delay='4ns') self.physmem = PhysicalMemory(range = AddrRange('1GB')) self.bridge.side_a = self.iobus.port @@ -170,7 +175,7 @@ def makeX86System(mem_mode, mdesc = None, self = None): self.readfile = mdesc.script() # Physical memory - self.membus = Bus(bus_id=1) + self.membus = MemBus(bus_id=1) self.physmem = PhysicalMemory(range = AddrRange(mdesc.mem())) self.physmem.port = self.membus.port diff --git a/configs/example/fs.py b/configs/example/fs.py index c155d0222..c013a97ae 100644 --- a/configs/example/fs.py +++ b/configs/example/fs.py @@ -126,8 +126,7 @@ test_sys.cpu = [TestCPUClass(cpu_id=i) for i in xrange(np)] if options.caches: test_sys.bridge.filter_ranges_a=[AddrRange(0, Addr.max)] test_sys.bridge.filter_ranges_b=[AddrRange(0, size='8GB')] - test_sys.iocache = IOCache(mem_side_filter_ranges=[AddrRange(0, Addr.max)], - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)]) + test_sys.iocache = IOCache(addr_range=AddrRange(0, size='8GB')) test_sys.iocache.cpu_side = test_sys.iobus.port test_sys.iocache.mem_side = test_sys.membus.port diff --git a/src/dev/Device.py b/src/dev/Device.py index adf262f26..4babffa18 100644 --- a/src/dev/Device.py +++ b/src/dev/Device.py @@ -66,6 +66,7 @@ class IsaFake(BasicPioDevice): warn_access = Param.String("", "String to print when device is accessed") class BadAddr(IsaFake): + pio_addr = 0 ret_bad_addr = Param.Bool(True, "Return pkt status bad address on access") diff --git a/src/mem/Bus.py b/src/mem/Bus.py index f4ea9a73b..0f113cc09 100644 --- a/src/mem/Bus.py +++ b/src/mem/Bus.py @@ -43,8 +43,4 @@ class Bus(MemObject): width = Param.Int(64, "bus width (bytes)") responder_set = Param.Bool(False, "Did the user specify a default responder.") block_size = Param.Int(64, "The default block size if one isn't set by a device attached to the bus.") - if build_env['FULL_SYSTEM']: - responder = BadAddr(pio_addr=0x0, pio_latency="1ps") - default = Port(Self.responder.pio, "Default port for requests that aren't handled by a device.") - else: - default = Port("Default port for requests that aren't handled by a device.") + default = Port("Default port for requests that aren't handled by a device.") diff --git a/src/mem/bus.cc b/src/mem/bus.cc index 2eb823051..b9cdff242 100644 --- a/src/mem/bus.cc +++ b/src/mem/bus.cc @@ -219,7 +219,7 @@ Bus::recvTiming(PacketPtr pkt) } } } else { - assert(dest >= 0 && dest < maxId); + assert(dest < maxId); assert(dest != src); // catch infinite loops dest_port_id = dest; if (dest_port_id == defaultId) @@ -238,7 +238,6 @@ Bus::recvTiming(PacketPtr pkt) if (dest_port_id == src) { // Must be forwarded snoop up from below... assert(dest == Packet::Broadcast); - assert(src != defaultId); // catch infinite loops } else { // send to actual target if (!dest_port->sendTiming(pkt)) { diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py index bef1b45d2..bdef07cb4 100644 --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -45,6 +45,8 @@ class BaseCache(MemObject): "always service demand misses first") repl = Param.Repl(NULL, "replacement policy") size = Param.MemorySize("capacity in bytes") + forward_snoops = Param.Bool(True, + "forward snoops from mem side to cpu side") subblock_size = Param.Int(0, "Size of subblock in IIC used for compression") tgts_per_mshr = Param.Int("max number of accesses per MSHR") @@ -74,8 +76,4 @@ class BaseCache(MemObject): "Only prefetch on data not on instruction accesses") cpu_side = Port("Port on side closer to CPU") mem_side = Port("Port on side closer to MEM") - cpu_side_filter_ranges = VectorParam.AddrRange([], - "What addresses shouldn't be passed through the side of the bridge") - mem_side_filter_ranges = VectorParam.AddrRange([], - "What addresses shouldn't be passed through the side of the bridge") - addr_range = VectorParam.AddrRange(AllMemory, "The address range in bytes") + addr_range = Param.AddrRange(AllMemory, "The address range for the CPU-side port") diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc index 956375530..29fa97544 100644 --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -41,11 +41,10 @@ using namespace std; BaseCache::CachePort::CachePort(const std::string &_name, BaseCache *_cache, - const std::string &_label, - std::vector > filter_ranges) + const std::string &_label) : SimpleTimingPort(_name, _cache), cache(_cache), label(_label), otherPort(NULL), - blocked(false), mustSendRetry(false), filterRanges(filter_ranges) + blocked(false), mustSendRetry(false) { } @@ -58,10 +57,12 @@ BaseCache::BaseCache(const Params *p) blkSize(p->block_size), hitLatency(p->latency), numTarget(p->tgts_per_mshr), + forwardSnoops(p->forward_snoops), blocked(0), noTargetMSHR(NULL), missCount(p->max_miss_count), - drainEvent(NULL) + drainEvent(NULL), + addrRange(p->addr_range) { } diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh index 4319717e5..d33c655d7 100644 --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -100,8 +100,7 @@ class BaseCache : public MemObject protected: CachePort(const std::string &_name, BaseCache *_cache, - const std::string &_label, - std::vector > filter_ranges); + const std::string &_label); virtual void recvStatusChange(Status status); @@ -129,9 +128,6 @@ class BaseCache : public MemObject bool mustSendRetry; - /** filter ranges */ - std::vector > filterRanges; - void requestBus(RequestCause cause, Tick time) { DPRINTF(CachePort, "Asserting bus request for cause %d\n", cause); @@ -194,8 +190,8 @@ class BaseCache : public MemObject /** The number of targets for each MSHR. */ const int numTarget; - /** Increasing order number assigned to each incoming request. */ - uint64_t order; + /** Do we forward snoops from mem side port through to cpu side port? */ + bool forwardSnoops; /** * Bit vector of the blocking reasons for the access path. @@ -203,6 +199,9 @@ class BaseCache : public MemObject */ uint8_t blocked; + /** Increasing order number assigned to each incoming request. */ + uint64_t order; + /** Stores time the cache blocked for statistics. */ Tick blockedCycle; @@ -215,6 +214,11 @@ class BaseCache : public MemObject /** The drain event. */ Event *drainEvent; + /** + * The address range to which the cache responds on the CPU side. + * Normally this is all possible memory addresses. */ + Range addrRange; + public: // Statistics /** @@ -377,6 +381,8 @@ class BaseCache : public MemObject Addr blockAlign(Addr addr) const { return (addr & ~(blkSize - 1)); } + const Range &getAddrRange() const { return addrRange; } + MSHR *allocateMissBuffer(PacketPtr pkt, Tick time, bool requestBus) { assert(!pkt->req->isUncacheable()); diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh index 4570b067b..6cb6233f5 100644 --- a/src/mem/cache/cache.hh +++ b/src/mem/cache/cache.hh @@ -71,8 +71,7 @@ class Cache : public BaseCache public: CpuSidePort(const std::string &_name, Cache *_cache, - const std::string &_label, - std::vector > filterRanges); + const std::string &_label); // BaseCache::CachePort just has a BaseCache *; this function // lets us get back the type info we lost when we stored the @@ -96,8 +95,7 @@ class Cache : public BaseCache public: MemSidePort(const std::string &_name, Cache *_cache, - const std::string &_label, - std::vector > filterRanges); + const std::string &_label); // BaseCache::CachePort just has a BaseCache *; this function // lets us get back the type info we lost when we stored the diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh index ea8ae0046..82db7750c 100644 --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -40,7 +40,7 @@ #include "sim/host.hh" #include "base/fast_alloc.hh" #include "base/misc.hh" -#include "base/range_ops.hh" +#include "base/range.hh" #include "mem/cache/cache.hh" #include "mem/cache/blk.hh" @@ -62,11 +62,9 @@ Cache::Cache(const Params *p, TagStore *tags, BasePrefetcher *pf) tempBlock->data = new uint8_t[blkSize]; cpuSidePort = new CpuSidePort(p->name + "-cpu_side_port", this, - "CpuSidePort", - p->cpu_side_filter_ranges); + "CpuSidePort"); memSidePort = new MemSidePort(p->name + "-mem_side_port", this, - "MemSidePort", - p->mem_side_filter_ranges); + "MemSidePort"); cpuSidePort->setOtherPort(memSidePort); memSidePort->setOtherPort(cpuSidePort); @@ -96,8 +94,7 @@ Cache::getPort(const std::string &if_name, int idx) } else if (if_name == "functional") { CpuSidePort *funcPort = new CpuSidePort(name() + "-cpu_side_funcport", this, - "CpuSideFuncPort", - std::vector >()); + "CpuSideFuncPort"); funcPort->setOtherPort(memSidePort); return funcPort; } else { @@ -1063,35 +1060,37 @@ Cache::handleSnoop(PacketPtr pkt, BlkType *blk, assert(!(pending_inval && !is_deferred)); assert(pkt->isRequest()); - // first propagate snoop upward to see if anyone above us wants to - // handle it. save & restore packet src since it will get - // rewritten to be relative to cpu-side bus (if any) - bool alreadyResponded = pkt->memInhibitAsserted(); - if (is_timing) { - Packet *snoopPkt = new Packet(pkt, true); // clear flags - snoopPkt->setExpressSnoop(); - snoopPkt->senderState = new ForwardResponseRecord(pkt, this); - cpuSidePort->sendTiming(snoopPkt); - if (snoopPkt->memInhibitAsserted()) { - // cache-to-cache response from some upper cache - assert(!alreadyResponded); - pkt->assertMemInhibit(); + if (forwardSnoops) { + // first propagate snoop upward to see if anyone above us wants to + // handle it. save & restore packet src since it will get + // rewritten to be relative to cpu-side bus (if any) + bool alreadyResponded = pkt->memInhibitAsserted(); + if (is_timing) { + Packet *snoopPkt = new Packet(pkt, true); // clear flags + snoopPkt->setExpressSnoop(); + snoopPkt->senderState = new ForwardResponseRecord(pkt, this); + cpuSidePort->sendTiming(snoopPkt); + if (snoopPkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache + assert(!alreadyResponded); + pkt->assertMemInhibit(); + } else { + delete snoopPkt->senderState; + } + if (snoopPkt->sharedAsserted()) { + pkt->assertShared(); + } + delete snoopPkt; } else { - delete snoopPkt->senderState; + int origSrc = pkt->getSrc(); + cpuSidePort->sendAtomic(pkt); + if (!alreadyResponded && pkt->memInhibitAsserted()) { + // cache-to-cache response from some upper cache: + // forward response to original requester + assert(pkt->isResponse()); + } + pkt->setSrc(origSrc); } - if (snoopPkt->sharedAsserted()) { - pkt->assertShared(); - } - delete snoopPkt; - } else { - int origSrc = pkt->getSrc(); - cpuSidePort->sendAtomic(pkt); - if (!alreadyResponded && pkt->memInhibitAsserted()) { - // cache-to-cache response from some upper cache: - // forward response to original requester - assert(pkt->isResponse()); - } - pkt->setSrc(origSrc); } if (!blk || !blk->isValid()) { @@ -1385,11 +1384,10 @@ void Cache::CpuSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { - // CPU side port doesn't snoop; it's a target only. - bool dummy; - otherPort->getPeerAddressRanges(resp, dummy); - FilterRangeList(filterRanges, resp); + // CPU side port doesn't snoop; it's a target only. It can + // potentially respond to any address. snoop = false; + resp.push_back(myCache()->getAddrRange()); } @@ -1428,9 +1426,8 @@ Cache::CpuSidePort::recvFunctional(PacketPtr pkt) template Cache:: CpuSidePort::CpuSidePort(const std::string &_name, Cache *_cache, - const std::string &_label, - std::vector > filterRanges) - : BaseCache::CachePort(_name, _cache, _label, filterRanges) + const std::string &_label) + : BaseCache::CachePort(_name, _cache, _label) { } @@ -1445,11 +1442,9 @@ void Cache::MemSidePort:: getDeviceAddressRanges(AddrRangeList &resp, bool &snoop) { - otherPort->getPeerAddressRanges(resp, snoop); - FilterRangeList(filterRanges, resp); - - // Memory-side port always snoops, so unconditionally set flag for - // caller. + // Memory-side port always snoops, but never passes requests + // through to targets on the cpu side (so we don't add anything to + // the address range list). snoop = true; } @@ -1581,9 +1576,8 @@ Cache::MemSidePort::processSendEvent() template Cache:: MemSidePort::MemSidePort(const std::string &_name, Cache *_cache, - const std::string &_label, - std::vector > filterRanges) - : BaseCache::CachePort(_name, _cache, _label, filterRanges) + const std::string &_label) + : BaseCache::CachePort(_name, _cache, _label) { // override default send event from SimpleTimingPort delete sendEvent; diff --git a/tests/configs/tsunami-o3-dual.py b/tests/configs/tsunami-o3-dual.py index 5dbfa5a8b..3044f5433 100644 --- a/tests/configs/tsunami-o3-dual.py +++ b/tests/configs/tsunami-o3-dual.py @@ -63,8 +63,8 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 - mem_side_filter_ranges=[AddrRange(0, Addr.max)] - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)] + addr_range=AddrRange(0, size='8GB') + forward_snoops = False #cpu cpus = [ DerivO3CPU(cpu_id=i) for i in xrange(2) ] diff --git a/tests/configs/tsunami-o3.py b/tests/configs/tsunami-o3.py index ee60ea8ae..34fa235bd 100644 --- a/tests/configs/tsunami-o3.py +++ b/tests/configs/tsunami-o3.py @@ -63,8 +63,8 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 - mem_side_filter_ranges=[AddrRange(0, Addr.max)] - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)] + addr_range=AddrRange(0, size='8GB') + forward_snoops = False #cpu cpu = DerivO3CPU(cpu_id=0) diff --git a/tests/configs/tsunami-simple-atomic-dual.py b/tests/configs/tsunami-simple-atomic-dual.py index 0e58d39af..593b02680 100644 --- a/tests/configs/tsunami-simple-atomic-dual.py +++ b/tests/configs/tsunami-simple-atomic-dual.py @@ -62,8 +62,8 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 - mem_side_filter_ranges=[AddrRange(0, Addr.max)] - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)] + addr_range=AddrRange(0, size='8GB') + forward_snoops = False #cpu cpus = [ AtomicSimpleCPU(cpu_id=i) for i in xrange(2) ] diff --git a/tests/configs/tsunami-simple-atomic.py b/tests/configs/tsunami-simple-atomic.py index 2374734ec..0c6feaeac 100644 --- a/tests/configs/tsunami-simple-atomic.py +++ b/tests/configs/tsunami-simple-atomic.py @@ -62,8 +62,8 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 - mem_side_filter_ranges=[AddrRange(0, Addr.max)] - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)] + addr_range=AddrRange(0, size='8GB') + forward_snoops = False #cpu cpu = AtomicSimpleCPU(cpu_id=0) diff --git a/tests/configs/tsunami-simple-timing-dual.py b/tests/configs/tsunami-simple-timing-dual.py index d7c4bb6e8..212449914 100644 --- a/tests/configs/tsunami-simple-timing-dual.py +++ b/tests/configs/tsunami-simple-timing-dual.py @@ -62,8 +62,8 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 - mem_side_filter_ranges=[AddrRange(0, Addr.max)] - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)] + addr_range=AddrRange(0, size='8GB') + forward_snoops = False #cpu cpus = [ TimingSimpleCPU(cpu_id=i) for i in xrange(2) ] diff --git a/tests/configs/tsunami-simple-timing.py b/tests/configs/tsunami-simple-timing.py index 96cd27111..f0eaa08d7 100644 --- a/tests/configs/tsunami-simple-timing.py +++ b/tests/configs/tsunami-simple-timing.py @@ -63,8 +63,8 @@ class IOCache(BaseCache): mshrs = 20 size = '1kB' tgts_per_mshr = 12 - mem_side_filter_ranges=[AddrRange(0, Addr.max)] - cpu_side_filter_ranges=[AddrRange(0x8000000000, Addr.max)] + addr_range=AddrRange(0, size='8GB') + forward_snoops = False #cpu cpu = TimingSimpleCPU(cpu_id=0)