sim: Add a system-global option to bypass caches

Virtualized CPUs and the fastmem mode of the atomic CPU require direct access to physical memory. We currently require caches to be disabled when using them to prevent chaos. This is not ideal when switching between hardware virutalized CPUs and other CPU models as it would require a configuration change on each switch. This changeset introduces a new version of the atomic memory mode, 'atomic_noncaching', where memory accesses are inserted into the memory system as atomic accesses, but bypass caches. To make memory mode tests cleaner, the following methods are added to the System class: * isAtomicMode() -- True if the memory mode is 'atomic' or 'direct'. * isTimingMode() -- True if the memory mode is 'timing'. * bypassCaches() -- True if caches should be bypassed. The old getMemoryMode() and setMemoryMode() methods should never be used from the C++ world anymore.
2013-02-15 17:40:09 -05:00 · 2013-02-15 17:40:09 -05:00 · b904bd5437
parent 1eec115c31
commit b904bd5437
18 changed files with 131 additions and 33 deletions
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@ -104,7 +104,7 @@ void
 TableWalker::drainResume()
 {
    Drainable::drainResume();
-    if ((params()->sys->getMemoryMode() == Enums::timing) && currState) {
+    if (params()->sys->isTimingMode() && currState) {
        delete currState;
        currState = NULL;
    }
--- a/src/arch/x86/interrupts.cc
+++ b/src/arch/x86/interrupts.cc
@ -510,7 +510,7 @@ X86ISA::Interrupts::setReg(ApicRegIndex reg, uint32_t val)
            message.destMode = low.destMode;
            message.level = low.level;
            message.trigger = low.trigger;
-            bool timing = sys->getMemoryMode() == Enums::timing;
+            bool timing(sys->isTimingMode());
            // Be careful no updates of the delivery status bit get lost.
            regs[APIC_INTERRUPT_COMMAND_LOW] = low;
            ApicList apics;
--- a/src/arch/x86/pagetable_walker.cc
+++ b/src/arch/x86/pagetable_walker.cc
@ -88,7 +88,7 @@ Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
    // outstanding requests, see if this request can be coalesced with
    // another one (i.e. either coalesce or start walk)
    WalkerState * newState = new WalkerState(this, _translation, _req);
-    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
+    newState->initState(_tc, _mode, sys->isTimingMode());
    if (currStates.size()) {
        assert(newState->isTiming());
        DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@ -812,7 +812,7 @@ InOrderCPU::init()
 void
 InOrderCPU::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::timing) {
+    if (!system->isTimingMode()) {
        fatal("The in-order CPU requires the memory system to be in "
              "'timing' mode.\n");
    }
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@ -1316,7 +1316,7 @@ template <class Impl>
 void
 FullO3CPU<Impl>::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::timing) {
+    if (!system->isTimingMode()) {
        fatal("The O3 CPU requires the memory system to be in "
              "'timing' mode.\n");
    }
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@ -212,7 +212,7 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 void
 AtomicSimpleCPU::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::atomic) {
+    if (!system->isAtomicMode()) {
        fatal("The atomic CPU requires the memory system to be in "
              "'atomic' mode.\n");
    }
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@ -191,7 +191,7 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 void
 TimingSimpleCPU::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::timing) {
+    if (!system->isTimingMode()) {
        fatal("The timing CPU requires the memory system to be in "
              "'timing' mode.\n");
    }
--- a/src/cpu/testers/traffic_gen/traffic_gen.cc
+++ b/src/cpu/testers/traffic_gen/traffic_gen.cc
@ -83,10 +83,8 @@ TrafficGen::init()
    if (!port.isConnected())
        fatal("The port of %s is not connected!\n", name());
    Enums::MemoryMode mode = system->getMemoryMode();
    // if the system is in timing mode active the request generator
-    if (mode == Enums::timing) {
+    if (system->isTimingMode()) {
        DPRINTF(TrafficGen, "Timing mode, activating request generator\n");
        // enter initial state
@ -101,7 +99,7 @@ void
 TrafficGen::initState()
 {
    // when not restoring from a checkpoint, make sure we kick things off
-    if (system->getMemoryMode() == Enums::timing) {
+    if (system->isTimingMode()) {
        Tick nextStateGraphEvent = stateGraph.nextEventTick();
        schedule(updateStateGraphEvent, nextStateGraphEvent);
    } else {
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@ -232,8 +232,7 @@ DmaPort::sendDma()
    // switching actually work
    assert(transmitList.size());
-    Enums::MemoryMode state = sys->getMemoryMode();
+    if (sys->isTimingMode()) {
    if (state == Enums::timing) {
        // if we are either waiting for a retry or are still waiting
        // after sending the last packet, then do not proceed
        if (inRetry || sendEvent.scheduled()) {
@ -242,7 +241,7 @@ DmaPort::sendDma()
        }
        trySendTimingReq();
-    } else if (state == Enums::atomic) {
+    } else if (sys->isAtomicMode()) {
        // send everything there is to send in zero time
        while (!transmitList.empty()) {
            PacketPtr pkt = transmitList.front();
--- a/src/dev/x86/i82094aa.cc
+++ b/src/dev/x86/i82094aa.cc
@ -222,8 +222,7 @@ X86ISA::I82094AA::signalInterrupt(int line)
                apics.push_back(selected);
            }
        }
-        intMasterPort.sendMessage(apics, message,
+        intMasterPort.sendMessage(apics, message, sys->isTimingMode());
                                  sys->getMemoryMode() == Enums::timing);
    }
 }
--- a/src/mem/Bus.py
+++ b/src/mem/Bus.py
@ -40,7 +40,9 @@
 #          Andreas Hansson
 from MemObject import MemObject
 from System import System
 from m5.params import *
 from m5.proxy import *
 class BaseBus(MemObject):
    type = 'BaseBus'
@ -72,3 +74,5 @@ class NoncoherentBus(BaseBus):
 class CoherentBus(BaseBus):
    type = 'CoherentBus'
    cxx_header = "mem/coherent_bus.hh"
    system = Param.System(Parent.any, "System that the bus belongs to.")
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@ -390,6 +390,7 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
        // must be cache-to-cache response from upper to lower level
        ForwardResponseRecord *rec =
            dynamic_cast<ForwardResponseRecord *>(pkt->senderState);
        assert(!system->bypassCaches());
        if (rec == NULL) {
            assert(pkt->cmd == MemCmd::HardPFResp);
@ -409,6 +410,12 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
    assert(pkt->isRequest());
    // Just forward the packet if caches are disabled.
    if (system->bypassCaches()) {
        memSidePort->sendTimingReq(pkt);
        return true;
    }
    if (pkt->memInhibitAsserted()) {
        DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
                pkt->getAddr());
@ -629,6 +636,10 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
    // @TODO: make this a parameter
    bool last_level_cache = false;
    // Forward the request if the system is in cache bypass mode.
    if (system->bypassCaches())
        return memSidePort->sendAtomic(pkt);
    if (pkt->memInhibitAsserted()) {
        assert(!pkt->req->isUncacheable());
        // have to invalidate ourselves and any lower caches even if
@ -744,6 +755,17 @@ template<class TagStore>
 void
 Cache<TagStore>::functionalAccess(PacketPtr pkt, bool fromCpuSide)
 {
    if (system->bypassCaches()) {
        // Packets from the memory side are snoop request and
        // shouldn't happen in bypass mode.
        assert(fromCpuSide);
        // The cache should be flushed if we are in cache bypass mode,
        // so we don't need to check if we need to update anything.
        memSidePort->sendFunctional(pkt);
        return;
    }
    Addr blk_addr = blockAlign(pkt->getAddr());
    BlkType *blk = tags->findBlock(pkt->getAddr());
    MSHR *mshr = mshrQueue.findMatch(blk_addr);
@ -1354,6 +1376,9 @@ template<class TagStore>
 void
 Cache<TagStore>::snoopTiming(PacketPtr pkt)
 {
    // Snoops shouldn't happen when bypassing caches
    assert(!system->bypassCaches());
    // Note that some deferred snoops don't have requests, since the
    // original access may have already completed
    if ((pkt->req && pkt->req->isUncacheable()) ||
@ -1438,6 +1463,9 @@ template<class TagStore>
 Cycles
 Cache<TagStore>::snoopAtomic(PacketPtr pkt)
 {
    // Snoops shouldn't happen when bypassing caches
    assert(!system->bypassCaches());
    if (pkt->req->isUncacheable() || pkt->cmd == MemCmd::Writeback) {
        // Can't get a hit on an uncacheable address
        // Revisit this for multi level coherence
@ -1683,6 +1711,7 @@ Cache<TagStore>::CpuSidePort::recvTimingReq(PacketPtr pkt)
 {
    // always let inhibited requests through even if blocked
    if (!pkt->memInhibitAsserted() && blocked) {
        assert(!cache->system->bypassCaches());
        DPRINTF(Cache,"Scheduling a retry while blocked\n");
        mustSendRetry = true;
        return false;
--- a/src/mem/coherent_bus.cc
+++ b/src/mem/coherent_bus.cc
@ -52,11 +52,13 @@
 #include "debug/BusAddrRanges.hh"
 #include "debug/CoherentBus.hh"
 #include "mem/coherent_bus.hh"
 #include "sim/system.hh"
 CoherentBus::CoherentBus(const CoherentBusParams *p)
    : BaseBus(p), reqLayer(*this, ".reqLayer", p->clock),
      respLayer(*this, ".respLayer", p->clock),
-      snoopRespLayer(*this, ".snoopRespLayer", p->clock)
+      snoopRespLayer(*this, ".snoopRespLayer", p->clock),
      system(p->system)
 {
    // create the ports based on the size of the master and slave
    // vector ports, and the presence of the default port, the ports
@ -137,7 +139,7 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
    Tick packetFinishTime = is_express_snoop ? 0 : pkt->finishTime;
    // uncacheable requests need never be snooped
-    if (!pkt->req->isUncacheable()) {
+    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
        // the packet is a memory-mapped request and should be
        // broadcasted to our snoopers but the source
        forwardTiming(pkt, slave_port_id);
@ -323,6 +325,9 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
 void
 CoherentBus::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id)
 {
    // snoops should only happen if the system isn't bypassing caches
    assert(!system->bypassCaches());
    for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) {
        SlavePort *p = *s;
        // we could have gotten this request from a snooping master
@ -357,7 +362,7 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id)
    Tick snoop_response_latency = 0;
    // uncacheable requests need never be snooped
-    if (!pkt->req->isUncacheable()) {
+    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
        // forward to all snoopers but the source
        std::pair<MemCmd, Tick> snoop_result =
            forwardAtomic(pkt, slave_port_id);
@ -414,6 +419,9 @@ CoherentBus::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id)
    MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
    Tick snoop_response_latency = 0;
    // snoops should only happen if the system isn't bypassing caches
    assert(!system->bypassCaches());
    for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) {
        SlavePort *p = *s;
        // we could have gotten this request from a snooping master
@ -458,7 +466,7 @@ CoherentBus::recvFunctional(PacketPtr pkt, PortID slave_port_id)
    }
    // uncacheable requests need never be snooped
-    if (!pkt->req->isUncacheable()) {
+    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
        // forward to all snoopers but the source
        forwardFunctional(pkt, slave_port_id);
    }
@ -490,6 +498,9 @@ CoherentBus::recvFunctionalSnoop(PacketPtr pkt, PortID master_port_id)
 void
 CoherentBus::forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id)
 {
    // snoops should only happen if the system isn't bypassing caches
    assert(!system->bypassCaches());
    for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) {
        SlavePort *p = *s;
        // we could have gotten this request from a snooping master
--- a/src/mem/coherent_bus.hh
+++ b/src/mem/coherent_bus.hh
@ -224,6 +224,12 @@ class CoherentBus : public BaseBus
     */
    std::set<RequestPtr> outstandingReq;
    /**
     * Keep a pointer to the system to be allow to querying memory system
     * properties.
     */
    System *system;
    /** Function called by the port when the bus is recieving a Timing
      request packet.*/
    virtual bool recvTimingReq(PacketPtr pkt, PortID slave_port_id);
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@ -63,6 +63,7 @@ MaxTick = 2**63 - 1
 _memory_modes = {
    "atomic" : objects.params.atomic,
    "timing" : objects.params.timing,
    "atomic_noncaching" : objects.params.atomic_noncaching,
    }
 # The final hook to generate .ini files.  Called from the user script
@ -288,6 +289,13 @@ def switchCpus(system, cpuList, do_drain=True):
    # Change the memory mode if required. We check if this is needed
    # to avoid printing a warning if no switch was performed.
    if system.getMemoryMode() != memory_mode:
        # Flush the memory system if we are switching to a memory mode
        # that disables caches. This typically happens when switching to a
        # hardware virtualized CPU.
        if memory_mode == objects.params.atomic_noncaching:
            memWriteback(system)
            memInvalidate(system)
        _changeMemoryMode(system, memory_mode)
    for old_cpu, new_cpu in cpuList:
--- a/src/sim/System.py
+++ b/src/sim/System.py
@ -35,7 +35,8 @@ from m5.proxy import *
 from SimpleMemory import *
-class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing']
+class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing',
                                'atomic_noncaching']
 class System(MemObject):
    type = 'System'
@ -55,7 +56,7 @@ class System(MemObject):
    @classmethod
    def export_methods(cls, code):
        code('''
-      Enums::MemoryMode getMemoryMode();
+      Enums::MemoryMode getMemoryMode() const;
      void setMemoryMode(Enums::MemoryMode mode);
 ''')
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@ -454,8 +454,8 @@ System::getMasterName(MasterID master_id)
    return masterIds[master_id];
 }
-const char *System::MemoryModeStrings[3] = {"invalid", "atomic",
+const char *System::MemoryModeStrings[4] = {"invalid", "atomic", "timing",
-    "timing"};
+                                            "atomic_noncaching"};
 System *
 SystemParams::create()
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@ -120,20 +120,63 @@ class System : public MemObject
    BaseMasterPort& getMasterPort(const std::string &if_name,
                                  PortID idx = InvalidPortID);
-    static const char *MemoryModeStrings[3];
+    static const char *MemoryModeStrings[4];
-    Enums::MemoryMode
+    /** @{ */
-    getMemoryMode()
+    /**
-    {
+     * Is the system in atomic mode?
-        assert(memoryMode);
+     *
-        return memoryMode;
+     * There are currently two different atomic memory modes:
     * 'atomic', which supports caches; and 'atomic_noncaching', which
     * bypasses caches. The latter is used by hardware virtualized
     * CPUs. SimObjects are expected to use Port::sendAtomic() and
     * Port::recvAtomic() when accessing memory in this mode.
     */
    bool isAtomicMode() const {
        return memoryMode == Enums::atomic ||
            memoryMode == Enums::atomic_noncaching;
    }
-    /** Change the memory mode of the system. This should only be called by the
+    /**
-     * python!!
+     * Is the system in timing mode?
-     * @param mode Mode to change to (atomic/timing)
+     *
     * SimObjects are expected to use Port::sendTiming() and
     * Port::recvTiming() when accessing memory in this mode.
     */
    bool isTimingMode() const {
        return memoryMode == Enums::timing;
    }
    /**
     * Should caches be bypassed?
     *
     * Some CPUs need to bypass caches to allow direct memory
     * accesses, which is required for hardware virtualization.
     */
    bool bypassCaches() const {
        return memoryMode == Enums::atomic_noncaching;
    }
    /** @} */
    /** @{ */
    /**
     * Get the memory mode of the system.
     *
     * \warn This should only be used by the Python world. The C++
     * world should use one of the query functions above
     * (isAtomicMode(), isTimingMode(), bypassCaches()).
     */
    Enums::MemoryMode getMemoryMode() const { return memoryMode; }
    /**
     * Change the memory mode of the system.
     *
     * \warn This should only be called by the Python!
     *
     * @param mode Mode to change to (atomic/timing/...)
     */
    void setMemoryMode(Enums::MemoryMode mode);
    /** @} */
    PCEventQueue pcEventQueue;