diff --git a/src/arch/arm/table_walker.cc b/src/arch/arm/table_walker.cc
index 99a7592c1..44f12833b 100644
--- a/src/arch/arm/table_walker.cc
+++ b/src/arch/arm/table_walker.cc
@@ -104,7 +104,7 @@ void
 TableWalker::drainResume()
 {
     Drainable::drainResume();
-    if ((params()->sys->getMemoryMode() == Enums::timing) && currState) {
+    if (params()->sys->isTimingMode() && currState) {
         delete currState;
         currState = NULL;
     }
diff --git a/src/arch/x86/interrupts.cc b/src/arch/x86/interrupts.cc
index b34124ce7..8eae2d390 100644
--- a/src/arch/x86/interrupts.cc
+++ b/src/arch/x86/interrupts.cc
@@ -510,7 +510,7 @@ X86ISA::Interrupts::setReg(ApicRegIndex reg, uint32_t val)
             message.destMode = low.destMode;
             message.level = low.level;
             message.trigger = low.trigger;
-            bool timing = sys->getMemoryMode() == Enums::timing;
+            bool timing(sys->isTimingMode());
             // Be careful no updates of the delivery status bit get lost.
             regs[APIC_INTERRUPT_COMMAND_LOW] = low;
             ApicList apics;
diff --git a/src/arch/x86/pagetable_walker.cc b/src/arch/x86/pagetable_walker.cc
index 1e42e5593..b096fbfe8 100644
--- a/src/arch/x86/pagetable_walker.cc
+++ b/src/arch/x86/pagetable_walker.cc
@@ -88,7 +88,7 @@ Walker::start(ThreadContext * _tc, BaseTLB::Translation *_translation,
     // outstanding requests, see if this request can be coalesced with
     // another one (i.e. either coalesce or start walk)
     WalkerState * newState = new WalkerState(this, _translation, _req);
-    newState->initState(_tc, _mode, sys->getMemoryMode() == Enums::timing);
+    newState->initState(_tc, _mode, sys->isTimingMode());
     if (currStates.size()) {
         assert(newState->isTiming());
         DPRINTF(PageTableWalker, "Walks in progress: %d\n", currStates.size());
diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc
index 5490cb3f2..1ba8e55b6 100644
--- a/src/cpu/inorder/cpu.cc
+++ b/src/cpu/inorder/cpu.cc
@@ -812,7 +812,7 @@ InOrderCPU::init()
 void
 InOrderCPU::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::timing) {
+    if (!system->isTimingMode()) {
         fatal("The in-order CPU requires the memory system to be in "
               "'timing' mode.\n");
     }
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 53250d495..9caa49ad6 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -1316,7 +1316,7 @@ template <class Impl>
 void
 FullO3CPU<Impl>::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::timing) {
+    if (!system->isTimingMode()) {
         fatal("The O3 CPU requires the memory system to be in "
               "'timing' mode.\n");
     }
diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc
index 7a0778961..d7c4190ee 100644
--- a/src/cpu/simple/atomic.cc
+++ b/src/cpu/simple/atomic.cc
@@ -212,7 +212,7 @@ AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 void
 AtomicSimpleCPU::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::atomic) {
+    if (!system->isAtomicMode()) {
         fatal("The atomic CPU requires the memory system to be in "
               "'atomic' mode.\n");
     }
diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc
index 7423d082c..ab4ea9256 100644
--- a/src/cpu/simple/timing.cc
+++ b/src/cpu/simple/timing.cc
@@ -191,7 +191,7 @@ TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
 void
 TimingSimpleCPU::verifyMemoryMode() const
 {
-    if (system->getMemoryMode() != Enums::timing) {
+    if (!system->isTimingMode()) {
         fatal("The timing CPU requires the memory system to be in "
               "'timing' mode.\n");
     }
diff --git a/src/cpu/testers/traffic_gen/traffic_gen.cc b/src/cpu/testers/traffic_gen/traffic_gen.cc
index 34e3b2c1e..d9d040858 100644
--- a/src/cpu/testers/traffic_gen/traffic_gen.cc
+++ b/src/cpu/testers/traffic_gen/traffic_gen.cc
@@ -83,10 +83,8 @@ TrafficGen::init()
     if (!port.isConnected())
         fatal("The port of %s is not connected!\n", name());
 
-    Enums::MemoryMode mode = system->getMemoryMode();
-
     // if the system is in timing mode active the request generator
-    if (mode == Enums::timing) {
+    if (system->isTimingMode()) {
         DPRINTF(TrafficGen, "Timing mode, activating request generator\n");
 
         // enter initial state
@@ -101,7 +99,7 @@ void
 TrafficGen::initState()
 {
     // when not restoring from a checkpoint, make sure we kick things off
-    if (system->getMemoryMode() == Enums::timing) {
+    if (system->isTimingMode()) {
         Tick nextStateGraphEvent = stateGraph.nextEventTick();
         schedule(updateStateGraphEvent, nextStateGraphEvent);
     } else {
diff --git a/src/dev/dma_device.cc b/src/dev/dma_device.cc
index 770370320..f25f52334 100644
--- a/src/dev/dma_device.cc
+++ b/src/dev/dma_device.cc
@@ -232,8 +232,7 @@ DmaPort::sendDma()
     // switching actually work
     assert(transmitList.size());
 
-    Enums::MemoryMode state = sys->getMemoryMode();
-    if (state == Enums::timing) {
+    if (sys->isTimingMode()) {
         // if we are either waiting for a retry or are still waiting
         // after sending the last packet, then do not proceed
         if (inRetry || sendEvent.scheduled()) {
@@ -242,7 +241,7 @@ DmaPort::sendDma()
         }
 
         trySendTimingReq();
-    } else if (state == Enums::atomic) {
+    } else if (sys->isAtomicMode()) {
         // send everything there is to send in zero time
         while (!transmitList.empty()) {
             PacketPtr pkt = transmitList.front();
diff --git a/src/dev/x86/i82094aa.cc b/src/dev/x86/i82094aa.cc
index 54824c778..0692718bf 100644
--- a/src/dev/x86/i82094aa.cc
+++ b/src/dev/x86/i82094aa.cc
@@ -222,8 +222,7 @@ X86ISA::I82094AA::signalInterrupt(int line)
                 apics.push_back(selected);
             }
         }
-        intMasterPort.sendMessage(apics, message,
-                                  sys->getMemoryMode() == Enums::timing);
+        intMasterPort.sendMessage(apics, message, sys->isTimingMode());
     }
 }
 
diff --git a/src/mem/Bus.py b/src/mem/Bus.py
index 4637b0ebc..ca0f40e1e 100644
--- a/src/mem/Bus.py
+++ b/src/mem/Bus.py
@@ -40,7 +40,9 @@
 #          Andreas Hansson
 
 from MemObject import MemObject
+from System import System
 from m5.params import *
+from m5.proxy import *
 
 class BaseBus(MemObject):
     type = 'BaseBus'
@@ -72,3 +74,5 @@ class NoncoherentBus(BaseBus):
 class CoherentBus(BaseBus):
     type = 'CoherentBus'
     cxx_header = "mem/coherent_bus.hh"
+
+    system = Param.System(Parent.any, "System that the bus belongs to.")
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index a1d945103..21c8e16d6 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -390,6 +390,7 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
         // must be cache-to-cache response from upper to lower level
         ForwardResponseRecord *rec =
             dynamic_cast<ForwardResponseRecord *>(pkt->senderState);
+        assert(!system->bypassCaches());
 
         if (rec == NULL) {
             assert(pkt->cmd == MemCmd::HardPFResp);
@@ -409,6 +410,12 @@ Cache<TagStore>::timingAccess(PacketPtr pkt)
 
     assert(pkt->isRequest());
 
+    // Just forward the packet if caches are disabled.
+    if (system->bypassCaches()) {
+        memSidePort->sendTimingReq(pkt);
+        return true;
+    }
+
     if (pkt->memInhibitAsserted()) {
         DPRINTF(Cache, "mem inhibited on 0x%x: not responding\n",
                 pkt->getAddr());
@@ -629,6 +636,10 @@ Cache<TagStore>::atomicAccess(PacketPtr pkt)
     // @TODO: make this a parameter
     bool last_level_cache = false;
 
+    // Forward the request if the system is in cache bypass mode.
+    if (system->bypassCaches())
+        return memSidePort->sendAtomic(pkt);
+
     if (pkt->memInhibitAsserted()) {
         assert(!pkt->req->isUncacheable());
         // have to invalidate ourselves and any lower caches even if
@@ -744,6 +755,17 @@ template<class TagStore>
 void
 Cache<TagStore>::functionalAccess(PacketPtr pkt, bool fromCpuSide)
 {
+    if (system->bypassCaches()) {
+        // Packets from the memory side are snoop request and
+        // shouldn't happen in bypass mode.
+        assert(fromCpuSide);
+
+        // The cache should be flushed if we are in cache bypass mode,
+        // so we don't need to check if we need to update anything.
+        memSidePort->sendFunctional(pkt);
+        return;
+    }
+
     Addr blk_addr = blockAlign(pkt->getAddr());
     BlkType *blk = tags->findBlock(pkt->getAddr());
     MSHR *mshr = mshrQueue.findMatch(blk_addr);
@@ -1354,6 +1376,9 @@ template<class TagStore>
 void
 Cache<TagStore>::snoopTiming(PacketPtr pkt)
 {
+    // Snoops shouldn't happen when bypassing caches
+    assert(!system->bypassCaches());
+
     // Note that some deferred snoops don't have requests, since the
     // original access may have already completed
     if ((pkt->req && pkt->req->isUncacheable()) ||
@@ -1438,6 +1463,9 @@ template<class TagStore>
 Cycles
 Cache<TagStore>::snoopAtomic(PacketPtr pkt)
 {
+    // Snoops shouldn't happen when bypassing caches
+    assert(!system->bypassCaches());
+
     if (pkt->req->isUncacheable() || pkt->cmd == MemCmd::Writeback) {
         // Can't get a hit on an uncacheable address
         // Revisit this for multi level coherence
@@ -1683,6 +1711,7 @@ Cache<TagStore>::CpuSidePort::recvTimingReq(PacketPtr pkt)
 {
     // always let inhibited requests through even if blocked
     if (!pkt->memInhibitAsserted() && blocked) {
+        assert(!cache->system->bypassCaches());
         DPRINTF(Cache,"Scheduling a retry while blocked\n");
         mustSendRetry = true;
         return false;
diff --git a/src/mem/coherent_bus.cc b/src/mem/coherent_bus.cc
index b1ac6dbcf..f74ca48e9 100644
--- a/src/mem/coherent_bus.cc
+++ b/src/mem/coherent_bus.cc
@@ -52,11 +52,13 @@
 #include "debug/BusAddrRanges.hh"
 #include "debug/CoherentBus.hh"
 #include "mem/coherent_bus.hh"
+#include "sim/system.hh"
 
 CoherentBus::CoherentBus(const CoherentBusParams *p)
     : BaseBus(p), reqLayer(*this, ".reqLayer", p->clock),
       respLayer(*this, ".respLayer", p->clock),
-      snoopRespLayer(*this, ".snoopRespLayer", p->clock)
+      snoopRespLayer(*this, ".snoopRespLayer", p->clock),
+      system(p->system)
 {
     // create the ports based on the size of the master and slave
     // vector ports, and the presence of the default port, the ports
@@ -137,7 +139,7 @@ CoherentBus::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
     Tick packetFinishTime = is_express_snoop ? 0 : pkt->finishTime;
 
     // uncacheable requests need never be snooped
-    if (!pkt->req->isUncacheable()) {
+    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
         // the packet is a memory-mapped request and should be
         // broadcasted to our snoopers but the source
         forwardTiming(pkt, slave_port_id);
@@ -323,6 +325,9 @@ CoherentBus::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
 void
 CoherentBus::forwardTiming(PacketPtr pkt, PortID exclude_slave_port_id)
 {
+    // snoops should only happen if the system isn't bypassing caches
+    assert(!system->bypassCaches());
+
     for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) {
         SlavePort *p = *s;
         // we could have gotten this request from a snooping master
@@ -357,7 +362,7 @@ CoherentBus::recvAtomic(PacketPtr pkt, PortID slave_port_id)
     Tick snoop_response_latency = 0;
 
     // uncacheable requests need never be snooped
-    if (!pkt->req->isUncacheable()) {
+    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
         // forward to all snoopers but the source
         std::pair<MemCmd, Tick> snoop_result =
             forwardAtomic(pkt, slave_port_id);
@@ -414,6 +419,9 @@ CoherentBus::forwardAtomic(PacketPtr pkt, PortID exclude_slave_port_id)
     MemCmd snoop_response_cmd = MemCmd::InvalidCmd;
     Tick snoop_response_latency = 0;
 
+    // snoops should only happen if the system isn't bypassing caches
+    assert(!system->bypassCaches());
+
     for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) {
         SlavePort *p = *s;
         // we could have gotten this request from a snooping master
@@ -458,7 +466,7 @@ CoherentBus::recvFunctional(PacketPtr pkt, PortID slave_port_id)
     }
 
     // uncacheable requests need never be snooped
-    if (!pkt->req->isUncacheable()) {
+    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
         // forward to all snoopers but the source
         forwardFunctional(pkt, slave_port_id);
     }
@@ -490,6 +498,9 @@ CoherentBus::recvFunctionalSnoop(PacketPtr pkt, PortID master_port_id)
 void
 CoherentBus::forwardFunctional(PacketPtr pkt, PortID exclude_slave_port_id)
 {
+    // snoops should only happen if the system isn't bypassing caches
+    assert(!system->bypassCaches());
+
     for (SlavePortIter s = snoopPorts.begin(); s != snoopPorts.end(); ++s) {
         SlavePort *p = *s;
         // we could have gotten this request from a snooping master
diff --git a/src/mem/coherent_bus.hh b/src/mem/coherent_bus.hh
index 61406608b..05c45f69a 100644
--- a/src/mem/coherent_bus.hh
+++ b/src/mem/coherent_bus.hh
@@ -224,6 +224,12 @@ class CoherentBus : public BaseBus
      */
     std::set<RequestPtr> outstandingReq;
 
+    /**
+     * Keep a pointer to the system to be allow to querying memory system
+     * properties.
+     */
+    System *system;
+
     /** Function called by the port when the bus is recieving a Timing
       request packet.*/
     virtual bool recvTimingReq(PacketPtr pkt, PortID slave_port_id);
diff --git a/src/python/m5/simulate.py b/src/python/m5/simulate.py
index 3583e8264..682104c26 100644
--- a/src/python/m5/simulate.py
+++ b/src/python/m5/simulate.py
@@ -63,6 +63,7 @@ MaxTick = 2**63 - 1
 _memory_modes = {
     "atomic" : objects.params.atomic,
     "timing" : objects.params.timing,
+    "atomic_noncaching" : objects.params.atomic_noncaching,
     }
 
 # The final hook to generate .ini files.  Called from the user script
@@ -288,6 +289,13 @@ def switchCpus(system, cpuList, do_drain=True):
     # Change the memory mode if required. We check if this is needed
     # to avoid printing a warning if no switch was performed.
     if system.getMemoryMode() != memory_mode:
+        # Flush the memory system if we are switching to a memory mode
+        # that disables caches. This typically happens when switching to a
+        # hardware virtualized CPU.
+        if memory_mode == objects.params.atomic_noncaching:
+            memWriteback(system)
+            memInvalidate(system)
+
         _changeMemoryMode(system, memory_mode)
 
     for old_cpu, new_cpu in cpuList:
diff --git a/src/sim/System.py b/src/sim/System.py
index 69ae61e8f..031331375 100644
--- a/src/sim/System.py
+++ b/src/sim/System.py
@@ -35,7 +35,8 @@ from m5.proxy import *
 
 from SimpleMemory import *
 
-class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing']
+class MemoryMode(Enum): vals = ['invalid', 'atomic', 'timing',
+                                'atomic_noncaching']
 
 class System(MemObject):
     type = 'System'
@@ -55,7 +56,7 @@ class System(MemObject):
     @classmethod
     def export_methods(cls, code):
         code('''
-      Enums::MemoryMode getMemoryMode();
+      Enums::MemoryMode getMemoryMode() const;
       void setMemoryMode(Enums::MemoryMode mode);
 ''')
 
diff --git a/src/sim/system.cc b/src/sim/system.cc
index 259ed3e88..03f8f8180 100644
--- a/src/sim/system.cc
+++ b/src/sim/system.cc
@@ -454,8 +454,8 @@ System::getMasterName(MasterID master_id)
     return masterIds[master_id];
 }
 
-const char *System::MemoryModeStrings[3] = {"invalid", "atomic",
-    "timing"};
+const char *System::MemoryModeStrings[4] = {"invalid", "atomic", "timing",
+                                            "atomic_noncaching"};
 
 System *
 SystemParams::create()
diff --git a/src/sim/system.hh b/src/sim/system.hh
index d1b79bbf4..05b1f2077 100644
--- a/src/sim/system.hh
+++ b/src/sim/system.hh
@@ -120,20 +120,63 @@ class System : public MemObject
     BaseMasterPort& getMasterPort(const std::string &if_name,
                                   PortID idx = InvalidPortID);
 
-    static const char *MemoryModeStrings[3];
+    static const char *MemoryModeStrings[4];
 
-    Enums::MemoryMode
-    getMemoryMode()
-    {
-        assert(memoryMode);
-        return memoryMode;
+    /** @{ */
+    /**
+     * Is the system in atomic mode?
+     *
+     * There are currently two different atomic memory modes:
+     * 'atomic', which supports caches; and 'atomic_noncaching', which
+     * bypasses caches. The latter is used by hardware virtualized
+     * CPUs. SimObjects are expected to use Port::sendAtomic() and
+     * Port::recvAtomic() when accessing memory in this mode.
+     */
+    bool isAtomicMode() const {
+        return memoryMode == Enums::atomic ||
+            memoryMode == Enums::atomic_noncaching;
     }
 
-    /** Change the memory mode of the system. This should only be called by the
-     * python!!
-     * @param mode Mode to change to (atomic/timing)
+    /**
+     * Is the system in timing mode?
+     *
+     * SimObjects are expected to use Port::sendTiming() and
+     * Port::recvTiming() when accessing memory in this mode.
+     */
+    bool isTimingMode() const {
+        return memoryMode == Enums::timing;
+    }
+
+    /**
+     * Should caches be bypassed?
+     *
+     * Some CPUs need to bypass caches to allow direct memory
+     * accesses, which is required for hardware virtualization.
+     */
+    bool bypassCaches() const {
+        return memoryMode == Enums::atomic_noncaching;
+    }
+    /** @} */
+
+    /** @{ */
+    /**
+     * Get the memory mode of the system.
+     *
+     * \warn This should only be used by the Python world. The C++
+     * world should use one of the query functions above
+     * (isAtomicMode(), isTimingMode(), bypassCaches()).
+     */
+    Enums::MemoryMode getMemoryMode() const { return memoryMode; }
+
+    /**
+     * Change the memory mode of the system.
+     *
+     * \warn This should only be called by the Python!
+     *
+     * @param mode Mode to change to (atomic/timing/...)
      */
     void setMemoryMode(Enums::MemoryMode mode);
+    /** @} */
 
     PCEventQueue pcEventQueue;