diff --git a/configs/example/fs.py b/configs/example/fs.py
index f0e32e97f..a5b8772af 100644
--- a/configs/example/fs.py
+++ b/configs/example/fs.py
@@ -72,6 +72,7 @@ class MyCache(BaseCache):
     latency = 1
     mshrs = 10
     tgts_per_mshr = 5
+    protocol = CoherenceProtocol(protocol='moesi')
 
 # client system CPU is always simple... note this is an assignment of
 # a class, not an instance.
diff --git a/src/base/traceflags.py b/src/base/traceflags.py
index 757c9e7b7..298d22c2b 100644
--- a/src/base/traceflags.py
+++ b/src/base/traceflags.py
@@ -182,7 +182,8 @@ compoundFlagMap = {
     'EthernetNoData' : [ 'Ethernet', 'EthernetPIO', 'EthernetDesc', 'EthernetIntr', 'EthernetSM', 'EthernetCksum' ],
     'IdeAll' : [ 'IdeCtrl', 'IdeDisk' ],
     'O3CPUAll' : [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit', 'IQ', 'ROB', 'FreeList', 'RenameMap', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit', 'DynInst', 'FullCPU', 'O3CPU', 'Activity','Scoreboard','Writeback'],
-    'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU']
+    'OzoneCPUAll' : [ 'BE', 'FE', 'IBE', 'OzoneLSQ', 'OzoneCPU'],
+    'All' : baseFlags
 }
 
 #############################################################
diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc
index 4c9a8e91f..367508288 100644
--- a/src/cpu/o3/cpu.cc
+++ b/src/cpu/o3/cpu.cc
@@ -850,9 +850,6 @@ template <class Impl>
 void
 FullO3CPU<Impl>::resume()
 {
-#if FULL_SYSTEM
-    assert(system->getMemoryMode() == System::Timing);
-#endif
     fetch.resume();
     decode.resume();
     rename.resume();
@@ -864,6 +861,10 @@ FullO3CPU<Impl>::resume()
     if (_status == SwitchedOut || _status == Idle)
         return;
 
+#if FULL_SYSTEM
+    assert(system->getMemoryMode() == System::Timing);
+#endif
+
     if (!tickEvent.scheduled())
         tickEvent.schedule(curTick);
     _status = Running;
diff --git a/src/mem/cache/cache_blk.hh b/src/mem/cache/cache_blk.hh
index 078c82d82..7b999e4b1 100644
--- a/src/mem/cache/cache_blk.hh
+++ b/src/mem/cache/cache_blk.hh
@@ -35,8 +35,11 @@
 #ifndef __CACHE_BLK_HH__
 #define __CACHE_BLK_HH__
 
+#include <list>
+
 #include "sim/root.hh"		// for Tick
 #include "arch/isa_traits.hh"	// for Addr
+#include "mem/request.hh"
 
 /**
  * Cache block status bit assignments
@@ -96,6 +99,35 @@ class CacheBlk
     /** Number of references to this block since it was brought in. */
     int refCount;
 
+  protected:
+    /**
+     * Represents that the indicated thread context has a "lock" on
+     * the block, in the LL/SC sense.
+     */
+    class Lock {
+      public:
+        int cpuNum;	// locking CPU
+        int threadNum;	// locking thread ID within CPU
+
+        // check for matching execution context
+        bool matchesContext(Request *req)
+        {
+            return (cpuNum == req->getCpuNum() &&
+                    threadNum == req->getThreadNum());
+        }
+
+        Lock(Request *req)
+            : cpuNum(req->getCpuNum()), threadNum(req->getThreadNum())
+        {
+        }
+    };
+
+    /** List of thread contexts that have performed a load-locked (LL)
+     * on the block since the last store. */
+    std::list<Lock> lockList;
+
+  public:
+
     CacheBlk()
         : asid(-1), tag(0), data(0) ,size(0), status(0), whenReady(0),
           set(-1), refCount(0)
@@ -175,7 +207,58 @@ class CacheBlk
         return (status & BlkHWPrefetched) != 0;
     }
 
+    /**
+     * Track the fact that a local locked was issued to the block.  If
+     * multiple LLs get issued from the same context we could have
+     * redundant records on the list, but that's OK, as they'll all
+     * get blown away at the next store.
+     */
+    void trackLoadLocked(Request *req)
+    {
+        assert(req->isLocked());
+        lockList.push_front(Lock(req));
+    }
 
+    /**
+     * Clear the list of valid load locks.  Should be called whenever
+     * block is written to or invalidated.
+     */
+    void clearLoadLocks() { lockList.clear(); }
+
+    /**
+     * Handle interaction of load-locked operations and stores.
+     * @return True if write should proceed, false otherwise.  Returns
+     * false only in the case of a failed store conditional.
+     */
+    bool checkWrite(Request *req)
+    {
+        if (req->isLocked()) {
+            // it's a store conditional... have to check for matching
+            // load locked.
+            bool success = false;
+
+            for (std::list<Lock>::iterator i = lockList.begin();
+                 i != lockList.end(); ++i)
+            {
+                if (i->matchesContext(req)) {
+                    // it's a store conditional, and as far as the memory
+                    // system can tell, the requesting context's lock is
+                    // still valid.
+                    success = true;
+                    break;
+                }
+            }
+
+            req->setScResult(success ? 1 : 0);
+            clearLoadLocks();
+            return success;
+        } else {
+            // for *all* stores (conditional or otherwise) we have to
+            // clear the list of load-locks as they're all invalid now.
+            clearLoadLocks();
+            return true;
+        }
+    }
 };
 
 #endif //__CACHE_BLK_HH__
diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh
index 8683352db..b5d7e1960 100644
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@@ -86,11 +86,6 @@ doAtomicAccess(Packet *pkt, bool isCpuSide)
 {
     if (isCpuSide)
     {
-        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->isLocked())) {
-            pkt->req->setScResult(1);
-        }
-
         probe(pkt, true, NULL);
         //TEMP ALWAYS SUCCES FOR NOW
         pkt->result = Packet::Success;
@@ -116,11 +111,6 @@ doFunctionalAccess(Packet *pkt, bool isCpuSide)
         //TEMP USE CPU?THREAD 0 0
         pkt->req->setThreadContext(0,0);
 
-        //Temporary solution to LL/SC
-        if (pkt->isWrite() && (pkt->req->isLocked())) {
-            assert("Can't handle LL/SC on functional path\n");
-        }
-
         probe(pkt, false, memSidePort);
         //TEMP ALWAYS SUCCESFUL FOR NOW
         pkt->result = Packet::Success;
diff --git a/src/mem/cache/tags/lru.cc b/src/mem/cache/tags/lru.cc
index a9ae049c3..3eb083327 100644
--- a/src/mem/cache/tags/lru.cc
+++ b/src/mem/cache/tags/lru.cc
@@ -246,6 +246,7 @@ LRU::invalidateBlk(Addr addr)
     if (blk) {
         blk->status = 0;
         blk->isTouched = false;
+        blk->clearLoadLocks();
         tagsInUse--;
     }
 }
diff --git a/src/mem/page_table.cc b/src/mem/page_table.cc
index 32da4282c..fa6d5ddfb 100644
--- a/src/mem/page_table.cc
+++ b/src/mem/page_table.cc
@@ -167,13 +167,14 @@ void
 PageTable::serialize(std::ostream &os)
 {
     paramOut(os, "ptable.size", pTable.size());
+
     int count = 0;
 
     m5::hash_map<Addr,Addr>::iterator iter;
-    while (iter != pTable.end()) {
+    m5::hash_map<Addr,Addr>::iterator end;
+    for (iter = pTable.begin(); iter != end; ++iter,++count) {
         paramOut(os, csprintf("ptable.entry%dvaddr", count),iter->first);
         paramOut(os, csprintf("ptable.entry%dpaddr", count),iter->second);
-        ++count;
     }
     assert(count == pTable.size());
 }
diff --git a/src/python/m5/SimObject.py b/src/python/m5/SimObject.py
index 716f584b0..18b3fff55 100644
--- a/src/python/m5/SimObject.py
+++ b/src/python/m5/SimObject.py
@@ -730,9 +730,8 @@ class SimObject(object):
             # i don't know if there's a better way to do this - calling
             # setMemoryMode directly from self._ccObject results in calling
             # SimObject::setMemoryMode, not the System::setMemoryMode
-##            system_ptr = cc_main.convertToSystemPtr(self._ccObject)
-##            system_ptr.setMemoryMode(mode)
-            self._ccObject.setMemoryMode(mode)
+            system_ptr = cc_main.convertToSystemPtr(self._ccObject)
+            system_ptr.setMemoryMode(mode)
         for child in self._children.itervalues():
             child.changeTiming(mode)
 
diff --git a/src/sim/eventq.hh b/src/sim/eventq.hh
index 537bfb918..fa65b08af 100644
--- a/src/sim/eventq.hh
+++ b/src/sim/eventq.hh
@@ -120,10 +120,22 @@ class Event : public Serializable, public FastAlloc
     /// priority; these values are used to control events that need to
     /// be ordered within a cycle.
     enum Priority {
-        /// Breakpoints should happen before anything else, so we
-        /// don't miss any action when debugging.
+        /// If we enable tracing on a particular cycle, do that as the
+        /// very first thing so we don't miss any of the events on
+        /// that cycle (even if we enter the debugger).
+        Trace_Enable_Pri        = -101,
+
+        /// Breakpoints should happen before anything else (except
+        /// enabling trace output), so we don't miss any action when
+        /// debugging.
         Debug_Break_Pri		= -100,
 
+        /// CPU switches schedule the new CPU's tick event for the
+        /// same cycle (after unscheduling the old CPU's tick event).
+        /// The switch needs to come before any tick events to make
+        /// sure we don't tick both CPUs in the same cycle.
+        CPU_Switch_Pri		=   -31,
+
         /// For some reason "delayed" inter-cluster writebacks are
         /// scheduled before regular writebacks (which have default
         /// priority).  Steve?
@@ -132,12 +144,6 @@ class Event : public Serializable, public FastAlloc
         /// Default is zero for historical reasons.
         Default_Pri		=    0,
 
-        /// CPU switches schedule the new CPU's tick event for the
-        /// same cycle (after unscheduling the old CPU's tick event).
-        /// The switch needs to come before any tick events to make
-        /// sure we don't tick both CPUs in the same cycle.
-        CPU_Switch_Pri		=   -31,
-
         /// Serailization needs to occur before tick events also, so
         /// that a serialize/unserialize is identical to an on-line
         /// CPU switch.