From 443da2c030c0ad5b22f86a471ae757cbbd491bc1 Mon Sep 17 00:00:00 2001
From: Michael LeBeane <michael.lebeane@amd.com>
Date: Tue, 13 Sep 2016 23:20:03 -0400
Subject: [PATCH] kvm: Support timing accesses for KVM cpu This patch enables
 timing accesses for KVM cpu.  A new state, RunningMMIOPending, is added to
 indicate that there are outstanding timing requests generated by KVM in the
 system.  KVM's tick() is disabled and the simulation does not enter into KVM
 until all outstanding timing requests have completed.  The main motivation
 for this is to allow KVM CPU to perform MMIO in Ruby, since Ruby does not
 support atomic accesses.

---
 src/cpu/kvm/base.cc    | 129 ++++++++++++++++++++++++++++++++++-------
 src/cpu/kvm/base.hh    |  56 +++++++++++++-----
 src/cpu/kvm/x86_cpu.cc |  14 ++---
 3 files changed, 159 insertions(+), 40 deletions(-)

diff --git a/src/cpu/kvm/base.cc b/src/cpu/kvm/base.cc
index 0670f61c6..ddf80dbaa 100644
--- a/src/cpu/kvm/base.cc
+++ b/src/cpu/kvm/base.cc
@@ -170,6 +170,76 @@ BaseKvmCPU::startup()
     schedule(startupEvent, curTick());
 }
 
+BaseKvmCPU::Status
+BaseKvmCPU::KVMCpuPort::nextIOState() const
+{
+    return (activeMMIOReqs || pendingMMIOPkts.size())
+        ? RunningMMIOPending : RunningServiceCompletion;
+}
+
+Tick
+BaseKvmCPU::KVMCpuPort::submitIO(PacketPtr pkt)
+{
+    if (cpu->system->isAtomicMode()) {
+        Tick delay = sendAtomic(pkt);
+        delete pkt->req;
+        delete pkt;
+        return delay;
+    } else {
+        if (pendingMMIOPkts.empty() && sendTimingReq(pkt)) {
+            activeMMIOReqs++;
+        } else {
+            pendingMMIOPkts.push(pkt);
+        }
+        // Return value is irrelevant for timing-mode accesses.
+        return 0;
+    }
+}
+
+bool
+BaseKvmCPU::KVMCpuPort::recvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(KvmIO, "KVM: Finished timing request\n");
+
+    delete pkt->req;
+    delete pkt;
+    activeMMIOReqs--;
+
+    // We can switch back into KVM when all pending and in-flight MMIO
+    // operations have completed.
+    if (!(activeMMIOReqs || pendingMMIOPkts.size())) {
+        DPRINTF(KvmIO, "KVM: Finished all outstanding timing requests\n");
+        cpu->finishMMIOPending();
+    }
+    return true;
+}
+
+void
+BaseKvmCPU::KVMCpuPort::recvReqRetry()
+{
+    DPRINTF(KvmIO, "KVM: Retry for timing request\n");
+
+    assert(pendingMMIOPkts.size());
+
+    // Assuming that we can issue infinite requests this cycle is a bit
+    // unrealistic, but it's not worth modeling something more complex in
+    // KVM.
+    while (pendingMMIOPkts.size() && sendTimingReq(pendingMMIOPkts.front())) {
+        pendingMMIOPkts.pop();
+        activeMMIOReqs++;
+    }
+}
+
+void
+BaseKvmCPU::finishMMIOPending()
+{
+    assert(_status = RunningMMIOPending);
+    assert(!tickEvent.scheduled());
+
+    _status = RunningServiceCompletion;
+    schedule(tickEvent, nextCycle());
+}
+
 void
 BaseKvmCPU::startupThread()
 {
@@ -329,6 +399,12 @@ BaseKvmCPU::drain()
                 "requesting drain.\n");
         return DrainState::Draining;
 
+      case RunningMMIOPending:
+        // We need to drain since there are in-flight timing accesses
+        DPRINTF(Drain, "KVM CPU is waiting for timing accesses to complete, "
+                "requesting drain.\n");
+        return DrainState::Draining;
+
       case RunningService:
         // We need to drain since the CPU is waiting for service (e.g., MMIOs)
         DPRINTF(Drain, "KVM CPU is waiting for service, requesting drain.\n");
@@ -425,9 +501,9 @@ BaseKvmCPU::takeOverFrom(BaseCPU *cpu)
 void
 BaseKvmCPU::verifyMemoryMode() const
 {
-    if (!(system->isAtomicMode() && system->bypassCaches())) {
+    if (!(system->bypassCaches())) {
         fatal("The KVM-based CPUs requires the memory system to be in the "
-              "'atomic_noncaching' mode.\n");
+              "'noncaching' mode.\n");
     }
 }
 
@@ -536,7 +612,7 @@ void
 BaseKvmCPU::tick()
 {
     Tick delay(0);
-    assert(_status != Idle);
+    assert(_status != Idle && _status != RunningMMIOPending);
 
     switch (_status) {
       case RunningService:
@@ -620,7 +696,7 @@ BaseKvmCPU::tick()
     }
 
     // Schedule a new tick if we are still running
-    if (_status != Idle)
+    if (_status != Idle && _status != RunningMMIOPending)
         schedule(tickEvent, clockEdge(ticksToCycles(delay)));
 }
 
@@ -629,8 +705,9 @@ BaseKvmCPU::kvmRunDrain()
 {
     // By default, the only thing we need to drain is a pending IO
     // operation which assumes that we are in the
-    // RunningServiceCompletion state.
-    assert(_status == RunningServiceCompletion);
+    // RunningServiceCompletion or RunningMMIOPending state.
+    assert(_status == RunningServiceCompletion ||
+           _status == RunningMMIOPending);
 
     // Deliver the data from the pending IO operation and immediately
     // exit.
@@ -922,9 +999,12 @@ BaseKvmCPU::handleKvmExit()
         return handleKvmExitException();
 
       case KVM_EXIT_IO:
-        _status = RunningServiceCompletion;
+      {
         ++numIO;
-        return handleKvmExitIO();
+        Tick ticks = handleKvmExitIO();
+        _status = dataPort.nextIOState();
+        return ticks;
+      }
 
       case KVM_EXIT_HYPERCALL:
         ++numHypercalls;
@@ -942,15 +1022,21 @@ BaseKvmCPU::handleKvmExit()
         return 0;
 
       case KVM_EXIT_MMIO:
-        _status = RunningServiceCompletion;
+      {
         /* Service memory mapped IO requests */
         DPRINTF(KvmIO, "KVM: Handling MMIO (w: %u, addr: 0x%x, len: %u)\n",
                 _kvmRun->mmio.is_write,
                 _kvmRun->mmio.phys_addr, _kvmRun->mmio.len);
 
         ++numMMIO;
-        return doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data,
-                            _kvmRun->mmio.len, _kvmRun->mmio.is_write);
+        Tick ticks = doMMIOAccess(_kvmRun->mmio.phys_addr, _kvmRun->mmio.data,
+                                  _kvmRun->mmio.len, _kvmRun->mmio.is_write);
+        // doMMIOAccess could have triggered a suspend, in which case we don't
+        // want to overwrite the _status.
+        if (_status != Idle)
+            _status = dataPort.nextIOState();
+        return ticks;
+      }
 
       case KVM_EXIT_IRQ_WINDOW_OPEN:
         return handleKvmExitIRQWindowOpen();
@@ -1026,30 +1112,33 @@ BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write)
     ThreadContext *tc(thread->getTC());
     syncThreadContext();
 
-    Request mmio_req(paddr, size, Request::UNCACHEABLE, dataMasterId());
-    mmio_req.setContext(tc->contextId());
+    RequestPtr mmio_req = new Request(paddr, size, Request::UNCACHEABLE,
+                                      dataMasterId());
+    mmio_req->setContext(tc->contextId());
     // Some architectures do need to massage physical addresses a bit
     // before they are inserted into the memory system. This enables
     // APIC accesses on x86 and m5ops where supported through a MMIO
     // interface.
     BaseTLB::Mode tlb_mode(write ? BaseTLB::Write : BaseTLB::Read);
-    Fault fault(tc->getDTBPtr()->finalizePhysical(&mmio_req, tc, tlb_mode));
+    Fault fault(tc->getDTBPtr()->finalizePhysical(mmio_req, tc, tlb_mode));
     if (fault != NoFault)
         warn("Finalization of MMIO address failed: %s\n", fault->name());
 
 
     const MemCmd cmd(write ? MemCmd::WriteReq : MemCmd::ReadReq);
-    Packet pkt(&mmio_req, cmd);
-    pkt.dataStatic(data);
+    PacketPtr pkt = new Packet(mmio_req, cmd);
+    pkt->dataStatic(data);
 
-    if (mmio_req.isMmappedIpr()) {
+    if (mmio_req->isMmappedIpr()) {
         // We currently assume that there is no need to migrate to a
         // different event queue when doing IPRs. Currently, IPRs are
         // only used for m5ops, so it should be a valid assumption.
         const Cycles ipr_delay(write ?
-                             TheISA::handleIprWrite(tc, &pkt) :
-                             TheISA::handleIprRead(tc, &pkt));
+                             TheISA::handleIprWrite(tc, pkt) :
+                             TheISA::handleIprRead(tc, pkt));
         threadContextDirty = true;
+        delete pkt->req;
+        delete pkt;
         return clockPeriod() * ipr_delay;
     } else {
         // Temporarily lock and migrate to the event queue of the
@@ -1057,7 +1146,7 @@ BaseKvmCPU::doMMIOAccess(Addr paddr, void *data, int size, bool write)
         // access if running in multi-core mode.
         EventQueue::ScopedMigration migrate(vm.eventQueue());
 
-        return dataPort.sendAtomic(&pkt);
+        return dataPort.submitIO(pkt);
     }
 }
 
diff --git a/src/cpu/kvm/base.hh b/src/cpu/kvm/base.hh
index 7ff0cab5c..ef500974f 100644
--- a/src/cpu/kvm/base.hh
+++ b/src/cpu/kvm/base.hh
@@ -44,6 +44,7 @@
 
 #include <csignal>
 #include <memory>
+#include <queue>
 
 #include "base/statistics.hh"
 #include "cpu/kvm/perfevent.hh"
@@ -110,6 +111,12 @@ class BaseKvmCPU : public BaseCPU
     Counter totalInsts() const override;
     Counter totalOps() const override;
 
+    /**
+     * Callback from KvmCPUPort to transition the CPU out of RunningMMIOPending
+     * when all timing requests have completed.
+     */
+    void finishMMIOPending();
+
     /** Dump the internal state to the terminal. */
     virtual void dump() const;
 
@@ -152,6 +159,7 @@ class BaseKvmCPU : public BaseCPU
      *     Running;
      *     RunningService;
      *     RunningServiceCompletion;
+     *     RunningMMIOPending;
      *
      *     Idle -> Idle;
      *     Idle -> Running [label="activateContext()", URL="\ref activateContext"];
@@ -161,6 +169,8 @@ class BaseKvmCPU : public BaseCPU
      *     Running -> Idle [label="drain()", URL="\ref drain"];
      *     Idle -> Running [label="drainResume()", URL="\ref drainResume"];
      *     RunningService -> RunningServiceCompletion [label="handleKvmExit()", URL="\ref handleKvmExit"];
+     *     RunningService -> RunningMMIOPending [label="handleKvmExit()", URL="\ref handleKvmExit"];
+     *     RunningMMIOPending -> RunningServiceCompletion [label="finishMMIOPending()", URL="\ref finishMMIOPending"];
      *     RunningServiceCompletion -> Running [label="tick()", URL="\ref tick"];
      *     RunningServiceCompletion -> RunningService [label="tick()", URL="\ref tick"];
      *   }
@@ -190,12 +200,21 @@ class BaseKvmCPU : public BaseCPU
          * after running service is determined in handleKvmExit() and
          * depends on what kind of service the guest requested:
          * <ul>
-         *   <li>IO/MMIO: RunningServiceCompletion
+         *   <li>IO/MMIO (Atomic): RunningServiceCompletion
+         *   <li>IO/MMIO (Timing): RunningMMIOPending
          *   <li>Halt: Idle
          *   <li>Others: Running
          * </ul>
          */
         RunningService,
+        /** Timing MMIO request in flight or stalled.
+         *
+         *  The VM has requested IO/MMIO and we are in timing mode.  A timing
+         *  request is either stalled (and will be retried with recvReqRetry())
+         *  or it is in flight.  After the timing request is complete, the CPU
+         *  will transition to the RunningServiceCompletion state.
+         */
+        RunningMMIOPending,
         /** Service completion in progress.
          *
          * The VM has requested service that requires KVM to be
@@ -543,28 +562,39 @@ class BaseKvmCPU : public BaseCPU
 
 
     /**
-     * KVM memory port. Uses the default MasterPort behavior, but
-     * panics on timing accesses.
+     * KVM memory port.  Uses default MasterPort behavior and provides an
+     * interface for KVM to transparently submit atomic or timing requests.
      */
     class KVMCpuPort : public MasterPort
     {
 
       public:
         KVMCpuPort(const std::string &_name, BaseKvmCPU *_cpu)
-            : MasterPort(_name, _cpu)
+            : MasterPort(_name, _cpu), cpu(_cpu), activeMMIOReqs(0)
         { }
+        /**
+         * Interface to send Atomic or Timing IO request.  Assumes that the pkt
+         * and corresponding req have been dynamically allocated and deletes
+         * them both if the system is in atomic mode.
+         */
+        Tick submitIO(PacketPtr pkt);
+
+        /** Returns next valid state after one or more IO accesses */
+        Status nextIOState() const;
 
       protected:
-        bool recvTimingResp(PacketPtr pkt)
-        {
-            panic("The KVM CPU doesn't expect recvTimingResp!\n");
-            return true;
-        }
+        /** KVM cpu pointer for finishMMIOPending() callback */
+        BaseKvmCPU *cpu;
 
-        void recvReqRetry()
-        {
-            panic("The KVM CPU doesn't expect recvReqRetry!\n");
-        }
+        /** Pending MMIO packets */
+        std::queue<PacketPtr> pendingMMIOPkts;
+
+        /** Number of MMIO requests in flight */
+        unsigned int activeMMIOReqs;
+
+        bool recvTimingResp(PacketPtr pkt) override;
+
+        void recvReqRetry() override;
 
     };
 
diff --git a/src/cpu/kvm/x86_cpu.cc b/src/cpu/kvm/x86_cpu.cc
index 9e9115ef5..f8f66af2f 100644
--- a/src/cpu/kvm/x86_cpu.cc
+++ b/src/cpu/kvm/x86_cpu.cc
@@ -1344,20 +1344,20 @@ X86KvmCPU::handleKvmExitIO()
         pAddr = X86ISA::x86IOAddress(port);
     }
 
-    Request io_req(pAddr, kvm_run.io.size, Request::UNCACHEABLE,
-                   dataMasterId());
-    io_req.setContext(tc->contextId());
-
     const MemCmd cmd(isWrite ? MemCmd::WriteReq : MemCmd::ReadReq);
     // Temporarily lock and migrate to the event queue of the
     // VM. This queue is assumed to "own" all devices we need to
     // access if running in multi-core mode.
     EventQueue::ScopedMigration migrate(vm.eventQueue());
     for (int i = 0; i < count; ++i) {
-        Packet pkt(&io_req, cmd);
+        RequestPtr io_req = new Request(pAddr, kvm_run.io.size,
+                                        Request::UNCACHEABLE, dataMasterId());
+        io_req->setContext(tc->contextId());
 
-        pkt.dataStatic(guestData);
-        delay += dataPort.sendAtomic(&pkt);
+        PacketPtr pkt = new Packet(io_req, cmd);
+
+        pkt->dataStatic(guestData);
+        delay += dataPort.submitIO(pkt);
 
         guestData += kvm_run.io.size;
     }