mem: Clarification of packet crossbar timings

This patch clarifies the packet timings annotated when going through a crossbar. The old 'firstWordDelay' is replaced by 'headerDelay' that represents the delay associated to the delivery of the header of the packet. The old 'lastWordDelay' is replaced by 'payloadDelay' that represents the delay needed to processing the payload of the packet. For now the uses and values remain identical. However, going forward the payloadDelay will be additive, and not include the headerDelay. Follow-on patches will make the headerDelay capture the pipeline latency incurred in the crossbar, whereas the payloadDelay will capture the additional serialisation delay.
2015-02-11 10:23:47 -05:00 · 2015-02-11 10:23:47 -05:00 · 268d9e59c5
commit 268d9e59c5
parent e2828587b3
15 changed files with 65 additions and 61 deletions
--- a/src/arch/x86/pagetable_walker.cc
+++ b/src/arch/x86/pagetable_walker.cc
@ -601,7 +601,7 @@ Walker::WalkerState::recvPacket(PacketPtr pkt)
        assert(!read);

        // @todo someone should pay for this
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        state = nextState;
        nextState = Ready;
--- a/src/dev/io_device.cc
+++ b/src/dev/io_device.cc
@ -55,7 +55,7 @@ Tick
 PioPort::recvAtomic(PacketPtr pkt)
 {
    // @todo: We need to pay for this and not just zero it out
-    pkt->firstWordDelay = pkt->lastWordDelay = 0;
+    pkt->headerDelay = pkt->payloadDelay = 0;

    const Tick delay(pkt->isRead() ? device->read(pkt) : device->write(pkt));
    assert(pkt->isResponse() || pkt->isError());
--- a/src/dev/pcidev.cc
+++ b/src/dev/pcidev.cc
@ -80,7 +80,7 @@ PciDevice::PciConfigPort::recvAtomic(PacketPtr pkt)
    assert(pkt->getAddr() >= configAddr &&
           pkt->getAddr() < configAddr + PCI_CONFIG_SIZE);
    // @todo someone should pay for this
-    pkt->firstWordDelay = pkt->lastWordDelay = 0;
+    pkt->headerDelay = pkt->payloadDelay = 0;
    return pkt->isRead() ? device->readConfig(pkt) : device->writeConfig(pkt);
 }

--- a/src/dev/x86/intdev.hh
+++ b/src/dev/x86/intdev.hh
@ -82,7 +82,7 @@ class IntDevice
        Tick recvMessage(PacketPtr pkt)
        {
            // @todo someone should pay for this
-            pkt->firstWordDelay = pkt->lastWordDelay = 0;
+            pkt->headerDelay = pkt->payloadDelay = 0;
            return device->recvMessage(pkt);
        }
    };
--- a/src/mem/bridge.cc
+++ b/src/mem/bridge.cc
@ -137,7 +137,7 @@ Bridge::BridgeMasterPort::recvTimingResp(PacketPtr pkt)
    DPRINTF(Bridge, "Request queue size: %d\n", transmitList.size());

    // @todo: We need to pay for this and not just zero it out
-    pkt->firstWordDelay = pkt->lastWordDelay = 0;
+    pkt->headerDelay = pkt->payloadDelay = 0;

    slavePort.schedTimingResp(pkt, bridge.clockEdge(delay));

@ -181,7 +181,7 @@ Bridge::BridgeSlavePort::recvTimingReq(PacketPtr pkt)

        if (!retryReq) {
            // @todo: We need to pay for this and not just zero it out
-            pkt->firstWordDelay = pkt->lastWordDelay = 0;
+            pkt->headerDelay = pkt->payloadDelay = 0;

            masterPort.schedTimingReq(pkt, bridge.clockEdge(delay));
        }
--- a/src/mem/cache/cache_impl.hh
+++ b/src/mem/cache/cache_impl.hh
@ -419,7 +419,7 @@ Cache<TagStore>::recvTimingSnoopResp(PacketPtr pkt)
    pkt->popSenderState();
    delete rec;
    // @todo someone should pay for this
-    pkt->firstWordDelay = pkt->lastWordDelay = 0;
+    pkt->headerDelay = pkt->payloadDelay = 0;
    // forwardLatency is set here because there is a response from an
    // upper level cache.
    memSidePort->schedTimingSnoopResp(pkt, clockEdge(forwardLatency));
@ -486,7 +486,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)

            // also reset the bus time that the original packet has
            // not yet paid for
-            snoop_pkt->firstWordDelay = snoop_pkt->lastWordDelay = 0;
+            snoop_pkt->headerDelay = snoop_pkt->payloadDelay = 0;

            // make this an instantaneous express snoop, and let the
            // other caches in the system know that the packet is
@ -521,7 +521,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
        uncacheableFlush(pkt);

        // @todo: someone should pay for this
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        // writes go in write buffer, reads use MSHR,
        // prefetches are acknowledged (responded to) and dropped
@ -579,7 +579,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
        if (needsResponse) {
            pkt->makeTimingResponse();
            // @todo: Make someone pay for this
-            pkt->firstWordDelay = pkt->lastWordDelay = 0;
+            pkt->headerDelay = pkt->payloadDelay = 0;

            // In this case we are considering lat neglecting
            // responseLatency, modelling hit latency just as
@ -598,7 +598,7 @@ Cache<TagStore>::recvTimingReq(PacketPtr pkt)
        // miss

        // @todo: Make someone pay for this
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        Addr blk_addr = blockAlign(pkt->getAddr());
        MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure());
@ -1146,8 +1146,8 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
                // from lower level caches/memory to an upper level cache or
                // the core.
                completion_time = clockEdge(responseLatency) +
-                    (transfer_offset ? pkt->lastWordDelay :
-                     pkt->firstWordDelay);
+                    (transfer_offset ? pkt->payloadDelay :
+                     pkt->headerDelay);

                assert(!target->pkt->req->isUncacheable());

@ -1163,14 +1163,14 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
                // from lower level caches/memory to an upper level cache or
                // the core.
                completion_time = clockEdge(responseLatency) +
-                    pkt->lastWordDelay;
+                    pkt->payloadDelay;
                target->pkt->req->setExtraData(0);
            } else {
                // not a cache fill, just forwarding response
                // responseLatency is the latency of the return path
                // from lower level cahces/memory to the core.
                completion_time = clockEdge(responseLatency) +
-                    pkt->lastWordDelay;
+                    pkt->payloadDelay;
                if (pkt->isRead() && !is_error) {
                    target->pkt->setData(pkt->getConstPtr<uint8_t>());
                }
@ -1190,7 +1190,7 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
                        target->pkt->getAddr());
            }
            // reset the bus additional time as it is now accounted for
-            target->pkt->firstWordDelay = target->pkt->lastWordDelay = 0;
+            target->pkt->headerDelay = target->pkt->payloadDelay = 0;
            cpuSidePort->schedTimingResp(target->pkt, completion_time);
            break;

@ -1239,7 +1239,7 @@ Cache<TagStore>::recvTimingResp(PacketPtr pkt)
        mq = mshr->queue;
        mq->markPending(mshr);
        requestMemSideBus((RequestCause)mq->index, clockEdge() +
-                          pkt->lastWordDelay);
+                          pkt->payloadDelay);
    } else {
        mq->deallocate(mshr);
        if (wasFull && !mq->isFull()) {
@ -1512,7 +1512,7 @@ Cache<TagStore>::handleFill(PacketPtr pkt, BlkType *blk,
    }
    // We pay for fillLatency here.
    blk->whenReady = clockEdge() + fillLatency * clockPeriod() +
-        pkt->lastWordDelay;
+        pkt->payloadDelay;

    return blk;
 }
@ -1548,7 +1548,7 @@ doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
    assert(req_pkt->isInvalidate() || pkt->sharedAsserted());
    pkt->makeTimingResponse();
    // @todo Make someone pay for this
-    pkt->firstWordDelay = pkt->lastWordDelay = 0;
+    pkt->headerDelay = pkt->payloadDelay = 0;
    if (pkt->isRead()) {
        pkt->setDataFromBlock(blk_data, blkSize);
    }
@ -1599,7 +1599,7 @@ Cache<TagStore>::handleSnoop(PacketPtr pkt, BlkType *blk,
            snoopPkt.pushSenderState(new ForwardResponseRecord());
            // the snoop packet does not need to wait any additional
            // time
-            snoopPkt.firstWordDelay = snoopPkt.lastWordDelay = 0;
+            snoopPkt.headerDelay = snoopPkt.payloadDelay = 0;
            cpuSidePort->sendTimingSnoopReq(&snoopPkt);
            if (snoopPkt.memInhibitAsserted()) {
                // cache-to-cache response from some upper cache
--- a/src/mem/coherent_xbar.cc
+++ b/src/mem/coherent_xbar.cc
@ -168,7 +168,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
    unsigned int pkt_cmd = pkt->cmdToIndex();

    calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->lastWordDelay + curTick();
+    Tick packetFinishTime = curTick() + pkt->payloadDelay;

    // uncacheable requests need never be snooped
    if (!pkt->req->isUncacheable() && !system->bypassCaches()) {
@ -222,7 +222,7 @@ CoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
        assert(!pkt->memInhibitAsserted());

        // undo the calculation so we can check for 0 again
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        DPRINTF(CoherentXBar, "recvTimingReq: src %s %s 0x%x RETRY\n",
                src_port->name(), pkt->cmdString(), pkt->getAddr());
@ -301,7 +301,7 @@ CoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
    unsigned int pkt_cmd = pkt->cmdToIndex();

    calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->lastWordDelay + curTick();
+    Tick packetFinishTime = curTick() + pkt->payloadDelay;

    if (snoopFilter && !pkt->req->isUncacheable() && !system->bypassCaches()) {
        // let the snoop filter inspect the response and update its state
@ -427,7 +427,7 @@ CoherentXBar::recvTimingSnoopResp(PacketPtr pkt, PortID slave_port_id)
    assert(!pkt->isExpressSnoop());

    calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->lastWordDelay + curTick();
+    Tick packetFinishTime = curTick() + pkt->payloadDelay;

    // forward it either as a snoop response or a normal response
    if (forwardAsSnoop) {
@ -608,8 +608,8 @@ CoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
        transDist[pkt_cmd]++;
    }

-    // @todo: Not setting first-word time
-    pkt->lastWordDelay = response_latency;
+    // @todo: Not setting header time
+    pkt->payloadDelay = response_latency;
    return response_latency;
 }

@ -648,8 +648,8 @@ CoherentXBar::recvAtomicSnoop(PacketPtr pkt, PortID master_port_id)
        snoops++;
    }

-    // @todo: Not setting first-word time
-    pkt->lastWordDelay = snoop_response_latency;
+    // @todo: Not setting header time
+    pkt->payloadDelay = snoop_response_latency;
    return snoop_response_latency;
 }

--- a/src/mem/dram_ctrl.cc
+++ b/src/mem/dram_ctrl.cc
@ -879,7 +879,7 @@ DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency)
        assert(pkt->isResponse());

        // @todo someone should pay for this
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        // queue the packet in the response queue to be sent out after
        // the static latency has passed
--- a/src/mem/dramsim2.cc
+++ b/src/mem/dramsim2.cc
@ -270,7 +270,7 @@ DRAMSim2::accessAndRespond(PacketPtr pkt)
        assert(pkt->isResponse());

        // @todo someone should pay for this
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        DPRINTF(DRAMSim2, "Queuing response for address %lld\n",
                pkt->getAddr());
--- a/src/mem/external_slave.cc
+++ b/src/mem/external_slave.cc
@ -124,8 +124,8 @@ void
 StubSlavePort::ResponseEvent::process()
 {
    owner.responsePacket->makeResponse();
-    owner.responsePacket->firstWordDelay = 0;
-    owner.responsePacket->lastWordDelay = 0;
+    owner.responsePacket->headerDelay = 0;
+    owner.responsePacket->payloadDelay = 0;

    if (owner.sendTimingResp(owner.responsePacket)) {
        owner.responsePacket = NULL;
--- a/src/mem/noncoherent_xbar.cc
+++ b/src/mem/noncoherent_xbar.cc
@ -128,7 +128,7 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
    unsigned int pkt_cmd = pkt->cmdToIndex();

    calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->lastWordDelay + curTick();
+    Tick packetFinishTime = curTick() + pkt->payloadDelay;

    // before forwarding the packet (and possibly altering it),
    // remember if we are expecting a response
@ -146,7 +146,7 @@ NoncoherentXBar::recvTimingReq(PacketPtr pkt, PortID slave_port_id)
                src_port->name(), pkt->cmdString(), pkt->getAddr());

        // undo the calculation so we can check for 0 again
-        pkt->firstWordDelay = pkt->lastWordDelay = 0;
+        pkt->headerDelay = pkt->payloadDelay = 0;

        // occupy until the header is sent
        reqLayers[master_port_id]->failedTiming(src_port,
@ -201,7 +201,7 @@ NoncoherentXBar::recvTimingResp(PacketPtr pkt, PortID master_port_id)
    unsigned int pkt_cmd = pkt->cmdToIndex();

    calcPacketTiming(pkt);
-    Tick packetFinishTime = pkt->lastWordDelay + curTick();
+    Tick packetFinishTime = curTick() + pkt->payloadDelay;

    // send the packet through the destination slave port
    bool success M5_VAR_USED = slavePorts[slave_port_id]->sendTimingResp(pkt);
@ -265,7 +265,7 @@ NoncoherentXBar::recvAtomic(PacketPtr pkt, PortID slave_port_id)
    }

    // @todo: Not setting first-word time
-    pkt->lastWordDelay = response_latency;
+    pkt->payloadDelay = response_latency;
    return response_latency;
 }

--- a/src/mem/packet.hh
+++ b/src/mem/packet.hh
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2014 ARM Limited
+ * Copyright (c) 2012-2015 ARM Limited
 * All rights reserved
 *
 * The license below extends only to copyright in the software and shall
@ -313,23 +313,23 @@ class Packet : public Printable
  public:

    /**
-     * The extra delay from seeing the packet until the first word is
+     * The extra delay from seeing the packet until the header is
     * transmitted. This delay is used to communicate the crossbar
     * forwarding latency to the neighbouring object (e.g. a cache)
     * that actually makes the packet wait. As the delay is relative,
     * a 32-bit unsigned should be sufficient.
     */
-    uint32_t firstWordDelay;
+    uint32_t headerDelay;

    /**
-     * The extra pipelining delay from seeing the packet until the
-     * last word is transmitted by the component that provided it (if
-     * any). This includes the first word delay. Similar to the first
-     * word delay, this is used to make up for the fact that the
+     * The extra pipelining delay from seeing the packet until the end of
+     * payload is transmitted by the component that provided it (if
+     * any). This includes the header delay. Similar to the header
+     * delay, this is used to make up for the fact that the
     * crossbar does not make the packet wait. As the delay is
     * relative, a 32-bit unsigned should be sufficient.
     */
-    uint32_t lastWordDelay;
+    uint32_t payloadDelay;

    /**
     * A virtual base opaque structure used to hold state associated
@ -574,7 +574,7 @@ class Packet : public Printable
    Packet(const RequestPtr _req, MemCmd _cmd)
        :  cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
           size(0), bytesValidStart(0), bytesValidEnd(0),
-           firstWordDelay(0), lastWordDelay(0),
+           headerDelay(0), payloadDelay(0),
           senderState(NULL)
    {
        if (req->hasPaddr()) {
@ -596,7 +596,7 @@ class Packet : public Printable
    Packet(const RequestPtr _req, MemCmd _cmd, int _blkSize)
        :  cmd(_cmd), req(_req), data(nullptr), addr(0), _isSecure(false),
           bytesValidStart(0), bytesValidEnd(0),
-           firstWordDelay(0), lastWordDelay(0),
+           headerDelay(0), payloadDelay(0),
           senderState(NULL)
    {
        if (req->hasPaddr()) {
@ -621,8 +621,8 @@ class Packet : public Printable
           addr(pkt->addr), _isSecure(pkt->_isSecure), size(pkt->size),
           bytesValidStart(pkt->bytesValidStart),
           bytesValidEnd(pkt->bytesValidEnd),
-           firstWordDelay(pkt->firstWordDelay),
-           lastWordDelay(pkt->lastWordDelay),
+           headerDelay(pkt->headerDelay),
+           payloadDelay(pkt->payloadDelay),
           senderState(pkt->senderState)
    {
        if (!clear_flags)
--- a/src/mem/simple_mem.cc
+++ b/src/mem/simple_mem.cc
@ -125,7 +125,7 @@ SimpleMemory::recvTimingReq(PacketPtr pkt)
    }

    // @todo someone should pay for this
-    pkt->firstWordDelay = pkt->lastWordDelay = 0;
+    pkt->headerDelay = pkt->payloadDelay = 0;

    // update the release time according to the bandwidth limit, and
    // do so with respect to the time it takes to finish this request
--- a/src/mem/xbar.cc
+++ b/src/mem/xbar.cc
@ -109,23 +109,27 @@ BaseXBar::calcPacketTiming(PacketPtr pkt)
    // until the next clock edge (could be zero)
    Tick offset = clockEdge() - curTick();

-    // determine how many cycles are needed to send the data
+    // Determine how many cycles are needed to send the data
+    // If the packet has no data we take into account just the cycle to send
+    // the header.
    unsigned dataCycles = pkt->hasData() ? divCeil(pkt->getSize(), width) : 0;

    // before setting the bus delay fields of the packet, ensure that
    // the delay from any previous crossbar has been accounted for
-    if (pkt->firstWordDelay != 0 || pkt->lastWordDelay != 0)
+    if (pkt->headerDelay != 0 || pkt->payloadDelay != 0)
        panic("Packet %s already has delay (%d, %d) that should be "
-              "accounted for.\n", pkt->cmdString(), pkt->firstWordDelay,
-              pkt->lastWordDelay);
+              "accounted for.\n", pkt->cmdString(), pkt->headerDelay,
+              pkt->payloadDelay);

-    // The first word will be delivered on the cycle after the header.
-    pkt->firstWordDelay = (headerCycles + 1) * clockPeriod() + offset;
+    // The headerDelay takes into account the relative time to deliver the
+    // header of the packet. It will be charged of the additional delay of
+    // the xbar if the packet goes through it.
+    pkt->headerDelay = (headerCycles + 1) * clockPeriod() + offset;

-    // Note that currently lastWordDelay can be smaller than
-    // firstWordDelay if the packet has no data
-    pkt->lastWordDelay = (headerCycles + dataCycles) * clockPeriod() +
-        offset;
+    // The payloadDelay takes into account the relative time to deliver the
+    // payload of the packet. If the packet has no data its value is just one
+    // tick (due to header) plus the offset value.
+    pkt->payloadDelay = (headerCycles + dataCycles) * clockPeriod() + offset;
 }

 template <typename SrcType, typename DstType>
--- a/src/mem/xbar.hh
+++ b/src/mem/xbar.hh
@ -333,9 +333,9 @@ class BaseXBar : public MemObject

    /**
     * Calculate the timing parameters for the packet. Updates the
-     * firstWordDelay and lastWordDelay fields of the packet
+     * headerDelay and payloadDelay fields of the packet
     * object with the relative number of ticks required to transmit
-     * the header and the first word, and the last word, respectively.
+     * the header and the payload, respectively.
     */
    void calcPacketTiming(PacketPtr pkt);